micro_allocator.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  2. b/160894903
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ==============================================================================*/
  13. #ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
  14. #define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
  15. #include <cstddef>
  16. #include <cstdint>
  17. #include "flatbuffers/flatbuffers.h" // from @flatbuffers
  18. #include "tensorflow/lite/c/common.h"
  19. #include "tensorflow/lite/core/api/error_reporter.h"
  20. #include "tensorflow/lite/micro/compatibility.h"
  21. #include "tensorflow/lite/micro/micro_op_resolver.h"
  22. #include "tensorflow/lite/micro/simple_memory_allocator.h"
  23. #include "tensorflow/lite/schema/schema_generated.h"
  24. namespace tflite {
  25. // Namespace used for unittests.
  26. namespace internal {
  27. // Sets up all of the data structure members for a TfLiteTensor based on the
  28. // contents of a serialized tensor in the flatbuffer.
  29. // TODO(b/160894903): Once all kernels have been updated to the new
  30. // TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal
  31. // flatbuffer quantization or dimension allocations to take place in either the
  32. // temp or tail section of the arena.
  33. TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
  34. SimpleMemoryAllocator* allocator, bool allocate_temp,
  35. const tflite::Tensor& flatbuffer_tensor,
  36. const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
  37. ErrorReporter* error_reporter, TfLiteTensor* result);
  38. // A handle tracking scratch buffer allocation. This handle is created by
  39. // `RequestScratchBufferInArena`. `data` field is populated in
  40. // `FinishModelAllocation` after static memory planning.
  41. // TODO(b/150257460) As a future optimization, this struct could be replaced by
  42. // a union, since once `data` is populated, `bytes` and `node_idx` is not
  43. // needed.
  44. typedef struct {
  45. // Pointer to the scratch buffer.
  46. uint8_t* data;
  47. // Number of bytes required by the buffer. The actual allocated size might be
  48. // greater than `bytes` due to buffer alignment.
  49. size_t bytes;
  50. // Node where the buffer is allocated for. This provides useful information to
  51. // determine the lifetime of the buffer. In AllocationInfo, this buffer will
  52. // have `before` = node_idx and `after` = node_idx.
  53. int node_idx;
  54. } ScratchBufferHandle;
  55. } // namespace internal
  56. typedef struct {
  57. TfLiteNode node;
  58. const TfLiteRegistration* registration;
  59. } NodeAndRegistration;
  60. // Allocator responsible for allocating memory for all intermediate tensors
  61. // necessary to invoke a model.
  62. //
  63. // The lifetime of the model, tensor arena and error reporter must be at
  64. // least as long as that of the allocator object, since the allocator needs
  65. // them to be accessible during its entire lifetime.
  66. //
  67. // The MicroAllocator simply plans out additional allocations that are required
  68. // to standup a model for inference in TF Micro. This class currently relies on
  69. // an additional allocator - SimpleMemoryAllocator - for all allocations from an
  70. // arena. These allocations are divided into head (non-persistent) and tail
  71. // (persistent) regions:
  72. //
  73. // Memory layout to help understand how it works
  74. // This information could change in the future version.
  75. // ************** .memory_allocator->GetBuffer()
  76. // Tensors/Scratch buffers (head)
  77. // ************** .head_watermark
  78. // unused memory
  79. // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
  80. // - ->GetDataSize()
  81. // persistent area (tail)
  82. // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
  83. class MicroAllocator {
  84. public:
  85. // Creates a MicroAllocator instance from a given tensor arena. This arena
  86. // will be managed by the created instance.
  87. // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
  88. // bytes aligned, otherwise some head room will be wasted.
  89. // TODO(b/157615197): Cleanup constructor + factory usage.
  90. static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
  91. ErrorReporter* error_reporter);
  92. // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
  93. // intance. This allocator instance will use the SimpleMemoryAllocator
  94. // instance to manage allocations internally.
  95. static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
  96. ErrorReporter* error_reporter);
  97. // Begin allocating internal resources required for model inference.
  98. // This method will run through the flatbuffer data supplied in the model to
  99. // properly allocate tensor, node, and op registration data. This method is
  100. // expected to be followed with a call to FinishModelAllocation() before
  101. // resuming allocation with another model. All persistent tensor buffers are
  102. // stored in the out-param eval_tensors. This value is allocated from the
  103. // persistent memory arena and will be used to host runtime tensor buffers.
  104. TfLiteStatus StartModelAllocation(
  105. const Model* model, const MicroOpResolver& op_resolver,
  106. NodeAndRegistration** node_and_registrations,
  107. TfLiteEvalTensor** eval_tensors);
  108. // Finish allocating internal resources required for model inference.
  109. // This method will plan non-persistent buffers and commit a memory plan to
  110. // the 'head' section of the memory arena. All variable tensor data will also
  111. // be allocated. This method should be called after assigning model resources
  112. // in StartModelAllocation(). The eval_tensors pointer should be the value
  113. // passed into this class during StartModelAllocation().
  114. TfLiteStatus FinishModelAllocation(const Model* model,
  115. TfLiteEvalTensor* eval_tensors);
  116. // Allocates a TfLiteTensor struct and populates the returned value with
  117. // properties from the model flatbuffer. This struct is allocated from
  118. // persistent arena memory is only guaranteed for the lifetime of the
  119. // application. The eval_tensors pointer should be the value passed into this
  120. // class during StartModelAllocation() and contains the source-of-truth for
  121. // buffers.
  122. virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
  123. const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
  124. // Allocates a TfLiteTensor struct and populates the returned value with
  125. // properties from the model flatbuffer. This struct is allocated from
  126. // temporary arena memory is only guaranteed until a call is made to
  127. // ResetTempAllocations(). The eval_tensors pointer should be the value passed
  128. // into this class during StartModelAllocation() and contains the
  129. // source-of-truth for buffers.
  130. virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
  131. TfLiteEvalTensor* eval_tensors,
  132. int tensor_index);
  133. // Resets all temporary allocations. This method should be called after a
  134. // chain of temp allocations (e.g. chain of TfLiteTensor objects via
  135. // AllocateTfLiteTensor()).
  136. virtual void ResetTempAllocations();
  137. // Allocates persistent buffer which has the same life time as the allocator.
  138. // The memory is immediately available and is allocated from the tail of the
  139. // arena.
  140. void* AllocatePersistentBuffer(size_t bytes);
  141. // Register a scratch buffer of size `bytes` for Node with `node_id`.
  142. // This method only allocates a BufferHandle holding information for memory
  143. // planning. The buffer ptr is ready after `FinishModelAllocation` and can
  144. // be retrieved by `GetScratchBuffer` method using the returned buffer_idx.
  145. // Note that there should be no tail allocation between two consecutive
  146. // `RequestScratchBufferInArena` calls.
  147. TfLiteStatus RequestScratchBufferInArena(int node_id, size_t bytes,
  148. int* buffer_idx);
  149. // Returns the pointer to the planned scratch buffer.
  150. void* GetScratchBuffer(int buffer_idx) const;
  151. // Returns the arena usage in bytes, only available after
  152. // `FinishModelAllocation`. Otherwise, it will return 0.
  153. size_t used_bytes() const;
  154. protected:
  155. MicroAllocator(SimpleMemoryAllocator* memory_allocator,
  156. ErrorReporter* error_reporter);
  157. virtual ~MicroAllocator();
  158. // Allocates an array in the arena to hold pointers to the node and
  159. // registration pointers required to represent the inference graph of the
  160. // model.
  161. virtual TfLiteStatus AllocateNodeAndRegistrations(
  162. const Model* model, NodeAndRegistration** node_and_registrations);
  163. // Populates node and registration pointers representing the inference graph
  164. // of the model from values inside the flatbuffer (loaded from the TfLiteModel
  165. // instance). Persistent data (e.g. operator data) is allocated from the
  166. // arena.
  167. virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
  168. const Model* model, const MicroOpResolver& op_resolver,
  169. NodeAndRegistration* node_and_registrations);
  170. // Allocates the list of persistent TfLiteEvalTensors that are used for the
  171. // "eval" phase of model inference. These structs will be the source of truth
  172. // for all tensor buffers. Allocation results are stored in the out-param
  173. // eval_tensors.
  174. virtual TfLiteStatus AllocateTfLiteEvalTensors(
  175. const Model* model, TfLiteEvalTensor** eval_tensors);
  176. // Allocates persistent tensor buffers for variable tensors in the subgraph.
  177. virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
  178. TfLiteEvalTensor* eval_tensors);
  179. // TODO(b/160894903): Once all kernels have been updated to the new API drop
  180. // this method. It is only used to record TfLiteTensor persistent allocations.
  181. virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
  182. const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
  183. // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
  184. // quantization data is allocated from either the tail (persistent) or temp
  185. // sections of the arena based on the allocation flag.
  186. // TODO(b/160894903): Once all kernels have been updated to the new API drop
  187. // this function since all allocations for quantized data will take place in
  188. // the temp section.
  189. virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
  190. const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
  191. int tensor_index, bool allocate_temp);
  192. ErrorReporter* error_reporter() const;
  193. // Returns the first subgraph from the model.
  194. const SubGraph* GetSubGraphFromModel(const Model* model);
  195. private:
  196. // Commits a memory plan for all non-persistent buffer allocations in the
  197. // 'head' section of the memory arena. The eval_tensors pointer is the list of
  198. // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
  199. // will be allocated into the head section in this function call.
  200. virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model,
  201. const SubGraph* subgraph,
  202. TfLiteEvalTensor* eval_tensors);
  203. // A simple memory allocator that always allocate from the arena tail or head.
  204. SimpleMemoryAllocator* memory_allocator_;
  205. ErrorReporter* error_reporter_;
  206. bool model_is_allocating_;
  207. // In reverse order for efficiency.
  208. // i.e. scratch_buffer_handles_[0] is the handle for the last buffer,
  209. // corresponding to the last RequestScratchBufferInArena call.
  210. internal::ScratchBufferHandle* scratch_buffer_handles_ = nullptr;
  211. // How many scratch buffers have been allocated.
  212. size_t scratch_buffer_count_ = 0;
  213. TF_LITE_REMOVE_VIRTUAL_DELETE
  214. };
  215. } // namespace tflite
  216. #endif // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_