Procházet zdrojové kódy

Merge pull request #1789 from hathach/fix-fifo-memory-overflow

Fix fifo memory overflow
Ha Thach před 3 roky
rodič
revize
79e5d7aa69

+ 267 - 207
src/common/tusb_fifo.c

@@ -28,21 +28,22 @@
 #include "osal/osal.h"
 #include "tusb_fifo.h"
 
+#define TU_FIFO_DBG   0
+
 // Suppress IAR warning
 // Warning[Pa082]: undefined behavior: the order of volatile accesses is undefined in this statement
 #if defined(__ICCARM__)
 #pragma diag_suppress = Pa082
 #endif
 
-// implement mutex lock and unlock
-#if CFG_FIFO_MUTEX
+#if OSAL_MUTEX_REQUIRED
 
-static inline void _ff_lock(tu_fifo_mutex_t mutex)
+TU_ATTR_ALWAYS_INLINE static inline void _ff_lock(osal_mutex_t mutex)
 {
   if (mutex) osal_mutex_lock(mutex, OSAL_TIMEOUT_WAIT_FOREVER);
 }
 
-static inline void _ff_unlock(tu_fifo_mutex_t mutex)
+TU_ATTR_ALWAYS_INLINE static inline void _ff_unlock(osal_mutex_t mutex)
 {
   if (mutex) osal_mutex_unlock(mutex);
 }
@@ -66,23 +67,20 @@ typedef enum
 
 bool tu_fifo_config(tu_fifo_t *f, void* buffer, uint16_t depth, uint16_t item_size, bool overwritable)
 {
-  if (depth > 0x8000) return false;               // Maximum depth is 2^15 items
+  // Limit index space to 2*depth - this allows for a fast "modulo" calculation
+  // but limits the maximum depth to 2^16/2 = 2^15 and buffer overflows are detectable
+  // only if overflow happens once (important for unsupervised DMA applications)
+  if (depth > 0x8000) return false;
 
   _ff_lock(f->mutex_wr);
   _ff_lock(f->mutex_rd);
 
-  f->buffer = (uint8_t*) buffer;
-  f->depth  = depth;
-  f->item_size = item_size;
+  f->buffer       = (uint8_t*) buffer;
+  f->depth        = depth;
+  f->item_size    = (uint16_t) (item_size & 0x7FFF);
   f->overwritable = overwritable;
-
-  // Limit index space to 2*depth - this allows for a fast "modulo" calculation
-  // but limits the maximum depth to 2^16/2 = 2^15 and buffer overflows are detectable
-  // only if overflow happens once (important for unsupervised DMA applications)
-  f->max_pointer_idx = (uint16_t) (2*depth - 1);
-  f->non_used_index_space = UINT16_MAX - f->max_pointer_idx;
-
-  f->rd_idx = f->wr_idx = 0;
+  f->rd_idx       = 0;
+  f->wr_idx       = 0;
 
   _ff_unlock(f->mutex_wr);
   _ff_unlock(f->mutex_rd);
@@ -90,25 +88,22 @@ bool tu_fifo_config(tu_fifo_t *f, void* buffer, uint16_t depth, uint16_t item_si
   return true;
 }
 
-// Static functions are intended to work on local variables
-static inline uint16_t _ff_mod(uint16_t idx, uint16_t depth)
-{
-  while ( idx >= depth) idx -= depth;
-  return idx;
-}
+//--------------------------------------------------------------------+
+// Pull & Push
+//--------------------------------------------------------------------+
 
 // Intended to be used to read from hardware USB FIFO in e.g. STM32 where all data is read from a constant address
 // Code adapted from dcd_synopsys.c
 // TODO generalize with configurable 1 byte or 4 byte each read
 static void _ff_push_const_addr(uint8_t * ff_buf, const void * app_buf, uint16_t len)
 {
-  volatile const uint32_t * rx_fifo = (volatile const uint32_t *) app_buf;
+  volatile const uint32_t * reg_rx = (volatile const uint32_t *) app_buf;
 
   // Reading full available 32 bit words from const app address
   uint16_t full_words = len >> 2;
   while(full_words--)
   {
-    tu_unaligned_write32(ff_buf, *rx_fifo);
+    tu_unaligned_write32(ff_buf, *reg_rx);
     ff_buf += 4;
   }
 
@@ -116,7 +111,7 @@ static void _ff_push_const_addr(uint8_t * ff_buf, const void * app_buf, uint16_t
   uint8_t const bytes_rem = len & 0x03;
   if ( bytes_rem )
   {
-    uint32_t tmp32 = *rx_fifo;
+    uint32_t tmp32 = *reg_rx;
     memcpy(ff_buf, &tmp32, bytes_rem);
   }
 }
@@ -125,49 +120,49 @@ static void _ff_push_const_addr(uint8_t * ff_buf, const void * app_buf, uint16_t
 // where all data is written to a constant address in full word copies
 static void _ff_pull_const_addr(void * app_buf, const uint8_t * ff_buf, uint16_t len)
 {
-  volatile uint32_t * tx_fifo = (volatile uint32_t *) app_buf;
+  volatile uint32_t * reg_tx = (volatile uint32_t *) app_buf;
 
-  // Pushing full available 32 bit words to const app address
+  // Write full available 32 bit words to const address
   uint16_t full_words = len >> 2;
   while(full_words--)
   {
-    *tx_fifo = tu_unaligned_read32(ff_buf);
+    *reg_tx = tu_unaligned_read32(ff_buf);
     ff_buf += 4;
   }
 
-  // Write the remaining 1-3 bytes into const app address
+  // Write the remaining 1-3 bytes into const address
   uint8_t const bytes_rem = len & 0x03;
   if ( bytes_rem )
   {
     uint32_t tmp32 = 0;
     memcpy(&tmp32, ff_buf, bytes_rem);
 
-    *tx_fifo = tmp32;
+    *reg_tx = tmp32;
   }
 }
 
-// send one item to FIFO WITHOUT updating write pointer
+// send one item to fifo WITHOUT updating write pointer
 static inline void _ff_push(tu_fifo_t* f, void const * app_buf, uint16_t rel)
 {
   memcpy(f->buffer + (rel * f->item_size), app_buf, f->item_size);
 }
 
-// send n items to FIFO WITHOUT updating write pointer
-static void _ff_push_n(tu_fifo_t* f, void const * app_buf, uint16_t n, uint16_t rel, tu_fifo_copy_mode_t copy_mode)
+// send n items to fifo WITHOUT updating write pointer
+static void _ff_push_n(tu_fifo_t* f, void const * app_buf, uint16_t n, uint16_t wr_ptr, tu_fifo_copy_mode_t copy_mode)
 {
-  uint16_t const nLin = f->depth - rel;
-  uint16_t const nWrap = n - nLin;
+  uint16_t const lin_count = f->depth - wr_ptr;
+  uint16_t const wrap_count = n - lin_count;
 
-  uint16_t nLin_bytes = nLin * f->item_size;
-  uint16_t nWrap_bytes = nWrap * f->item_size;
+  uint16_t lin_bytes = lin_count * f->item_size;
+  uint16_t wrap_bytes = wrap_count * f->item_size;
 
   // current buffer of fifo
-  uint8_t* ff_buf = f->buffer + (rel * f->item_size);
+  uint8_t* ff_buf = f->buffer + (wr_ptr * f->item_size);
 
   switch (copy_mode)
   {
     case TU_FIFO_COPY_INC:
-      if(n <= nLin)
+      if(n <= lin_count)
       {
         // Linear only
         memcpy(ff_buf, app_buf, n*f->item_size);
@@ -177,16 +172,17 @@ static void _ff_push_n(tu_fifo_t* f, void const * app_buf, uint16_t n, uint16_t
         // Wrap around
 
         // Write data to linear part of buffer
-        memcpy(ff_buf, app_buf, nLin_bytes);
+        memcpy(ff_buf, app_buf, lin_bytes);
 
         // Write data wrapped around
-        memcpy(f->buffer, ((uint8_t const*) app_buf) + nLin_bytes, nWrap_bytes);
+        // TU_ASSERT(nWrap_bytes <= f->depth, );
+        memcpy(f->buffer, ((uint8_t const*) app_buf) + lin_bytes, wrap_bytes);
       }
       break;
 
     case TU_FIFO_COPY_CST_FULL_WORDS:
       // Intended for hardware buffers from which it can be read word by word only
-      if(n <= nLin)
+      if(n <= lin_count)
       {
         // Linear only
         _ff_push_const_addr(ff_buf, app_buf, n*f->item_size);
@@ -196,17 +192,18 @@ static void _ff_push_n(tu_fifo_t* f, void const * app_buf, uint16_t n, uint16_t
         // Wrap around case
 
         // Write full words to linear part of buffer
-        uint16_t nLin_4n_bytes = nLin_bytes & 0xFFFC;
+        uint16_t nLin_4n_bytes = lin_bytes & 0xFFFC;
         _ff_push_const_addr(ff_buf, app_buf, nLin_4n_bytes);
         ff_buf += nLin_4n_bytes;
 
         // There could be odd 1-3 bytes before the wrap-around boundary
-        volatile const uint32_t * rx_fifo = (volatile const uint32_t *) app_buf;
-        uint8_t rem = nLin_bytes & 0x03;
+        uint8_t rem = lin_bytes & 0x03;
         if (rem > 0)
         {
-          uint8_t remrem = (uint8_t) tu_min16(nWrap_bytes, 4-rem);
-          nWrap_bytes -= remrem;
+          volatile const uint32_t * rx_fifo = (volatile const uint32_t *) app_buf;
+
+          uint8_t remrem = (uint8_t) tu_min16(wrap_bytes, 4-rem);
+          wrap_bytes -= remrem;
 
           uint32_t tmp32 = *rx_fifo;
           uint8_t * src_u8 = ((uint8_t *) &tmp32);
@@ -224,34 +221,34 @@ static void _ff_push_n(tu_fifo_t* f, void const * app_buf, uint16_t n, uint16_t
         }
 
         // Write data wrapped part
-        if (nWrap_bytes > 0) _ff_push_const_addr(ff_buf, app_buf, nWrap_bytes);
+        if (wrap_bytes > 0) _ff_push_const_addr(ff_buf, app_buf, wrap_bytes);
       }
       break;
   }
 }
 
-// get one item from FIFO WITHOUT updating read pointer
+// get one item from fifo WITHOUT updating read pointer
 static inline void _ff_pull(tu_fifo_t* f, void * app_buf, uint16_t rel)
 {
   memcpy(app_buf, f->buffer + (rel * f->item_size), f->item_size);
 }
 
-// get n items from FIFO WITHOUT updating read pointer
-static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu_fifo_copy_mode_t copy_mode)
+// get n items from fifo WITHOUT updating read pointer
+static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rd_ptr, tu_fifo_copy_mode_t copy_mode)
 {
-  uint16_t const nLin = f->depth - rel;
-  uint16_t const nWrap = n - nLin; // only used if wrapped
+  uint16_t const lin_count = f->depth - rd_ptr;
+  uint16_t const wrap_count = n - lin_count; // only used if wrapped
 
-  uint16_t nLin_bytes = nLin * f->item_size;
-  uint16_t nWrap_bytes = nWrap * f->item_size;
+  uint16_t lin_bytes = lin_count * f->item_size;
+  uint16_t wrap_bytes = wrap_count * f->item_size;
 
   // current buffer of fifo
-  uint8_t* ff_buf = f->buffer + (rel * f->item_size);
+  uint8_t* ff_buf = f->buffer + (rd_ptr * f->item_size);
 
   switch (copy_mode)
   {
     case TU_FIFO_COPY_INC:
-      if ( n <= nLin )
+      if ( n <= lin_count )
       {
         // Linear only
         memcpy(app_buf, ff_buf, n*f->item_size);
@@ -261,15 +258,15 @@ static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu
         // Wrap around
 
         // Read data from linear part of buffer
-        memcpy(app_buf, ff_buf, nLin_bytes);
+        memcpy(app_buf, ff_buf, lin_bytes);
 
         // Read data wrapped part
-        memcpy((uint8_t*) app_buf + nLin_bytes, f->buffer, nWrap_bytes);
+        memcpy((uint8_t*) app_buf + lin_bytes, f->buffer, wrap_bytes);
       }
     break;
 
     case TU_FIFO_COPY_CST_FULL_WORDS:
-      if ( n <= nLin )
+      if ( n <= lin_count )
       {
         // Linear only
         _ff_pull_const_addr(app_buf, ff_buf, n*f->item_size);
@@ -279,17 +276,18 @@ static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu
         // Wrap around case
 
         // Read full words from linear part of buffer
-        uint16_t nLin_4n_bytes = nLin_bytes & 0xFFFC;
-        _ff_pull_const_addr(app_buf, ff_buf, nLin_4n_bytes);
-        ff_buf += nLin_4n_bytes;
+        uint16_t lin_4n_bytes = lin_bytes & 0xFFFC;
+        _ff_pull_const_addr(app_buf, ff_buf, lin_4n_bytes);
+        ff_buf += lin_4n_bytes;
 
         // There could be odd 1-3 bytes before the wrap-around boundary
-        volatile uint32_t * tx_fifo = (volatile uint32_t *) app_buf;
-        uint8_t rem = nLin_bytes & 0x03;
+        uint8_t rem = lin_bytes & 0x03;
         if (rem > 0)
         {
-          uint8_t remrem = (uint8_t) tu_min16(nWrap_bytes, 4-rem);
-          nWrap_bytes -= remrem;
+          volatile uint32_t * reg_tx = (volatile uint32_t *) app_buf;
+
+          uint8_t remrem = (uint8_t) tu_min16(wrap_bytes, 4-rem);
+          wrap_bytes -= remrem;
 
           uint32_t tmp32=0;
           uint8_t * dst_u8 = (uint8_t *)&tmp32;
@@ -301,7 +299,7 @@ static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu
           ff_buf = f->buffer;
           while(remrem--) *dst_u8++ = *ff_buf++;
 
-          *tx_fifo = tmp32;
+          *reg_tx = tmp32;
         }
         else
         {
@@ -309,7 +307,7 @@ static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu
         }
 
         // Read data wrapped part
-        if (nWrap_bytes > 0) _ff_pull_const_addr(app_buf, ff_buf, nWrap_bytes);
+        if (wrap_bytes > 0) _ff_pull_const_addr(app_buf, ff_buf, wrap_bytes);
       }
     break;
 
@@ -317,178 +315,232 @@ static void _ff_pull_n(tu_fifo_t* f, void* app_buf, uint16_t n, uint16_t rel, tu
   }
 }
 
-// Advance an absolute pointer
-static uint16_t advance_pointer(tu_fifo_t* f, uint16_t p, uint16_t offset)
+//--------------------------------------------------------------------+
+// Helper
+//--------------------------------------------------------------------+
+
+// return only the index difference and as such can be used to determine an overflow i.e overflowable count
+TU_ATTR_ALWAYS_INLINE static inline
+uint16_t _ff_count(uint16_t depth, uint16_t wr_idx, uint16_t rd_idx)
 {
-  // We limit the index space of p such that a correct wrap around happens
-  // Check for a wrap around or if we are in unused index space - This has to be checked first!!
-  // We are exploiting the wrap around to the correct index
-  if ((p > (uint16_t)(p + offset)) || ((uint16_t)(p + offset) > f->max_pointer_idx))
+  // In case we have non-power of two depth we need a further modification
+  if (wr_idx >= rd_idx)
   {
-    p = (uint16_t) ((p + offset) + f->non_used_index_space);
-  }
-  else
+    return (uint16_t) (wr_idx - rd_idx);
+  } else
   {
-    p += offset;
+    return (uint16_t) (2*depth - (rd_idx - wr_idx));
   }
-  return p;
 }
 
-// Backward an absolute pointer
-static uint16_t backward_pointer(tu_fifo_t* f, uint16_t p, uint16_t offset)
+// return remaining slot in fifo
+TU_ATTR_ALWAYS_INLINE static inline
+uint16_t _ff_remaining(uint16_t depth, uint16_t wr_idx, uint16_t rd_idx)
+{
+  uint16_t const count = _ff_count(depth, wr_idx, rd_idx);
+  return (depth > count) ? (depth - count) : 0;
+}
+
+//--------------------------------------------------------------------+
+// Index Helper
+//--------------------------------------------------------------------+
+
+// Advance an absolute index
+// "absolute" index is only in the range of [0..2*depth)
+static uint16_t advance_index(uint16_t depth, uint16_t idx, uint16_t offset)
 {
   // We limit the index space of p such that a correct wrap around happens
   // Check for a wrap around or if we are in unused index space - This has to be checked first!!
   // We are exploiting the wrap around to the correct index
-  if ((p < (uint16_t)(p - offset)) || ((uint16_t)(p - offset) > f->max_pointer_idx))
+  uint16_t new_idx = (uint16_t) (idx + offset);
+  if ( (idx > new_idx) || (new_idx >= 2*depth) )
   {
-    p = (uint16_t) ((p - offset) - f->non_used_index_space);
+    uint16_t const non_used_index_space = (uint16_t) (UINT16_MAX - (2*depth-1));
+    new_idx = (uint16_t) (new_idx + non_used_index_space);
   }
-  else
-  {
-    p -= offset;
-  }
-  return p;
-}
 
-// get relative from absolute pointer
-static uint16_t get_relative_pointer(tu_fifo_t* f, uint16_t p)
-{
-  return _ff_mod(p, f->depth);
+  return new_idx;
 }
 
-// Works on local copies of w and r - return only the difference and as such can be used to determine an overflow
-static inline uint16_t _tu_fifo_count(tu_fifo_t* f, uint16_t wAbs, uint16_t rAbs)
+#if 0 // not used but
+// Backward an absolute index
+static uint16_t backward_index(uint16_t depth, uint16_t idx, uint16_t offset)
 {
-  uint16_t cnt = wAbs-rAbs;
-
-  // In case we have non-power of two depth we need a further modification
-  if (rAbs > wAbs) cnt -= f->non_used_index_space;
+  // We limit the index space of p such that a correct wrap around happens
+  // Check for a wrap around or if we are in unused index space - This has to be checked first!!
+  // We are exploiting the wrap around to the correct index
+  uint16_t new_idx = (uint16_t) (idx - offset);
+  if ( (idx < new_idx) || (new_idx >= 2*depth) )
+  {
+    uint16_t const non_used_index_space = (uint16_t) (UINT16_MAX - (2*depth-1));
+    new_idx = (uint16_t) (new_idx - non_used_index_space);
+  }
 
-  return cnt;
+  return new_idx;
 }
+#endif
 
-// Works on local copies of w and r
-static inline bool _tu_fifo_empty(uint16_t wAbs, uint16_t rAbs)
+// index to pointer, simply an modulo with minus.
+TU_ATTR_ALWAYS_INLINE static inline
+uint16_t idx2ptr(uint16_t depth, uint16_t idx)
 {
-  return wAbs == rAbs;
+  // Only run at most 3 times since index is limit in the range of [0..2*depth)
+  while ( idx >= depth ) idx -= depth;
+  return idx;
 }
 
-// Works on local copies of w and r
-static inline bool _tu_fifo_full(tu_fifo_t* f, uint16_t wAbs, uint16_t rAbs)
+// Works on local copies of w
+// When an overwritable fifo is overflowed, rd_idx will be re-index so that it forms
+// an full fifo i.e _ff_count() = depth
+TU_ATTR_ALWAYS_INLINE static inline
+uint16_t _ff_correct_read_index(tu_fifo_t* f, uint16_t wr_idx)
 {
-  return (_tu_fifo_count(f, wAbs, rAbs) == f->depth);
-}
+  uint16_t rd_idx;
+  if ( wr_idx >= f->depth )
+  {
+    rd_idx = wr_idx - f->depth;
+  }else
+  {
+    rd_idx = wr_idx + f->depth;
+  }
 
-// Works on local copies of w and r
-// BE AWARE - THIS FUNCTION MIGHT NOT GIVE A CORRECT ANSWERE IN CASE WRITE POINTER "OVERFLOWS"
-// Only one overflow is allowed for this function to work e.g. if depth = 100, you must not
-// write more than 2*depth-1 items in one rush without updating write pointer. Otherwise
-// write pointer wraps and you pointer states are messed up. This can only happen if you
-// use DMAs, write functions do not allow such an error.
-static inline bool _tu_fifo_overflowed(tu_fifo_t* f, uint16_t wAbs, uint16_t rAbs)
-{
-  return (_tu_fifo_count(f, wAbs, rAbs) > f->depth);
-}
+  f->rd_idx = rd_idx;
 
-// Works on local copies of w
-// For more details see _tu_fifo_overflow()!
-static inline void _tu_fifo_correct_read_pointer(tu_fifo_t* f, uint16_t wAbs)
-{
-  f->rd_idx = backward_pointer(f, wAbs, f->depth);
+  return rd_idx;
 }
 
 // Works on local copies of w and r
 // Must be protected by mutexes since in case of an overflow read pointer gets modified
-static bool _tu_fifo_peek(tu_fifo_t* f, void * p_buffer, uint16_t wAbs, uint16_t rAbs)
+static bool _tu_fifo_peek(tu_fifo_t* f, void * p_buffer, uint16_t wr_idx, uint16_t rd_idx)
 {
-  uint16_t cnt = _tu_fifo_count(f, wAbs, rAbs);
+  uint16_t cnt = _ff_count(f->depth, wr_idx, rd_idx);
+
+  // nothing to peek
+  if ( cnt == 0 ) return false;
 
   // Check overflow and correct if required
-  if (cnt > f->depth)
+  if ( cnt > f->depth )
   {
-    _tu_fifo_correct_read_pointer(f, wAbs);
+    rd_idx = _ff_correct_read_index(f, wr_idx);
     cnt = f->depth;
   }
 
-  // Skip beginning of buffer
-  if (cnt == 0) return false;
-
-  uint16_t rRel = get_relative_pointer(f, rAbs);
+  uint16_t rd_ptr = idx2ptr(f->depth, rd_idx);
 
   // Peek data
-  _ff_pull(f, p_buffer, rRel);
+  _ff_pull(f, p_buffer, rd_ptr);
 
   return true;
 }
 
 // Works on local copies of w and r
 // Must be protected by mutexes since in case of an overflow read pointer gets modified
-static uint16_t _tu_fifo_peek_n(tu_fifo_t* f, void * p_buffer, uint16_t n, uint16_t wAbs, uint16_t rAbs, tu_fifo_copy_mode_t copy_mode)
+static uint16_t _tu_fifo_peek_n(tu_fifo_t* f, void * p_buffer, uint16_t n, uint16_t wr_idx, uint16_t rd_idx, tu_fifo_copy_mode_t copy_mode)
 {
-  uint16_t cnt = _tu_fifo_count(f, wAbs, rAbs);
+  uint16_t cnt = _ff_count(f->depth, wr_idx, rd_idx);
+
+  // nothing to peek
+  if ( cnt == 0 ) return 0;
 
   // Check overflow and correct if required
-  if (cnt > f->depth)
+  if ( cnt > f->depth )
   {
-    _tu_fifo_correct_read_pointer(f, wAbs);
-    rAbs = f->rd_idx;
+    rd_idx = _ff_correct_read_index(f, wr_idx);
     cnt = f->depth;
   }
 
-  // Skip beginning of buffer
-  if (cnt == 0) return 0;
-
   // Check if we can read something at and after offset - if too less is available we read what remains
-  if (cnt < n) n = cnt;
+  if ( cnt < n ) n = cnt;
 
-  uint16_t rRel = get_relative_pointer(f, rAbs);
+  uint16_t rd_ptr = idx2ptr(f->depth, rd_idx);
 
   // Peek data
-  _ff_pull_n(f, p_buffer, n, rRel, copy_mode);
+  _ff_pull_n(f, p_buffer, n, rd_ptr, copy_mode);
 
   return n;
 }
 
-// Works on local copies of w and r
-static inline uint16_t _tu_fifo_remaining(tu_fifo_t* f, uint16_t wAbs, uint16_t rAbs)
-{
-  return f->depth - _tu_fifo_count(f, wAbs, rAbs);
-}
-
 static uint16_t _tu_fifo_write_n(tu_fifo_t* f, const void * data, uint16_t n, tu_fifo_copy_mode_t copy_mode)
 {
   if ( n == 0 ) return 0;
 
   _ff_lock(f->mutex_wr);
 
-  uint16_t w = f->wr_idx, r = f->rd_idx;
+  uint16_t wr_idx = f->wr_idx;
+  uint16_t rd_idx = f->rd_idx;
+
   uint8_t const* buf8 = (uint8_t const*) data;
 
-  if (!f->overwritable)
+  TU_LOG(TU_FIFO_DBG, "rd = %3u, wr = %3u, count = %3u, remain = %3u, n = %3u:  ",
+                       rd_idx, wr_idx, _ff_count(f->depth, wr_idx, rd_idx), _ff_remaining(f->depth, wr_idx, rd_idx), n);
+
+  if ( !f->overwritable )
   {
-    // Not overwritable limit up to full
-    n = tu_min16(n, _tu_fifo_remaining(f, w, r));
+    // limit up to full
+    uint16_t const remain = _ff_remaining(f->depth, wr_idx, rd_idx);
+    n = tu_min16(n, remain);
   }
-  else if (n >= f->depth)
+  else
   {
-    // Only copy last part
-    buf8 = buf8 + (n - f->depth) * f->item_size;
-    n = f->depth;
-
-    // We start writing at the read pointer's position since we fill the complete
-    // buffer and we do not want to modify the read pointer within a write function!
-    // This would end up in a race condition with read functions!
-    w = r;
+    // In over-writable mode, fifo_write() is allowed even when fifo is full. In such case,
+    // oldest data in fifo i.e at read pointer data will be overwritten
+    // Note: we can modify read buffer contents but we must not modify the read index itself within a write function!
+    // Since it would end up in a race condition with read functions!
+    if ( n >= f->depth )
+    {
+      // Only copy last part
+      if ( copy_mode == TU_FIFO_COPY_INC )
+      {
+        buf8 += (n - f->depth) * f->item_size;
+      }else
+      {
+        // TODO should read from hw fifo to discard data, however reading an odd number could
+        // accidentally discard data.
+      }
+
+      n = f->depth;
+
+      // We start writing at the read pointer's position since we fill the whole buffer
+      wr_idx = rd_idx;
+    }
+    else
+    {
+      uint16_t const overflowable_count = _ff_count(f->depth, wr_idx, rd_idx);
+      if (overflowable_count + n >= 2*f->depth)
+      {
+        // Double overflowed
+        // Index is bigger than the allowed range [0,2*depth)
+        // re-position write index to have a full fifo after pushed
+        wr_idx = advance_index(f->depth, rd_idx, f->depth - n);
+
+        // TODO we should also shift out n bytes from read index since we avoid changing rd index !!
+        // However memmove() is expensive due to actual copying + wrapping consideration.
+        // Also race condition could happen anyway if read() is invoke while moving result in corrupted memory
+        // currently deliberately not implemented --> result in incorrect data read back
+      }else
+      {
+        // normal + single overflowed:
+        // Index is in the range of [0,2*depth) and thus detect and recoverable. Recovering is handled in read()
+        // Therefore we just increase write index
+        // we will correct (re-position) read index later on in fifo_read() function
+      }
+    }
   }
 
-  uint16_t wRel = get_relative_pointer(f, w);
+  if (n)
+  {
+    uint16_t wr_ptr = idx2ptr(f->depth, wr_idx);
 
-  // Write data
-  _ff_push_n(f, buf8, n, wRel, copy_mode);
+    TU_LOG(TU_FIFO_DBG, "actual_n = %u, wr_ptr = %u", n, wr_ptr);
 
-  // Advance pointer
-  f->wr_idx = advance_pointer(f, w, n);
+    // Write data
+    _ff_push_n(f, buf8, n, wr_ptr, copy_mode);
+
+    // Advance index
+    f->wr_idx = advance_index(f->depth, wr_idx, n);
+
+    TU_LOG(TU_FIFO_DBG, "\tnew_wr = %u\n", f->wr_idx);
+  }
 
   _ff_unlock(f->mutex_wr);
 
@@ -504,12 +556,16 @@ static uint16_t _tu_fifo_read_n(tu_fifo_t* f, void * buffer, uint16_t n, tu_fifo
   n = _tu_fifo_peek_n(f, buffer, n, f->wr_idx, f->rd_idx, copy_mode);
 
   // Advance read pointer
-  f->rd_idx = advance_pointer(f, f->rd_idx, n);
+  f->rd_idx = advance_index(f->depth, f->rd_idx, n);
 
   _ff_unlock(f->mutex_rd);
   return n;
 }
 
+//--------------------------------------------------------------------+
+// Application API
+//--------------------------------------------------------------------+
+
 /******************************************************************************/
 /*!
     @brief Get number of items in FIFO.
@@ -527,7 +583,7 @@ static uint16_t _tu_fifo_read_n(tu_fifo_t* f, void * buffer, uint16_t n, tu_fifo
 /******************************************************************************/
 uint16_t tu_fifo_count(tu_fifo_t* f)
 {
-  return tu_min16(_tu_fifo_count(f, f->wr_idx, f->rd_idx), f->depth);
+  return tu_min16(_ff_count(f->depth, f->wr_idx, f->rd_idx), f->depth);
 }
 
 /******************************************************************************/
@@ -545,7 +601,7 @@ uint16_t tu_fifo_count(tu_fifo_t* f)
 /******************************************************************************/
 bool tu_fifo_empty(tu_fifo_t* f)
 {
-  return _tu_fifo_empty(f->wr_idx, f->rd_idx);
+  return f->wr_idx == f->rd_idx;
 }
 
 /******************************************************************************/
@@ -563,7 +619,7 @@ bool tu_fifo_empty(tu_fifo_t* f)
 /******************************************************************************/
 bool tu_fifo_full(tu_fifo_t* f)
 {
-  return _tu_fifo_full(f, f->wr_idx, f->rd_idx);
+  return _ff_count(f->depth, f->wr_idx, f->rd_idx) >= f->depth;
 }
 
 /******************************************************************************/
@@ -581,7 +637,7 @@ bool tu_fifo_full(tu_fifo_t* f)
 /******************************************************************************/
 uint16_t tu_fifo_remaining(tu_fifo_t* f)
 {
-  return _tu_fifo_remaining(f, f->wr_idx, f->rd_idx);
+  return _ff_remaining(f->depth, f->wr_idx, f->rd_idx);
 }
 
 /******************************************************************************/
@@ -607,14 +663,14 @@ uint16_t tu_fifo_remaining(tu_fifo_t* f)
 /******************************************************************************/
 bool tu_fifo_overflowed(tu_fifo_t* f)
 {
-  return _tu_fifo_overflowed(f, f->wr_idx, f->rd_idx);
+  return _ff_count(f->depth, f->wr_idx, f->rd_idx) > f->depth;
 }
 
 // Only use in case tu_fifo_overflow() returned true!
 void tu_fifo_correct_read_pointer(tu_fifo_t* f)
 {
   _ff_lock(f->mutex_rd);
-  _tu_fifo_correct_read_pointer(f, f->wr_idx);
+  _ff_correct_read_index(f, f->wr_idx);
   _ff_unlock(f->mutex_rd);
 }
 
@@ -643,7 +699,7 @@ bool tu_fifo_read(tu_fifo_t* f, void * buffer)
   bool ret = _tu_fifo_peek(f, buffer, f->wr_idx, f->rd_idx);
 
   // Advance pointer
-  f->rd_idx = advance_pointer(f, f->rd_idx, ret);
+  f->rd_idx = advance_index(f->depth, f->rd_idx, ret);
 
   _ff_unlock(f->mutex_rd);
   return ret;
@@ -740,20 +796,20 @@ bool tu_fifo_write(tu_fifo_t* f, const void * data)
   _ff_lock(f->mutex_wr);
 
   bool ret;
-  uint16_t const w = f->wr_idx;
+  uint16_t const wr_idx = f->wr_idx;
 
-  if ( _tu_fifo_full(f, w, f->rd_idx) && !f->overwritable )
+  if ( tu_fifo_full(f) && !f->overwritable )
   {
     ret = false;
   }else
   {
-    uint16_t wRel = get_relative_pointer(f, w);
+    uint16_t wr_ptr = idx2ptr(f->depth, wr_idx);
 
     // Write data
-    _ff_push(f, data, wRel);
+    _ff_push(f, data, wr_ptr);
 
     // Advance pointer
-    f->wr_idx = advance_pointer(f, w, 1);
+    f->wr_idx = advance_index(f->depth, wr_idx, 1);
 
     ret = true;
   }
@@ -815,9 +871,8 @@ bool tu_fifo_clear(tu_fifo_t *f)
   _ff_lock(f->mutex_wr);
   _ff_lock(f->mutex_rd);
 
-  f->rd_idx = f->wr_idx = 0;
-  f->max_pointer_idx = (uint16_t) (2*f->depth-1);
-  f->non_used_index_space = UINT16_MAX - f->max_pointer_idx;
+  f->rd_idx = 0;
+  f->wr_idx = 0;
 
   _ff_unlock(f->mutex_wr);
   _ff_unlock(f->mutex_rd);
@@ -865,7 +920,7 @@ bool tu_fifo_set_overwritable(tu_fifo_t *f, bool overwritable)
 /******************************************************************************/
 void tu_fifo_advance_write_pointer(tu_fifo_t *f, uint16_t n)
 {
-  f->wr_idx = advance_pointer(f, f->wr_idx, n);
+  f->wr_idx = advance_index(f->depth, f->wr_idx, n);
 }
 
 /******************************************************************************/
@@ -886,7 +941,7 @@ void tu_fifo_advance_write_pointer(tu_fifo_t *f, uint16_t n)
 /******************************************************************************/
 void tu_fifo_advance_read_pointer(tu_fifo_t *f, uint16_t n)
 {
-  f->rd_idx = advance_pointer(f, f->rd_idx, n);
+  f->rd_idx = advance_index(f->depth, f->rd_idx, n);
 }
 
 /******************************************************************************/
@@ -907,17 +962,18 @@ void tu_fifo_advance_read_pointer(tu_fifo_t *f, uint16_t n)
 void tu_fifo_get_read_info(tu_fifo_t *f, tu_fifo_buffer_info_t *info)
 {
   // Operate on temporary values in case they change in between
-  uint16_t w = f->wr_idx, r = f->rd_idx;
+  uint16_t wr_idx = f->wr_idx;
+  uint16_t rd_idx = f->rd_idx;
 
-  uint16_t cnt = _tu_fifo_count(f, w, r);
+  uint16_t cnt = _ff_count(f->depth, wr_idx, rd_idx);
 
   // Check overflow and correct if required - may happen in case a DMA wrote too fast
   if (cnt > f->depth)
   {
     _ff_lock(f->mutex_rd);
-    _tu_fifo_correct_read_pointer(f, w);
+    rd_idx = _ff_correct_read_index(f, wr_idx);
     _ff_unlock(f->mutex_rd);
-    r = f->rd_idx;
+
     cnt = f->depth;
   }
 
@@ -932,22 +988,25 @@ void tu_fifo_get_read_info(tu_fifo_t *f, tu_fifo_buffer_info_t *info)
   }
 
   // Get relative pointers
-  w = get_relative_pointer(f, w);
-  r = get_relative_pointer(f, r);
+  uint16_t wr_ptr = idx2ptr(f->depth, wr_idx);
+  uint16_t rd_ptr = idx2ptr(f->depth, rd_idx);
 
   // Copy pointer to buffer to start reading from
-  info->ptr_lin = &f->buffer[r];
+  info->ptr_lin = &f->buffer[rd_ptr];
 
   // Check if there is a wrap around necessary
-  if (w > r) {
+  if (wr_ptr > rd_ptr)
+  {
     // Non wrapping case
     info->len_lin  = cnt;
+
     info->len_wrap = 0;
     info->ptr_wrap = NULL;
   }
   else
   {
-    info->len_lin  = f->depth - r;         // Also the case if FIFO was full
+    info->len_lin  = f->depth - rd_ptr;   // Also the case if FIFO was full
+
     info->len_wrap = cnt - info->len_lin;
     info->ptr_wrap = f->buffer;
   }
@@ -970,36 +1029,37 @@ void tu_fifo_get_read_info(tu_fifo_t *f, tu_fifo_buffer_info_t *info)
 /******************************************************************************/
 void tu_fifo_get_write_info(tu_fifo_t *f, tu_fifo_buffer_info_t *info)
 {
-  uint16_t w = f->wr_idx, r = f->rd_idx;
-  uint16_t free = _tu_fifo_remaining(f, w, r);
+  uint16_t wr_idx = f->wr_idx;
+  uint16_t rd_idx = f->rd_idx;
+  uint16_t remain = _ff_remaining(f->depth, wr_idx, rd_idx);
 
-  if (free == 0)
+  if (remain == 0)
   {
-    info->len_lin = 0;
+    info->len_lin  = 0;
     info->len_wrap = 0;
-    info->ptr_lin = NULL;
+    info->ptr_lin  = NULL;
     info->ptr_wrap = NULL;
     return;
   }
 
   // Get relative pointers
-  w = get_relative_pointer(f, w);
-  r = get_relative_pointer(f, r);
+  uint16_t wr_ptr = idx2ptr(f->depth, wr_idx);
+  uint16_t rd_ptr = idx2ptr(f->depth, rd_idx);
 
   // Copy pointer to buffer to start writing to
-  info->ptr_lin = &f->buffer[w];
+  info->ptr_lin = &f->buffer[wr_ptr];
 
-  if (w < r)
+  if (wr_ptr < rd_ptr)
   {
     // Non wrapping case
-    info->len_lin = r-w;
+    info->len_lin  = rd_ptr-wr_ptr;
     info->len_wrap = 0;
     info->ptr_wrap = NULL;
   }
   else
   {
-    info->len_lin = f->depth - w;
-    info->len_wrap = free - info->len_lin; // Remaining length - n already was limited to free or FIFO depth
-    info->ptr_wrap = f->buffer;            // Always start of buffer
+    info->len_lin  = f->depth - wr_ptr;
+    info->len_wrap = remain - info->len_lin; // Remaining length - n already was limited to remain or FIFO depth
+    info->ptr_wrap = f->buffer;              // Always start of buffer
   }
 }

+ 69 - 17
src/common/tusb_fifo.h

@@ -44,28 +44,82 @@ extern "C" {
 #include "common/tusb_common.h"
 #include "osal/osal.h"
 
-#define tu_fifo_mutex_t  osal_mutex_t
-
 // mutex is only needed for RTOS
 // for OS None, we don't get preempted
 #define CFG_FIFO_MUTEX      OSAL_MUTEX_REQUIRED
 
+/* Write/Read index is always in the range of:
+ *      0 .. 2*depth-1
+ * The extra window allow us to determine the fifo state of empty or full with only 2 indices
+ * Following are examples with depth = 3
+ *
+ * - empty: W = R
+ *                |
+ *    -------------------------
+ *    | 0 | RW| 2 | 3 | 4 | 5 |
+ *
+ * - full 1: W > R
+ *                |
+ *    -------------------------
+ *    | 0 | R | 2 | 3 | W | 5 |
+ *
+ * - full 2: W < R
+ *                |
+ *    -------------------------
+ *    | 0 | 1 | W | 3 | 4 | R |
+ *
+ * - Number of items in the fifo can be determined in either cases:
+ *    - case W >= R: Count = W - R
+ *    - case W <  R: Count = 2*depth - (R - W)
+ *
+ * In non-overwritable mode, computed Count (in above 2 cases) is at most equal to depth.
+ * However, in over-writable mode, write index can be repeatedly increased and count can be
+ * temporarily larger than depth (overflowed condition) e.g
+ *
+ *  - Overflowed 1: write(3), write(1)
+ *    In this case we will adjust Read index when read()/peek() is called so that count = depth.
+ *                  |
+ *      -------------------------
+ *      | R | 1 | 2 | 3 | W | 5 |
+ *
+ *  - Double Overflowed i.e index is out of allowed range [0,2*depth)
+ *    This occurs when we continue to write after 1st overflowed to 2nd overflowed. e.g:
+ *      write(3), write(1), write(2)
+ *    This must be prevented since it will cause unrecoverable state, in above example
+ *    if not handled the fifo will be empty instead of continue-to-be full. Since we must not modify
+ *    read index in write() function, which cause race condition. We will re-position write index so that
+ *    after data is written it is a full fifo i.e W = depth - R
+ *
+ *      re-position W = 1 before write(2)
+ *      Note: we should also move data from mem[3] to read index as well, but deliberately skipped here
+ *      since it is an expensive operation !!!
+ *                  |
+ *      -------------------------
+ *      | R | W | 2 | 3 | 4 | 5 |
+ *
+ *      perform write(2), result is still a full fifo.
+ *
+ *                  |
+ *      -------------------------
+ *      | R | 1 | 2 | W | 4 | 5 |
+
+ */
 typedef struct
 {
-  uint8_t* buffer               ; ///< buffer pointer
-  uint16_t depth                ; ///< max items
-  uint16_t item_size            ; ///< size of each item
-  bool overwritable             ;
+  uint8_t* buffer          ; // buffer pointer
+  uint16_t depth           ; // max items
 
-  uint16_t non_used_index_space ; ///< required for non-power-of-two buffer length
-  uint16_t max_pointer_idx      ; ///< maximum absolute pointer index
+  struct TU_ATTR_PACKED {
+    uint16_t item_size : 15; // size of each item
+    bool overwritable  : 1 ; // ovwerwritable when full
+  };
 
-  volatile uint16_t wr_idx      ; ///< write pointer
-  volatile uint16_t rd_idx      ; ///< read pointer
+  volatile uint16_t wr_idx ; // write index
+  volatile uint16_t rd_idx ; // read index
 
 #if OSAL_MUTEX_REQUIRED
-  tu_fifo_mutex_t mutex_wr;
-  tu_fifo_mutex_t mutex_rd;
+  osal_mutex_t mutex_wr;
+  osal_mutex_t mutex_rd;
 #endif
 
 } tu_fifo_t;
@@ -84,8 +138,6 @@ typedef struct
   .depth                = _depth,                           \
   .item_size            = sizeof(_type),                    \
   .overwritable         = _overwritable,                    \
-  .non_used_index_space = UINT16_MAX - (2*(_depth)-1),      \
-  .max_pointer_idx      = 2*(_depth)-1,                     \
 }
 
 #define TU_FIFO_DEF(_name, _depth, _type, _overwritable)                      \
@@ -99,10 +151,10 @@ bool tu_fifo_config(tu_fifo_t *f, void* buffer, uint16_t depth, uint16_t item_si
 
 #if OSAL_MUTEX_REQUIRED
 TU_ATTR_ALWAYS_INLINE static inline
-void tu_fifo_config_mutex(tu_fifo_t *f, tu_fifo_mutex_t write_mutex_hdl, tu_fifo_mutex_t read_mutex_hdl)
+void tu_fifo_config_mutex(tu_fifo_t *f, osal_mutex_t wr_mutex, osal_mutex_t rd_mutex)
 {
-  f->mutex_wr = write_mutex_hdl;
-  f->mutex_rd = read_mutex_hdl;
+  f->mutex_wr = wr_mutex;
+  f->mutex_rd = rd_mutex;
 }
 
 #else

+ 2 - 0
src/device/usbd.c

@@ -388,6 +388,8 @@ bool tud_init (uint8_t rhport)
 
   TU_LOG(USBD_DBG, "USBD init on controller %u\r\n", rhport);
   TU_LOG_INT(USBD_DBG, sizeof(usbd_device_t));
+  TU_LOG_INT(USBD_DBG, sizeof(tu_fifo_t));
+  TU_LOG_INT(USBD_DBG, sizeof(tu_edpt_stream_t));
 
   tu_varclr(&_usbd_dev);
 

+ 18 - 4
test/unit-test/project.yml

@@ -78,10 +78,24 @@
     :html_high_threshold: 90
     :xml_report: FALSE
 
-#:tools:
-# Ceedling defaults to using gcc for compiling, linking, etc.
-# As [:tools] is blank, gcc will be used (so long as it's in your system path)
-# See documentation to configure a given toolchain for use
+:tools:
+  :test_compiler:
+     :executable: clang
+     :name: 'clang compiler'
+     :arguments:
+        - -I"$": COLLECTION_PATHS_TEST_TOOLCHAIN_INCLUDE               #expands to -I search paths
+        - -I"$": COLLECTION_PATHS_TEST_SUPPORT_SOURCE_INCLUDE_VENDOR   #expands to -I search paths
+        - -D$: COLLECTION_DEFINES_TEST_AND_VENDOR  #expands to all -D defined symbols
+        - -fsanitize=address
+        - -c ${1}                       #source code input file (Ruby method call param list sub)
+        - -o ${2}                       #object file output (Ruby method call param list sub)
+  :test_linker:
+     :executable: clang
+     :name: 'clang linker'
+     :arguments:
+        - -fsanitize=address
+        - ${1}               #list of object files to link (Ruby method call param list sub)
+        - -o ${2}            #executable file output (Ruby method call param list sub)
 
 # LIBRARIES
 # These libraries are automatically injected into the build process. Those specified as

+ 101 - 43
test/unit-test/test/test_fifo.c

@@ -30,15 +30,23 @@
 #include "osal/osal.h"
 #include "tusb_fifo.h"
 
-#define FIFO_SIZE 10
-TU_FIFO_DEF(tu_ff, FIFO_SIZE, uint8_t, false);
+#define FIFO_SIZE   64
+uint8_t tu_ff_buf[FIFO_SIZE * sizeof(uint8_t)];
+tu_fifo_t tu_ff = TU_FIFO_INIT(tu_ff_buf, FIFO_SIZE, uint8_t, false);
+
 tu_fifo_t* ff = &tu_ff;
 tu_fifo_buffer_info_t info;
 
+uint8_t test_data[4096];
+uint8_t rd_buf[FIFO_SIZE];
+
 void setUp(void)
 {
   tu_fifo_clear(ff);
   memset(&info, 0, sizeof(tu_fifo_buffer_info_t));
+
+  for(int i=0; i<sizeof(test_data); i++) test_data[i] = i;
+  memset(rd_buf, 0, sizeof(rd_buf));
 }
 
 void tearDown(void)
@@ -62,86 +70,136 @@ void test_normal(void)
 
 void test_item_size(void)
 {
-  TU_FIFO_DEF(ff4, FIFO_SIZE, uint32_t, false);
-  tu_fifo_clear(&ff4);
+  uint8_t ff4_buf[FIFO_SIZE * sizeof(uint32_t)];
+  tu_fifo_t ff4 = TU_FIFO_INIT(ff4_buf, FIFO_SIZE, uint32_t, false);
 
-  uint32_t data[20];
-  for(uint32_t i=0; i<sizeof(data)/4; i++) data[i] = i;
+  uint32_t data4[2*FIFO_SIZE];
+  for(uint32_t i=0; i<sizeof(data4)/4; i++) data4[i] = i;
 
-  tu_fifo_write_n(&ff4, data, 10);
+  // fill up fifo
+  tu_fifo_write_n(&ff4, data4, FIFO_SIZE);
 
-  uint32_t rd[10];
+  uint32_t rd_buf4[FIFO_SIZE];
   uint16_t rd_count;
 
   // read 0 -> 4
-  rd_count = tu_fifo_read_n(&ff4, rd, 5);
+  rd_count = tu_fifo_read_n(&ff4, rd_buf4, 5);
   TEST_ASSERT_EQUAL( 5, rd_count );
-  TEST_ASSERT_EQUAL_UINT32_ARRAY( data, rd, rd_count ); // 0 -> 4
+  TEST_ASSERT_EQUAL_UINT32_ARRAY( data4, rd_buf4, rd_count ); // 0 -> 4
 
-  tu_fifo_write_n(&ff4, data+10, 5);
+  tu_fifo_write_n(&ff4, data4+FIFO_SIZE, 5);
 
-  // read 5 -> 14
-  rd_count = tu_fifo_read_n(&ff4, rd, 10);
-  TEST_ASSERT_EQUAL( 10, rd_count );
-  TEST_ASSERT_EQUAL_UINT32_ARRAY( data+5, rd, rd_count ); // 5 -> 14
+  // read all 5 -> 68
+  rd_count = tu_fifo_read_n(&ff4, rd_buf4, FIFO_SIZE);
+  TEST_ASSERT_EQUAL( FIFO_SIZE, rd_count );
+  TEST_ASSERT_EQUAL_UINT32_ARRAY( data4+5, rd_buf4, rd_count ); // 5 -> 68
 }
 
 void test_read_n(void)
 {
-  // prepare data
-  uint8_t data[20];
-  for(int i=0; i<sizeof(data); i++) data[i] = i;
-
-  for(uint8_t i=0; i < FIFO_SIZE; i++) tu_fifo_write(ff, data+i);
-
-  uint8_t rd[10];
   uint16_t rd_count;
 
+  // fill up fifo
+  for(uint8_t i=0; i < FIFO_SIZE; i++) tu_fifo_write(ff, test_data+i);
+
   // case 1: Read index + count < depth
   // read 0 -> 4
-  rd_count = tu_fifo_read_n(ff, rd, 5);
+  rd_count = tu_fifo_read_n(ff, rd_buf, 5);
   TEST_ASSERT_EQUAL( 5, rd_count );
-  TEST_ASSERT_EQUAL_MEMORY( data, rd, rd_count ); // 0 -> 4
+  TEST_ASSERT_EQUAL_MEMORY( test_data, rd_buf, rd_count ); // 0 -> 4
 
   // case 2: Read index + count > depth
   // write 10, 11, 12
-  tu_fifo_write(ff, data+10);
-  tu_fifo_write(ff, data+11);
-  tu_fifo_write(ff, data+12);
+  tu_fifo_write(ff, test_data+FIFO_SIZE);
+  tu_fifo_write(ff, test_data+FIFO_SIZE+1);
+  tu_fifo_write(ff, test_data+FIFO_SIZE+2);
 
-  rd_count = tu_fifo_read_n(ff, rd, 7);
+  rd_count = tu_fifo_read_n(ff, rd_buf, 7);
   TEST_ASSERT_EQUAL( 7, rd_count );
 
-  TEST_ASSERT_EQUAL_MEMORY( data+5, rd, rd_count ); // 5 -> 11
+  TEST_ASSERT_EQUAL_MEMORY( test_data+5, rd_buf, rd_count ); // 5 -> 11
 
   // Should only read until empty
-  TEST_ASSERT_EQUAL( 1, tu_fifo_read_n(ff, rd, 100) );
+  TEST_ASSERT_EQUAL( FIFO_SIZE-5+3-7, tu_fifo_read_n(ff, rd_buf, 100) );
 }
 
 void test_write_n(void)
 {
-  // prepare data
-  uint8_t data[20];
-  for(int i=0; i<sizeof(data); i++) data[i] = i;
-
   // case 1: wr + count < depth
-  tu_fifo_write_n(ff, data, 8); // wr = 8, count = 8
+  tu_fifo_write_n(ff, test_data, 32); // wr = 32, count = 32
 
-  uint8_t rd[10];
   uint16_t rd_count;
 
-  rd_count = tu_fifo_read_n(ff, rd, 5); // wr = 8, count = 3
-  TEST_ASSERT_EQUAL( 5, rd_count );
-  TEST_ASSERT_EQUAL_MEMORY( data, rd, rd_count ); // 0 -> 4
+  rd_count = tu_fifo_read_n(ff, rd_buf, 16); // wr = 32, count = 16
+  TEST_ASSERT_EQUAL( 16, rd_count );
+  TEST_ASSERT_EQUAL_MEMORY( test_data, rd_buf, rd_count );
 
   // case 2: wr + count > depth
-  tu_fifo_write_n(ff, data+8, 6); // wr = 3, count = 9
+  tu_fifo_write_n(ff, test_data+32, 40); // wr = 72 -> 8, count = 56
+
+  tu_fifo_read_n(ff, rd_buf, 32); // count = 24
+  TEST_ASSERT_EQUAL_MEMORY( test_data+16, rd_buf, rd_count);
+
+  TEST_ASSERT_EQUAL(24, tu_fifo_count(ff));
+}
+
+void test_write_double_overflowed(void)
+{
+  tu_fifo_set_overwritable(ff, true);
 
-  for(rd_count=0; rd_count<7; rd_count++) tu_fifo_read(ff, rd+rd_count); // wr = 3, count = 2
+  uint8_t rd_buf[FIFO_SIZE] = { 0 };
+  uint8_t* buf = test_data;
 
-  TEST_ASSERT_EQUAL_MEMORY( data+5, rd, rd_count); // 5 -> 11
+  // full
+  buf += tu_fifo_write_n(ff, buf, FIFO_SIZE);
+  TEST_ASSERT_EQUAL(FIFO_SIZE, tu_fifo_count(ff));
 
-  TEST_ASSERT_EQUAL(2, tu_fifo_count(ff));
+  // write more, should still full
+  buf += tu_fifo_write_n(ff, buf, FIFO_SIZE-8);
+  TEST_ASSERT_EQUAL(FIFO_SIZE, tu_fifo_count(ff));
+
+  // double overflowed: in total, write more than > 2*FIFO_SIZE
+  buf += tu_fifo_write_n(ff, buf, 16);
+  TEST_ASSERT_EQUAL(FIFO_SIZE, tu_fifo_count(ff));
+
+  // reading back should give back data from last FIFO_SIZE write
+  tu_fifo_read_n(ff, rd_buf, FIFO_SIZE);
+
+  TEST_ASSERT_EQUAL_MEMORY(buf-16, rd_buf+FIFO_SIZE-16, 16);
+
+  // TODO whole buffer should match, but we deliberately not implement it
+  // TEST_ASSERT_EQUAL_MEMORY(buf-FIFO_SIZE, rd_buf, FIFO_SIZE);
+}
+
+static uint16_t help_write(uint16_t total, uint16_t n)
+{
+  tu_fifo_write_n(ff, test_data, n);
+  total = tu_min16(FIFO_SIZE, total + n);
+
+  TEST_ASSERT_EQUAL(total, tu_fifo_count(ff));
+  TEST_ASSERT_EQUAL(FIFO_SIZE - total, tu_fifo_remaining(ff));
+
+  return total;
+}
+
+void test_write_overwritable2(void)
+{
+  tu_fifo_set_overwritable(ff, true);
+
+  // based on actual crash tests detected by fuzzing
+  uint16_t total = 0;
+
+  total = help_write(total, 12);
+  total = help_write(total, 55);
+  total = help_write(total, 73);
+  total = help_write(total, 55);
+  total = help_write(total, 75);
+  total = help_write(total, 84);
+  total = help_write(total, 1);
+  total = help_write(total, 10);
+  total = help_write(total, 12);
+  total = help_write(total, 25);
+  total = help_write(total, 192);
 }
 
 void test_peek(void)