Browse Source

esp32s2 SHA: fallback to hashing block by block for non DMA memory

Also adds unit test for SHA with input buffer in flash

Closes IDF-1529
Marius Vikhammer 5 years ago
parent
commit
b75edc84e3

+ 1 - 1
components/idf_test/include/esp32s2/idf_performance_target.h

@@ -7,7 +7,7 @@
 #define IDF_PERFORMANCE_MIN_SHA256_THROUGHPUT_MBSEC                             90.0
 // esp_sha() time to process 32KB of input data from RAM
 #define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB                                      900
-#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB                                    800
+#define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB                                    900
 
 #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP                               13500
 #define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP                              130000

+ 81 - 47
components/mbedtls/port/esp32s2/sha.c

@@ -54,6 +54,9 @@
 */
 #define SHA_DMA_MAX_BYTES 3968
 
+/* The longest length of a single block is for SHA512 = 128 byte */
+#define SHA_MAX_BLK_LEN 128
+
 const static char *TAG = "esp-sha";
 
 /* Return block size (in bytes) for a given SHA type */
@@ -196,6 +199,59 @@ int esp_sha_512_t_init_hash(uint16_t t)
     return 0;
 }
 
+static void esp_sha_fill_text_block(esp_sha_type sha_type, const void *input)
+{
+    uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE);
+    uint32_t *data_words = NULL;
+
+    /* Fill the data block */
+    data_words = (uint32_t *)(input);
+    for (int i = 0; i < block_length(sha_type) / 4; i++) {
+        reg_addr_buf[i] = (data_words[i]);
+    }
+    asm volatile ("memw");
+}
+
+/* Hash a single SHA block */
+static void esp_sha_block(esp_sha_type sha_type, const void *input, bool is_first_block)
+{
+    esp_sha_fill_text_block(sha_type, input);
+
+    esp_sha_wait_idle();
+    /* Start hashing */
+    if (is_first_block) {
+        REG_WRITE(SHA_START_REG, 1);
+    } else {
+        REG_WRITE(SHA_CONTINUE_REG, 1);
+    }
+}
+
+/* Hash the input block by block, using non-DMA mode */
+static void esp_sha_block_mode(esp_sha_type sha_type, const uint8_t *input, uint32_t ilen,
+                                const uint8_t *buf, uint32_t buf_len, bool is_first_block)
+{
+    size_t blk_len = 0;
+    int num_block = 0;
+
+    blk_len = block_length(sha_type);
+
+    REG_WRITE(SHA_MODE_REG, sha_type);
+    num_block = ilen / blk_len;
+
+    if (buf_len != 0) {
+        esp_sha_block(sha_type, buf, is_first_block);
+        is_first_block = false;
+    }
+
+    for (int i = 0; i < num_block; i++) {
+        esp_sha_block(sha_type, input + blk_len*i, is_first_block);
+        is_first_block = false;
+    }
+
+    esp_sha_wait_idle();
+}
+
+
 
 static int esp_sha_dma_process(esp_sha_type sha_type, const void *input, uint32_t ilen,
                                 const void *buf, uint32_t buf_len, bool is_first_block);
@@ -207,87 +263,65 @@ int esp_sha_dma(esp_sha_type sha_type, const void *input, uint32_t ilen,
                 const void *buf, uint32_t buf_len, bool is_first_block)
 {
     int ret = 0;
-    const void *dma_input;
-    unsigned char *non_icache_input = NULL;
-    unsigned char *non_icache_buf = NULL;
-    int dma_op_num;
-    size_t dma_max_chunk_len = SHA_DMA_MAX_BYTES;
+    unsigned char *dma_cap_buf = NULL;
+    int dma_op_num = ( ilen / (SHA_DMA_MAX_BYTES + 1) ) + 1;
 
-    if (buf_len > 128) {
+    if (buf_len > block_length(sha_type)) {
         ESP_LOGE(TAG, "SHA DMA buf_len cannot exceed max size for a single block");
         return -1;
     }
 
-#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
-    if (esp_ptr_external_ram(input) || esp_ptr_external_ram(buf)) {
-        Cache_WriteBack_All();
+    /* DMA cannot access memory in the iCache range, hash block by block instead of using DMA */
+    if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) {
+        esp_sha_block_mode(sha_type, input, ilen, buf, buf_len, is_first_block);
+        return 0;
+    }
+
+#if (CONFIG_ESP32S2_SPIRAM_SUPPORT)
+    if (esp_ptr_external_ram(input)) {
+        Cache_WriteBack_Addr((uint32_t)input, ilen);
+    }
+    if (esp_ptr_external_ram(buf)) {
+        Cache_WriteBack_Addr((uint32_t)buf, buf_len);
     }
 #endif
 
+    /* Copy to internal buf if buf is in non DMA capable memory */
     if (!esp_ptr_dma_ext_capable(buf) && !esp_ptr_dma_capable(buf) && (buf_len != 0)) {
-        non_icache_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA);
-        if (non_icache_buf == NULL) {
+        dma_cap_buf = heap_caps_malloc(sizeof(unsigned char) * buf_len, MALLOC_CAP_DMA);
+        if (dma_cap_buf == NULL) {
             ESP_LOGE(TAG, "Failed to allocate buf memory");
-            ret = ESP_ERR_NO_MEM;
+            ret = -1;
             goto cleanup;
         }
-        memcpy(non_icache_buf, buf, buf_len);
-        buf = non_icache_buf;
-    }
-
-    /* DMA cannot access memory in the iCache range, copy data to temporary buffers before transfer */
-    if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input) && (ilen != 0)) {
-        non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA);
-
-        if (non_icache_input == NULL) {
-            /* Allocate biggest available heap */
-            size_t max_alloc_len = heap_caps_get_largest_free_block(MALLOC_CAP_DMA);
-            dma_max_chunk_len = max_alloc_len - max_alloc_len % block_length(sha_type);
-            non_icache_input = heap_caps_malloc(sizeof(unsigned char) * MIN(ilen, dma_max_chunk_len), MALLOC_CAP_DMA);
-
-            if (non_icache_input == NULL) {
-                ESP_LOGE(TAG, "Failed to allocate input memory");
-                ret = ESP_ERR_NO_MEM;
-                goto cleanup;
-            }
-        }
+        memcpy(dma_cap_buf, buf, buf_len);
+        buf = dma_cap_buf;
     }
 
 
     /* The max amount of blocks in a single hardware operation is 2^6 - 1 = 63
        Thus we only do a single DMA input list + dma buf list,
        which is max 3968/64 + 64/64 = 63 blocks */
-    dma_op_num = ( ilen / (dma_max_chunk_len + 1) ) + 1;
     for (int i = 0; i < dma_op_num; i++) {
-        int dma_chunk_len = MIN(ilen, dma_max_chunk_len);
 
+        int dma_chunk_len = MIN(ilen, SHA_DMA_MAX_BYTES);
 
-        /* Input depends on if it's a temp alloc buffer or supplied by user */
-        if (non_icache_input != NULL) {
-            memcpy(non_icache_input, input, dma_chunk_len);
-            dma_input = non_icache_input;
-        } else {
-            dma_input = input;
-        }
-
-        ret = esp_sha_dma_process(sha_type, dma_input, dma_chunk_len, buf, buf_len, is_first_block);
-
+        ret = esp_sha_dma_process(sha_type, input, dma_chunk_len, buf, buf_len, is_first_block);
 
         if (ret != 0) {
             goto cleanup;
         }
 
-        is_first_block = false;
         ilen -= dma_chunk_len;
         input += dma_chunk_len;
 
         // Only append buf to the first operation
         buf_len = 0;
+        is_first_block = false;
     }
 
 cleanup:
-    free(non_icache_input);
-    free(non_icache_buf);
+    free(dma_cap_buf);
     return ret;
 }
 

+ 60 - 0
components/mbedtls/test/test_mbedtls_sha.c

@@ -305,6 +305,66 @@ TEST_CASE("mbedtls SHA session passed between tasks", "[mbedtls]")
     TEST_ASSERT_EQUAL_MEMORY_MESSAGE(sha256_thousand_as, param.result, 32, "SHA256 result from other task");
 }
 
+
+
+
+/* Random input generated and hashed using python:
+
+    import hashlib
+    import os, binascii
+
+    input = bytearray(os.urandom(150))
+    arr = ''
+    for idx, b in enumerate(input):
+        if idx % 8 == 0:
+            arr += '\n'
+        arr += "{}, ".format(hex(b))
+    digest = hashlib.sha256(input).hexdigest()
+
+*/
+const uint8_t test_vector[] = {
+    0xe4, 0x1a, 0x1a, 0x30, 0x71, 0xd3, 0x94, 0xb0,
+    0xc3, 0x7e, 0x99, 0x9f, 0x1a, 0xde, 0x4a, 0x36,
+    0xb1, 0x1, 0x81, 0x2b, 0x41, 0x91, 0x11, 0x7f,
+    0xd8, 0xe1, 0xd5, 0xe5, 0x52, 0x6d, 0x92, 0xee,
+    0x6c, 0xf7, 0x70, 0xea, 0x3a, 0xb, 0xc9, 0x97,
+    0xc0, 0x12, 0x6f, 0x10, 0x5b, 0x90, 0xd8, 0x52,
+    0x91, 0x69, 0xea, 0xc4, 0x1f, 0xc, 0xcf, 0xc6,
+    0xf0, 0x43, 0xc6, 0xa3, 0x1f, 0x46, 0x3c, 0x3d,
+    0x25, 0xe5, 0xa8, 0x27, 0x86, 0x85, 0x32, 0x3f,
+    0x33, 0xd8, 0x40, 0xc4, 0x41, 0xf6, 0x4b, 0x12,
+    0xd8, 0x5e, 0x4, 0x27, 0x42, 0x90, 0x73, 0x4,
+    0x8, 0x42, 0xd1, 0x64, 0xd, 0x84, 0x3, 0x1,
+    0x76, 0x88, 0xe4, 0x95, 0xdf, 0xe7, 0x62, 0xb4,
+    0xb3, 0xb2, 0x7e, 0x6d, 0x78, 0xca, 0x79, 0x82,
+    0xcc, 0xba, 0x22, 0xd2, 0x90, 0x2e, 0xe3, 0xa8,
+    0x2a, 0x53, 0x3a, 0xb1, 0x9a, 0x7f, 0xb7, 0x8b,
+    0xfa, 0x32, 0x47, 0xc1, 0x5c, 0x6, 0x4f, 0x7b,
+    0xcd, 0xb3, 0xf4, 0xf1, 0xd0, 0xb5, 0xbf, 0xfb,
+    0x7c, 0xc3, 0xa5, 0xb2, 0xc4, 0xd4,
+};
+
+const uint8_t test_vector_digest[] = {
+    0xff, 0x1c, 0x60, 0xcb, 0x21, 0xf0, 0x63, 0x68,
+    0xb9, 0xfc, 0xfe, 0xad, 0x3e, 0xb0, 0x2e, 0xd1,
+    0xf9, 0x08, 0x82, 0x82, 0x83, 0x06, 0xc1, 0x8a,
+    0x98, 0x5d, 0x36, 0xc0, 0xb7, 0xeb, 0x35, 0xe0,
+};
+
+TEST_CASE("mbedtls SHA, input in flash", "[mbedtls]")
+{
+    mbedtls_sha256_context sha256_ctx;
+    unsigned char sha256[32];
+
+    mbedtls_sha256_init(&sha256_ctx);
+
+    TEST_ASSERT_EQUAL(0, mbedtls_sha256_starts_ret(&sha256_ctx, false));
+    TEST_ASSERT_EQUAL(0, mbedtls_sha256_update_ret(&sha256_ctx, test_vector, sizeof(test_vector)));
+    TEST_ASSERT_EQUAL(0, mbedtls_sha256_finish_ret(&sha256_ctx, sha256));
+
+    TEST_ASSERT_EQUAL_MEMORY_MESSAGE(test_vector_digest, sha256, 32, "SHA256 calculation");
+}
+
 /* ESP32 do not have SHA512/t functions */
 #if !DISABLED_FOR_TARGETS(ESP32)