فهرست منبع

crypto accelerator support on esp32s3

SHA: passing unit tests
RSA: pass tests
AES: tests passing
Marius Vikhammer 5 سال پیش
والد
کامیت
bff0016eb8

+ 1 - 0
components/esp32s3/ld/esp32s3.peripherals.ld

@@ -29,5 +29,6 @@ PROVIDE ( I2C1 = 0x60027000 );
 PROVIDE ( GPSPI4 = 0x60037000 );
 PROVIDE ( GDMA = 0x6003F000 );
 PROVIDE ( UART2  = 0x60010000 );
+PROVIDE ( DMA = 0x6003F000 );
 PROVIDE ( APB_SARADC = 0x60040000 );
 PROVIDE ( LCD_CAM    = 0x60041000 );

+ 35 - 0
components/hal/esp32s3/include/hal/clk_gate_ll.h

@@ -94,6 +94,12 @@ static inline uint32_t periph_ll_get_clk_en_mask(periph_module_t periph)
         return SYSTEM_SYSTIMER_CLK_EN;
     case PERIPH_GDMA_MODULE:
         return SYSTEM_DMA_CLK_EN;
+    case PERIPH_AES_MODULE:
+        return SYSTEM_CRYPTO_AES_CLK_EN;
+    case PERIPH_SHA_MODULE:
+        return SYSTEM_CRYPTO_SHA_CLK_EN;
+    case PERIPH_RSA_MODULE:
+        return SYSTEM_CRYPTO_RSA_CLK_EN;
     default:
         return 0;
     }
@@ -159,6 +165,30 @@ static inline uint32_t periph_ll_get_rst_en_mask(periph_module_t periph, bool en
         return SYSTEM_SYSTIMER_RST;
     case PERIPH_GDMA_MODULE:
         return SYSTEM_DMA_RST;
+    case PERIPH_AES_MODULE:
+        if (enable == true) {
+            // Clear reset on digital signature, otherwise AES unit is held in reset also.
+            return (SYSTEM_CRYPTO_AES_RST | SYSTEM_CRYPTO_DS_RST);
+        } else {
+            //Don't return other units to reset, as this pulls reset on RSA & SHA units, respectively.
+            return SYSTEM_CRYPTO_AES_RST;
+        }
+    case PERIPH_SHA_MODULE:
+        if (enable == true) {
+            // Clear reset on digital signature and HMAC, otherwise SHA is held in reset
+            return (SYSTEM_CRYPTO_SHA_RST | SYSTEM_CRYPTO_DS_RST | SYSTEM_CRYPTO_HMAC_RST | SYSTEM_DMA_RST) ;
+        } else {
+            // Don't assert reset on secure boot, otherwise AES is held in reset
+            return SYSTEM_CRYPTO_SHA_RST | SYSTEM_DMA_RST;
+        }
+    case PERIPH_RSA_MODULE:
+        if (enable == true) {
+            /* also clear reset on digital signature, otherwise RSA is held in reset */
+            return (SYSTEM_CRYPTO_RSA_RST | SYSTEM_CRYPTO_DS_RST);
+        } else {
+            /* don't reset digital signature unit, as this resets AES also */
+            return SYSTEM_CRYPTO_RSA_RST;
+        }
     default:
         return 0;
     }
@@ -177,6 +207,9 @@ static uint32_t periph_ll_get_clk_en_reg(periph_module_t periph)
     case PERIPH_UART2_MODULE:
     case PERIPH_SDMMC_MODULE:
     case PERIPH_GDMA_MODULE:
+    case PERIPH_AES_MODULE:
+    case PERIPH_SHA_MODULE:
+    case PERIPH_RSA_MODULE:
         return SYSTEM_PERIP_CLK_EN1_REG;
     default:
         return SYSTEM_PERIP_CLK_EN0_REG;
@@ -196,6 +229,8 @@ static uint32_t periph_ll_get_rst_en_reg(periph_module_t periph)
     case PERIPH_UART2_MODULE:
     case PERIPH_SDMMC_MODULE:
     case PERIPH_GDMA_MODULE:
+    case PERIPH_AES_MODULE:
+    case PERIPH_RSA_MODULE:
         return SYSTEM_PERIP_RST_EN1_REG;
     default:
         return SYSTEM_PERIP_RST_EN0_REG;

+ 4 - 4
components/idf_test/include/esp32s3/idf_performance_target.h

@@ -8,10 +8,10 @@
 #define IDF_PERFORMANCE_MAX_TIME_SHA1_32KB                                      1000
 #define IDF_PERFORMANCE_MAX_TIME_SHA512_32KB                                    900
 
-#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP                               14000
-#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP                              100000
-#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP                               60000
-#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP                              600000
+#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PUBLIC_OP                               18000
+#define IDF_PERFORMANCE_MAX_RSA_2048KEY_PRIVATE_OP                              210000
+#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PUBLIC_OP                               80000
+#define IDF_PERFORMANCE_MAX_RSA_4096KEY_PRIVATE_OP                              1500000
 
 #define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING                            32
 #define IDF_PERFORMANCE_MAX_SPI_PER_TRANS_NO_POLLING_NO_DMA                     30

+ 1059 - 0
components/mbedtls/port/esp32s3/aes.c

@@ -0,0 +1,1059 @@
+/**
+ * \brief AES block cipher, ESP32-S2 hardware accelerated version
+ * Based on mbedTLS FIPS-197 compliant version.
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE Ltd
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+/*
+ *  The AES block cipher was designed by Vincent Rijmen and Joan Daemen.
+ *
+ *  http://csrc.nist.gov/encryption/aes/rijndael/Rijndael.pdf
+ *  http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/lock.h>
+#include "mbedtls/aes.h"
+#include "esp32s3/aes.h"
+#include "soc/cpu.h"
+#include "soc/dport_reg.h"
+#include "soc/hwcrypto_reg.h"
+#include "soc/periph_defs.h"
+#include "esp32s3/rom/lldesc.h"
+#include "esp32s3/rom/cache.h"
+#include "esp_intr_alloc.h"
+#include "driver/periph_ctrl.h"
+#include "esp_log.h"
+#include "soc/lldesc.h"
+#include "esp_heap_caps.h"
+#include "sys/param.h"
+#include "esp_pm.h"
+#include "soc/soc_memory_layout.h"
+#include "soc/gdma_reg.h"
+#include "soc/gdma_struct.h"
+#include "soc/extmem_reg.h"
+#include "freertos/FreeRTOS.h"
+#include "freertos/semphr.h"
+
+#define AES_BLOCK_BYTES 16
+#define IV_WORDS        4
+
+#define DMA_PERIPH_AES 6 /* DMA peripheral indexes */
+#define DMA_PERIPH_SHA 7
+/* Max size of each chunk to process when output buffer is in unaligned external ram
+   must be a multiple of block size
+*/
+#define AES_MAX_CHUNK_WRITE_SIZE 1600
+
+/* Input over this length will yield and wait for interrupt instead of
+   busy-waiting, 30000 bytes is approx 0.5 ms */
+#define AES_DMA_INTR_TRIG_LEN 2000
+
+#define ESP_PUT_BE64(a, val)                                    \
+    do {                                                        \
+        *(uint64_t*)(a) = __builtin_bswap64( (uint64_t)(val) ); \
+    } while (0)
+
+
+/* DMA AES working modes*/
+typedef enum {
+    ESP_AES_BLOCK_MODE_ECB = 0,
+    ESP_AES_BLOCK_MODE_CBC,
+    ESP_AES_BLOCK_MODE_OFB,
+    ESP_AES_BLOCK_MODE_CTR,
+    ESP_AES_BLOCK_MODE_CFB8,
+    ESP_AES_BLOCK_MODE_CFB128,
+} esp_aes_mode_t;
+
+
+#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT)
+static SemaphoreHandle_t op_complete_sem;
+#if defined(CONFIG_PM_ENABLE)
+static esp_pm_lock_handle_t s_pm_cpu_lock;
+static esp_pm_lock_handle_t s_pm_sleep_lock;
+#endif
+#endif
+
+static const char *TAG = "esp-aes";
+
+static _lock_t s_aes_lock;
+
+static inline bool valid_key_length(const esp_aes_context *ctx)
+{
+    return ctx->key_bytes == 128 / 8  || ctx->key_bytes == 256 / 8;
+}
+
+
+void esp_aes_acquire_hardware( void )
+{
+    _lock_acquire(&s_aes_lock);
+
+    /* Enable AES hardware */
+    //periph_module_enable(PERIPH_AES_DMA_MODULE);
+    /* Enable AES hardware */
+    REG_SET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_CRYPTO_AES_CLK_EN | SYSTEM_DMA_CLK_EN);
+    /* Clear reset on digital signature unit,
+       otherwise AES unit is held in reset also. */
+    REG_CLR_BIT(SYSTEM_PERIP_RST_EN1_REG,
+                SYSTEM_CRYPTO_AES_RST | SYSTEM_DMA_RST | SYSTEM_CRYPTO_DS_RST);
+}
+
+/* Function to disable AES and Crypto DMA clocks and release locks */
+void esp_aes_release_hardware( void )
+{
+    /* Disable AES hardware */
+    //periph_module_disable(PERIPH_AES_DMA_MODULE);
+    /* Disable AES hardware */
+    REG_SET_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_CRYPTO_AES_RST | SYSTEM_DMA_RST);
+    /* Don't return other units to reset, as this pulls
+       reset on RSA & SHA units, respectively. */
+    REG_CLR_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_CRYPTO_AES_CLK_EN | SYSTEM_DMA_CLK_EN);
+
+    _lock_release(&s_aes_lock);
+}
+
+
+/* Function to init AES context to zero */
+void esp_aes_init( esp_aes_context *ctx )
+{
+    if ( ctx == NULL ) {
+        return;
+    }
+
+    bzero( ctx, sizeof( esp_aes_context ) );
+}
+
+/* Function to clear AES context */
+void esp_aes_free( esp_aes_context *ctx )
+{
+    if ( ctx == NULL ) {
+        return;
+    }
+
+    bzero( ctx, sizeof( esp_aes_context ) );
+}
+
+/*
+ * AES key schedule (same for encryption or decryption, as hardware handles schedule)
+ *
+ */
+int esp_aes_setkey( esp_aes_context *ctx, const unsigned char *key,
+                    unsigned int keybits )
+{
+    if (keybits == 192) {
+        return MBEDTLS_ERR_AES_FEATURE_UNAVAILABLE;
+    }
+    if (keybits != 128  && keybits != 256) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+    ctx->key_bytes = keybits / 8;
+    memcpy(ctx->key, key, ctx->key_bytes);
+    ctx->key_in_hardware = 0;
+    return 0;
+}
+
+/*
+ * Helper function to copy key from esp_aes_context buffer
+ * to hardware key registers.
+ *
+ * Call only while holding esp_aes_acquire_hardware().
+ */
+static void esp_aes_setkey_hardware( esp_aes_context *ctx, int crypt_mode)
+{
+    const uint32_t MODE_DECRYPT_BIT = 4;
+    unsigned mode_reg_base = (crypt_mode == ESP_AES_ENCRYPT) ? 0 : MODE_DECRYPT_BIT;
+
+    ctx->key_in_hardware = 0;
+
+    for (int i = 0; i < ctx->key_bytes / 4; ++i) {
+        REG_WRITE(AES_KEY_BASE + i * 4, *(((uint32_t *)ctx->key) + i));
+        ctx->key_in_hardware += 4;
+    }
+
+    REG_WRITE(AES_MODE_REG, mode_reg_base + ((ctx->key_bytes / 8) - 2));
+
+    /* Fault injection check: all words of key data should have been written to hardware */
+    if (ctx->key_in_hardware < 16
+            || ctx->key_in_hardware != ctx->key_bytes) {
+        abort();
+    }
+}
+
+/*
+ * Sets the AES DMA block mode (ECB, CBC, CFB, OFB, GCM, CTR)
+ * and intializes the required registers for that working mode
+ */
+static inline void esp_aes_mode_init(esp_aes_mode_t mode)
+{
+    /* Set the algorithm mode CBC, CFB ... */
+    REG_WRITE(AES_BLOCK_MODE_REG, mode);
+
+    /* Presently hard-coding the INC function to 32 bit */
+    if (mode == ESP_AES_BLOCK_MODE_CTR) {
+        REG_WRITE(AES_INC_SEL_REG, 0);
+    }
+}
+
+/*
+ * Write IV to hardware iv registers
+ */
+static inline void esp_aes_set_iv(uint8_t *iv)
+{
+    uint32_t *iv_words = (uint32_t *)iv;
+    uint32_t *reg_addr_buf = (uint32_t *)(AES_IV_BASE);
+
+    for (int i = 0; i < IV_WORDS; i++ ) {
+        REG_WRITE(&reg_addr_buf[i], iv_words[i]);
+    }
+}
+
+/*
+ * Read IV from hardware iv registers
+ */
+static inline void esp_aes_get_iv(uint8_t *iv)
+{
+    esp_dport_access_read_buffer((uint32_t *)iv, AES_IV_BASE, IV_WORDS);
+}
+
+
+#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT)
+static IRAM_ATTR void esp_aes_complete_isr(void *arg)
+{
+    BaseType_t higher_woken;
+    REG_WRITE(AES_INT_CLR_REG, 1);
+    xSemaphoreGiveFromISR(op_complete_sem, &higher_woken);
+    if (higher_woken) {
+        portYIELD_FROM_ISR();
+    }
+}
+
+static esp_err_t esp_aes_isr_initialise( void )
+{
+    REG_WRITE(AES_INT_CLR_REG, 1);
+    REG_WRITE(AES_INT_ENA_REG, 1);
+    if (op_complete_sem == NULL) {
+        op_complete_sem = xSemaphoreCreateBinary();
+
+        if (op_complete_sem == NULL) {
+            ESP_LOGE(TAG, "Failed to create intr semaphore");
+            return ESP_FAIL;
+        }
+
+        esp_intr_alloc(ETS_AES_INTR_SOURCE, 0, esp_aes_complete_isr, NULL, NULL);
+    }
+
+    /* AES is clocked proportionally to CPU clock, take power management lock */
+#ifdef CONFIG_PM_ENABLE
+    if (s_pm_cpu_lock == NULL) {
+        if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) {
+            ESP_LOGE(TAG, "Failed to create PM sleep lock");
+            return ESP_FAIL;
+        }
+        if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) {
+            ESP_LOGE(TAG, "Failed to create PM CPU lock");
+            return ESP_FAIL;
+        }
+    }
+    esp_pm_lock_acquire(s_pm_cpu_lock);
+    esp_pm_lock_acquire(s_pm_sleep_lock);
+#endif
+
+    return ESP_OK;
+}
+#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT
+
+/* Wait for AES hardware block operation to complete */
+static void esp_aes_dma_wait_complete(bool use_intr, lldesc_t *output_desc)
+{
+    __attribute__((unused)) volatile uint32_t dma_done;
+
+#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT)
+    if (use_intr) {
+        if (!xSemaphoreTake(op_complete_sem, 2000 / portTICK_PERIOD_MS)) {
+            /* indicates a fundamental problem with driver */
+            ESP_LOGE("AES", "Timed out waiting for completion of AES Interrupt");
+            abort();
+        }
+#ifdef CONFIG_PM_ENABLE
+        esp_pm_lock_release(s_pm_cpu_lock);
+        esp_pm_lock_release(s_pm_sleep_lock);
+#endif  // CONFIG_PM_ENABLE
+    }
+#endif
+
+    /* Checking this if interrupt is used also, to avoid
+       issues with AES fault injection
+    */
+    while (REG_READ(AES_STATE_REG) != AES_STATE_DONE) {
+    }
+
+
+    /* Wait for DMA write operation to complete */
+    while (1) {
+        dma_done = REG_READ(CRYPTO_DMA_INT_RAW_REG);
+        // Wait for ownership of buffer to be transferred back to CPU
+        if (  (output_desc->owner == 0) ) {
+            break;
+        }
+    }
+}
+
+/* Init DMA related registers for AES operation */
+static void esp_aes_dma_init(lldesc_t *input, lldesc_t *output)
+{
+    /* Enable DMA mode */
+    REG_WRITE(AES_DMA_ENABLE_REG, 1);
+    REG_CLR_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
+    REG_SET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
+    REG_SET_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
+    REG_CLR_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
+
+    /* Initialize DMA registers - this is probably mostly one off initialization
+
+         Note: hardcoded to DMA channel 0
+      */
+    /* Note: burst mode has alignment requirements that we have not checked here */
+    GDMA.conf0[0].outdscr_burst_en = 0;
+    GDMA.conf0[0].indscr_burst_en = 0;
+    GDMA.conf0[0].out_data_burst_en = 0;
+    GDMA.conf0[0].in_data_burst_en = 0;
+
+    GDMA.peri_sel[0].peri_out_sel = DMA_PERIPH_AES;
+    GDMA.peri_sel[0].peri_in_sel = DMA_PERIPH_AES;
+
+    /* Set descriptor addresses: NOTE BACKWARDS AS DMA IN/OUT is reverse of AES in/out */
+    GDMA.out_link[0].addr = (uint32_t)input;
+    GDMA.in_link[0].addr = (uint32_t)output;
+
+    GDMA.sram_size[0].in_size = 3; /* 40 bytes, also minimum size for EDMA */
+    GDMA.sram_size[0].out_size = 3;
+    GDMA.conf1[0].in_ext_mem_bk_size = 0; // 16 bytes
+    GDMA.conf1[0].out_ext_mem_bk_size = 0; // 16 bytes
+
+    /*
+      printf("DESC HEAD pointers IN/outlink %p OUT/inlink / %p\n", in_desc_head, out_desc_head);
+
+      printf("before starting in_desc_head owner %d out_desc_head owner %d INT_RAW 0x%08x\n",
+      in_desc_head->owner,
+      out_desc_head->owner,
+      DMA.int_raw.val);
+
+    */
+
+    //REG_SET_BIT(EXTMEM_CACHE_MMU_OWNER_REG, 1<<23);  //mark PSRAM DCache as belonging to DMA
+
+    GDMA.conf0[0].in_rst = 1;
+    GDMA.conf0[0].in_rst = 0;
+    GDMA.conf0[0].out_rst = 1;
+    GDMA.conf0[0].out_rst = 0;
+
+    /* Start transfer */
+    GDMA.out_link[0].start = 1;
+    GDMA.in_link[0].start = 1;
+}
+
+static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out);
+
+
+/* Output buffers in external ram needs to be 16-byte aligned and DMA cant access input in the iCache mem range,
+   reallocate them into internal memory and encrypt in chunks to avoid
+   having to malloc too big of a buffer
+*/
+
+static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output)
+{
+    size_t chunk_len;
+    int ret = 0;
+    int offset = 0;
+    unsigned char *input_buf = NULL;
+    unsigned char *output_buf = NULL;
+    const unsigned char *dma_input;
+    chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len);
+
+    if (realloc_input) {
+        input_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA);
+
+        if (input_buf == NULL) {
+            ESP_LOGE(TAG, "Failed to allocate memory");
+            ret = -1;
+            goto cleanup;
+        }
+    }
+
+    if (realloc_output) {
+        output_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA);
+
+        if (output_buf == NULL) {
+            ESP_LOGE(TAG, "Failed to allocate memory");
+            ret = -1;
+            goto cleanup;
+        }
+    } else {
+        output_buf = output;
+    }
+
+    while (len) {
+        chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len);
+
+        /* If input needs realloc then copy it, else use the input with offset*/
+        if (realloc_input) {
+            memcpy(input_buf, input + offset, chunk_len);
+            dma_input = input_buf;
+        } else {
+            dma_input = input + offset;
+        }
+
+        if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) {
+            ret = -1;
+            goto cleanup;
+        }
+
+        if (realloc_output) {
+            memcpy(output + offset, output_buf, chunk_len);
+        } else {
+            output_buf = output + offset + chunk_len;
+        }
+
+        len -= chunk_len;
+        offset += chunk_len;
+    }
+
+cleanup:
+
+    if (realloc_input) {
+        free(input_buf);
+    }
+    if (realloc_output) {
+        free(output_buf);
+    }
+
+    return ret;
+}
+
+/* Encrypt/decrypt the input using DMA */
+static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out)
+{
+    lldesc_t stream_in_desc, stream_out_desc;
+    lldesc_t *in_desc_head, *out_desc_head;
+    lldesc_t *block_desc = NULL, *block_in_desc, *block_out_desc;
+    size_t lldesc_num;
+    uint8_t stream_in[16] = {};
+    unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block
+    unsigned block_bytes = len - stream_bytes;     // bytes which are in a full block
+    unsigned char *non_icache_input = NULL;
+    unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0);
+    bool use_intr = false;
+    bool input_needs_realloc = false;
+    bool output_needs_realloc = false;
+    int ret = 0;
+
+    assert(len > 0); // caller shouldn't ever have len set to zero
+    assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s)
+
+    /* If no key is written to hardware yet, either the user hasn't called
+       mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't
+       know which mode to use - or a fault skipped the
+       key write to hardware. Treat this as a fatal error and zero the output block.
+    */
+    if (ctx->key_in_hardware != ctx->key_bytes) {
+        bzero(output, len);
+        return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH;
+    }
+
+    if (block_bytes > 0) {
+        /* Flush cache if input in external ram */
+#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
+        if (esp_ptr_external_ram(input)) {
+            Cache_WriteBack_All();
+        }
+        if (esp_ptr_external_ram(output)) {
+            if (((intptr_t)(output) & 0xF) != 0) {
+                // Non aligned ext-mem buffer
+                output_needs_realloc = true;
+            }
+        }
+#endif
+        /* DMA cannot access memory in the iCache range, copy input to internal ram */
+        if (!esp_ptr_dma_ext_capable(input) && !esp_ptr_dma_capable(input)) {
+            input_needs_realloc = true;
+        }
+
+        if (!esp_ptr_dma_ext_capable(output) && !esp_ptr_dma_capable(output)) {
+            output_needs_realloc = true;
+        }
+
+        /* If either input or output is unaccessible to the DMA then they need to be reallocated */
+        if (input_needs_realloc || output_needs_realloc) {
+            return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc);
+        }
+
+
+        /* Set up dma descriptors for input and output */
+        lldesc_num = lldesc_get_required_num(block_bytes);
+
+        /* Allocate both in and out descriptors to save a malloc/free per function call */
+        block_desc = heap_caps_malloc(sizeof(lldesc_t) * lldesc_num * 2, MALLOC_CAP_DMA);
+        if (block_desc == NULL) {
+            ESP_LOGE(TAG, "Failed to allocate memory");
+            ret = -1;
+            goto cleanup;
+        }
+
+        block_in_desc = block_desc;
+        block_out_desc = block_desc + lldesc_num;
+
+        lldesc_setup_link(block_desc, input, block_bytes, 0);
+        lldesc_setup_link(block_desc + lldesc_num, output, block_bytes, 0);
+    }
+
+    /* Any leftover bytes which are appended as an additional DMA list */
+    if (stream_bytes > 0) {
+        memcpy(stream_in, input + block_bytes, stream_bytes);
+
+        lldesc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, 0);
+        lldesc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, 0);
+
+        if (block_bytes > 0) {
+            /* Link with block descriptors*/
+            block_in_desc[lldesc_num - 1].empty = (uint32_t)&stream_in_desc;
+            block_out_desc[lldesc_num - 1].empty = (uint32_t)&stream_out_desc;
+        }
+    }
+
+    // block buffers are sent to DMA first, unless there aren't any
+    in_desc_head =  (block_bytes > 0) ? block_in_desc : &stream_in_desc;
+    out_desc_head = (block_bytes > 0) ? block_out_desc : &stream_out_desc;
+
+    esp_aes_dma_init(in_desc_head, out_desc_head);
+
+    /* Write the number of blocks */
+    REG_WRITE(AES_BLOCK_NUM_REG, blocks);
+
+
+#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT)
+    /* Only use interrupt for long AES operations */
+    if (len > AES_DMA_INTR_TRIG_LEN) {
+        use_intr = true;
+        if (esp_aes_isr_initialise() == ESP_FAIL) {
+            ret = -1;
+            goto cleanup;
+        }
+    } else
+#endif
+    {
+        REG_WRITE(AES_INT_ENA_REG, 0);
+    }
+
+    /* Start AES operation */
+    REG_WRITE(AES_TRIGGER_REG, 1);
+    esp_aes_dma_wait_complete(use_intr, out_desc_head);
+
+
+
+#if (CONFIG_SPIRAM_USE_CAPS_ALLOC || CONFIG_SPIRAM_USE_MALLOC)
+    if (block_bytes > 0) {
+        if (esp_ptr_external_ram(output)) {
+            Cache_Invalidate_DCache_All();
+        }
+    }
+#endif
+
+    REG_WRITE(AES_DMA_EXIT_REG, 0);
+    /* Disable DMA mode */
+    REG_WRITE(AES_DMA_ENABLE_REG, 0);
+
+    if (stream_bytes > 0) {
+        memcpy(output + block_bytes, stream_out, stream_bytes);
+    }
+
+cleanup:
+    free(non_icache_input);
+    free(block_desc);
+    return ret;
+}
+
+
+static int esp_aes_validate_input(esp_aes_context *ctx, const unsigned char *input,
+                                  unsigned char *output )
+{
+    if (!ctx) {
+        ESP_LOGE(TAG, "No AES context supplied");
+        return -1;
+    }
+    if (!input) {
+        ESP_LOGE(TAG, "No input supplied");
+        return -1;
+    }
+    if (!output) {
+        ESP_LOGE(TAG, "No output supplied");
+        return -1;
+    }
+
+    return 0;
+}
+
+
+/*
+ * AES-ECB single block encryption
+ */
+int esp_internal_aes_encrypt( esp_aes_context *ctx,
+                              const unsigned char input[16],
+                              unsigned char output[16] )
+{
+    int r;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    esp_aes_acquire_hardware();
+    ctx->key_in_hardware = 0;
+    esp_aes_setkey_hardware(ctx, ESP_AES_ENCRYPT);
+    esp_aes_mode_init(ESP_AES_BLOCK_MODE_ECB);
+    r = esp_aes_process_dma(ctx, input, output, AES_BLOCK_BYTES, NULL);
+    esp_aes_release_hardware();
+
+    return r;
+}
+
+void esp_aes_encrypt( esp_aes_context *ctx,
+                      const unsigned char input[16],
+                      unsigned char output[16] )
+{
+    esp_internal_aes_encrypt(ctx, input, output);
+}
+
+/*
+ * AES-ECB single block decryption
+ */
+int esp_internal_aes_decrypt( esp_aes_context *ctx,
+                              const unsigned char input[16],
+                              unsigned char output[16] )
+{
+    int r;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    esp_aes_acquire_hardware();
+    ctx->key_in_hardware = 0;
+    esp_aes_setkey_hardware(ctx, ESP_AES_DECRYPT);
+    esp_aes_mode_init(ESP_AES_BLOCK_MODE_ECB);
+    r = esp_aes_process_dma(ctx, input, output, AES_BLOCK_BYTES, NULL);
+    esp_aes_release_hardware();
+
+    return r;
+}
+
+void esp_aes_decrypt( esp_aes_context *ctx,
+                      const unsigned char input[16],
+                      unsigned char output[16] )
+{
+    esp_internal_aes_decrypt(ctx, input, output);
+}
+
+
+/*
+ * AES-ECB block encryption/decryption
+ */
+int esp_aes_crypt_ecb( esp_aes_context *ctx,
+                       int mode,
+                       const unsigned char input[16],
+                       unsigned char output[16] )
+{
+    int r;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    esp_aes_acquire_hardware();
+    ctx->key_in_hardware = 0;
+    esp_aes_setkey_hardware(ctx, mode);
+    esp_aes_mode_init(ESP_AES_BLOCK_MODE_ECB);
+    r = esp_aes_process_dma(ctx, input, output, AES_BLOCK_BYTES, NULL);
+    esp_aes_release_hardware();
+
+    return r;
+}
+
+/*
+ * AES-CBC buffer encryption/decryption
+ */
+int esp_aes_crypt_cbc( esp_aes_context *ctx,
+                       int mode,
+                       size_t length,
+                       unsigned char iv[16],
+                       const unsigned char *input,
+                       unsigned char *output )
+{
+    int r = 0;
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!iv) {
+        ESP_LOGE(TAG, "No IV supplied");
+        return -1;
+    }
+
+    /* For CBC input length should be multiple of
+     * AES BLOCK BYTES
+     * */
+    if ( (length % AES_BLOCK_BYTES) || (length == 0) ) {
+        return ERR_ESP_AES_INVALID_INPUT_LENGTH;
+    }
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    esp_aes_acquire_hardware();
+    ctx->key_in_hardware = 0;
+    esp_aes_setkey_hardware(ctx, mode);
+    esp_aes_mode_init(ESP_AES_BLOCK_MODE_CBC);
+    esp_aes_set_iv(iv);
+
+    r = esp_aes_process_dma(ctx, input, output, length, NULL);
+    if (r != 0) {
+        esp_aes_release_hardware();
+        return r;
+    }
+
+    esp_aes_get_iv(iv);
+    esp_aes_release_hardware();
+
+    return r;
+}
+
+/*
+ * AES-CFB8 buffer encryption/decryption
+ */
+int esp_aes_crypt_cfb8( esp_aes_context *ctx,
+                        int mode,
+                        size_t length,
+                        unsigned char iv[16],
+                        const unsigned char *input,
+                        unsigned char *output )
+{
+    unsigned char c;
+    unsigned char ov[17];
+    int r = 0;
+    size_t block_bytes = length - (length % AES_BLOCK_BYTES);
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!iv) {
+        ESP_LOGE(TAG, "No IV supplied");
+        return -1;
+    }
+
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    /* The DMA engine will only output correct IV if it runs
+       full blocks of input in CFB8 mode
+    */
+    esp_aes_acquire_hardware();
+
+    if (block_bytes > 0) {
+
+        ctx->key_in_hardware = 0;
+        esp_aes_setkey_hardware(ctx, mode);
+        esp_aes_mode_init(ESP_AES_BLOCK_MODE_CFB8);
+        esp_aes_set_iv(iv);
+        r = esp_aes_process_dma(ctx, input, output, block_bytes, NULL);
+        esp_aes_get_iv(iv);
+
+        if (r != 0) {
+            esp_aes_release_hardware();
+            return r;
+        }
+
+        length -= block_bytes;
+        input += block_bytes;
+        output += block_bytes;
+    }
+
+    // Process remaining bytes block-at-a-time in ECB mode
+    if (length > 0) {
+        ctx->key_in_hardware = 0;
+        esp_aes_setkey_hardware(ctx, MBEDTLS_AES_ENCRYPT);
+        esp_aes_mode_init(ESP_AES_BLOCK_MODE_ECB);
+
+        while ( length-- ) {
+            memcpy( ov, iv, 16 );
+
+            r = esp_aes_process_dma(ctx, iv, iv, AES_BLOCK_BYTES, NULL);
+            if (r != 0) {
+                esp_aes_release_hardware();
+                return r;
+            }
+
+            if ( mode == MBEDTLS_AES_DECRYPT ) {
+                ov[16] = *input;
+            }
+
+            c = *output++ = ( iv[0] ^ *input++ );
+
+            if ( mode == MBEDTLS_AES_ENCRYPT ) {
+                ov[16] = c;
+            }
+            memcpy( iv, ov + 1, 16 );
+        }
+
+    }
+    esp_aes_release_hardware();
+
+    return r;
+}
+
+/*
+ * AES-CFB128 buffer encryption/decryption
+ */
+int esp_aes_crypt_cfb128( esp_aes_context *ctx,
+                          int mode,
+                          size_t length,
+                          size_t *iv_off,
+                          unsigned char iv[16],
+                          const unsigned char *input,
+                          unsigned char *output )
+
+{
+    uint8_t c;
+    int r = 0;
+    size_t stream_bytes = 0;
+    size_t n;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!iv) {
+        ESP_LOGE(TAG, "No IV supplied");
+        return -1;
+    }
+
+    if (!iv_off) {
+        ESP_LOGE(TAG, "No IV offset supplied");
+        return -1;
+    }
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    n = *iv_off;
+
+    /* First process the *iv_off bytes
+     * which are pending from the previous call to this API
+     */
+    while (n > 0 && length > 0) {
+        if (mode == MBEDTLS_AES_ENCRYPT) {
+            iv[n] = *output++ = *input++ ^ iv[n];
+        } else {
+            c = *input++;
+            *output++ = c ^ iv[n];
+            iv[n] = c;
+        }
+        n = (n + 1) % AES_BLOCK_BYTES;
+        length--;
+    }
+
+
+    if (length > 0) {
+        stream_bytes = length % AES_BLOCK_BYTES;
+        esp_aes_acquire_hardware();
+        ctx->key_in_hardware = 0;
+        esp_aes_setkey_hardware(ctx, mode);
+        esp_aes_mode_init(ESP_AES_BLOCK_MODE_CFB128);
+        esp_aes_set_iv(iv);
+
+        r = esp_aes_process_dma(ctx, input, output, length, iv);
+        if (r != 0) {
+            esp_aes_release_hardware();
+            return r;
+        }
+
+        if (stream_bytes == 0) {
+            // if we didn't need the partial 'stream block' then the new IV is in the IV register
+            esp_aes_get_iv(iv);
+        } else {
+            // if we did process a final partial block the new IV is already processed via DMA (and has some bytes of output in it),
+            // In decrypt mode any partial bytes are output plaintext (iv ^ c) and need to be swapped back to ciphertext (as the next
+            // block uses ciphertext as its IV input)
+            //
+            // Note: It may be more efficient to not process the partial block via DMA in this case.
+            if (mode == MBEDTLS_AES_DECRYPT) {
+                memcpy(iv, input + length - stream_bytes, stream_bytes);
+            }
+        }
+        esp_aes_release_hardware();
+    }
+
+    *iv_off = n + stream_bytes;
+    return r;
+}
+
+/*
+ * AES-OFB (Output Feedback Mode) buffer encryption/decryption
+ */
+
+int esp_aes_crypt_ofb( esp_aes_context *ctx,
+                       size_t length,
+                       size_t *iv_off,
+                       unsigned char iv[16],
+                       const unsigned char *input,
+                       unsigned char *output )
+{
+    int r = 0;
+    size_t n;
+    size_t stream_bytes = 0;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!iv) {
+        ESP_LOGE(TAG, "No IV supplied");
+        return -1;
+    }
+
+    if (!iv_off) {
+        ESP_LOGE(TAG, "No IV offset supplied");
+        return -1;
+    }
+
+    n = *iv_off;
+
+    /* If there is an offset then use the output of the previous AES block
+        (the updated IV) to calculate the new output */
+    while (n > 0 && length > 0) {
+        *output++ = (*input++ ^ iv[n]);
+        n = (n + 1) & 0xF;
+        length--;
+    }
+    if (length > 0) {
+        stream_bytes = (length % AES_BLOCK_BYTES);
+
+        esp_aes_acquire_hardware();
+        ctx->key_in_hardware = 0;
+        esp_aes_setkey_hardware(ctx, ESP_AES_DECRYPT);
+        esp_aes_mode_init(ESP_AES_BLOCK_MODE_OFB);
+        esp_aes_set_iv(iv);
+
+        r = esp_aes_process_dma(ctx, input, output, length, iv);
+        if (r != 0) {
+            esp_aes_release_hardware();
+            return r;
+        }
+
+        esp_aes_get_iv(iv);
+        esp_aes_release_hardware();
+    }
+
+    *iv_off = n + stream_bytes;
+
+    return r;
+}
+
+/*
+ * AES-CTR buffer encryption/decryption
+ */
+int esp_aes_crypt_ctr( esp_aes_context *ctx,
+                       size_t length,
+                       size_t *nc_off,
+                       unsigned char nonce_counter[16],
+                       unsigned char stream_block[16],
+                       const unsigned char *input,
+                       unsigned char *output )
+{
+    int r = 0;
+    size_t n;
+
+    if (esp_aes_validate_input(ctx, input, output)) {
+        return -1;
+    }
+
+    if (!nonce_counter) {
+        ESP_LOGE(TAG, "No nonce supplied");
+        return -1;
+    }
+
+    if (!nc_off) {
+        ESP_LOGE(TAG, "No nonce offset supplied");
+        return -1;
+    }
+
+    n = *nc_off;
+
+    if (!valid_key_length(ctx)) {
+        return MBEDTLS_ERR_AES_INVALID_KEY_LENGTH;
+    }
+
+    /* Process any unprocessed bytes left in stream block from
+       last operation */
+    while (n > 0 && length > 0) {
+        *output++ = (unsigned char)(*input++ ^ stream_block[n]);
+        n = (n + 1) & 0xF;
+        length--;
+    }
+
+    if (length > 0) {
+
+        esp_aes_acquire_hardware();
+        ctx->key_in_hardware = 0;
+        esp_aes_setkey_hardware(ctx, ESP_AES_DECRYPT);
+
+        esp_aes_mode_init(ESP_AES_BLOCK_MODE_CTR);
+        esp_aes_set_iv(nonce_counter);
+
+        r = esp_aes_process_dma(ctx, input, output, length, stream_block);
+
+        if (r != 0) {
+            esp_aes_release_hardware();
+            return r;
+        }
+
+        esp_aes_get_iv(nonce_counter);
+
+        esp_aes_release_hardware();
+
+    }
+    *nc_off = n + (length % AES_BLOCK_BYTES);
+
+    return r;
+}

+ 223 - 0
components/mbedtls/port/esp32s3/bignum.c

@@ -0,0 +1,223 @@
+/**
+ * \brief  Multi-precision integer library, ESP32 S3 hardware accelerated parts
+ *
+ *  based on mbedTLS implementation
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE Ltd
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+#include "soc/hwcrypto_periph.h"
+#include "driver/periph_ctrl.h"
+#include <mbedtls/bignum.h>
+#include "bignum_impl.h"
+#include "soc/dport_reg.h"
+#include "soc/system_reg.h"
+#include "soc/periph_defs.h"
+#include <sys/param.h>
+
+size_t esp_mpi_hardware_words(size_t words)
+{
+    return words;
+}
+
+void esp_mpi_enable_hardware_hw_op( void )
+{
+    /* Enable RSA hardware */
+    periph_module_enable(PERIPH_RSA_MODULE);
+
+    REG_CLR_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD);
+
+    while (DPORT_REG_READ(RSA_QUERY_CLEAN_REG) != 1) {
+    }
+    // Note: from enabling RSA clock to here takes about 1.3us
+
+
+}
+
+void esp_mpi_disable_hardware_hw_op( void )
+{
+    REG_SET_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD);
+
+    /* Disable RSA hardware */
+    periph_module_disable(PERIPH_RSA_MODULE);
+}
+
+
+/* Copy mbedTLS MPI bignum 'mpi' to hardware memory block at 'mem_base'.
+
+   If num_words is higher than the number of words in the bignum then
+   these additional words will be zeroed in the memory buffer.
+*/
+static inline void mpi_to_mem_block(uint32_t mem_base, const mbedtls_mpi *mpi, size_t num_words)
+{
+    uint32_t *pbase = (uint32_t *)mem_base;
+    uint32_t copy_words = MIN(num_words, mpi->n);
+
+    /* Copy MPI data to memory block registers */
+    for (int i = 0; i < copy_words; i++) {
+        pbase[i] = mpi->p[i];
+    }
+
+    /* Zero any remaining memory block data */
+    for (int i = copy_words; i < num_words; i++) {
+        pbase[i] = 0;
+    }
+}
+
+/* Read mbedTLS MPI bignum back from hardware memory block.
+
+   Reads num_words words from block.
+*/
+static inline void mem_block_to_mpi(mbedtls_mpi *x, uint32_t mem_base, int num_words)
+{
+
+    /* Copy data from memory block registers */
+    esp_dport_access_read_buffer(x->p, mem_base, num_words);
+    /* Zero any remaining limbs in the bignum, if the buffer is bigger
+       than num_words */
+    for (size_t i = num_words; i < x->n; i++) {
+        x->p[i] = 0;
+    }
+}
+
+
+
+/* Begin an RSA operation. op_reg specifies which 'START' register
+   to write to.
+*/
+static inline void start_op(uint32_t op_reg)
+{
+    /* Clear interrupt status */
+    DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
+
+    /* Note: above REG_WRITE includes a memw, so we know any writes
+       to the memory blocks are also complete. */
+
+    DPORT_REG_WRITE(op_reg, 1);
+}
+
+/* Wait for an RSA operation to complete.
+*/
+static inline void wait_op_complete(void)
+{
+    while (DPORT_REG_READ(RSA_QUERY_INTERRUPT_REG) != 1)
+    { }
+
+    /* clear the interrupt */
+    DPORT_REG_WRITE(RSA_CLEAR_INTERRUPT_REG, 1);
+}
+
+
+/* Read result from last MPI operation */
+void esp_mpi_read_result_hw_op(mbedtls_mpi *Z, size_t z_words)
+{
+    wait_op_complete();
+    mem_block_to_mpi(Z, RSA_MEM_Z_BLOCK_BASE, z_words);
+}
+
+
+/* Z = (X * Y) mod M
+
+   Not an mbedTLS function
+*/
+void esp_mpi_mul_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t num_words)
+{
+    DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words - 1));
+
+    /* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
+    mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
+    mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
+    mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
+    mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
+    DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
+
+    start_op(RSA_MOD_MULT_START_REG);
+}
+
+/* Z = (X ^ Y) mod M
+*/
+void esp_mpi_exp_mpi_mod_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, const mbedtls_mpi *M, const mbedtls_mpi *Rinv, mbedtls_mpi_uint Mprime, size_t num_words)
+{
+    size_t y_bits = mbedtls_mpi_bitlen(Y);
+
+    DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words - 1));
+
+    /* Load M, X, Rinv, Mprime (Mprime is mod 2^32) */
+    mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
+    mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
+    mpi_to_mem_block(RSA_MEM_M_BLOCK_BASE, M, num_words);
+    mpi_to_mem_block(RSA_MEM_RB_BLOCK_BASE, Rinv, num_words);
+    DPORT_REG_WRITE(RSA_M_DASH_REG, Mprime);
+
+    /* Enable acceleration options */
+    DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0);
+    DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 1);
+    DPORT_REG_WRITE(RSA_SEARCH_POS_REG, y_bits - 1);
+
+    /* Execute first stage montgomery multiplication */
+    start_op(RSA_MODEXP_START_REG);
+
+    DPORT_REG_WRITE(RSA_SEARCH_OPEN_REG, 0);
+}
+
+
+/* Z = X * Y */
+void esp_mpi_mul_mpi_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
+{
+    /* Copy X (right-extended) & Y (left-extended) to memory block */
+    mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
+    mpi_to_mem_block(RSA_MEM_Z_BLOCK_BASE + num_words * 4, Y, num_words);
+    /* NB: as Y is left-extended, we don't zero the bottom words_mult words of Y block.
+       This is OK for now because zeroing is done by hardware when we do esp_mpi_acquire_hardware().
+    */
+    DPORT_REG_WRITE(RSA_LENGTH_REG, (num_words * 2 - 1));
+    start_op(RSA_MULT_START_REG);
+}
+
+
+
+/**
+ * @brief Special-case of (X * Y), where we use hardware montgomery mod
+   multiplication to calculate result where either A or B are >2048 bits so
+   can't use the standard multiplication method.
+ *
+ */
+void esp_mpi_mult_mpi_failover_mod_mult_hw_op(const mbedtls_mpi *X, const mbedtls_mpi *Y, size_t num_words)
+{
+    /* M = 2^num_words - 1, so block is entirely FF */
+    for (int i = 0; i < num_words; i++) {
+        DPORT_REG_WRITE(RSA_MEM_M_BLOCK_BASE + i * 4, UINT32_MAX);
+    }
+
+    /* Mprime = 1 */
+    DPORT_REG_WRITE(RSA_M_DASH_REG, 1);
+    DPORT_REG_WRITE(RSA_LENGTH_REG, num_words - 1);
+
+    /* Load X & Y */
+    mpi_to_mem_block(RSA_MEM_X_BLOCK_BASE, X, num_words);
+    mpi_to_mem_block(RSA_MEM_Y_BLOCK_BASE, Y, num_words);
+
+    /* Rinv = 1, write first word */
+    DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE, 1);
+
+    /* Zero out rest of the Rinv words */
+    for (int i = 1; i < num_words; i++) {
+        DPORT_REG_WRITE(RSA_MEM_RB_BLOCK_BASE + i * 4, 0);
+    }
+
+    start_op(RSA_MOD_MULT_START_REG);
+}

+ 255 - 0
components/mbedtls/port/esp32s3/esp_sha1.c

@@ -0,0 +1,255 @@
+/*
+ *  SHA-1 implementation with hardware ESP32 support added.
+ *  Uses mbedTLS software implementation for failover when concurrent
+ *  SHA operations are in use.
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE LTD
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+/*
+ *  The SHA-1 standard was published by NIST in 1993.
+ *
+ *  http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ */
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_SHA1_C) && defined(MBEDTLS_SHA1_ALT)
+
+#include "mbedtls/sha1.h"
+
+#include <string.h>
+
+#if defined(MBEDTLS_SELF_TEST)
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdio.h>
+#define mbedtls_printf printf
+#endif /* MBEDTLS_PLATFORM_C */
+#endif /* MBEDTLS_SELF_TEST */
+
+#include "esp32s3/sha.h"
+
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_zeroize( void *v, size_t n )
+{
+    volatile unsigned char *p = (unsigned char *)v; while ( n-- ) *p++ = 0;
+}
+
+/*
+ * 32-bit integer manipulation macros (big endian)
+ */
+
+#ifndef PUT_UINT32_BE
+#define PUT_UINT32_BE(n,b,i)                            \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n) >> 24 );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n)       );       \
+}
+#endif
+
+void mbedtls_sha1_init( mbedtls_sha1_context *ctx )
+{
+    memset( ctx, 0, sizeof( mbedtls_sha1_context ) );
+}
+
+void mbedtls_sha1_free( mbedtls_sha1_context *ctx )
+{
+    if ( ctx == NULL ) {
+        return;
+    }
+
+    mbedtls_zeroize( ctx, sizeof( mbedtls_sha1_context ) );
+}
+
+void mbedtls_sha1_clone( mbedtls_sha1_context *dst,
+                         const mbedtls_sha1_context *src )
+{
+    memcpy(dst, src, sizeof(mbedtls_sha1_context));
+}
+
+/*
+ * SHA-1 context setup
+ */
+int mbedtls_sha1_starts_ret( mbedtls_sha1_context *ctx )
+{
+    ctx->total[0] = 0;
+    ctx->total[1] = 0;
+
+    memset( ctx, 0, sizeof( mbedtls_sha1_context ) );
+    ctx->mode = SHA1;
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha1_starts( mbedtls_sha1_context *ctx )
+{
+    mbedtls_sha1_starts_ret( ctx );
+}
+#endif
+
+static int esp_internal_sha1_dma_process(mbedtls_sha1_context *ctx,
+        const uint8_t *data, size_t len,
+        uint8_t *buf, size_t buf_len)
+{
+    return esp_sha_dma(SHA1, data, len, buf, buf_len, ctx->first_block);
+}
+
+int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, const unsigned char data[64] )
+{
+    int ret;
+    esp_sha_acquire_hardware();
+    ret = esp_sha_dma(ctx->mode, data, 64, 0, 0, ctx->first_block);
+    esp_sha_release_hardware();
+    return ret;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha1_process( mbedtls_sha1_context *ctx,
+                           const unsigned char data[64] )
+{
+    mbedtls_internal_sha1_process( ctx, data );
+}
+#endif
+
+int mbedtls_sha1_update_ret( mbedtls_sha1_context *ctx, const unsigned char *input, size_t ilen )
+{
+    int ret;
+    size_t fill;
+    uint32_t left, len, local_len = 0;
+
+    if ( !ilen || (input == NULL)) {
+        return 0;
+    }
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += (uint32_t) ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if ( ctx->total[0] < (uint32_t) ilen ) {
+        ctx->total[1]++;
+    }
+
+    if ( left && ilen >= fill ) {
+        memcpy( (void *) (ctx->buffer + left), input, fill );
+
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+        local_len = 64;
+    }
+
+    len = (ilen / 64) * 64;
+    if ( len || local_len) {
+
+        esp_sha_acquire_hardware();
+
+        if (ctx->sha_state == ESP_SHA1_STATE_INIT) {
+            ctx->first_block = true;
+            ctx->sha_state = ESP_SHA1_STATE_IN_PROCESS;
+        } else if (ctx->sha_state == ESP_SHA1_STATE_IN_PROCESS) {
+            ctx->first_block = false;
+            esp_sha_write_digest_state(SHA1, ctx->state);
+        }
+
+        ret = esp_internal_sha1_dma_process(ctx, input, len, ctx->buffer, local_len);
+
+        esp_sha_read_digest_state(SHA1, ctx->state);
+
+        esp_sha_release_hardware();
+
+        if (ret != 0) {
+            return ret;
+        }
+
+    }
+
+    if ( ilen > 0 ) {
+        memcpy( (void *) (ctx->buffer + left), input + len, ilen - len );
+    }
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha1_update( mbedtls_sha1_context *ctx,
+                          const unsigned char *input,
+                          size_t ilen )
+{
+    mbedtls_sha1_update_ret( ctx, input, ilen );
+}
+#endif
+
+static const unsigned char sha1_padding[64] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+* SHA-1 final digest
+ */
+int mbedtls_sha1_finish_ret( mbedtls_sha1_context *ctx, unsigned char output[20] )
+{
+    int ret;
+    uint32_t last, padn;
+    uint32_t high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+           | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_UINT32_BE( high, msglen, 0 );
+    PUT_UINT32_BE( low,  msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+
+    if ( ( ret = mbedtls_sha1_update_ret( ctx, sha1_padding, padn ) ) != 0 ) {
+        return ret;
+    }
+    if ( ( ret = mbedtls_sha1_update_ret( ctx, msglen, 8 ) ) != 0 ) {
+        return ret;
+    }
+
+    memcpy(output, ctx->state, 20);
+
+    return ret;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha1_finish( mbedtls_sha1_context *ctx,
+                          unsigned char output[20] )
+{
+    mbedtls_sha1_finish_ret( ctx, output );
+}
+#endif
+
+#endif /* MBEDTLS_SHA1_C && MBEDTLS_SHA1_ALT */

+ 267 - 0
components/mbedtls/port/esp32s3/esp_sha256.c

@@ -0,0 +1,267 @@
+/*
+ *  SHA-256 implementation with hardware ESP32 support added.
+ *  Uses mbedTLS software implementation for failover when concurrent
+ *  SHA operations are in use.
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE LTD
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ *  The SHA-256 Secure Hash Standard was published by NIST in 2002.
+ *
+ *  http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
+ */
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_SHA256_C) && defined(MBEDTLS_SHA256_ALT)
+
+#include "mbedtls/sha256.h"
+
+#include <string.h>
+
+#if defined(MBEDTLS_SELF_TEST)
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdio.h>
+#define mbedtls_printf printf
+#endif /* MBEDTLS_PLATFORM_C */
+#endif /* MBEDTLS_SELF_TEST */
+
+#include "esp32s3/sha.h"
+
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_zeroize( void *v, size_t n )
+{
+    volatile unsigned char *p = v; while ( n-- ) *p++ = 0;
+}
+
+/*
+ * 32-bit integer manipulation macros (big endian)
+ */
+#ifndef GET_UINT32_BE
+#define GET_UINT32_BE(n,b,i)                            \
+do {                                                    \
+    (n) = ( (uint32_t) (b)[(i)    ] << 24 )             \
+        | ( (uint32_t) (b)[(i) + 1] << 16 )             \
+        | ( (uint32_t) (b)[(i) + 2] <<  8 )             \
+        | ( (uint32_t) (b)[(i) + 3]       );            \
+} while( 0 )
+#endif
+
+#ifndef PUT_UINT32_BE
+#define PUT_UINT32_BE(n,b,i)                            \
+do {                                                    \
+    (b)[(i)    ] = (unsigned char) ( (n) >> 24 );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n)       );       \
+} while( 0 )
+#endif
+
+void mbedtls_sha256_init( mbedtls_sha256_context *ctx )
+{
+    memset( ctx, 0, sizeof( mbedtls_sha256_context ) );
+}
+
+void mbedtls_sha256_free( mbedtls_sha256_context *ctx )
+{
+    if ( ctx == NULL ) {
+        return;
+    }
+
+    mbedtls_zeroize( ctx, sizeof( mbedtls_sha256_context ) );
+}
+
+void mbedtls_sha256_clone( mbedtls_sha256_context *dst,
+                           const mbedtls_sha256_context *src )
+{
+    *dst = *src;
+}
+
+/*
+ * SHA-256 context setup
+ */
+int mbedtls_sha256_starts_ret( mbedtls_sha256_context *ctx, int is224 )
+{
+    memset( ctx, 0, sizeof( mbedtls_sha256_context ) );
+
+    if ( is224 ) {
+        ctx->mode = SHA2_224;
+    } else {
+        ctx->mode = SHA2_256;
+    }
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha256_starts( mbedtls_sha256_context *ctx,
+                            int is224 )
+{
+    mbedtls_sha256_starts_ret( ctx, is224 );
+}
+#endif
+
+
+int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx, const unsigned char data[64] )
+{
+    int ret;
+    esp_sha_acquire_hardware();
+    ret = esp_sha_dma(ctx->mode, data, 64, 0, 0, ctx->first_block);
+    esp_sha_release_hardware();
+
+    return ret;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha256_process( mbedtls_sha256_context *ctx,
+                             const unsigned char data[64] )
+{
+    mbedtls_internal_sha256_process( ctx, data );
+}
+#endif
+
+/*
+ * SHA-256 process buffer
+ */
+int mbedtls_sha256_update_ret( mbedtls_sha256_context *ctx, const unsigned char *input,
+                               size_t ilen )
+{
+    int ret = 0;
+    size_t fill;
+    uint32_t left, len, local_len = 0;
+
+    if ( ilen == 0 ) {
+        return 0;
+    }
+
+    left = ctx->total[0] & 0x3F;
+    fill = 64 - left;
+
+    ctx->total[0] += (uint32_t) ilen;
+    ctx->total[0] &= 0xFFFFFFFF;
+
+    if ( ctx->total[0] < (uint32_t) ilen ) {
+        ctx->total[1]++;
+    }
+
+    /* Check if any data pending from previous call to this API */
+    if ( left && ilen >= fill ) {
+        memcpy( (void *) (ctx->buffer + left), input, fill );
+
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+        local_len = 64;
+    }
+
+    len = (ilen / 64) * 64;
+
+    if ( len || local_len) {
+        esp_sha_acquire_hardware();
+
+        if (ctx->sha_state == ESP_SHA256_STATE_INIT) {
+            ctx->first_block = true;
+            ctx->sha_state = ESP_SHA256_STATE_IN_PROCESS;
+        } else if (ctx->sha_state == ESP_SHA256_STATE_IN_PROCESS) {
+            ctx->first_block = false;
+            esp_sha_write_digest_state(ctx->mode, ctx->state);
+        }
+
+        ret = esp_sha_dma(ctx->mode, input, len,  ctx->buffer, local_len, ctx->first_block);
+
+        esp_sha_read_digest_state(ctx->mode, ctx->state);
+
+        esp_sha_release_hardware();
+
+        if (ret != 0) {
+            return ret;
+        }
+    }
+
+    if ( ilen > 0 ) {
+        memcpy( (void *) (ctx->buffer + left), input + len, ilen - len );
+    }
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha256_update( mbedtls_sha256_context *ctx,
+                            const unsigned char *input,
+                            size_t ilen )
+{
+    mbedtls_sha256_update_ret( ctx, input, ilen );
+}
+#endif
+
+static const unsigned char sha256_padding[64] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * SHA-256 final digest
+ */
+int mbedtls_sha256_finish_ret( mbedtls_sha256_context *ctx, unsigned char output[32] )
+{
+    int ret;
+    uint32_t last, padn;
+    uint32_t high, low;
+    unsigned char msglen[8];
+
+    high = ( ctx->total[0] >> 29 )
+           | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_UINT32_BE( high, msglen, 0 );
+    PUT_UINT32_BE( low,  msglen, 4 );
+
+    last = ctx->total[0] & 0x3F;
+    padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
+
+    if ( ( ret = mbedtls_sha256_update_ret( ctx, sha256_padding, padn ) ) != 0 ) {
+        return ret;
+    }
+
+    if ( ( ret = mbedtls_sha256_update_ret( ctx, msglen, 8 ) ) != 0 ) {
+        return ret;
+    }
+
+    memcpy(output, ctx->state, 32);
+
+    return ret;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha256_finish( mbedtls_sha256_context *ctx,
+                            unsigned char output[32] )
+{
+    mbedtls_sha256_finish_ret( ctx, output );
+}
+#endif
+
+#endif /* MBEDTLS_SHA256_C && MBEDTLS_SHA256_ALT */

+ 317 - 0
components/mbedtls/port/esp32s3/esp_sha512.c

@@ -0,0 +1,317 @@
+/*
+ *  SHA-512 implementation with hardware ESP32 support added.
+ *  Uses mbedTLS software implementation for failover when concurrent
+ *  SHA operations are in use.
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE LTD
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ *  The SHA-512 Secure Hash Standard was published by NIST in 2002.
+ *
+ *  http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
+ */
+
+#if !defined(MBEDTLS_CONFIG_FILE)
+#include "mbedtls/config.h"
+#else
+#include MBEDTLS_CONFIG_FILE
+#endif
+
+#if defined(MBEDTLS_SHA512_C) && defined(MBEDTLS_SHA512_ALT)
+
+#include "mbedtls/sha512.h"
+
+#if defined(_MSC_VER) || defined(__WATCOMC__)
+#define UL64(x) x##ui64
+#else
+#define UL64(x) x##ULL
+#endif
+
+#include <string.h>
+
+#if defined(MBEDTLS_SELF_TEST)
+#if defined(MBEDTLS_PLATFORM_C)
+#include "mbedtls/platform.h"
+#else
+#include <stdio.h>
+#define mbedtls_printf printf
+#endif /* MBEDTLS_PLATFORM_C */
+#endif /* MBEDTLS_SELF_TEST */
+
+#include "esp32s3/sha.h"
+
+/* Implementation that should never be optimized out by the compiler */
+static void mbedtls_zeroize( void *v, size_t n )
+{
+    volatile unsigned char *p = v; while ( n-- ) *p++ = 0;
+}
+
+/*
+ * 64-bit integer manipulation macros (big endian)
+ */
+#ifndef PUT_UINT64_BE
+#define PUT_UINT64_BE(n,b,i)                            \
+{                                                       \
+    (b)[(i)    ] = (unsigned char) ( (n) >> 56 );       \
+    (b)[(i) + 1] = (unsigned char) ( (n) >> 48 );       \
+    (b)[(i) + 2] = (unsigned char) ( (n) >> 40 );       \
+    (b)[(i) + 3] = (unsigned char) ( (n) >> 32 );       \
+    (b)[(i) + 4] = (unsigned char) ( (n) >> 24 );       \
+    (b)[(i) + 5] = (unsigned char) ( (n) >> 16 );       \
+    (b)[(i) + 6] = (unsigned char) ( (n) >>  8 );       \
+    (b)[(i) + 7] = (unsigned char) ( (n)       );       \
+}
+#endif /* PUT_UINT64_BE */
+
+void esp_sha512_set_mode(mbedtls_sha512_context *ctx, esp_sha_type type)
+{
+    switch (type) {
+    case SHA2_384:
+    case SHA2_512224:
+    case SHA2_512256:
+    case SHA2_512T:
+        ctx->mode = type;
+        break;
+    default:
+        ctx->mode = SHA2_512;
+        break;
+    }
+}
+
+
+/* For SHA512/t mode the intial hash value will depend on t */
+void esp_sha512_set_t( mbedtls_sha512_context *ctx, uint16_t t_val)
+{
+    ctx->t_val = t_val;
+}
+
+void mbedtls_sha512_init( mbedtls_sha512_context *ctx )
+{
+    memset( ctx, 0, sizeof( mbedtls_sha512_context ) );
+}
+
+void mbedtls_sha512_free( mbedtls_sha512_context *ctx )
+{
+    if ( ctx == NULL ) {
+        return;
+    }
+
+    mbedtls_zeroize( ctx, sizeof( mbedtls_sha512_context ) );
+}
+
+void mbedtls_sha512_clone( mbedtls_sha512_context *dst,
+                           const mbedtls_sha512_context *src )
+{
+    memcpy(dst, src, sizeof(mbedtls_sha512_context));
+}
+
+/*
+ * SHA-512 context setup
+ */
+int mbedtls_sha512_starts_ret( mbedtls_sha512_context *ctx, int is384 )
+{
+    mbedtls_zeroize( ctx, sizeof( mbedtls_sha512_context ) );
+
+    if ( is384 ) {
+        ctx->mode = SHA2_384;
+    } else {
+        ctx->mode = SHA2_512;
+    }
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha512_starts( mbedtls_sha512_context *ctx,
+                            int is384 )
+{
+    mbedtls_sha512_starts_ret( ctx, is384 );
+}
+#endif
+
+static int esp_internal_sha512_dma_process(mbedtls_sha512_context *ctx,
+        const uint8_t *data, size_t len,
+        uint8_t *buf, size_t buf_len)
+{
+
+
+    return esp_sha_dma(ctx->mode, data, len, buf, buf_len, ctx->first_block);
+
+
+}
+
+int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx, const unsigned char data[128] )
+{
+    int ret;
+    esp_sha_acquire_hardware();
+    ret = esp_internal_sha512_dma_process(ctx, data, 128, 0, 0);
+    esp_sha_release_hardware();
+
+    return ret;
+
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha512_process( mbedtls_sha512_context *ctx,
+                             const unsigned char data[128] )
+{
+    mbedtls_internal_sha512_process( ctx, data );
+}
+#endif
+
+/*
+ * SHA-512 process buffer
+ */
+int mbedtls_sha512_update_ret( mbedtls_sha512_context *ctx, const unsigned char *input,
+                               size_t ilen )
+{
+    int ret;
+    size_t fill;
+    unsigned int left, len, local_len = 0;
+
+    if ( ilen == 0 ) {
+        return 0;
+    }
+
+    left = (unsigned int) (ctx->total[0] & 0x7F);
+    fill = 128 - left;
+
+    ctx->total[0] += (uint64_t) ilen;
+
+    if ( ctx->total[0] < (uint64_t) ilen ) {
+        ctx->total[1]++;
+    }
+
+    if ( left && ilen >= fill ) {
+        memcpy( (void *) (ctx->buffer + left), input, fill );
+
+        input += fill;
+        ilen  -= fill;
+        left = 0;
+        local_len = 128;
+    }
+
+    len = (ilen / 128) * 128;
+
+    if ( len || local_len) {
+
+        esp_sha_acquire_hardware();
+
+        if (ctx->sha_state == ESP_SHA512_STATE_INIT) {
+
+            if (ctx->mode == SHA2_512T) {
+                esp_sha_512_t_init_hash(ctx->t_val);
+                ctx->first_block = false;
+            } else {
+                ctx->first_block = true;
+            }
+            ctx->sha_state = ESP_SHA512_STATE_IN_PROCESS;
+
+        } else if (ctx->sha_state == ESP_SHA512_STATE_IN_PROCESS) {
+            ctx->first_block = false;
+            esp_sha_write_digest_state(ctx->mode, ctx->state);
+        }
+
+        ret = esp_internal_sha512_dma_process(ctx, input, len, ctx->buffer, local_len);
+
+        esp_sha_read_digest_state(ctx->mode, ctx->state);
+
+        esp_sha_release_hardware();
+
+        if (ret != 0) {
+            return ret;
+        }
+
+    }
+
+
+    if ( ilen > 0 ) {
+        memcpy( (void *) (ctx->buffer + left), input + len, ilen - len );
+    }
+
+    return 0;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha512_update( mbedtls_sha512_context *ctx,
+                            const unsigned char *input,
+                            size_t ilen )
+{
+    mbedtls_sha512_update_ret( ctx, input, ilen );
+}
+#endif
+
+
+static const unsigned char sha512_padding[128] = {
+    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/*
+ * SHA-512 final digest
+ */
+int mbedtls_sha512_finish_ret( mbedtls_sha512_context *ctx, unsigned char output[64] )
+{
+    int ret;
+    size_t last, padn;
+    uint64_t high, low;
+    unsigned char msglen[16];
+
+    high = ( ctx->total[0] >> 61 )
+           | ( ctx->total[1] <<  3 );
+    low  = ( ctx->total[0] <<  3 );
+
+    PUT_UINT64_BE( high, msglen, 0 );
+    PUT_UINT64_BE( low,  msglen, 8 );
+
+    last = (size_t)( ctx->total[0] & 0x7F );
+    padn = ( last < 112 ) ? ( 112 - last ) : ( 240 - last );
+
+    if ( ( ret = mbedtls_sha512_update_ret( ctx, sha512_padding, padn ) ) != 0 ) {
+        return ret;
+    }
+
+    if ( ( ret = mbedtls_sha512_update_ret( ctx, msglen, 16 ) ) != 0 ) {
+        return ret;
+    }
+
+    if (ctx->mode == SHA2_384) {
+        memcpy(output, ctx->state, 48);
+    } else {
+        memcpy(output, ctx->state, 64);
+    }
+
+    return ret;
+}
+
+#if !defined(MBEDTLS_DEPRECATED_REMOVED)
+void mbedtls_sha512_finish( mbedtls_sha512_context *ctx,
+                            unsigned char output[64] )
+{
+    mbedtls_sha512_finish_ret( ctx, output );
+}
+#endif
+
+#endif /* MBEDTLS_SHA512_C && MBEDTLS_SHA512_ALT */

+ 384 - 0
components/mbedtls/port/esp32s3/sha.c

@@ -0,0 +1,384 @@
+/*
+ *  ESP32 hardware accelerated SHA1/256/512 implementation
+ *  based on mbedTLS FIPS-197 compliant version.
+ *
+ *  Copyright (C) 2006-2015, ARM Limited, All Rights Reserved
+ *  Additions Copyright (C) 2016-2020, Espressif Systems (Shanghai) PTE Ltd
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+/*
+ *  The SHA-1 standard was published by NIST in 1993.
+ *
+ *  http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ */
+
+typedef int _lock_t;
+
+#include <string.h>
+#include <stdio.h>
+#include <sys/lock.h>
+#include "esp_err.h"
+#include "esp_log.h"
+#include "esp32s3/rom/ets_sys.h"
+#include "soc/dport_reg.h"
+#include "soc/hwcrypto_reg.h"
+#include "soc/soc_memory_layout.h"
+
+#include "esp32s3/rom/cache.h"
+
+#include "soc/cache_memory.h"
+
+#include "freertos/FreeRTOS.h"
+#include "freertos/semphr.h"
+
+#include "esp32s3/sha.h"
+#include "esp32s3/rom/lldesc.h"
+#include "soc/periph_defs.h"
+#include "driver/periph_ctrl.h"
+#include "sys/param.h"
+#include "soc/gdma_struct.h"
+#include "soc/extmem_reg.h"
+
+#define DMA_PERIPH_AES 6 /* DMA peripheral indexes */
+#define DMA_PERIPH_SHA 7
+#define DMA_CHANNEL 1 /* note: hard-coded */
+
+/* Max amount of bytes in a single DMA operation is 4095,
+   for SHA this means that the biggest safe amount of bytes is
+   31 blocks of 128 bytes = 3968
+*/
+#define SHA_DMA_MAX_BYTES 3968
+
+/* Lock for SHA engine */
+static _lock_t s_sha_lock;
+
+const static char *TAG = "esp-sha";
+
+inline static size_t block_length(esp_sha_type type)
+{
+    switch (type) {
+    case SHA1:
+    case SHA2_224:
+    case SHA2_256:
+        return 64;
+    case SHA2_384:
+    case SHA2_512:
+    case SHA2_512224:
+    case SHA2_512256:
+    case SHA2_512T:
+        return 128;
+    default:
+        return 0;
+    }
+}
+
+/* Return state size (in bytes) for a given SHA type */
+inline static size_t state_length(esp_sha_type type)
+{
+    switch (type) {
+    case SHA1:
+        return 160 / 8;
+    case SHA2_224:
+    case SHA2_256:
+        return 256 / 8;
+    case SHA2_384:
+    case SHA2_512:
+    case SHA2_512224:
+    case SHA2_512256:
+    case SHA2_512T:
+        return 512 / 8;
+    default:
+        return 0;
+    }
+}
+
+/* Enable SHA peripheral and then lock it */
+void esp_sha_acquire_hardware()
+{
+    _lock_acquire(&s_sha_lock);
+
+    /* Enable SHA and DMA hardware */
+    //periph_module_enable(PERIPH_SHA_DMA_MODULE);
+    REG_SET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_CRYPTO_SHA_CLK_EN | SYSTEM_DMA_CLK_EN);
+    REG_CLR_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_CRYPTO_SHA_RST | SYSTEM_CRYPTO_HMAC_RST |
+                SYSTEM_DMA_RST | SYSTEM_CRYPTO_DS_RST);
+
+
+}
+
+/* Disable SHA peripheral block and then release it */
+void esp_sha_release_hardware()
+{
+    /* Disable SHA and DMA hardware */
+    //periph_module_disable(PERIPH_SHA_MODULE);
+    REG_SET_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_CRYPTO_SHA_RST | SYSTEM_DMA_RST |
+                SYSTEM_CRYPTO_DS_RST);
+    REG_CLR_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_CRYPTO_SHA_CLK_EN | SYSTEM_DMA_CLK_EN);
+
+    _lock_release(&s_sha_lock);
+
+}
+
+
+/* Busy wait until SHA is idle */
+static void esp_sha_wait_idle(void)
+{
+    while (DPORT_REG_READ(SHA_BUSY_REG) != 0) {
+    }
+}
+
+
+void esp_sha_write_digest_state(esp_sha_type sha_type, void *digest_state)
+{
+    uint32_t *digest_state_words = (uint32_t *)digest_state;
+    uint32_t *reg_addr_buf = (uint32_t *)(SHA_H_BASE);
+
+    for (int i = 0; i < state_length(sha_type) / 4; i++) {
+        REG_WRITE(&reg_addr_buf[i], digest_state_words[i]);
+    }
+}
+
+/* Read the SHA digest from hardware */
+void esp_sha_read_digest_state(esp_sha_type sha_type, void *digest_state)
+{
+    uint32_t *digest_state_words = (uint32_t *)digest_state;
+    int word_len = state_length(sha_type) / 4;
+
+    esp_dport_access_read_buffer(digest_state_words, SHA_H_BASE, word_len);
+
+    /* Fault injection check: verify SHA engine actually ran,
+       state is not all zeroes.
+    */
+    for (int i = 0; i < word_len; i++) {
+        if (digest_state_words[i] != 0) {
+            return;
+        }
+    }
+    abort(); // SHA peripheral returned all zero state, probably due to fault injection
+}
+
+
+static int esp_sha_dma_process(esp_sha_type sha_type, const void *input, uint32_t ilen,
+                               const void *buf, uint32_t buf_len, bool is_first_block);
+
+/* Performs SHA on multiple blocks at a time using DMA
+   splits up into smaller operations for inputs that exceed a single DMA list
+ */
+int esp_sha_dma(esp_sha_type sha_type, const void *input, uint32_t ilen,
+                const void *buf, uint32_t buf_len, bool is_first_block)
+{
+    int ret = 0;
+    const void *dma_input;
+    unsigned char *non_icache_input = NULL;
+    unsigned char *non_icache_buf = NULL;
+    int dma_op_num = ( ilen / (SHA_DMA_MAX_BYTES + 1) ) + 1;
+
+    if (buf_len > 128) {
+        ESP_LOGE(TAG, "SHA DMA buf_len cannot exceed max size for a single block");
+        return -1;
+    }
+
+    /* DMA cannot access memory in the iCache range, copy data to temporary buffers before transfer */
+    if (!esp_ptr_dma_capable(input) && ilen) {
+        non_icache_input = malloc(sizeof(unsigned char) * MIN(ilen, SHA_DMA_MAX_BYTES));
+        if (non_icache_input == NULL) {
+            ESP_LOGE(TAG, "Failed to allocate memory");
+            ret = ESP_ERR_NO_MEM;
+            goto cleanup;
+        }
+    }
+
+    if (!esp_ptr_dma_capable(buf) && buf_len) {
+        non_icache_buf = malloc(sizeof(unsigned char) * buf_len);
+        if (non_icache_buf == NULL) {
+            ESP_LOGE(TAG, "Failed to allocate memory");
+            ret = ESP_ERR_NO_MEM;
+            goto cleanup;
+        }
+        memcpy(non_icache_buf, buf, buf_len);
+        buf = non_icache_buf;
+    }
+
+    /* The max amount of blocks in a single hardware operation is 2^6 - 1 = 63
+       Thus we only do a single DMA input list + dma buf list,
+       which is max 3968/64 + 64/64 = 63 blocks */
+    for (int i = 0; i < dma_op_num; i++) {
+        int dma_chunk_len = MIN(ilen, SHA_DMA_MAX_BYTES);
+
+
+        /* Input depends on if it's a temp alloc buffer or supplied by user */
+        if (non_icache_input != NULL) {
+            memcpy(non_icache_input, input, dma_chunk_len);
+            dma_input = non_icache_input;
+        } else {
+            dma_input = input;
+        }
+
+        ret = esp_sha_dma_process(sha_type, dma_input, dma_chunk_len, buf, buf_len, is_first_block);
+
+
+        if (ret != 0) {
+            return ret;
+        }
+
+        is_first_block = false;
+
+
+        ilen -= dma_chunk_len;
+        input += dma_chunk_len;
+
+        // Only append buf to the first operation
+        buf_len = 0;
+    }
+
+cleanup:
+    free(non_icache_input);
+    free(non_icache_buf);
+    return ret;
+}
+
+static void esp_sha_dma_init(lldesc_t *input)
+{
+    /* Reset DMA */
+    REG_CLR_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
+    REG_SET_BIT(SYSTEM_PERIP_CLK_EN1_REG, SYSTEM_DMA_CLK_EN);
+    REG_SET_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
+    REG_CLR_BIT(SYSTEM_PERIP_RST_EN1_REG, SYSTEM_DMA_RST);
+
+    /* NOTE: all hardcoded to DMA channel 1 */
+    /* Note: burst mode has alignment requirements that we have not checked here */
+    GDMA.conf0[0].outdscr_burst_en = 0; /* was 1*/
+    GDMA.conf0[0].out_data_burst_en = 0; /* was 1*/
+    GDMA.conf0[0].out_auto_wrback = 0;
+
+    GDMA.peri_sel[0].peri_out_sel = DMA_PERIPH_SHA;
+
+    GDMA.sram_size[0].in_size = 3; /* 40 bytes, also minimum size for EDMA */
+    GDMA.sram_size[0].out_size = 3;
+    GDMA.conf1[0].in_ext_mem_bk_size = 0; // 16 bytes
+    GDMA.conf1[0].out_ext_mem_bk_size = 0; // 16 bytes
+
+    /* Set descriptors */
+    GDMA.out_link[0].addr = (uint32_t)input;
+
+    GDMA.conf0[0].in_rst = 1;
+    GDMA.conf0[0].in_rst = 0;
+    GDMA.conf0[0].out_rst = 1;
+    GDMA.conf0[0].out_rst = 0;
+
+    /* Start transfer */
+    GDMA.out_link[0].start = 1;
+}
+
+/* The initial hash value for SHA512/t is generated according to the
+   algorithm described in the TRM, chapter SHA-Accelerator
+*/
+int esp_sha_512_t_init_hash(uint16_t t)
+{
+    uint32_t t_string = 0;
+    uint8_t t0, t1, t2, t_len;
+
+    if (t == 384) {
+        ESP_LOGE(TAG, "Invalid t for SHA512/t, t = %u,cannot be 384", t);
+        return -1;
+    }
+
+    if (t <= 9) {
+        t_string = (uint32_t)((1 << 23) | ((0x30 + t) << 24));
+        t_len = 0x48;
+    } else if (t <= 99) {
+        t0 = t % 10;
+        t1 = (t / 10) % 10;
+        t_string = (uint32_t)((1 << 15) | ((0x30 + t0) << 16) |
+                              (((0x30 + t1) << 24)));
+        t_len = 0x50;
+    } else if (t <= 512) {
+        t0 = t % 10;
+        t1 = (t / 10) % 10;
+        t2 = t / 100;
+        t_string = (uint32_t)((1 << 7) | ((0x30 + t0) << 8) |
+                              (((0x30 + t1) << 16) + ((0x30 + t2) << 24)));
+        t_len = 0x58;
+    } else {
+        ESP_LOGE(TAG, "Invalid t for SHA512/t, t = %u, must equal or less than 512", t);
+        return -1;
+    }
+
+    REG_WRITE(SHA_T_LENGTH_REG, t_len);
+    REG_WRITE(SHA_T_STRING_REG, t_string);
+    REG_WRITE(SHA_MODE_REG, SHA2_512T);
+    REG_WRITE(SHA_START_REG, 1);
+
+    esp_sha_wait_idle();
+
+    return 0;
+}
+
+/* Performs SHA on multiple blocks at a time */
+static int esp_sha_dma_process(esp_sha_type sha_type, const void *input, uint32_t ilen,
+                               const void *buf, uint32_t buf_len, bool is_first_block)
+{
+    size_t blk_len = 0;
+    int ret = 0;
+    lldesc_t dma_descr_input = {};
+    lldesc_t dma_descr_buf = {};
+    lldesc_t *dma_descr_head;
+
+    blk_len = block_length(sha_type);
+
+    REG_WRITE(SHA_MODE_REG, sha_type);
+    REG_WRITE(SHA_BLOCK_NUM_REG, ((ilen + buf_len) / blk_len));
+
+
+    /* DMA descriptor for Memory to DMA-SHA transfer */
+    if (ilen) {
+        dma_descr_input.length = ilen;
+        dma_descr_input.size = ilen;
+        dma_descr_input.owner = 1;
+        dma_descr_input.eof = 1;
+        dma_descr_input.buf = (void *)input;
+        dma_descr_head = &dma_descr_input;
+    }
+    /* Check after input to overide head if there is any buf*/
+    if (buf_len) {
+        dma_descr_buf.length = buf_len;
+        dma_descr_buf.size = buf_len;
+        dma_descr_buf.owner = 1;
+        dma_descr_buf.eof = 1;
+        dma_descr_buf.buf = (void *)buf;
+        dma_descr_head = &dma_descr_buf;
+    }
+
+    /* Link DMA lists */
+    if (buf_len && ilen) {
+        dma_descr_buf.eof = 0;
+        dma_descr_buf.empty = (uint32_t)(&dma_descr_input);
+    }
+
+    esp_sha_dma_init(dma_descr_head);
+
+    /* Start hashing */
+    if (is_first_block) {
+        REG_WRITE(SHA_DMA_START_REG, 1);
+    } else {
+        REG_WRITE(SHA_DMA_CONTINUE_REG, 1);
+    }
+
+    esp_sha_wait_idle();
+
+    return ret;
+}
+

+ 7 - 1
components/mbedtls/port/esp_aes_xts.c

@@ -40,10 +40,16 @@
 
 #if CONFIG_IDF_TARGET_ESP32
 #include "esp32/aes.h"
-#elif CONFIG_IDF_TARGET_ESP32S2
+#endif
+
+#if CONFIG_IDF_TARGET_ESP32S2
 #include "esp32s2/aes.h"
 #endif
 
+#if CONFIG_IDF_TARGET_ESP32S3
+#include "esp32s3/aes.h"
+#endif
+
 void esp_aes_xts_init( esp_aes_xts_context *ctx )
 {
     esp_aes_init( &ctx->crypt );

+ 279 - 1
components/mbedtls/test/test_aes.c

@@ -12,6 +12,284 @@
 #include "esp_heap_caps.h"
 #include "test_utils.h"
 
+static const uint8_t key_256[] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+};
+
+static const uint8_t iv[] = {
+    0x10, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09,
+    0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
+};
+
+/* Cipher produced via this Python:
+    from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
+    from cryptography.hazmat.backends import default_backend
+
+    def as_c_array(byte_arr):
+
+        hex_str = ''
+        for idx, byte in enumerate(byte_arr):
+            hex_str += "0x{:02x}, ".format(byte)
+            bytes_per_line = 8
+            if idx % bytes_per_line == bytes_per_line - 1:
+                hex_str += '\n'
+
+        return hex_str
+
+    key = bytearray(range(32))
+    iv = bytearray(range(16, 0, -1))
+
+    print("Key: \n{}".format(as_c_array(key)))
+    print("IV: \n{}".format(as_c_array(iv)))
+
+    # Replace CTR with desired mode
+    cipher = Cipher(algorithms.AES(key), modes.CTR(iv), backend=default_backend())
+    encryptor = cipher.encryptor()
+
+    input_len = 1000
+
+    plain = b'\x3A'*input_len
+    print(as_c_array(plain))
+    ct = encryptor.update(plain) + encryptor.finalize()
+
+    print("Chipertext: {}".format(as_c_array(ct)))
+*/
+TEST_CASE("mbedtls CBC AES-256 test", "[aes]")
+{
+    const unsigned SZ = 1600;
+    mbedtls_aes_context ctx;
+    uint8_t nonce[16];
+
+    const uint8_t expected_cipher_end[] = {
+        0x3e, 0x68, 0x8a, 0x02, 0xe6, 0xf2, 0x6a, 0x9e,
+        0x9b, 0xb2, 0xc0, 0xc4, 0x63, 0x63, 0xd9, 0x25,
+        0x51, 0xdc, 0xc2, 0x71, 0x96, 0xb3, 0xe5, 0xcd,
+        0xbd, 0x0e, 0xf2, 0xef, 0xa9, 0xab, 0xab, 0x2d,
+    };
+
+    memcpy(nonce, iv, 16);
+
+    // allocate internal memory
+    uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+
+    TEST_ASSERT_NOT_NULL(chipertext);
+    TEST_ASSERT_NOT_NULL(plaintext);
+    TEST_ASSERT_NOT_NULL(decryptedtext);
+
+    mbedtls_aes_init(&ctx);
+    mbedtls_aes_setkey_enc(&ctx, key_256, 256);
+
+    memset(plaintext, 0x3A, SZ);
+    memset(decryptedtext, 0x0, SZ);
+
+    // Encrypt
+    mbedtls_aes_crypt_cbc(&ctx, MBEDTLS_AES_ENCRYPT, SZ, nonce, plaintext, chipertext);
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher_end, chipertext + SZ - 32, 32);
+
+    // Decrypt
+    memcpy(nonce, iv, 16);
+    mbedtls_aes_setkey_dec(&ctx, key_256, 256);
+    mbedtls_aes_crypt_cbc(&ctx, MBEDTLS_AES_DECRYPT, SZ, nonce, chipertext, decryptedtext);
+
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
+
+    free(plaintext);
+    free(chipertext);
+    free(decryptedtext);
+}
+
+TEST_CASE("mbedtls CTR AES-256 test", "[aes]")
+{
+    const unsigned SZ = 1000;
+    mbedtls_aes_context ctx;
+    uint8_t nonce[16];
+    uint8_t stream_block[16];
+    size_t nc_off = 0;
+
+    const uint8_t expected_cipher_end[] = {
+        0xd4, 0xdc, 0x4f, 0x8f, 0xfe, 0x86, 0xee, 0xb5,
+        0x14, 0x7f, 0xba, 0x30, 0x25, 0xa6, 0x7f, 0x6c,
+        0xb5, 0x73, 0xaf, 0x90, 0xd7, 0xff, 0x36, 0xba,
+        0x2b, 0x1d, 0xec, 0xb9, 0x38, 0xfa, 0x0d, 0xeb,
+    };
+
+    memcpy(nonce, iv, 16);
+
+    // allocate internal memory
+    uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+
+    TEST_ASSERT_NOT_NULL(chipertext);
+    TEST_ASSERT_NOT_NULL(plaintext);
+    TEST_ASSERT_NOT_NULL(decryptedtext);
+
+    mbedtls_aes_init(&ctx);
+    mbedtls_aes_setkey_enc(&ctx, key_256, 256);
+
+    memset(plaintext, 0x3A, SZ);
+    memset(decryptedtext, 0x0, SZ);
+
+    // Encrypt
+    mbedtls_aes_crypt_ctr(&ctx, SZ, &nc_off, nonce, stream_block, plaintext, chipertext);
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher_end, chipertext + SZ - 32, 32);
+
+    // Decrypt
+    nc_off = 0;
+    memcpy(nonce, iv, 16);
+    mbedtls_aes_crypt_ctr(&ctx, SZ, &nc_off, nonce, stream_block, chipertext, decryptedtext);
+
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
+
+    free(plaintext);
+    free(chipertext);
+    free(decryptedtext);
+}
+
+TEST_CASE("mbedtls OFB AES-256 test", "[aes]")
+{
+    const unsigned SZ = 1000;
+    mbedtls_aes_context ctx;
+    uint8_t nonce[16];
+    size_t nc_off = 0;
+
+    const uint8_t expected_cipher_end[] = {
+        0xca, 0xc3, 0x05, 0x77, 0xae, 0xb9, 0x38, 0xd6,
+        0x03, 0x0a, 0xad, 0x90, 0x6e, 0xdd, 0xf3, 0x9a,
+        0x41, 0x4d, 0x71, 0x30, 0x04, 0x9f, 0xd3, 0x53,
+        0xb7, 0x5e, 0xb4, 0xfd, 0x93, 0xf8, 0x31, 0x6a,
+    };
+
+    memcpy(nonce, iv, 16);
+
+    // allocate internal memory
+    uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+
+    TEST_ASSERT_NOT_NULL(chipertext);
+    TEST_ASSERT_NOT_NULL(plaintext);
+    TEST_ASSERT_NOT_NULL(decryptedtext);
+
+    mbedtls_aes_init(&ctx);
+    mbedtls_aes_setkey_enc(&ctx, key_256, 256);
+
+    memset(plaintext, 0x3A, SZ);
+    memset(decryptedtext, 0x0, SZ);
+
+    // Encrypt
+    mbedtls_aes_crypt_ofb(&ctx, SZ, &nc_off, nonce, plaintext, chipertext);
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher_end, chipertext + SZ - 32, 32);
+
+    // Decrypt
+    nc_off = 0;
+    memcpy(nonce, iv, 16);
+    mbedtls_aes_crypt_ofb(&ctx, SZ, &nc_off, nonce, chipertext, decryptedtext);
+
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
+
+    free(plaintext);
+    free(chipertext);
+    free(decryptedtext);
+}
+
+TEST_CASE("mbedtls CFB-8 AES-256 test", "[aes]")
+{
+    const unsigned SZ = 1000;
+    mbedtls_aes_context ctx;
+    uint8_t nonce[16];
+
+    const uint8_t expected_cipher_end[] = {
+        0x69, 0xdc, 0x1d, 0x8a, 0x0b, 0x9e, 0xbc, 0x84,
+        0x29, 0xa2, 0x04, 0xb6, 0x91, 0x6b, 0xb2, 0x83,
+        0x13, 0x23, 0x54, 0xcb, 0xf9, 0x6d, 0xcc, 0x53,
+        0x04, 0x59, 0xd1, 0xc9, 0xff, 0xab, 0xe2, 0x37,
+    };
+
+    memcpy(nonce, iv, 16);
+
+    // allocate internal memory
+    uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+
+    TEST_ASSERT_NOT_NULL(chipertext);
+    TEST_ASSERT_NOT_NULL(plaintext);
+    TEST_ASSERT_NOT_NULL(decryptedtext);
+
+    mbedtls_aes_init(&ctx);
+    mbedtls_aes_setkey_enc(&ctx, key_256, 256);
+
+    memset(plaintext, 0x3A, SZ);
+    memset(decryptedtext, 0x0, SZ);
+
+    // Encrypt
+    mbedtls_aes_crypt_cfb8(&ctx, MBEDTLS_AES_ENCRYPT, SZ, nonce, plaintext, chipertext);
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher_end, chipertext + SZ - 32, 32);
+
+    // Decrypt
+    memcpy(nonce, iv, 16);
+    mbedtls_aes_crypt_cfb8(&ctx, MBEDTLS_AES_DECRYPT, SZ, nonce, chipertext, decryptedtext);
+
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
+
+    free(plaintext);
+    free(chipertext);
+    free(decryptedtext);
+}
+
+TEST_CASE("mbedtls CFB-128 AES-256 test", "[aes]")
+{
+    const unsigned SZ = 1000;
+    mbedtls_aes_context ctx;
+    uint8_t nonce[16];
+    size_t nc_off = 0;
+
+    const uint8_t expected_cipher_end[] = {
+        0xf3, 0x64, 0x20, 0xa1, 0x70, 0x2a, 0xd9, 0x3f,
+        0xb7, 0x48, 0x8c, 0x2c, 0x1f, 0x65, 0x53, 0xc2,
+        0xac, 0xfd, 0x82, 0xe5, 0x31, 0x24, 0x1f, 0x30,
+        0xaf, 0xcc, 0x8d, 0xb3, 0xf3, 0x63, 0xe1, 0xa0,
+    };
+
+    memcpy(nonce, iv, 16);
+
+    // allocate internal memory
+    uint8_t *chipertext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *plaintext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+    uint8_t *decryptedtext = heap_caps_malloc(SZ, MALLOC_CAP_8BIT|MALLOC_CAP_INTERNAL);
+
+    TEST_ASSERT_NOT_NULL(chipertext);
+    TEST_ASSERT_NOT_NULL(plaintext);
+    TEST_ASSERT_NOT_NULL(decryptedtext);
+
+    mbedtls_aes_init(&ctx);
+    mbedtls_aes_setkey_enc(&ctx, key_256, 256);
+
+    memset(plaintext, 0x3A, SZ);
+    memset(decryptedtext, 0x0, SZ);
+
+    // Encrypt
+    mbedtls_aes_crypt_cfb128(&ctx, MBEDTLS_AES_ENCRYPT,  SZ, &nc_off, nonce, plaintext, chipertext);
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_cipher_end, chipertext + SZ - 32, 32);
+
+    // Decrypt
+    nc_off = 0;
+    memcpy(nonce, iv, 16);
+    mbedtls_aes_crypt_cfb128(&ctx, MBEDTLS_AES_DECRYPT, SZ, &nc_off, nonce, chipertext, decryptedtext);
+
+    TEST_ASSERT_EQUAL_HEX8_ARRAY(plaintext, decryptedtext, SZ);
+
+    free(plaintext);
+    free(chipertext);
+    free(decryptedtext);
+}
+
 TEST_CASE("mbedtls CTR stream test", "[aes]")
 {
     const unsigned SZ = 100;
@@ -79,11 +357,11 @@ TEST_CASE("mbedtls CTR stream test", "[aes]")
         memset(decryptedtext, 0x0, SZ);
 
         size_t offset = 0;
-
         // Encrypt
         for (int idx = 0; idx < SZ; idx = idx + bytes_to_process) {
             // Limit length of last call to avoid exceeding buffer size
             size_t length = (idx + bytes_to_process > SZ) ? (SZ - idx) : bytes_to_process;
+
             mbedtls_aes_crypt_ctr(&ctx, length, &offset, nonce,
                                   stream_block, plaintext + idx, chipertext + idx );
         }