diff --git a/blog/content/post/2023-06-07-twpm-spi-fix.md b/blog/content/post/2023-06-07-twpm-spi-fix.md index cb62dc7d..04c14b08 100644 --- a/blog/content/post/2023-06-07-twpm-spi-fix.md +++ b/blog/content/post/2023-06-07-twpm-spi-fix.md @@ -498,6 +498,279 @@ can reach stable 22 MHz (contrary to stable 18 MHz on old cables), 24 MHz is mostly stable but sometimes problems occur, if I connect logic analyzer 24 Mhz becomes broken completely. +## Extending tests + +I could achieve 22 Mhz frequency, while this is not the target I aim for, I +deciced to continue tests on lower frequency, for now (hoping that better cables +will make the problem go away). I extended MCU code to speak over a TPM-like +protocol. + +```c +#define STATE_WAIT_HEADER 0 +#define STATE_WAIT_STATE 1 +#define STATE_WAIT_STATE_LAST 2 +#define STATE_PAYLOAD_TRANSFER 3 + +static uint8_t ff_buffer[64]; +static uint8_t waitstate_insert[4] = {0xff, 0xff, 0xff, 0xfe}; +static uint8_t waitstate_hold[1] = {0x00}; + +static uint8_t waitstate_cancel[1] = {0x01}; +static uint8_t trash[1] = {0}; +static uint8_t header[4] = {0}; +static bool b_txdma_complete = false; +static bool b_rxdma_complete = false; +static uint8_t state = STATE_WAIT_HEADER; + +static uint8_t scratch_buffer[64] = {0}; + +static uint8_t transfer_is_read = 0; +static uint32_t transfer_length = 0; + +static void update_state(SPI_HandleTypeDef *hspi) +{ + b_rxdma_complete = false; + b_txdma_complete = false; + + switch (state) { + case STATE_WAIT_HEADER: + // We don't care what host sends during wait state, but we start DMA anyway to avoid overrun errors. + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)trash, sizeof trash); + // Wait state got inserted while reading header. + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_cancel, (uint32_t)&hspi->Instance->DR, sizeof waitstate_cancel); + + // This follows a real TPM protocol, except we ignore addr currently. + transfer_is_read = !!(header[0] & (1 << 7)); + transfer_length = (header[0] & 0x3f) + 1; + + state = STATE_WAIT_STATE_LAST; + break; + + case STATE_WAIT_STATE_LAST: + if (transfer_is_read) { + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)scratch_buffer, (uint32_t)&hspi->Instance->DR, transfer_length); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)ff_buffer, transfer_length); + } else { + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)ff_buffer, (uint32_t)&hspi->Instance->DR, transfer_length); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)scratch_buffer, transfer_length); + } + + state = STATE_PAYLOAD_TRANSFER; + break; + + case STATE_PAYLOAD_TRANSFER: + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)header, sizeof header); + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_insert, (uint32_t)&hspi->Instance->DR, sizeof waitstate_insert); + + state = STATE_WAIT_HEADER; + break; + } +} + +static void rxdma_complete(DMA_HandleTypeDef *hdma) +{ + SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent); + if (b_txdma_complete) + update_state(hspi); + else + b_rxdma_complete = true; +} + +static void txdma_complete(DMA_HandleTypeDef *hdma) +{ + SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent); + if (b_rxdma_complete) + update_state(hspi); + else + b_txdma_complete = true; +} +``` + +On Python side I introduce a new function: + +```python +def tpm_read(size: int) -> bytes: + assert size > 0 and size <= 64 + + header = bytes([ + (1 << 7) | (size - 1), + 0, 0, 0 + ]) + + waitstate = spi.xfer(header) + assert waitstate == [0xff, 0xff, 0xff, 0xfe] + + while True: + status = spi.xfer([0]) + if status[0] == 1: + break + assert status[0] == 0 + + empty = [0] * size + return spi.xfer(empty) +``` + +And update the test: + +```python +@test +def test_read(): + x = tpm_read(0, 8) + assert x == [0] * 8 +``` + +After running the test code I immediatelly got an error, logic analyzer shows +this: + +![](/img/stm32-spi-failure.png) + +I can see here two problems: the first problem is that MISO goes high between +header, wait states and payload (MISO high in the middle of a transfer cancels +the transfers). The second, far worse problem, is that Nucleo transmits wrong +data (0xff) instead of (0x01). + +To solve the problem I went a step back, I hardcoded a few data patterns to +replicate transfer sequence: + +```c +struct pattern { + uint8_t *data; + uint8_t len; +}; + +#define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0])) + +static uint8_t pattern_0[] = {0xff, 0xff, 0xff, 0xfe}; +static uint8_t pattern_1[] = {1}; +static uint8_t pattern_2[] = { + 0x3e, 0x60, 0xc3, 0x4f, 0x35, 0x2e, 0xa6, 0xaa, 0xa6, 0x61, 0x64, 0xcb, + 0x10, 0xd7, 0x45, 0x35, 0x82, 0xc9, 0x91, 0xbc, 0x35, 0x43, 0xbb, 0xe1, + 0xea, 0x08, 0xdf, 0xdd, 0x4d, 0xd8, 0xd5, 0x94, 0x71, 0x75, 0xfd, 0x23, + 0x24, 0xf8, 0x95, 0x85, 0x7b, 0x11, 0xf9, 0xdd, 0xa0, 0xaa, 0x60, 0xc5, + 0xd2, 0x07, 0x6b, 0x3a, 0xd4, 0xd2, 0xac, 0xac, 0x1b, 0x54, 0xfe, 0x2f, + 0xa2 +}; + +static struct pattern patterns[] = { + { .data = pattern_0, .len = sizeof pattern_0 }, + { .data = pattern_1, .len = sizeof pattern_1 }, + { .data = pattern_2, .len = sizeof pattern_2 }, +}; +int current_pattern = 1; + +static void txdma_complete(DMA_HandleTypeDef *hdma) +{ + SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent); + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)patterns[current_pattern].data, (uint32_t)&hspi->Instance->DR, patterns[current_pattern].len); + + if (++current_pattern == ARRAY_SIZE(patterns)) + current_pattern = 0; +} + +void app_main() { + hspi->hdmatx->XferCpltCallback = txdma_complete; + + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)pattern_0, (uint32_t)&hspi->Instance->DR, sizeof pattern_0); + SET_BIT(hspi->Instance->CR2, SPI_CR2_TXDMAEN); + + __HAL_SPI_ENABLE(hspi); +} +``` + +And on Python side: + +```python +patterns = [ + [0xff, 0xff, 0xff, 0xfe], + [0x01], + [ + 0x3e, 0x60, 0xc3, 0x4f, 0x35, 0x2e, 0xa6, 0xaa, 0xa6, 0x61, 0x64, 0xcb, + 0x10, 0xd7, 0x45, 0x35, 0x82, 0xc9, 0x91, 0xbc, 0x35, 0x43, 0xbb, 0xe1, + 0xea, 0x08, 0xdf, 0xdd, 0x4d, 0xd8, 0xd5, 0x94, 0x71, 0x75, 0xfd, 0x23, + 0x24, 0xf8, 0x95, 0x85, 0x7b, 0x11, 0xf9, 0xdd, 0xa0, 0xaa, 0x60, 0xc5, + 0xd2, 0x07, 0x6b, 0x3a, 0xd4, 0xd2, 0xac, 0xac, 0x1b, 0x54, 0xfe, 0x2f, + 0xa2 + ] +] + +spi = Spi() +spi.set_frequency(22000000) + +for i in range(100000): + for pattern in patterns: + data = spi.xfer([0] * len(pattern)) + print(f'iter {i} test {data} == {pattern}') + assert data == pattern +``` + +The code works just fine (100k iterations) + +``` +... +iter 99999 test [255, 255, 255, 254] == [255, 255, 255, 254] +iter 99999 test [1] == [1] +iter 99999 test [62, 96, 195, 79, 53, 46, 166, 170, 166, 97, 100, 203, 16, 215, 69, 53, 130, 201, 145, 188, 53, 67, 187, 225, 234, 8, 223, 221, 77, 216, 213, 148, 113, 117, 253, 35, 36, 248, 149, 133, 123, 17, 249, 221, 160, 170, 96, 197, 210, 7, 107, 58, 212, 210, 172, 172, 27, 84, 254, 47, 162] == [62, 96, 195, 79, 53, 46, 166, 170, 166, 97, 100, 203, 16, 215, 69, 53, 130, 201, 145, 188, 53, 67, 187, 225, 234, 8, 223, 221, 77, 216, 213, 148, 113, 117, 253, 35, 36, 248, 149, 133, 123, 17, 249, 221, 160, 170, 96, 197, 210, 7, 107, 58, 212, 210, 172, 172, 27, 84, 254, 47, 162] +``` + +The main difference is that the full code performs both read and write, contrary +to only writing. Currently we wait for both TX and RX DMA to complete before +re-programming DMA channels and updating state machine. TX and RX are always +the same size, so should complete with a similar time. So, instead of using +interrupts for both channels I changed the code so that interrupts are used for +TX, and polling for RX. + +```c +static void txdma_complete(DMA_HandleTypeDef *hdma) +{ + SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent); + + switch (state) { + case STATE_WAIT_HEADER: + // Wait state got inserted while reading header. + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_cancel, (uint32_t)&hspi->Instance->DR, sizeof waitstate_cancel); + + // We don't care what host sends during wait state, but we start DMA anyway to avoid overrun errors. + HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)trash, sizeof trash); + + transfer_is_read = !!(header[0] & (1 << 7)); + transfer_length = (header[0] & 0x3f) + 1; + + state = STATE_WAIT_STATE_LAST; + break; + + case STATE_WAIT_STATE_LAST: + if (transfer_is_read) { + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)scratch_buffer, (uint32_t)&hspi->Instance->DR, transfer_length); + HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)ff_buffer, transfer_length); + } else { + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)ff_buffer, (uint32_t)&hspi->Instance->DR, transfer_length); + HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)scratch_buffer, transfer_length); + } + + state = STATE_PAYLOAD_TRANSFER; + break; + + case STATE_PAYLOAD_TRANSFER: + HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_insert, (uint32_t)&hspi->Instance->DR, sizeof waitstate_insert); + HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY); + HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)header, sizeof header); + + state = STATE_WAIT_HEADER; + break; + } +} +``` + +Note that I start TX transfer first, then poll for RX DMA completion before +re-programming DMA channel. Now, the test succeeds. + +## Extending tests + +TODO + ## Summary Summary of the post. diff --git a/blog/static/img/stm32-spi-failure.png b/blog/static/img/stm32-spi-failure.png new file mode 100644 index 00000000..82b2586c Binary files /dev/null and b/blog/static/img/stm32-spi-failure.png differ