Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
arturkow2000 committed Jun 22, 2023
1 parent 5ea6384 commit d4d7921
Show file tree
Hide file tree
Showing 2 changed files with 273 additions and 0 deletions.
273 changes: 273 additions & 0 deletions blog/content/post/2023-06-07-twpm-spi-fix.md
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,279 @@ can reach stable 22 MHz (contrary to stable 18 MHz on old cables), 24 MHz is
mostly stable but sometimes problems occur, if I connect logic analyzer 24 Mhz
becomes broken completely.

## Extending tests

I could achieve 22 Mhz frequency, while this is not the target I aim for, I
deciced to continue tests on lower frequency, for now (hoping that better cables
will make the problem go away). I extended MCU code to speak over a TPM-like
protocol.

```c
#define STATE_WAIT_HEADER 0
#define STATE_WAIT_STATE 1
#define STATE_WAIT_STATE_LAST 2
#define STATE_PAYLOAD_TRANSFER 3

static uint8_t ff_buffer[64];
static uint8_t waitstate_insert[4] = {0xff, 0xff, 0xff, 0xfe};
static uint8_t waitstate_hold[1] = {0x00};

static uint8_t waitstate_cancel[1] = {0x01};
static uint8_t trash[1] = {0};
static uint8_t header[4] = {0};
static bool b_txdma_complete = false;
static bool b_rxdma_complete = false;
static uint8_t state = STATE_WAIT_HEADER;

static uint8_t scratch_buffer[64] = {0};

static uint8_t transfer_is_read = 0;
static uint32_t transfer_length = 0;

static void update_state(SPI_HandleTypeDef *hspi)
{
b_rxdma_complete = false;
b_txdma_complete = false;

switch (state) {
case STATE_WAIT_HEADER:
// We don't care what host sends during wait state, but we start DMA anyway to avoid overrun errors.
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)trash, sizeof trash);
// Wait state got inserted while reading header.
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_cancel, (uint32_t)&hspi->Instance->DR, sizeof waitstate_cancel);

// This follows a real TPM protocol, except we ignore addr currently.
transfer_is_read = !!(header[0] & (1 << 7));
transfer_length = (header[0] & 0x3f) + 1;

state = STATE_WAIT_STATE_LAST;
break;

case STATE_WAIT_STATE_LAST:
if (transfer_is_read) {
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)scratch_buffer, (uint32_t)&hspi->Instance->DR, transfer_length);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)ff_buffer, transfer_length);
} else {
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)ff_buffer, (uint32_t)&hspi->Instance->DR, transfer_length);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)scratch_buffer, transfer_length);
}

state = STATE_PAYLOAD_TRANSFER;
break;

case STATE_PAYLOAD_TRANSFER:
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)header, sizeof header);
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_insert, (uint32_t)&hspi->Instance->DR, sizeof waitstate_insert);

state = STATE_WAIT_HEADER;
break;
}
}

static void rxdma_complete(DMA_HandleTypeDef *hdma)
{
SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent);
if (b_txdma_complete)
update_state(hspi);
else
b_rxdma_complete = true;
}

static void txdma_complete(DMA_HandleTypeDef *hdma)
{
SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent);
if (b_rxdma_complete)
update_state(hspi);
else
b_txdma_complete = true;
}
```
On Python side I introduce a new function:
```python
def tpm_read(size: int) -> bytes:
assert size > 0 and size <= 64
header = bytes([
(1 << 7) | (size - 1),
0, 0, 0
])
waitstate = spi.xfer(header)
assert waitstate == [0xff, 0xff, 0xff, 0xfe]
while True:
status = spi.xfer([0])
if status[0] == 1:
break
assert status[0] == 0
empty = [0] * size
return spi.xfer(empty)
```

And update the test:

```python
@test
def test_read():
x = tpm_read(0, 8)
assert x == [0] * 8
```

After running the test code I immediatelly got an error, logic analyzer shows
this:

![](/img/stm32-spi-failure.png)

I can see here two problems: the first problem is that MISO goes high between
header, wait states and payload (MISO high in the middle of a transfer cancels
the transfers). The second, far worse problem, is that Nucleo transmits wrong
data (0xff) instead of (0x01).

To solve the problem I went a step back, I hardcoded a few data patterns to
replicate transfer sequence:

```c
struct pattern {
uint8_t *data;
uint8_t len;
};

#define ARRAY_SIZE(x) (sizeof((x)) / sizeof((x)[0]))

static uint8_t pattern_0[] = {0xff, 0xff, 0xff, 0xfe};
static uint8_t pattern_1[] = {1};
static uint8_t pattern_2[] = {
0x3e, 0x60, 0xc3, 0x4f, 0x35, 0x2e, 0xa6, 0xaa, 0xa6, 0x61, 0x64, 0xcb,
0x10, 0xd7, 0x45, 0x35, 0x82, 0xc9, 0x91, 0xbc, 0x35, 0x43, 0xbb, 0xe1,
0xea, 0x08, 0xdf, 0xdd, 0x4d, 0xd8, 0xd5, 0x94, 0x71, 0x75, 0xfd, 0x23,
0x24, 0xf8, 0x95, 0x85, 0x7b, 0x11, 0xf9, 0xdd, 0xa0, 0xaa, 0x60, 0xc5,
0xd2, 0x07, 0x6b, 0x3a, 0xd4, 0xd2, 0xac, 0xac, 0x1b, 0x54, 0xfe, 0x2f,
0xa2
};

static struct pattern patterns[] = {
{ .data = pattern_0, .len = sizeof pattern_0 },
{ .data = pattern_1, .len = sizeof pattern_1 },
{ .data = pattern_2, .len = sizeof pattern_2 },
};
int current_pattern = 1;

static void txdma_complete(DMA_HandleTypeDef *hdma)
{
SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent);
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)patterns[current_pattern].data, (uint32_t)&hspi->Instance->DR, patterns[current_pattern].len);

if (++current_pattern == ARRAY_SIZE(patterns))
current_pattern = 0;
}

void app_main() {
hspi->hdmatx->XferCpltCallback = txdma_complete;

HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)pattern_0, (uint32_t)&hspi->Instance->DR, sizeof pattern_0);
SET_BIT(hspi->Instance->CR2, SPI_CR2_TXDMAEN);

__HAL_SPI_ENABLE(hspi);
}
```
And on Python side:
```python
patterns = [
[0xff, 0xff, 0xff, 0xfe],
[0x01],
[
0x3e, 0x60, 0xc3, 0x4f, 0x35, 0x2e, 0xa6, 0xaa, 0xa6, 0x61, 0x64, 0xcb,
0x10, 0xd7, 0x45, 0x35, 0x82, 0xc9, 0x91, 0xbc, 0x35, 0x43, 0xbb, 0xe1,
0xea, 0x08, 0xdf, 0xdd, 0x4d, 0xd8, 0xd5, 0x94, 0x71, 0x75, 0xfd, 0x23,
0x24, 0xf8, 0x95, 0x85, 0x7b, 0x11, 0xf9, 0xdd, 0xa0, 0xaa, 0x60, 0xc5,
0xd2, 0x07, 0x6b, 0x3a, 0xd4, 0xd2, 0xac, 0xac, 0x1b, 0x54, 0xfe, 0x2f,
0xa2
]
]
spi = Spi()
spi.set_frequency(22000000)
for i in range(100000):
for pattern in patterns:
data = spi.xfer([0] * len(pattern))
print(f'iter {i} test {data} == {pattern}')
assert data == pattern
```

The code works just fine (100k iterations)

```
...
iter 99999 test [255, 255, 255, 254] == [255, 255, 255, 254]
iter 99999 test [1] == [1]
iter 99999 test [62, 96, 195, 79, 53, 46, 166, 170, 166, 97, 100, 203, 16, 215, 69, 53, 130, 201, 145, 188, 53, 67, 187, 225, 234, 8, 223, 221, 77, 216, 213, 148, 113, 117, 253, 35, 36, 248, 149, 133, 123, 17, 249, 221, 160, 170, 96, 197, 210, 7, 107, 58, 212, 210, 172, 172, 27, 84, 254, 47, 162] == [62, 96, 195, 79, 53, 46, 166, 170, 166, 97, 100, 203, 16, 215, 69, 53, 130, 201, 145, 188, 53, 67, 187, 225, 234, 8, 223, 221, 77, 216, 213, 148, 113, 117, 253, 35, 36, 248, 149, 133, 123, 17, 249, 221, 160, 170, 96, 197, 210, 7, 107, 58, 212, 210, 172, 172, 27, 84, 254, 47, 162]
```

The main difference is that the full code performs both read and write, contrary
to only writing. Currently we wait for both TX and RX DMA to complete before
re-programming DMA channels and updating state machine. TX and RX are always
the same size, so should complete with a similar time. So, instead of using
interrupts for both channels I changed the code so that interrupts are used for
TX, and polling for RX.

```c
static void txdma_complete(DMA_HandleTypeDef *hdma)
{
SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)(((DMA_HandleTypeDef *)hdma)->Parent);

switch (state) {
case STATE_WAIT_HEADER:
// Wait state got inserted while reading header.
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_cancel, (uint32_t)&hspi->Instance->DR, sizeof waitstate_cancel);

// We don't care what host sends during wait state, but we start DMA anyway to avoid overrun errors.
HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)trash, sizeof trash);

transfer_is_read = !!(header[0] & (1 << 7));
transfer_length = (header[0] & 0x3f) + 1;

state = STATE_WAIT_STATE_LAST;
break;

case STATE_WAIT_STATE_LAST:
if (transfer_is_read) {
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)scratch_buffer, (uint32_t)&hspi->Instance->DR, transfer_length);
HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)ff_buffer, transfer_length);
} else {
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)ff_buffer, (uint32_t)&hspi->Instance->DR, transfer_length);
HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)scratch_buffer, transfer_length);
}

state = STATE_PAYLOAD_TRANSFER;
break;

case STATE_PAYLOAD_TRANSFER:
HAL_DMA_Start_IT(hspi->hdmatx, (uint32_t)waitstate_insert, (uint32_t)&hspi->Instance->DR, sizeof waitstate_insert);
HAL_DMA_PollForTransfer(hspi->hdmarx, HAL_DMA_FULL_TRANSFER, HAL_MAX_DELAY);
HAL_DMA_Start_IT(hspi->hdmarx, (uint32_t)&hspi->Instance->DR, (uint32_t)header, sizeof header);

state = STATE_WAIT_HEADER;
break;
}
}
```
Note that I start TX transfer first, then poll for RX DMA completion before
re-programming DMA channel. Now, the test succeeds.
## Extending tests
TODO
## Summary
Summary of the post.
Expand Down
Binary file added blog/static/img/stm32-spi-failure.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d4d7921

Please sign in to comment.