Skip to content

Commit f3f8f53

Browse files
committed
Versal port cleanups and improvements
1 parent 9824b15 commit f3f8f53

5 files changed

Lines changed: 158 additions & 106 deletions

File tree

.gitignore

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,16 @@ hal/mpfs.dtb
308308

309309
# Emulator test: generated linker scripts
310310
test-app/emu-test-apps/*/target.ld
311+
312+
# AMD/Xilinx Versal test artifacts
313+
uart_log*.txt
314+
build_test*.log
315+
optional_data.txt
316+
wolfboot_output_*.log
317+
BOOT.BIN
318+
Image
319+
fitImage
320+
image.ub
321+
*.pdi
322+
system-default.dtb
323+
test_output/

hal/versal.c

Lines changed: 71 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* Target: VMK180 Evaluation Board (VM1802 Versal Prime)
2323
*
2424
* Features:
25-
* - UART driver (Cadence UART / ARM PL011)
25+
* - UART driver (ARM PL011 UART / UARTPSV)
2626
* - ARM Generic Timer
2727
* - QSPI flash driver (GQSPI - dual parallel MT25QU01GBBB)
2828
*
@@ -590,6 +590,29 @@ static int qspi_fifo_rx(uint8_t *data, uint32_t len)
590590
return 0;
591591
}
592592

593+
/* RX using FIFO polling (IO mode) - helper to avoid code duplication */
594+
static int qspi_rx_io_mode(uint8_t *rxData, uint32_t rxLen, uint32_t *rxEntry)
595+
{
596+
int ret = 0;
597+
uint32_t remaining = rxLen;
598+
uint32_t offset = 0;
599+
uint32_t xferSz;
600+
601+
while (ret == 0 && remaining > 0) {
602+
xferSz = qspi_calc_exp(remaining, rxEntry);
603+
ret = qspi_gen_fifo_push(*rxEntry);
604+
if (ret == 0) {
605+
ret = qspi_gen_fifo_start_and_wait();
606+
}
607+
if (ret == 0) {
608+
ret = qspi_fifo_rx(&rxData[offset], xferSz);
609+
}
610+
offset += xferSz;
611+
remaining -= xferSz;
612+
}
613+
return ret;
614+
}
615+
593616
/* Core QSPI transfer function using GenFIFO */
594617
static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen,
595618
uint8_t *rxData, uint32_t rxLen, uint32_t dummyClocks,
@@ -601,11 +624,11 @@ static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen,
601624
uint32_t chunkLen;
602625
uint32_t txEntry, chunkEntry;
603626
const uint8_t *writePtr;
604-
uint32_t remaining, offset, xferSz;
627+
uint32_t remaining, xferSz;
605628
uint32_t rxEntry;
606629

607630
/* Enable GQSPI controller */
608-
/* Set DMA mode only for Quad reads (when dummyClocks > 0) and not in IO mode */
631+
/* Set DMA mode for fast/quad reads (indicated by dummyClocks > 0) unless IO mode forced */
609632
if (dummyClocks > 0 && rxLen > 0) {
610633
#ifndef GQSPI_MODE_IO
611634
GQSPI_CFG = (GQSPI_CFG & ~GQSPI_CFG_MODE_EN_MASK) | GQSPI_CFG_MODE_EN_DMA;
@@ -704,9 +727,14 @@ static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen,
704727
if (((uintptr_t)rxData & (GQSPI_DMA_ALIGN - 1)) || (rxLen & 3)) {
705728
/* Use temp buffer for unaligned data */
706729
dmaPtr = dma_tmpbuf;
707-
dmaLen = (rxLen + GQSPI_DMA_ALIGN - 1) & ~(GQSPI_DMA_ALIGN - 1);
708-
if (dmaLen > sizeof(dma_tmpbuf)) {
730+
/* Bounds check before alignment to prevent integer overflow */
731+
if (rxLen > sizeof(dma_tmpbuf)) {
709732
dmaLen = sizeof(dma_tmpbuf);
733+
} else {
734+
dmaLen = (rxLen + GQSPI_DMA_ALIGN - 1) & ~(GQSPI_DMA_ALIGN - 1);
735+
if (dmaLen > sizeof(dma_tmpbuf)) {
736+
dmaLen = sizeof(dma_tmpbuf);
737+
}
710738
}
711739
useTemp = 1;
712740
} else {
@@ -755,37 +783,11 @@ static int qspi_transfer(QspiDev_t *dev, const uint8_t *txData, uint32_t txLen,
755783
}
756784
} else {
757785
/* IO mode: Use FIFO polling (fallback when DMA mode not enabled) */
758-
remaining = rxLen;
759-
offset = 0;
760-
while (ret == 0 && remaining > 0) {
761-
xferSz = qspi_calc_exp(remaining, &rxEntry);
762-
ret = qspi_gen_fifo_push(rxEntry);
763-
if (ret == 0) {
764-
ret = qspi_gen_fifo_start_and_wait();
765-
}
766-
if (ret == 0) {
767-
ret = qspi_fifo_rx(&rxData[offset], xferSz);
768-
}
769-
offset += xferSz;
770-
remaining -= xferSz;
771-
}
786+
ret = qspi_rx_io_mode(rxData, rxLen, &rxEntry);
772787
}
773788
#else /* GQSPI_MODE_IO */
774789
/* IO mode: Use FIFO polling */
775-
remaining = rxLen;
776-
offset = 0;
777-
while (ret == 0 && remaining > 0) {
778-
xferSz = qspi_calc_exp(remaining, &rxEntry);
779-
ret = qspi_gen_fifo_push(rxEntry);
780-
if (ret == 0) {
781-
ret = qspi_gen_fifo_start_and_wait();
782-
}
783-
if (ret == 0) {
784-
ret = qspi_fifo_rx(&rxData[offset], xferSz);
785-
}
786-
offset += xferSz;
787-
remaining -= xferSz;
788-
}
790+
ret = qspi_rx_io_mode(rxData, rxLen, &rxEntry);
789791
#endif /* !GQSPI_MODE_IO */
790792
} else {
791793
/* SPI mode for simple reads */
@@ -837,76 +839,39 @@ static int qspi_read_id(QspiDev_t *dev, uint8_t *id, uint32_t len)
837839
return ret;
838840
}
839841

840-
/* Read flash status register */
841-
static int qspi_read_status(QspiDev_t *dev, uint8_t *status)
842+
/* Generic flash register read helper (handles dual parallel) */
843+
static int qspi_read_register(QspiDev_t *dev, uint8_t cmd, uint8_t *status)
842844
{
843-
uint8_t cmd[1];
845+
uint8_t cmdByte[1];
844846
uint8_t data[4]; /* Space for 2 bytes from each chip */
845847
int ret;
846848
QspiDev_t tmpDev;
847849

848-
/* For dual parallel, read status from each chip separately */
849-
if (dev->stripe) {
850-
/* Read from lower chip */
851-
tmpDev = *dev;
852-
tmpDev.bus = GQSPI_GEN_FIFO_BUS_LOW;
853-
tmpDev.cs = GQSPI_GEN_FIFO_CS_LOWER;
854-
tmpDev.stripe = 0;
855-
cmd[0] = FLASH_CMD_READ_STATUS;
856-
ret = qspi_transfer(&tmpDev, cmd, 1, &data[0], 1, 0, NULL, 0);
857-
if (ret != 0) return ret;
858-
859-
/* Read from upper chip */
860-
tmpDev.bus = GQSPI_GEN_FIFO_BUS_UP;
861-
tmpDev.cs = GQSPI_GEN_FIFO_CS_UPPER;
862-
ret = qspi_transfer(&tmpDev, cmd, 1, &data[1], 1, 0, NULL, 0);
863-
if (ret != 0) return ret;
864-
865-
/* AND the status from both chips */
866-
*status = data[0] & data[1];
867-
return 0;
868-
}
869-
870-
cmd[0] = FLASH_CMD_READ_STATUS;
871-
ret = qspi_transfer(dev, cmd, 1, data, 1, 0, NULL, 0);
872-
if (ret == 0) {
873-
*status = data[0];
874-
}
875-
return ret;
876-
}
877-
878-
/* Read flash flag status register */
879-
static int qspi_read_flag_status(QspiDev_t *dev, uint8_t *status)
880-
{
881-
uint8_t cmd[1];
882-
uint8_t data[4];
883-
int ret;
884-
QspiDev_t tmpDev;
850+
cmdByte[0] = cmd;
885851

886-
/* For dual parallel, read status from each chip separately */
852+
/* For dual parallel, read from each chip separately and AND the results */
887853
if (dev->stripe) {
888854
/* Read from lower chip */
889855
tmpDev = *dev;
890856
tmpDev.bus = GQSPI_GEN_FIFO_BUS_LOW;
891857
tmpDev.cs = GQSPI_GEN_FIFO_CS_LOWER;
892858
tmpDev.stripe = 0;
893-
cmd[0] = FLASH_CMD_READ_FLAG_STATUS;
894-
ret = qspi_transfer(&tmpDev, cmd, 1, &data[0], 1, 0, NULL, 0);
859+
ret = qspi_transfer(&tmpDev, cmdByte, 1, &data[0], 1, 0, NULL, 0);
895860
if (ret != 0) return ret;
896861

897862
/* Read from upper chip */
898863
tmpDev.bus = GQSPI_GEN_FIFO_BUS_UP;
899864
tmpDev.cs = GQSPI_GEN_FIFO_CS_UPPER;
900-
ret = qspi_transfer(&tmpDev, cmd, 1, &data[1], 1, 0, NULL, 0);
865+
ret = qspi_transfer(&tmpDev, cmdByte, 1, &data[1], 1, 0, NULL, 0);
901866
if (ret != 0) return ret;
902867

903868
/* AND the status from both chips */
904869
*status = data[0] & data[1];
905870
return 0;
906871
}
907872

908-
cmd[0] = FLASH_CMD_READ_FLAG_STATUS;
909-
ret = qspi_transfer(dev, cmd, 1, data, 1, 0, NULL, 0);
873+
/* Single chip mode */
874+
ret = qspi_transfer(dev, cmdByte, 1, data, 1, 0, NULL, 0);
910875
if (ret == 0) {
911876
*status = data[0];
912877
}
@@ -921,10 +886,14 @@ static int qspi_wait_ready(QspiDev_t *dev)
921886
int ret;
922887

923888
while (timeout-- > 0) {
924-
ret = qspi_read_flag_status(dev, &status);
889+
ret = qspi_read_register(dev, FLASH_CMD_READ_FLAG_STATUS, &status);
925890
if (ret == 0 && (status & FLASH_FSR_READY)) {
926891
return 0;
927892
}
893+
/* Add small delay every 100 polls to reduce bus traffic during erase/write ops */
894+
if ((timeout % 100) == 0) {
895+
hal_delay_us(10);
896+
}
928897
}
929898
QSPI_DEBUG_PRINTF("QSPI: Flash ready timeout\n");
930899
return -1;
@@ -963,7 +932,7 @@ static int qspi_write_enable(QspiDev_t *dev)
963932

964933
/* Wait for WEL bit to be set */
965934
while (timeout-- > 0) {
966-
ret = qspi_read_status(dev, &status);
935+
ret = qspi_read_register(dev, FLASH_CMD_READ_STATUS, &status);
967936
if (ret == 0 && (status & FLASH_SR_WEL)) {
968937
return 0;
969938
}
@@ -1446,6 +1415,13 @@ int ext_flash_write(uintptr_t address, const uint8_t *data, int len)
14461415
return -1;
14471416
}
14481417

1418+
/* Validate flash address bounds */
1419+
if (address >= FLASH_TOTAL_SIZE || (address + len) > FLASH_TOTAL_SIZE) {
1420+
QSPI_DEBUG_PRINTF("ext_flash_write: address 0x%lx+%d exceeds flash size\n",
1421+
(unsigned long)address, len);
1422+
return -1;
1423+
}
1424+
14491425
QSPI_DEBUG_PRINTF("ext_flash_write: addr=0x%lx, len=%d\n",
14501426
(unsigned long)address, len);
14511427

@@ -1496,6 +1472,13 @@ int ext_flash_read(uintptr_t address, uint8_t *data, int len)
14961472
return -1;
14971473
}
14981474

1475+
/* Validate flash address bounds */
1476+
if (address >= FLASH_TOTAL_SIZE || (address + len) > FLASH_TOTAL_SIZE) {
1477+
QSPI_DEBUG_PRINTF("ext_flash_read: address 0x%lx+%d exceeds flash size\n",
1478+
(unsigned long)address, len);
1479+
return -1;
1480+
}
1481+
14991482
QSPI_DEBUG_PRINTF("ext_flash_read: addr=0x%lx len=%d\n",
15001483
(unsigned long)address, len);
15011484

@@ -1532,6 +1515,13 @@ int ext_flash_erase(uintptr_t address, int len)
15321515
return -1;
15331516
}
15341517

1518+
/* Validate flash address bounds */
1519+
if (address >= FLASH_TOTAL_SIZE || (address + len) > FLASH_TOTAL_SIZE) {
1520+
QSPI_DEBUG_PRINTF("ext_flash_erase: address 0x%lx+%d exceeds flash size\n",
1521+
(unsigned long)address, len);
1522+
return -1;
1523+
}
1524+
15351525
QSPI_DEBUG_PRINTF("ext_flash_erase: addr=0x%lx, len=%d\n",
15361526
(unsigned long)address, len);
15371527

hal/versal.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,14 @@
5959
/* ARM Errata */
6060
#define CONFIG_ARM_ERRATA_855873 1
6161

62-
/* BL31-applied errata/CVEs (preserve when modifying CPUACTLR):
62+
/* BL31-applied errata/CVEs:
63+
* ARM Trusted Firmware (BL31) applies these CPU workarounds at EL3:
6364
* - Erratum 859971
6465
* - Erratum 1319367
6566
* - CVE-2017-5715 (Spectre V2)
6667
* - CVE-2018-3639 (SSB)
6768
* - CVE-2022-23960
69+
* Note: Do not modify CPUACTLR_EL1 in application code without preserving these.
6870
*/
6971

7072
#endif /* USE_BUILTIN_STARTUP */
@@ -495,12 +497,19 @@
495497
#define FLASH_SECTOR_SIZE 0x10000 /* 64KB */
496498
#define FLASH_DEVICE_SIZE 0x8000000 /* 128MB per chip */
497499

500+
/* Total flash size depends on single vs dual parallel configuration */
501+
#if GQPI_USE_DUAL_PARALLEL == 1
502+
#define FLASH_TOTAL_SIZE (FLASH_DEVICE_SIZE * 2) /* 256MB total */
503+
#else
504+
#define FLASH_TOTAL_SIZE FLASH_DEVICE_SIZE /* 128MB total */
505+
#endif
506+
498507
/* QSPI Configuration (bare-metal driver) */
499508
#ifndef GQSPI_CLK_REF
500509
#define GQSPI_CLK_REF 300000000 /* 300 MHz */
501510
#endif
502511
#ifndef GQSPI_CLK_DIV
503-
#define GQSPI_CLK_DIV 1 /* Divide by 4 (300MHz / 4 = 75MHz) */
512+
#define GQSPI_CLK_DIV 1 /* Formula: 300MHz / (2 << 1) = 75MHz */
504513
#endif
505514
#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) */
506515
#define GQSPI_CS_DEASSERT_CLOCKS 4 /* CS Hold Time */

src/boot_aarch64_start.S

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,10 @@ InitEL3:
198198
orr w1, w1, #(1 << 1) /* Set IRQ bit (IRQs routed to EL3) */
199199
msr SCR_EL3, x1
200200

201-
/* Configure CPUACTLR_EL1 - read-modify-write to preserve BL31 workarounds:
202-
* 859971, 1319367, CVE-2017-5715, CVE-2018-3639, CVE-2022-23960 */
201+
/* Configure CPUACTLR_EL1 - CPU auxiliary control register
202+
* Read-modify-write to preserve any pre-configured bits and add prefetch settings.
203+
* Note: On Versal, BL31 will later apply additional errata workarounds after EL3 init:
204+
* 859971, 1319367, CVE-2017-5715, CVE-2018-3639, CVE-2022-23960 */
203205
mrs x0, S3_1_C15_C2_0 /* Read current CPUACTLR_EL1 */
204206
ldr x1,=0x80CA000 /* L1 Data prefetch control - 5, Enable device split throttle, 2 independent data prefetch streams */
205207
orr x0, x0, x1 /* Merge with existing value */

0 commit comments

Comments
 (0)