Skip to content

Commit 61638a4

Browse files
committed
Relocation to DDR and enabling cache
1 parent c9c3b95 commit 61638a4

5 files changed

Lines changed: 154 additions & 57 deletions

File tree

docs/Targets.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3129,12 +3129,22 @@ Example Boot Debug Output:
31293129

31303130
```
31313131
wolfBoot Init
3132-
Part: Active 0, Address E8080000
3133-
Image size 1028
3132+
Build: Feb 19 2026 13:24:56
3133+
DDR Test: PASSED
3134+
Ramcode: copied 2852 bytes to DDR, TLB9 remapped
3135+
CPC: Released SRAM, full 2MB L2 cache enabled
3136+
Flash: caching enabled (L1+L2+CPC)
3137+
Versions: Boot 1, Update 0
3138+
Trying Boot partition at 0xEFFC0000
3139+
Boot partition: 0xEFFC0000 (sz 3164, ver 0x1, type 0x601)
3140+
Checking integrity...done
3141+
Verifying signature...done
3142+
Successfully selected image in part: 0
31343143
Firmware Valid
3135-
Loading 1028 bytes to RAM at 19000
3136-
Failed parsing DTB to load.
3137-
Booting at 19000
3144+
Copying image from 0xEFFC0200 to RAM at 0x19000 (3164 bytes)
3145+
Failed parsing DTB to load
3146+
Booting at 0x19000
3147+
FDT: Invalid header! -1
31383148
Test App
31393149
31403150
0x00000001

hal/nxp_ppc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@
144144
#define DDR_STACK_TOP 0x02000000UL /* Top of first 32MB */
145145
#define DDR_STACK_BASE (DDR_STACK_TOP - DDR_STACK_SIZE)
146146

147+
/* DDR address where .ramcode is copied before CPC SRAM is released.
148+
* TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that
149+
* RAMFUNCTION code continues to work after CPC becomes L2 cache. */
150+
#define DDR_RAMCODE_ADDR 0x03000000UL /* 48MB into DDR */
151+
147152
#define FLASH_BASE_ADDR 0xE8000000UL
148153
#define FLASH_BASE_PHYS_HIGH 0x0ULL
149154
#define FLASH_LAW_SIZE LAW_SIZE_128MB

hal/nxp_t2080.c

Lines changed: 92 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
2020
*/
2121
#include <stdint.h>
22-
#include <string.h>
2322
#include "target.h"
2423
#include "printf.h"
2524
#include "image.h" /* for RAMFUNCTION */
@@ -374,59 +373,105 @@ static void hal_cpld_init(void)
374373
}
375374

376375
#ifdef ENABLE_DDR
377-
/* Relocate stack from CPC SRAM to DDR for more stack space.
378-
* Call this after DDR is initialized and verified working.
379-
* This allows signature verification (ECC P384) which needs ~20-30KB stack. */
380-
static void hal_relocate_stack_to_ddr(void)
376+
/* Release CPC SRAM back to L2 cache mode.
377+
* Call after stack is relocated to DDR (done in boot_entry_C).
378+
* This gives us the full 2MB CPC as L3 cache for better performance.
379+
*
380+
* Before releasing CPC SRAM, .ramcode (RAMFUNCTION) is copied to DDR
381+
* and TLB9 is remapped: VA 0xF8F00000 -> PA DDR_RAMCODE_ADDR so that
382+
* RAMFUNCTION code (memcpy, wolfBoot_start, etc.) continues to work. */
383+
static void hal_reconfigure_cpc_as_cache(void)
381384
{
382-
uint32_t new_sp = DDR_STACK_TOP - 64; /* 64-byte alignment, room for frame */
385+
volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0);
386+
volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0);
387+
uint32_t reg;
383388

384-
/* Zero the DDR stack area for clean operation */
385-
memset((void*)DDR_STACK_BASE, 0, DDR_STACK_SIZE);
389+
/* Linker symbols for .ramcode section boundaries */
390+
extern unsigned int _start_ramcode;
391+
extern unsigned int _end_ramcode;
392+
uint32_t ramcode_size = (uint32_t)&_end_ramcode - (uint32_t)&_start_ramcode;
393+
394+
/* Step 1: Copy .ramcode from CPC SRAM to DDR.
395+
* Must use volatile loop — memcpy itself is in .ramcode! */
396+
if (ramcode_size > 0) {
397+
volatile const uint32_t *src = (volatile const uint32_t *)&_start_ramcode;
398+
volatile uint32_t *dst = (volatile uint32_t *)DDR_RAMCODE_ADDR;
399+
volatile uint32_t *end = (volatile uint32_t *)(DDR_RAMCODE_ADDR +
400+
ramcode_size);
401+
while (dst < end) {
402+
*dst++ = *src++;
403+
}
404+
405+
/* Flush D-cache and invalidate I-cache for the DDR copy */
406+
flush_cache(DDR_RAMCODE_ADDR, ramcode_size);
386407

387-
/* Switch stack pointer from CPC SRAM to DDR.
388-
* r1 is the stack pointer in PowerPC ABI. */
389-
__asm__ __volatile__(
390-
"mr 1, %0\n" /* Move new stack address to r1 */
391-
"sync\n"
392-
:
393-
: "r" (new_sp)
394-
: "memory"
395-
);
408+
/* Step 2: Remap TLB9: same VA (0xF8F00000) -> DDR physical address.
409+
* All .ramcode references use VA 0xF8F00000, so this makes them
410+
* transparently access the DDR copy instead of CPC SRAM. */
411+
set_tlb(1, 9,
412+
L2SRAM_ADDR, DDR_RAMCODE_ADDR, 0,
413+
MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0,
414+
INITIAL_SRAM_BOOKE_SZ, 1);
415+
416+
/* Ensure TLB update and I-cache pick up new mapping */
417+
invalidate_icache();
418+
}
396419

397420
#ifdef DEBUG_UART
398-
wolfBoot_printf("Stack relocated to DDR (SP=0x%x)\n", new_sp);
421+
wolfBoot_printf("Ramcode: copied %d bytes to DDR, TLB9 remapped\n",
422+
ramcode_size);
399423
#endif
400-
}
401-
402-
/* Release CPC SRAM back to L2 cache mode after stack is relocated to DDR.
403-
* This gives us the full 2MB CPC as instruction/data cache for better performance. */
404-
static void hal_reconfigure_cpc_as_cache(void)
405-
{
406-
volatile uint32_t *cpc_csr0 = (volatile uint32_t *)(CPC_BASE + CPCCSR0);
407-
volatile uint32_t *cpc_srcr0 = (volatile uint32_t *)(CPC_BASE + CPCSRCR0);
408-
uint32_t reg;
409424

410-
/* Step 1: Flush the CPC to ensure no stale SRAM data.
411-
* IMPORTANT: Read-modify-write to preserve CPCE/CPCPE enable bits! */
425+
/* Step 3: Flush the CPC to push any dirty SRAM data out.
426+
* Read-modify-write to preserve CPCE/CPCPE enable bits. */
412427
reg = *cpc_csr0;
413428
reg |= CPCCSR0_CPCFL;
414429
*cpc_csr0 = reg;
415430
__asm__ __volatile__("sync; isync" ::: "memory");
416431

417-
/* Step 2: Poll until flush completes (CPCFL clears) */
418-
do {
419-
reg = *cpc_csr0;
420-
} while (reg & CPCCSR0_CPCFL);
432+
/* Step 4: Poll until flush completes (CPCFL clears) */
433+
while (*cpc_csr0 & CPCCSR0_CPCFL);
421434

422-
/* Step 3: Disable SRAM mode - release ways back to cache */
423-
*cpc_srcr0 = 0; /* Clear SRAMEN and SRAMSZ */
435+
/* Step 5: Disable SRAM mode - release all ways back to cache */
436+
*cpc_srcr0 = 0;
424437
__asm__ __volatile__("sync; isync" ::: "memory");
425438

426-
/* CPC remains enabled (CPCE/CPCPE preserved), now with all ways as cache */
439+
/* Step 6: Disable CPC SRAM LAW (no longer needed — TLB9 now routes
440+
* to DDR via LAW4, not CPC SRAM via LAW2).
441+
* Keep TLB9 — it's remapped to DDR and still in use. */
442+
set32(LAWAR(2), 0);
443+
444+
/* Step 7: Flash invalidate CPC to start fresh as cache */
445+
reg = *cpc_csr0;
446+
reg |= CPCCSR0_CPCFI;
447+
*cpc_csr0 = reg;
448+
__asm__ __volatile__("sync; isync" ::: "memory");
449+
while (*cpc_csr0 & CPCCSR0_CPCFI);
450+
451+
/* CPC remains enabled (CPCE/CPCPE preserved), now all 2MB as cache */
427452

428453
#ifdef DEBUG_UART
429-
wolfBoot_printf("CPC: Released SRAM, full L2 cache enabled\n");
454+
wolfBoot_printf("CPC: Released SRAM, full 2MB L2 cache enabled\n");
455+
#endif
456+
}
457+
458+
/* Make flash TLB cacheable for XIP code performance.
459+
* Changes TLB Entry 2 (flash) from MAS2_I|MAS2_G to MAS2_M.
460+
* This enables L1 I-cache + L2 + CPC to cache flash instructions. */
461+
static void hal_flash_enable_caching(void)
462+
{
463+
/* Rewrite flash TLB entry with cacheable attributes.
464+
* MAS2_M = memory coherent, enables caching */
465+
set_tlb(1, 2,
466+
FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH,
467+
MAS3_SX | MAS3_SW | MAS3_SR, MAS2_M, 0,
468+
FLASH_TLB_PAGESZ, 1);
469+
470+
/* Invalidate L1 I-cache so new TLB attributes take effect */
471+
invalidate_icache();
472+
473+
#ifdef DEBUG_UART
474+
wolfBoot_printf("Flash: caching enabled (L1+L2+CPC)\n");
430475
#endif
431476
}
432477
#endif /* ENABLE_DDR */
@@ -594,22 +639,17 @@ void hal_init(void)
594639
#ifdef DEBUG_UART
595640
hal_ddr_test();
596641
#endif
597-
/* TODO: Implement proper assembly-based stack relocation to DDR.
598-
* The current C-based approach corrupts return addresses because:
599-
* 1. hal_init's return address is saved on CPC SRAM stack
600-
* 2. Stack switch changes SP to DDR (zeroed area)
601-
* 3. CPC release makes old stack contents invalid
602-
* 4. Function returns read garbage addresses
642+
643+
/* Stack is already in DDR (relocated in boot_entry_C via
644+
* ddr_call_with_stack trampoline before main() was called).
603645
*
604-
* For now, keep using CPC SRAM stack (1MB should be enough for P384).
605-
* Stack relocation needs to be done in assembly with proper LR handling.
606-
*/
607-
#if 0 /* Disabled until proper assembly implementation */
608-
{
609-
hal_relocate_stack_to_ddr();
610-
hal_reconfigure_cpc_as_cache();
611-
}
612-
#endif
646+
* Now release CPC SRAM back to L2 cache and enable flash caching.
647+
* This dramatically improves ECC signature verification performance:
648+
* - CPC (2MB) becomes L3 cache for all memory accesses
649+
* - Flash code is cached by L1 I-cache + L2 + CPC
650+
* - Stack/data in DDR is cached by L1 D-cache + L2 + CPC */
651+
hal_reconfigure_cpc_as_cache();
652+
hal_flash_enable_caching();
613653
#endif
614654
}
615655

src/boot_ppc.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,25 @@ void boot_entry_C(void)
179179
}
180180

181181
/* Run wolfBoot! */
182+
#ifdef ENABLE_DDR
183+
/* DDR is initialized, .data and .bss are set up.
184+
* Switch stack from CPC SRAM to DDR for:
185+
* 1. Better performance (DDR stack is cacheable by L1/L2/CPC)
186+
* 2. More stack space (64KB vs shared CPC SRAM)
187+
* Uses assembly trampoline since we can't return after stack switch.
188+
* The CPC SRAM will be released back to L2 cache in hal_init(). */
189+
{
190+
extern void ddr_call_with_stack(uint32_t func, uint32_t sp);
191+
/* Zero DDR stack area using volatile to prevent memset transform */
192+
volatile uint32_t *p = (volatile uint32_t *)DDR_STACK_BASE;
193+
volatile uint32_t *e = (volatile uint32_t *)DDR_STACK_TOP;
194+
while (p < e) { *p++ = 0; }
195+
ddr_call_with_stack((uint32_t)main, DDR_STACK_TOP - 64);
196+
/* Does not return */
197+
}
198+
#else
182199
main();
200+
#endif
183201
}
184202

185203
#ifndef BUILD_LOADER_STAGE1

src/boot_ppc_start.S

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,30 @@ dcache_disable:
10011001
blr
10021002
#endif
10031003

1004+
/* void ddr_call_with_stack(uint32_t func_ptr, uint32_t new_sp)
1005+
* Switches stack pointer to DDR and calls the given function.
1006+
* Used by boot_entry_C to transition from CPC SRAM stack to DDR stack
1007+
* before calling main(). Does not return.
1008+
* r3 = function pointer to call
1009+
* r4 = new stack pointer (top of DDR stack area) */
1010+
.global ddr_call_with_stack
1011+
ddr_call_with_stack:
1012+
mr r1, r4 /* Set stack pointer to DDR */
1013+
/* Create minimal PPC ABI stack frame with terminated back chain */
1014+
li r0, 0
1015+
stwu r0, -4(r1) /* Terminate back chain */
1016+
stwu r0, -4(r1)
1017+
stwu r1, -8(r1) /* Save back chain and move SP */
1018+
lis r0, RESET_VECTOR@h
1019+
ori r0, r0, RESET_VECTOR@l
1020+
stwu r1, -8(r1) /* Save back chain and move SP */
1021+
stw r0, +12(r1) /* Save return addr (underflow vector) */
1022+
/* Call the function */
1023+
mtctr r3
1024+
bctrl
1025+
/* Should never reach here */
1026+
1: b 1b
1027+
10041028
#ifdef USE_GOT
10051029

10061030
/* function to relocate code, handling cache flushing and continue to

0 commit comments

Comments
 (0)