ESP32 Forum

Posted: **Mon Oct 17, 2022 8:27 pm**

When I insert the instruction ee.vld.128.ip instruction into my code I get a trap. The address I am passing is correct, it is 16 byte aligned (actually 64 byte aligned) and readable using l32i. This is on an ESP32 S3.

Posted: **Mon Oct 17, 2022 8:28 pm**

Note that the almost identical code that doesn't use the ee.vld.128.ip instruction works fine.

Here is my code:

Code: [Select all] [Expand/Collapse]

    volatile register uint32_t *pItem = (uint32_t *)mRMT_mem_ptr;
    for (register int i = 0; i < PULSES_PER_FILL/8; i++) {
        if (mCur < mSize) {
            register uint32_t tmp1, tmp2, tmp3, tmp4;
#if 1
            register uint8_t pData = mPixelData[mCur];
            register rmt_item32_t *bitTablePtr = &bitTable[0][0];
#if 1
            // This is a slight speedup by not waiting for the load stall
            // cycles.  By the time the load is finished, the store should
            // be ready.  Stores aren't as critical due to a write buffer,
            // but you can't immediately store after loading without losing
            // a couple of cycles due to pipeline stalls.
            __asm__ __volatile__(
                // Get upper nibble of color and multiply it by 16 to
                // get the address in the table
                "   srli            %[tmp], %[p], 4             \n"
                "   slli            %[tmp], %[tmp], 4           \n"
                // Add color nibble as offset to table
                "   add.n           %[tmp], %[tmp], %[bitTable] \n"
                // Load 4 words from table
                "   l32i            %[tmp1], %[tmp],0           \n"
                "   l32i            %[tmp2], %[tmp],4           \n"
                "   l32i            %[tmp3], %[tmp],8           \n"
                "   l32i            %[tmp4], %[tmp],12          \n"
                // Store 4 words to RMT memory
                "   s32i            %[tmp1], %[pRmtMem], 0x0     \n"
                "   s32i            %[tmp2], %[pRmtMem], 0x4     \n"
                "   s32i            %[tmp3], %[pRmtMem], 0x8     \n"
                "   s32i            %[tmp4], %[pRmtMem], 0xc     \n"
                // Take the lower nibble and multiply it by 16 to
                // get the address in the table
                "   extui           %[tmp], %[p], 0, 4          \n"
                "   slli            %[tmp], %[tmp], 4           \n"
                // Add color nibble as offset to table
                "   add.n           %[tmp], %[tmp], %[bitTable] \n"
                // Load 4 words from table
                "   l32i            %[tmp1], %[tmp],0           \n"
                "   l32i            %[tmp2], %[tmp],4           \n"
                "   l32i            %[tmp3], %[tmp],8           \n"
                "   l32i            %[tmp4], %[tmp],12          \n"
                // Store 4 words to RMT memory
                "   s32i            %[tmp1], %[pRmtMem], 0x10   \n"
                "   s32i            %[tmp2], %[pRmtMem], 0x14   \n"
                "   s32i            %[tmp3], %[pRmtMem], 0x18   \n"
                "   s32i            %[tmp4], %[pRmtMem], 0x1c   \n"
                // Update RMT memory pointer
                "   addi.n          %[pRmtMem], %[pRmtMem], 0x20\n"
                // Flush the writes
                "   memw                                        \n"
                : [tmp] "=&r"(tmp), [pRmtMem] "+r"(pItem),
                  [tmp1] "=&r"(tmp1),[tmp2] "=&r"(tmp2),
                  [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4)
                : [bitTable] "r"(bitTablePtr), [p] "r"(pData)
                : );
#else
            // This attempts to do the same thing but with the vector load
            // instructions.  This crashes for some unknown reason when the
            // ee.vld.128.ip instruction hits.  It's not due to alignment
            // but for some reason the ee instruction is barfing on the address.
            __asm__ __volatile__(
                "   srli            %[tmp], %[p], 4             \n"
                "   slli            %[tmp], %[tmp], 4           \n"
                "   add.n           %[tmp], %[tmp], %[bitTable] \n"
                "   mov.n           a15, %[tmp]                 \n"
                "   l32i            a14, %[tmp], 0              \n"
                "   ee.vld.128.ip   q0,%[tmp],0                 \n"
                "   extui           %[tmp], %[p], 0, 4          \n"
                "   slli            %[tmp], %[tmp], 4           \n"
                "   add.n           %[tmp], %[tmp], %[bitTable] \n"
                "   ee.vld.128.ip   q1,%[tmp],0                 \n"
                "   ee.movi.32.a    q0, %[tmp], 3               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x0     \n"
                "   ee.movi.32.a    q0, %[tmp], 2               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x4     \n"
                "   ee.movi.32.a    q0, %[tmp], 1               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x8     \n"
                "   ee.movi.32.a    q0, %[tmp], 0               \n"
                "   s32i            %[tmp], %[pRmtMem], 0xc     \n"
                "   ee.movi.32.a    q1, %[tmp], 3               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x10    \n"
                "   ee.movi.32.a    q1, %[tmp], 2               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x14    \n"
                "   ee.movi.32.a    q1, %[tmp], 1               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x18    \n"
                "   ee.movi.32.a    q1, %[tmp], 0               \n"
                "   s32i            %[tmp], %[pRmtMem], 0x1c    \n"
                "   addi            %[pRmtMem],%[pRmtMem], 0x20 \n"
                "   memw                                        \n"
                : [tmp] "=&r"(tmp), [pRmtMem] "+r"(pItem)
                : [bitTable] "r"(bitTablePtr), [p] "r"(pData)
                : "a14", "a15");
#endif
            mCur++;

Posted: **Mon Oct 17, 2022 9:17 pm**

Is this the same issue you have described in https://esp32.com/viewtopic.php?f=2&t=30121? If yes, please avoid creating duplicate topics. If no, could you please point out the difference?

ESP32 Forum

ESP32 S3 ee.vld.128.ip instruction crashes

ESP32 S3 ee.vld.128.ip instruction crashes

Re: ESP32 S3 ee.vld.128.ip instruction crashes

Re: ESP32 S3 ee.vld.128.ip instruction crashes