P4 Assembly for Art-Net

This commit is contained in:
Troy
2024-11-07 13:32:45 -05:00
parent 3f02ba73df
commit e9ce495d83

31
wled00/p4_mul16x16.S Normal file
View File

@@ -0,0 +1,31 @@
#if defined(ARDUINO_ARCH_ESP32P4)
.text
.align 4
.global p4_mul16x16
.type p4_mul16x16,@function
# ESP32-P4 needs -march rv32imafc_zicsr_zifencei_xesppie -mabi ilp32f
# a0 = out_packet, a1 = brightness, a2 = num_loops, a3 = pixelbuffer
p4_mul16x16:
esp.movx.r.cfg t6 # Enable aligned data access
or t6, t6, 2 # Enable aligned data access
esp.movx.w.cfg t6 # Enable aligned data access
li t6, 8 # put 8 (eventually for vmul bitshift) in temp register 6
esp.movx.w.sar t6 # set the numbers of bits to right-shift from t6
li t5, 255 # load 255 into t5 for a comparison
esp.vldbc.8.ip q1, a1, 0 # load the "B" value into q1 from a1, broadcasting the same value to all 16 values of q1
li t1, 0 # start our loop_num counter t1 at 0
loop: # "loop" label
beq t1, a2, exit # branch to "exit" if loop_num == num_loops
esp.vld.128.ip q0, a3, 16 # load 16 "A" values into q0 from a3, then move the pointer by 16 to get a new batch
beq a1, t5, skip # If brightness (a1) == 255, jump to "skip"
esp.vmul.u8 q2, q0, q1 # C = A*B (q2 = q0 * q1) then >> by esp.movx.w.sar which we set to 8
esp.vst.128.ip q2, a0, 16 # store the 16 "C" values into a0, then move the pointer by 16
j end_skip # jump to "end_skip"
skip: # "skip" label
esp.vst.128.ip q0, a0, 16 # just store brightness (q0 from a3) to packet (a0)
end_skip: # "end_skip" label
addi t1, t1, 1 # increment loop_num counter t1
j loop # jump to "loop"
exit: # "exit" label
ret # return
#endif