replacement for fastled sqrt16() (#4426)
* added bitwise operation based sqrt16 - replacement for fastled, it is about 10% slower for numbers smaller 128 but faster for larger numbers. speed difference is irrelevant to WLED but it saves some flash. * updated to 32bit, improved for typical WLED use - making it 32bits allows for larger numbers - added another initial condition check for medium sized numbers - increased the "small number" optimization to larger numbers: the function is currently only used to calculate sqrt(x^2+y^2) which even for small segments is larger than the initially used 64, so optimizing for 1024 makes more sense, although the value is arbitrarily chosen
This commit is contained in:
@@ -6179,15 +6179,15 @@ uint16_t mode_2Dmetaballs(void) { // Metaballs by Stefan Petrick. Cannot have
|
||||
// and add them together with weightening
|
||||
uint16_t dx = abs(x - x1);
|
||||
uint16_t dy = abs(y - y1);
|
||||
uint16_t dist = 2 * sqrt16((dx * dx) + (dy * dy));
|
||||
uint16_t dist = 2 * sqrt32_bw((dx * dx) + (dy * dy));
|
||||
|
||||
dx = abs(x - x2);
|
||||
dy = abs(y - y2);
|
||||
dist += sqrt16((dx * dx) + (dy * dy));
|
||||
dist += sqrt32_bw((dx * dx) + (dy * dy));
|
||||
|
||||
dx = abs(x - x3);
|
||||
dy = abs(y - y3);
|
||||
dist += sqrt16((dx * dx) + (dy * dy));
|
||||
dist += sqrt32_bw((dx * dx) + (dy * dy));
|
||||
|
||||
// inverse result
|
||||
byte color = dist ? 1000 / dist : 255;
|
||||
@@ -11744,7 +11744,7 @@ uint16_t mode_particle1DsonicStream(void) {
|
||||
else PartSys->particles[i].ttl = 0;
|
||||
}
|
||||
if (SEGMENT.check1) { // modulate colors by mid frequencies
|
||||
int mids = sqrt16((int)fftResult[5] + (int)fftResult[6] + (int)fftResult[7] + (int)fftResult[8] + (int)fftResult[9] + (int)fftResult[10]); // average the mids, bin 5 is ~500Hz, bin 10 is ~2kHz (see audio_reactive.h)
|
||||
int mids = sqrt32_bw((int)fftResult[5] + (int)fftResult[6] + (int)fftResult[7] + (int)fftResult[8] + (int)fftResult[9] + (int)fftResult[10]); // average the mids, bin 5 is ~500Hz, bin 10 is ~2kHz (see audio_reactive.h)
|
||||
PartSys->particles[i].hue += (mids * perlin8(PartSys->particles[i].x << 2, SEGMENT.step << 2)) >> 9; // color by perlin noise from mid frequencies
|
||||
}
|
||||
}
|
||||
@@ -11832,7 +11832,7 @@ uint16_t mode_particle1DsonicBoom(void) {
|
||||
// particle manipulation
|
||||
for (uint32_t i = 0; i < PartSys->usedParticles; i++) {
|
||||
if (SEGMENT.check1) { // modulate colors by mid frequencies
|
||||
int mids = sqrt16((int)fftResult[5] + (int)fftResult[6] + (int)fftResult[7] + (int)fftResult[8] + (int)fftResult[9] + (int)fftResult[10]); // average the mids, bin 5 is ~500Hz, bin 10 is ~2kHz (see audio_reactive.h)
|
||||
int mids = sqrt32_bw((int)fftResult[5] + (int)fftResult[6] + (int)fftResult[7] + (int)fftResult[8] + (int)fftResult[9] + (int)fftResult[10]); // average the mids, bin 5 is ~500Hz, bin 10 is ~2kHz (see audio_reactive.h)
|
||||
PartSys->particles[i].hue += (mids * perlin8(PartSys->particles[i].x << 2, SEGMENT.step << 2)) >> 9; // color by perlin noise from mid frequencies
|
||||
}
|
||||
if (PartSys->particles[i].ttl > 16) {
|
||||
|
||||
@@ -932,7 +932,7 @@ uint16_t Segment::calc_virtualLength() const {
|
||||
break;
|
||||
case M12_pArc:
|
||||
{ unsigned vLen2 = vW * vW + vH * vH; // length ^2
|
||||
if (vLen2 < UINT16_MAX) vLen = sqrt16(vLen2); // use faster function for 16bit values
|
||||
if (vLen2 < UINT16_MAX) vLen = sqrt32_bw(vLen2); // use faster function for 16bit values
|
||||
else vLen = sqrtf(vLen2); // fall-back to float if bigger
|
||||
if (vW != vH) vLen++; // round up
|
||||
}
|
||||
|
||||
@@ -588,6 +588,7 @@ float fmod_t(float num, float denom);
|
||||
#define cos_t cosf
|
||||
#define tan_t tanf
|
||||
*/
|
||||
uint32_t sqrt32_bw(uint32_t x);
|
||||
|
||||
//wled_serial.cpp
|
||||
void handleSerial();
|
||||
|
||||
@@ -235,3 +235,27 @@ float fmod_t(float num, float denom) {
|
||||
}
|
||||
|
||||
#endif // WLEDMM
|
||||
|
||||
// bit-wise integer square root calculation (exact)
|
||||
uint32_t sqrt32_bw(uint32_t x) {
|
||||
uint32_t res = 0;
|
||||
uint32_t bit;
|
||||
uint32_t num = x; // use 32bit for faster calculation
|
||||
|
||||
if(num < 1 << 10) bit = 1 << 10; // speed optimization for small numbers < 32^2
|
||||
else if (num < 1 << 20) bit = 1 << 20; // speed optimization for medium numbers < 1024^2
|
||||
else bit = 1 << 30; // start with highest power of 4 <= 2^32
|
||||
|
||||
while (bit > num) bit >>= 2; // reduce iterations
|
||||
|
||||
while (bit != 0) {
|
||||
if (num >= res + bit) {
|
||||
num -= res + bit;
|
||||
res = (res >> 1) + bit;
|
||||
} else {
|
||||
res >>= 1;
|
||||
}
|
||||
bit >>= 2;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user