63#if defined IPLUG_SIMDE
64 #if defined(__arm64__)
65 #define SIMDE_ENABLE_NATIVE_ALIASES
66 #include "simde/x86/sse2.h"
68 #include <emmintrin.h>
91template<
typename T =
double,
int NCHANS=2,
size_t A=12>
96 static_assert(std::is_same<T, float>::value,
"LanczosResampler requires T to be float when using SIMD instructions");
101 static constexpr size_t kBufferSize = 4096;
103 static constexpr size_t kFilterWidth = A * 2;
105 static constexpr size_t kTablePoints = 8192;
106 static constexpr double kDeltaX = 1.0 / (kTablePoints);
114 : mInputSampleRate(inputRate)
115 , mOutputSamplerate(outputRate)
116 , mPhaseOutIncr(mInputSampleRate / mOutputSamplerate)
120 auto kernel = [](
double x) {
121 if (std::fabs(x) < 1e-7)
124 const auto pi = iplug::PI;
125 return T(A * std::sin(pi * x) * std::sin(pi * x / A) / (pi * pi * x * x));
128 if (!sTablesInitialized)
130 for (
auto t=0; t<kTablePoints+1; ++t)
132 const double x0 = kDeltaX * t;
134 for (
auto i=0; i<kFilterWidth; ++i)
136 const double x = x0 + i - A;
137 sTable[t][i] = kernel(x);
141 for (
auto t=0; t<kTablePoints; ++t)
143 for (
auto i=0; i<kFilterWidth; ++i)
145 sDeltaTable[t][i] = sTable[t + 1][i] - sTable[t][i];
149 for (
auto i=0; i<kFilterWidth; ++i)
152 sDeltaTable[kTablePoints][i] = sDeltaTable[0][i];
154 sTablesInitialized =
true;
158 inline size_t GetNumSamplesRequiredFor(
size_t nOutputSamples)
const
166 auto res = A + 1.0 - (mPhaseIn - mPhaseOut - mPhaseOutIncr * nOutputSamples);
168 return static_cast<size_t>(std::max(res + 1.0, 0.0));
171 inline void PushBlock(T** inputs,
size_t nFrames,
int nChans)
173 for (
auto s=0; s<nFrames; s++)
175 for (
auto c=0; c<nChans; c++)
177 mInputBuffer[c][mWritePos] = inputs[c][s];
178 mInputBuffer[c][mWritePos + kBufferSize] = inputs[c][s];
181 mWritePos = (mWritePos + 1) & (kBufferSize - 1);
182 mPhaseIn += mPhaseInIncr;
186 size_t PopBlock(T** outputs,
size_t max,
int nChans)
189 while (populated < max && (mPhaseIn - mPhaseOut) > A + 1)
191 ReadSamples((mPhaseIn - mPhaseOut), outputs, populated, nChans);
192 mPhaseOut += mPhaseOutIncr;
198 inline void RenormalizePhases()
200 mPhaseIn -= mPhaseOut;
211 memset(mInputBuffer, 0, NCHANS * kBufferSize * 2 *
sizeof(T));
216 inline void ReadSamples(
double xBack, T** outputs,
int s,
int nChans)
const
218 float bufferReadPosition =
static_cast<float>(mWritePos - xBack);
219 int bufferReadIndex =
static_cast<int>(std::floor(bufferReadPosition));
220 float bufferFracPosition = 1.0f - (bufferReadPosition -
static_cast<float>(bufferReadIndex));
222 bufferReadIndex = (bufferReadIndex + kBufferSize) & (kBufferSize - 1);
223 bufferReadIndex += (bufferReadIndex <= static_cast<int>(A)) * kBufferSize;
225 float tablePosition = bufferFracPosition * kTablePoints;
226 int tableIndex =
static_cast<int>(tablePosition);
227 float tableFracPosition = (tablePosition - tableIndex);
230 for (
auto & v : sum) {
231 v = _mm_setzero_ps();
234 for (
int i=0; i<A; i+=4)
237 __m128 f0 = _mm_load_ps(&sTable[tableIndex][i]);
238 __m128 df0 = _mm_load_ps(&sDeltaTable[tableIndex][i]);
239 __m128 f1 = _mm_load_ps(&sTable[tableIndex][A + i]);
240 __m128 df1 = _mm_load_ps(&sDeltaTable[tableIndex][A + i]);
243 __m128 tfp = _mm_set1_ps(tableFracPosition);
244 f0 = _mm_add_ps(f0, _mm_mul_ps(df0, tfp));
245 f1 = _mm_add_ps(f1, _mm_mul_ps(df1, tfp));
247 for (
int c=0; c<nChans; c++)
250 __m128 d0 = _mm_set_ps(mInputBuffer[c][bufferReadIndex - A + i + 3],
251 mInputBuffer[c][bufferReadIndex - A + i + 2],
252 mInputBuffer[c][bufferReadIndex - A + i + 1],
253 mInputBuffer[c][bufferReadIndex - A + i]);
254 __m128 d1 = _mm_set_ps(mInputBuffer[c][bufferReadIndex + i + 3],
255 mInputBuffer[c][bufferReadIndex + i + 2],
256 mInputBuffer[c][bufferReadIndex + i + 1],
257 mInputBuffer[c][bufferReadIndex + i]);
260 __m128 result0 = _mm_mul_ps(f0, d0);
261 __m128 result1 = _mm_mul_ps(f1, d1);
262 sum[c] = _mm_add_ps(sum[c], _mm_add_ps(result0, result1));
267 for (
int c=0; c<nChans; c++)
270 _mm_store_ps(sumArray, sum[c]);
271 outputs[c][s] = sumArray[0] + sumArray[1] + sumArray[2] + sumArray[3];
275 inline void ReadSamples(
double xBack, T** outputs,
int s,
int nChans)
const
277 double bufferReadPosition = mWritePos - xBack;
278 int bufferReadIndex = std::floor(bufferReadPosition);
279 double bufferFracPosition = 1.0 - (bufferReadPosition - bufferReadIndex);
281 bufferReadIndex = (bufferReadIndex + kBufferSize) & (kBufferSize - 1);
282 bufferReadIndex += (bufferReadIndex <= static_cast<int>(A)) * kBufferSize;
284 double tablePosition = bufferFracPosition * kTablePoints;
285 int tableIndex =
static_cast<int>(tablePosition);
286 double tableFracPosition = (tablePosition - tableIndex);
288 T sum[NCHANS] = {0.0};
290 for (
auto i=0; i<A; i++)
292 auto f0 = sTable[tableIndex][i];
293 const auto df0 = sDeltaTable[tableIndex][i];
294 f0 += df0 * tableFracPosition;
296 auto f1 = sTable[tableIndex][A+i];
297 const auto df1 = sDeltaTable[tableIndex][A+i];
298 f1 += df1 * tableFracPosition;
300 for (
auto c=0; c<nChans;c++)
302 const auto d0 = mInputBuffer[c][bufferReadIndex - A + i];
303 const auto d1 = mInputBuffer[c][bufferReadIndex + i];
304 const auto rv = (f0 * d0) + (f1 * d1);
309 for (
auto c=0; c<nChans;c++)
311 outputs[c][s] = sum[c];
316 static T sTable
alignas(16)[kTablePoints + 1][kFilterWidth];
317 static T sDeltaTable
alignas(16)[kTablePoints + 1][kFilterWidth];
318 static bool sTablesInitialized;
320 T mInputBuffer[NCHANS][kBufferSize * 2];
322 const float mInputSampleRate;
323 const float mOutputSamplerate;
324 double mPhaseIn = 0.0;
325 double mPhaseOut = 0.0;
326 double mPhaseInIncr = 1.0;
327 double mPhaseOutIncr = 0.0;
330template<
typename T,
int NCHANS,
size_t A>
331T LanczosResampler<T, NCHANS, A>::sTable
alignas(16) [LanczosResampler<T, NCHANS, A>::kTablePoints + 1][LanczosResampler::kFilterWidth];
333template<
typename T,
int NCHANS,
size_t A>
334T LanczosResampler<T, NCHANS, A>::sDeltaTable
alignas(16) [LanczosResampler<T, NCHANS, A>::kTablePoints + 1][LanczosResampler::kFilterWidth];
336template<
typename T,
int NCHANS,
size_t A>
337bool LanczosResampler<T, NCHANS, A>::sTablesInitialized{
false};
IPlug Constant definitions, Types, magic numbers.
LanczosResampler(float inputRate, float outputRate)
Constructor.