iPlug2 - C++ Audio Plug-in Framework
Loading...
Searching...
No Matches
LanczosResampler.h
1/*
2 ==============================================================================
3
4 This file is part of the iPlug 2 library. Copyright (C) the iPlug 2 developers.
5
6 See LICENSE.txt for more info.
7
8 ==============================================================================
9*/
10
11/*
12 This code is derived from
13 https://github.com/surge-synthesizer/sst-basic-blocks/blob/main/include/sst/basic-blocks/dsp/LanczosResampler.h
14
15 The following license info is copied from the above file:
16
17 * sst-basic-blocks - an open source library of core audio utilities
18 * built by Surge Synth Team.
19 *
20 * Provides a collection of tools useful on the audio thread for blocks,
21 * modulation, etc... or useful for adapting code to multiple environments.
22 *
23 * Copyright 2023, various authors, as described in the GitHub
24 * transaction log. Parts of this code are derived from similar
25 * functions original in Surge or ShortCircuit.
26 *
27 * sst-basic-blocks is released under the GNU General Public Licence v3
28 * or later (GPL-3.0-or-later). The license is found in the "LICENSE"
29 * file in the root of this repository, or at
30 * https://www.gnu.org/licenses/gpl-3.0.en.html
31 *
32 * All source in sst-basic-blocks available at
33 * https://github.com/surge-synthesizer/sst-basic-blocks
34
35 * A special note on licensing: This file (and only this file)
36 * has Paul Walker (baconpaul) as the sole author to date.
37 *
38 * In order to make this handy small function based on public
39 * information available to a set of open source projects
40 * adapting hardware to software, but which are licensed under
41 * MIT or BSD or similar licenses, this file and only this file
42 * can be used in an MIT/BSD context as well as a GPL3 context, by
43 * copying it and modifying it as you see fit.
44 *
45 * If you do that, you will need to replace the `sum_ps_to_float`
46 * call below with either an hadd if you are SSE3 or higher or
47 * an appropriate reduction operator from your toolkit.
48 *
49 * But basically: Need to resample 48k to variable rate with
50 * a small window and want to use this? Go for it!
51 *
52 * For avoidance of doubt, this license exception only
53 * applies to this file.
54 */
55
56#pragma once
57
58#include <algorithm>
59#include <utility>
60#include <cmath>
61#include <cstring>
62
63#if defined IPLUG_SIMDE
64 #if defined(__arm64__)
65 #define SIMDE_ENABLE_NATIVE_ALIASES
66 #include "simde/x86/sse2.h"
67 #else
68 #include <emmintrin.h>
69 #endif
70#endif
71
72#include "IPlugConstants.h"
73
74namespace iplug
75{
76/* LanczosResampler
77 *
78 * A class that implements Lanczos resampling, optionally using SIMD instructions.
79 * Define IPLUG_SIMDE at project level in order to use SIMD and if on non-x86_64
80 * include the SIMDE library in your search paths in order to translate intel
81 * intrinsics to e.g. arm64
82 *
83 * See https://en.wikipedia.org/wiki/Lanczos_resampling
84 *
85 * @tparam T the sampletype
86 * @tparam NCHANS the number of channels
87 * @tparam A The Lanczos filter size. A higher value makes the filter closer to an
88 ideal stop-band that rejects high-frequency content (anti-aliasing),
89 but at the expense of higher latency
90 */
91template<typename T = double, int NCHANS=2, size_t A=12>
93{
94private:
95#if IPLUG_SIMDE
96 static_assert(std::is_same<T, float>::value, "LanczosResampler requires T to be float when using SIMD instructions");
97#endif
98
99 // The buffer size. This needs to be at least as large as the largest block of samples
100 // that the input side will see.
101 static constexpr size_t kBufferSize = 4096;
102 // The filter width. 2x because the filter goes from -A to A
103 static constexpr size_t kFilterWidth = A * 2;
104 // The discretization resolution for the filter table.
105 static constexpr size_t kTablePoints = 8192;
106 static constexpr double kDeltaX = 1.0 / (kTablePoints);
107
108public:
113 LanczosResampler(float inputRate, float outputRate)
114 : mInputSampleRate(inputRate)
115 , mOutputSamplerate(outputRate)
116 , mPhaseOutIncr(mInputSampleRate / mOutputSamplerate)
117 {
118 ClearBuffer();
119
120 auto kernel = [](double x) {
121 if (std::fabs(x) < 1e-7)
122 return T(1.0);
123
124 const auto pi = iplug::PI;
125 return T(A * std::sin(pi * x) * std::sin(pi * x / A) / (pi * pi * x * x));
126 };
127
128 if (!sTablesInitialized)
129 {
130 for (auto t=0; t<kTablePoints+1; ++t)
131 {
132 const double x0 = kDeltaX * t;
133
134 for (auto i=0; i<kFilterWidth; ++i)
135 {
136 const double x = x0 + i - A;
137 sTable[t][i] = kernel(x);
138 }
139 }
140
141 for (auto t=0; t<kTablePoints; ++t)
142 {
143 for (auto i=0; i<kFilterWidth; ++i)
144 {
145 sDeltaTable[t][i] = sTable[t + 1][i] - sTable[t][i];
146 }
147 }
148
149 for (auto i=0; i<kFilterWidth; ++i)
150 {
151 // Wrap at the end - delta is the same
152 sDeltaTable[kTablePoints][i] = sDeltaTable[0][i];
153 }
154 sTablesInitialized = true;
155 }
156 }
157
158 inline size_t GetNumSamplesRequiredFor(size_t nOutputSamples) const
159 {
160 /*
161 * So (mPhaseIn + mPhaseInIncr * res - mPhaseOut - mPhaseOutIncr * nOutputSamples) * sri > A + 1
162 *
163 * Use the fact that mPhaseInIncr = mInputSampleRate and find
164 * res > (A+1) - (mPhaseIn - mPhaseOut + mPhaseOutIncr * desiredOutputs) * sri
165 */
166 auto res = A + 1.0 - (mPhaseIn - mPhaseOut - mPhaseOutIncr * nOutputSamples);
167
168 return static_cast<size_t>(std::max(res + 1.0, 0.0));
169 }
170
171 inline void PushBlock(T** inputs, size_t nFrames, int nChans)
172 {
173 for (auto s=0; s<nFrames; s++)
174 {
175 for (auto c=0; c<nChans; c++)
176 {
177 mInputBuffer[c][mWritePos] = inputs[c][s];
178 mInputBuffer[c][mWritePos + kBufferSize] = inputs[c][s]; // this way we can always wrap
179 }
180
181 mWritePos = (mWritePos + 1) & (kBufferSize - 1);
182 mPhaseIn += mPhaseInIncr;
183 }
184 }
185
186 size_t PopBlock(T** outputs, size_t max, int nChans)
187 {
188 int populated = 0;
189 while (populated < max && (mPhaseIn - mPhaseOut) > A + 1)
190 {
191 ReadSamples((mPhaseIn - mPhaseOut), outputs, populated, nChans);
192 mPhaseOut += mPhaseOutIncr;
193 populated++;
194 }
195 return populated;
196 }
197
198 inline void RenormalizePhases()
199 {
200 mPhaseIn -= mPhaseOut;
201 mPhaseOut = 0;
202 }
203
204 void Reset()
205 {
206 ClearBuffer();
207 }
208
209 void ClearBuffer()
210 {
211 memset(mInputBuffer, 0, NCHANS * kBufferSize * 2 * sizeof(T));
212 }
213
214private:
215#ifdef IPLUG_SIMDE
216 inline void ReadSamples(double xBack, T** outputs, int s, int nChans) const
217 {
218 float bufferReadPosition = static_cast<float>(mWritePos - xBack);
219 int bufferReadIndex = static_cast<int>(std::floor(bufferReadPosition));
220 float bufferFracPosition = 1.0f - (bufferReadPosition - static_cast<float>(bufferReadIndex));
221
222 bufferReadIndex = (bufferReadIndex + kBufferSize) & (kBufferSize - 1);
223 bufferReadIndex += (bufferReadIndex <= static_cast<int>(A)) * kBufferSize;
224
225 float tablePosition = bufferFracPosition * kTablePoints;
226 int tableIndex = static_cast<int>(tablePosition);
227 float tableFracPosition = (tablePosition - tableIndex);
228
229 __m128 sum[NCHANS];
230 for (auto & v : sum) {
231 v = _mm_setzero_ps(); // Initialize sum vectors to zero
232 }
233
234 for (int i=0; i<A; i+=4) // Process four samples at a time
235 {
236 // Load filter coefficients and input samples into SSE registers
237 __m128 f0 = _mm_load_ps(&sTable[tableIndex][i]);
238 __m128 df0 = _mm_load_ps(&sDeltaTable[tableIndex][i]);
239 __m128 f1 = _mm_load_ps(&sTable[tableIndex][A + i]);
240 __m128 df1 = _mm_load_ps(&sDeltaTable[tableIndex][A + i]);
241
242 // Interpolate filter coefficients
243 __m128 tfp = _mm_set1_ps(tableFracPosition);
244 f0 = _mm_add_ps(f0, _mm_mul_ps(df0, tfp));
245 f1 = _mm_add_ps(f1, _mm_mul_ps(df1, tfp));
246
247 for (int c=0; c<nChans; c++)
248 {
249 // Load input data
250 __m128 d0 = _mm_set_ps(mInputBuffer[c][bufferReadIndex - A + i + 3],
251 mInputBuffer[c][bufferReadIndex - A + i + 2],
252 mInputBuffer[c][bufferReadIndex - A + i + 1],
253 mInputBuffer[c][bufferReadIndex - A + i]);
254 __m128 d1 = _mm_set_ps(mInputBuffer[c][bufferReadIndex + i + 3],
255 mInputBuffer[c][bufferReadIndex + i + 2],
256 mInputBuffer[c][bufferReadIndex + i + 1],
257 mInputBuffer[c][bufferReadIndex + i]);
258
259 // Perform multiplication and accumulate
260 __m128 result0 = _mm_mul_ps(f0, d0);
261 __m128 result1 = _mm_mul_ps(f1, d1);
262 sum[c] = _mm_add_ps(sum[c], _mm_add_ps(result0, result1));
263 }
264 }
265
266 // Extract the final sums and store them in the output
267 for (int c=0; c<nChans; c++)
268 {
269 float sumArray[4];
270 _mm_store_ps(sumArray, sum[c]);
271 outputs[c][s] = sumArray[0] + sumArray[1] + sumArray[2] + sumArray[3];
272 }
273 }
274#else // scalar
275 inline void ReadSamples(double xBack, T** outputs, int s, int nChans) const
276 {
277 double bufferReadPosition = mWritePos - xBack;
278 int bufferReadIndex = std::floor(bufferReadPosition);
279 double bufferFracPosition = 1.0 - (bufferReadPosition - bufferReadIndex);
280
281 bufferReadIndex = (bufferReadIndex + kBufferSize) & (kBufferSize - 1);
282 bufferReadIndex += (bufferReadIndex <= static_cast<int>(A)) * kBufferSize;
283
284 double tablePosition = bufferFracPosition * kTablePoints;
285 int tableIndex = static_cast<int>(tablePosition);
286 double tableFracPosition = (tablePosition - tableIndex);
287
288 T sum[NCHANS] = {0.0};
289
290 for (auto i=0; i<A; i++)
291 {
292 auto f0 = sTable[tableIndex][i];
293 const auto df0 = sDeltaTable[tableIndex][i];
294 f0 += df0 * tableFracPosition;
295
296 auto f1 = sTable[tableIndex][A+i];
297 const auto df1 = sDeltaTable[tableIndex][A+i];
298 f1 += df1 * tableFracPosition;
299
300 for (auto c=0; c<nChans;c++)
301 {
302 const auto d0 = mInputBuffer[c][bufferReadIndex - A + i];
303 const auto d1 = mInputBuffer[c][bufferReadIndex + i];
304 const auto rv = (f0 * d0) + (f1 * d1);
305 sum[c] += rv;
306 }
307 }
308
309 for (auto c=0; c<nChans;c++)
310 {
311 outputs[c][s] = sum[c];
312 }
313 }
314#endif
315
316 static T sTable alignas(16)[kTablePoints + 1][kFilterWidth];
317 static T sDeltaTable alignas(16)[kTablePoints + 1][kFilterWidth];
318 static bool sTablesInitialized;
319
320 T mInputBuffer[NCHANS][kBufferSize * 2];
321 int mWritePos = 0;
322 const float mInputSampleRate;
323 const float mOutputSamplerate;
324 double mPhaseIn = 0.0;
325 double mPhaseOut = 0.0;
326 double mPhaseInIncr = 1.0;
327 double mPhaseOutIncr = 0.0;
328} WDL_FIXALIGN;
329
330template<typename T, int NCHANS, size_t A>
331T LanczosResampler<T, NCHANS, A>::sTable alignas(16) [LanczosResampler<T, NCHANS, A>::kTablePoints + 1][LanczosResampler::kFilterWidth];
332
333template<typename T, int NCHANS, size_t A>
334T LanczosResampler<T, NCHANS, A>::sDeltaTable alignas(16) [LanczosResampler<T, NCHANS, A>::kTablePoints + 1][LanczosResampler::kFilterWidth];
335
336template<typename T, int NCHANS, size_t A>
337bool LanczosResampler<T, NCHANS, A>::sTablesInitialized{false};
338
339} // namespace iplug
340
IPlug Constant definitions, Types, magic numbers.
LanczosResampler(float inputRate, float outputRate)
Constructor.