summaryrefslogtreecommitdiffstats
path: root/media/libsoundtouch/src/TDStretch.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'media/libsoundtouch/src/TDStretch.cpp')
-rw-r--r--media/libsoundtouch/src/TDStretch.cpp1108
1 files changed, 1108 insertions, 0 deletions
diff --git a/media/libsoundtouch/src/TDStretch.cpp b/media/libsoundtouch/src/TDStretch.cpp
new file mode 100644
index 0000000000..709e979d1d
--- /dev/null
+++ b/media/libsoundtouch/src/TDStretch.cpp
@@ -0,0 +1,1108 @@
+///////////////////////////////////////////////////////////////////////////////
+///
+/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
+/// while maintaining the original pitch by using a time domain WSOLA-like
+/// method with several performance-increasing tweaks.
+///
+/// Notes : MMX optimized functions reside in a separate, platform-specific
+/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
+///
+/// This source file contains OpenMP optimizations that allow speeding up the
+/// corss-correlation algorithm by executing it in several threads / CPU cores
+/// in parallel. See the following article link for more detailed discussion
+/// about SoundTouch OpenMP optimizations:
+/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
+///
+/// Author : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+// SoundTouch audio processing library
+// Copyright (c) Olli Parviainen
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+
+#include "STTypes.h"
+#include "cpu_detect.h"
+#include "TDStretch.h"
+
+using namespace soundtouch;
+
+#define max(x, y) (((x) > (y)) ? (x) : (y))
+
+/*****************************************************************************
+ *
+ * Constant definitions
+ *
+ *****************************************************************************/
+
+// Table for the hierarchical mixing position seeking algorithm
+const short _scanOffsets[5][24]={
+ { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
+ 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
+ {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
+ 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
+
+/*****************************************************************************
+ *
+ * Implementation of the class 'TDStretch'
+ *
+ *****************************************************************************/
+
+
+TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
+{
+ bQuickSeek = false;
+ channels = 2;
+
+ pMidBuffer = NULL;
+ pMidBufferUnaligned = NULL;
+ overlapLength = 0;
+
+ bAutoSeqSetting = true;
+ bAutoSeekSetting = true;
+
+ tempo = 1.0f;
+ setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
+ setTempo(1.0f);
+
+ clear();
+}
+
+
+
+TDStretch::~TDStretch()
+{
+ delete[] pMidBufferUnaligned;
+}
+
+
+
+// Sets routine control parameters. These control are certain time constants
+// defining how the sound is stretched to the desired duration.
+//
+// 'sampleRate' = sample rate of the sound
+// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
+// 'seekwindowMS' = seeking window length for scanning the best overlapping
+// position (default = 28 ms)
+// 'overlapMS' = overlapping length (default = 12 ms)
+
+void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
+ int aSeekWindowMS, int aOverlapMS)
+{
+ // accept only positive parameter values - if zero or negative, use old values instead
+ if (aSampleRate > 0)
+ {
+ if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
+ this->sampleRate = aSampleRate;
+ }
+
+ if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
+
+ if (aSequenceMS > 0)
+ {
+ this->sequenceMs = aSequenceMS;
+ bAutoSeqSetting = false;
+ }
+ else if (aSequenceMS == 0)
+ {
+ // if zero, use automatic setting
+ bAutoSeqSetting = true;
+ }
+
+ if (aSeekWindowMS > 0)
+ {
+ this->seekWindowMs = aSeekWindowMS;
+ bAutoSeekSetting = false;
+ }
+ else if (aSeekWindowMS == 0)
+ {
+ // if zero, use automatic setting
+ bAutoSeekSetting = true;
+ }
+
+ calcSeqParameters();
+
+ calculateOverlapLength(overlapMs);
+
+ // set tempo to recalculate 'sampleReq'
+ setTempo(tempo);
+}
+
+
+
+/// Get routine control parameters, see setParameters() function.
+/// Any of the parameters to this function can be NULL, in such case corresponding parameter
+/// value isn't returned.
+void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
+{
+ if (pSampleRate)
+ {
+ *pSampleRate = sampleRate;
+ }
+
+ if (pSequenceMs)
+ {
+ *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
+ }
+
+ if (pSeekWindowMs)
+ {
+ *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
+ }
+
+ if (pOverlapMs)
+ {
+ *pOverlapMs = overlapMs;
+ }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInput'
+void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
+{
+ int i;
+ SAMPLETYPE m1, m2;
+
+ m1 = (SAMPLETYPE)0;
+ m2 = (SAMPLETYPE)overlapLength;
+
+ for (i = 0; i < overlapLength ; i ++)
+ {
+ pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
+ m1 += 1;
+ m2 -= 1;
+ }
+}
+
+
+
+void TDStretch::clearMidBuffer()
+{
+ memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
+}
+
+
+void TDStretch::clearInput()
+{
+ inputBuffer.clear();
+ clearMidBuffer();
+ isBeginning = true;
+ maxnorm = 0;
+ maxnormf = 1e8;
+ skipFract = 0;
+}
+
+
+// Clears the sample buffers
+void TDStretch::clear()
+{
+ outputBuffer.clear();
+ clearInput();
+}
+
+
+
+// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
+// to enable
+void TDStretch::enableQuickSeek(bool enable)
+{
+ bQuickSeek = enable;
+}
+
+
+// Returns nonzero if the quick seeking algorithm is enabled.
+bool TDStretch::isQuickSeekEnabled() const
+{
+ return bQuickSeek;
+}
+
+
+// Seeks for the optimal overlap-mixing position.
+int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
+{
+ if (bQuickSeek)
+ {
+ return seekBestOverlapPositionQuick(refPos);
+ }
+ else
+ {
+ return seekBestOverlapPositionFull(refPos);
+ }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
+// of 'ovlPos'.
+inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
+{
+#ifndef USE_MULTICH_ALWAYS
+ if (channels == 1)
+ {
+ // mono sound.
+ overlapMono(pOutput, pInput + ovlPos);
+ }
+ else if (channels == 2)
+ {
+ // stereo sound
+ overlapStereo(pOutput, pInput + 2 * ovlPos);
+ }
+ else
+#endif // USE_MULTICH_ALWAYS
+ {
+ assert(channels > 0);
+ overlapMulti(pOutput, pInput + channels * ovlPos);
+ }
+}
+
+
+// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
+// routine
+//
+// The best position is determined as the position where the two overlapped
+// sample sequences are 'most alike', in terms of the highest cross-correlation
+// value over the overlapping period
+int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
+{
+ int bestOffs;
+ double bestCorr;
+ int i;
+ double norm;
+
+ bestCorr = -FLT_MAX;
+ bestOffs = 0;
+
+ // Scans for the best correlation value by testing each possible position
+ // over the permitted range.
+ bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
+ bestCorr = (bestCorr + 0.1) * 0.75;
+
+ #pragma omp parallel for
+ for (i = 1; i < seekLength; i ++)
+ {
+ double corr;
+ // Calculates correlation value for the mixing position corresponding to 'i'
+#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
+ // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
+ // iterate the loop in sequential order
+ // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
+ corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
+#else
+ // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
+ // as "calcCrossCorr", but saves time by reusing & updating previously stored
+ // "norm" value
+ corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
+#endif
+ // heuristic rule to slightly favour values close to mid of the range
+ double tmp = (double)(2 * i - seekLength) / (double)seekLength;
+ corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
+
+ // Checks for the highest correlation value
+ if (corr > bestCorr)
+ {
+ // For optimal performance, enter critical section only in case that best value found.
+ // in such case repeat 'if' condition as it's possible that parallel execution may have
+ // updated the bestCorr value in the mean time
+ #pragma omp critical
+ if (corr > bestCorr)
+ {
+ bestCorr = corr;
+ bestOffs = i;
+ }
+ }
+ }
+
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
+ adaptNormalizer();
+#endif
+
+ // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+ clearCrossCorrState();
+
+ return bestOffs;
+}
+
+
+// Quick seek algorithm for improved runtime-performance: First roughly scans through the
+// correlation area, and then scan surroundings of two best preliminary correlation candidates
+// with improved precision
+//
+// Based on testing:
+// - This algorithm gives on average 99% as good match as the full algorithm
+// - this quick seek algorithm finds the best match on ~90% of cases
+// - on those 10% of cases when this algorithm doesn't find best match,
+// it still finds on average ~90% match vs. the best possible match
+int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
+{
+#define _MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define SCANSTEP 16
+#define SCANWIND 8
+
+ int bestOffs;
+ int i;
+ int bestOffs2;
+ float bestCorr, corr;
+ float bestCorr2;
+ double norm;
+
+ // note: 'float' types used in this function in case that the platform would need to use software-fp
+
+ bestCorr =
+ bestCorr2 = -FLT_MAX;
+ bestOffs =
+ bestOffs2 = SCANWIND;
+
+ // Scans for the best correlation value by testing each possible position
+ // over the permitted range. Look for two best matches on the first pass to
+ // increase possibility of ideal match.
+ //
+ // Begin from "SCANSTEP" instead of SCANWIND to make the calculation
+ // catch the 'middlepoint' of seekLength vector as that's the a-priori
+ // expected best match position
+ //
+ // Roughly:
+ // - 15% of cases find best result directly on the first round,
+ // - 75% cases find better match on 2nd round around the best match from 1st round
+ // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
+ for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
+ {
+ // Calculates correlation value for the mixing position corresponding
+ // to 'i'
+ corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
+ // heuristic rule to slightly favour values close to mid of the seek range
+ float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
+ corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
+
+ // Checks for the highest correlation value
+ if (corr > bestCorr)
+ {
+ // found new best match. keep the previous best as 2nd best match
+ bestCorr2 = bestCorr;
+ bestOffs2 = bestOffs;
+ bestCorr = corr;
+ bestOffs = i;
+ }
+ else if (corr > bestCorr2)
+ {
+ // not new best, but still new 2nd best match
+ bestCorr2 = corr;
+ bestOffs2 = i;
+ }
+ }
+
+ // Scans surroundings of the found best match with small stepping
+ int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
+ for (i = bestOffs - SCANWIND; i < end; i++)
+ {
+ if (i == bestOffs) continue; // this offset already calculated, thus skip
+
+ // Calculates correlation value for the mixing position corresponding
+ // to 'i'
+ corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
+ // heuristic rule to slightly favour values close to mid of the range
+ float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
+ corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
+
+ // Checks for the highest correlation value
+ if (corr > bestCorr)
+ {
+ bestCorr = corr;
+ bestOffs = i;
+ }
+ }
+
+ // Scans surroundings of the 2nd best match with small stepping
+ end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
+ for (i = bestOffs2 - SCANWIND; i < end; i++)
+ {
+ if (i == bestOffs2) continue; // this offset already calculated, thus skip
+
+ // Calculates correlation value for the mixing position corresponding
+ // to 'i'
+ corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
+ // heuristic rule to slightly favour values close to mid of the range
+ float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
+ corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
+
+ // Checks for the highest correlation value
+ if (corr > bestCorr)
+ {
+ bestCorr = corr;
+ bestOffs = i;
+ }
+ }
+
+ // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+ clearCrossCorrState();
+
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
+ adaptNormalizer();
+#endif
+
+ return bestOffs;
+}
+
+
+
+
+/// For integer algorithm: adapt normalization factor divider with music so that
+/// it'll not be pessimistically restrictive that can degrade quality on quieter sections
+/// yet won't cause integer overflows either
+void TDStretch::adaptNormalizer()
+{
+ // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
+ // too low values during pauses in music
+ if ((maxnorm > 1000) || (maxnormf > 40000000))
+ {
+ //norm averaging filter
+ maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
+
+ if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
+ {
+ // large values, so increase divider
+ overlapDividerBitsNorm++;
+ if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
+ }
+ else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
+ {
+ // extra small values, decrease divider
+ overlapDividerBitsNorm--;
+ }
+ }
+
+ maxnorm = 0;
+}
+
+
+/// clear cross correlation routine state if necessary
+void TDStretch::clearCrossCorrState()
+{
+ // default implementation is empty.
+}
+
+
+/// Calculates processing sequence length according to tempo setting
+void TDStretch::calcSeqParameters()
+{
+ // Adjust tempo param according to tempo, so that variating processing sequence length is used
+ // at various tempo settings, between the given low...top limits
+ #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
+ #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
+
+ // sequence-ms setting values at above low & top tempo
+ #define AUTOSEQ_AT_MIN 90.0
+ #define AUTOSEQ_AT_MAX 40.0
+ #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
+ #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
+
+ // seek-window-ms setting values at above low & top tempoq
+ #define AUTOSEEK_AT_MIN 20.0
+ #define AUTOSEEK_AT_MAX 15.0
+ #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
+ #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
+
+ #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
+
+ double seq, seek;
+
+ if (bAutoSeqSetting)
+ {
+ seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
+ seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
+ sequenceMs = (int)(seq + 0.5);
+ }
+
+ if (bAutoSeekSetting)
+ {
+ seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
+ seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
+ seekWindowMs = (int)(seek + 0.5);
+ }
+
+ // Update seek window lengths
+ seekWindowLength = (sampleRate * sequenceMs) / 1000;
+ if (seekWindowLength < 2 * overlapLength)
+ {
+ seekWindowLength = 2 * overlapLength;
+ }
+ seekLength = (sampleRate * seekWindowMs) / 1000;
+}
+
+
+
+// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
+// tempo, larger faster tempo.
+void TDStretch::setTempo(double newTempo)
+{
+ int intskip;
+
+ tempo = newTempo;
+
+ // Calculate new sequence duration
+ calcSeqParameters();
+
+ // Calculate ideal skip length (according to tempo value)
+ nominalSkip = tempo * (seekWindowLength - overlapLength);
+ intskip = (int)(nominalSkip + 0.5);
+
+ // Calculate how many samples are needed in the 'inputBuffer' to
+ // process another batch of samples
+ //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
+ sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
+}
+
+
+
+// Sets the number of channels, 1 = mono, 2 = stereo
+void TDStretch::setChannels(int numChannels)
+{
+ if (!verifyNumberOfChannels(numChannels) ||
+ (channels == numChannels)) return;
+
+ channels = numChannels;
+ inputBuffer.setChannels(channels);
+ outputBuffer.setChannels(channels);
+
+ // re-init overlap/buffer
+ overlapLength=0;
+ setParameters(sampleRate);
+}
+
+
+// nominal tempo, no need for processing, just pass the samples through
+// to outputBuffer
+/*
+void TDStretch::processNominalTempo()
+{
+ assert(tempo == 1.0f);
+
+ if (bMidBufferDirty)
+ {
+ // If there are samples in pMidBuffer waiting for overlapping,
+ // do a single sliding overlapping with them in order to prevent a
+ // clicking distortion in the output sound
+ if (inputBuffer.numSamples() < overlapLength)
+ {
+ // wait until we've got overlapLength input samples
+ return;
+ }
+ // Mix the samples in the beginning of 'inputBuffer' with the
+ // samples in 'midBuffer' using sliding overlapping
+ overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
+ outputBuffer.putSamples(overlapLength);
+ inputBuffer.receiveSamples(overlapLength);
+ clearMidBuffer();
+ // now we've caught the nominal sample flow and may switch to
+ // bypass mode
+ }
+
+ // Simply bypass samples from input to output
+ outputBuffer.moveSamples(inputBuffer);
+}
+*/
+
+
+// Processes as many processing frames of the samples 'inputBuffer', store
+// the result into 'outputBuffer'
+void TDStretch::processSamples()
+{
+ int ovlSkip;
+ int offset = 0;
+ int temp;
+
+ /* Removed this small optimization - can introduce a click to sound when tempo setting
+ crosses the nominal value
+ if (tempo == 1.0f)
+ {
+ // tempo not changed from the original, so bypass the processing
+ processNominalTempo();
+ return;
+ }
+ */
+
+ // Process samples as long as there are enough samples in 'inputBuffer'
+ // to form a processing frame.
+ while ((int)inputBuffer.numSamples() >= sampleReq)
+ {
+ if (isBeginning == false)
+ {
+ // apart from the very beginning of the track,
+ // scan for the best overlapping position & do overlap-add
+ offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
+
+ // Mix the samples in the 'inputBuffer' at position of 'offset' with the
+ // samples in 'midBuffer' using sliding overlapping
+ // ... first partially overlap with the end of the previous sequence
+ // (that's in 'midBuffer')
+ overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
+ outputBuffer.putSamples((uint)overlapLength);
+ offset += overlapLength;
+ }
+ else
+ {
+ // Adjust processing offset at beginning of track by not perform initial overlapping
+ // and compensating that in the 'input buffer skip' calculation
+ isBeginning = false;
+ int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
+
+ #ifdef ST_SIMD_AVOID_UNALIGNED
+ // in SIMD mode, round the skip amount to value corresponding to aligned memory address
+ if (channels == 1)
+ {
+ skip &= -4;
+ }
+ else if (channels == 2)
+ {
+ skip &= -2;
+ }
+ #endif
+ skipFract -= skip;
+ if (skipFract <= -nominalSkip)
+ {
+ skipFract = -nominalSkip;
+ }
+ }
+
+ // ... then copy sequence samples from 'inputBuffer' to output:
+
+ // crosscheck that we don't have buffer overflow...
+ if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
+ {
+ continue; // just in case, shouldn't really happen
+ }
+
+ // length of sequence
+ temp = (seekWindowLength - 2 * overlapLength);
+ outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
+
+ // Copies the end of the current sequence from 'inputBuffer' to
+ // 'midBuffer' for being mixed with the beginning of the next
+ // processing sequence and so on
+ assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
+ memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
+ channels * sizeof(SAMPLETYPE) * overlapLength);
+
+ // Remove the processed samples from the input buffer. Update
+ // the difference between integer & nominal skip step to 'skipFract'
+ // in order to prevent the error from accumulating over time.
+ skipFract += nominalSkip; // real skip size
+ ovlSkip = (int)skipFract; // rounded to integer skip
+ skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
+ inputBuffer.receiveSamples((uint)ovlSkip);
+ }
+}
+
+
+// Adds 'numsamples' pcs of samples from the 'samples' memory position into
+// the input of the object.
+void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
+{
+ // Add the samples into the input buffer
+ inputBuffer.putSamples(samples, nSamples);
+ // Process the samples in input buffer
+ processSamples();
+}
+
+
+
+/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
+void TDStretch::acceptNewOverlapLength(int newOverlapLength)
+{
+ int prevOvl;
+
+ assert(newOverlapLength >= 0);
+ prevOvl = overlapLength;
+ overlapLength = newOverlapLength;
+
+ if (overlapLength > prevOvl)
+ {
+ delete[] pMidBufferUnaligned;
+
+ pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
+ // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
+ pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
+
+ clearMidBuffer();
+ }
+}
+
+
+// Operator 'new' is overloaded so that it automatically creates a suitable instance
+// depending on if we've a MMX/SSE/etc-capable CPU available or not.
+void * TDStretch::operator new(size_t s)
+{
+ // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
+ ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
+ return newInstance();
+}
+
+
+TDStretch * TDStretch::newInstance()
+{
+#if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE)
+ uint uExtensions;
+
+ uExtensions = detectCPUextensions();
+#endif
+
+ // Check if MMX/SSE instruction set extensions supported by CPU
+
+#ifdef SOUNDTOUCH_ALLOW_MMX
+ // MMX routines available only with integer sample types
+ if (uExtensions & SUPPORT_MMX)
+ {
+ return ::new TDStretchMMX;
+ }
+ else
+#endif // SOUNDTOUCH_ALLOW_MMX
+
+
+#ifdef SOUNDTOUCH_ALLOW_SSE
+ if (uExtensions & SUPPORT_SSE)
+ {
+ // SSE support
+ return ::new TDStretchSSE;
+ }
+ else
+#endif // SOUNDTOUCH_ALLOW_SSE
+
+ {
+ // ISA optimizations not supported, use plain C version
+ return ::new TDStretch;
+ }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Integer arithmetic specific algorithm implementations.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef SOUNDTOUCH_INTEGER_SAMPLES
+
+// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
+// version of the routine.
+void TDStretch::overlapStereo(short *poutput, const short *input) const
+{
+ int i;
+ short temp;
+ int cnt2;
+
+ for (i = 0; i < overlapLength ; i ++)
+ {
+ temp = (short)(overlapLength - i);
+ cnt2 = 2 * i;
+ poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
+ poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
+ }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
+// version of the routine.
+void TDStretch::overlapMulti(short *poutput, const short *input) const
+{
+ short m1;
+ int i = 0;
+
+ for (m1 = 0; m1 < overlapLength; m1 ++)
+ {
+ short m2 = (short)(overlapLength - m1);
+ for (int c = 0; c < channels; c ++)
+ {
+ poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
+ i++;
+ }
+ }
+}
+
+// Calculates the x having the closest 2^x value for the given value
+static int _getClosest2Power(double value)
+{
+ return (int)(log(value) / log(2.0) + 0.5);
+}
+
+
+/// Calculates overlap period length in samples.
+/// Integer version rounds overlap length to closest power of 2
+/// for a divide scaling operation.
+void TDStretch::calculateOverlapLength(int aoverlapMs)
+{
+ int newOvl;
+
+ assert(aoverlapMs >= 0);
+
+ // calculate overlap length so that it's power of 2 - thus it's easy to do
+ // integer division by right-shifting. Term "-1" at end is to account for
+ // the extra most significatnt bit left unused in result by signed multiplication
+ overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
+ if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
+ if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
+ newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above
+
+ acceptNewOverlapLength(newOvl);
+
+ overlapDividerBitsNorm = overlapDividerBitsPure;
+
+ // calculate sloping divider so that crosscorrelation operation won't
+ // overflow 32-bit register. Max. sum of the crosscorrelation sum without
+ // divider would be 2^30*(N^3-N)/3, where N = overlap length
+ slopingDivider = (newOvl * newOvl - 1) / 3;
+}
+
+
+double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
+{
+ long corr;
+ unsigned long lnorm;
+ int i;
+
+ #ifdef ST_SIMD_AVOID_UNALIGNED
+ // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
+ if (((ulongptr)mixingPos) & 15) return -1e50;
+ #endif
+
+ // hint compiler autovectorization that loop length is divisible by 8
+ int ilength = (channels * overlapLength) & -8;
+
+ corr = lnorm = 0;
+ // Same routine for stereo and mono
+ for (i = 0; i < ilength; i += 2)
+ {
+ corr += (mixingPos[i] * compare[i] +
+ mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
+ lnorm += (mixingPos[i] * mixingPos[i] +
+ mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
+ // do intermediate scalings to avoid integer overflow
+ }
+
+ if (lnorm > maxnorm)
+ {
+ // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
+ #pragma omp critical
+ if (lnorm > maxnorm)
+ {
+ maxnorm = lnorm;
+ }
+ }
+ // Normalize result by dividing by sqrt(norm) - this step is easiest
+ // done using floating point operation
+ norm = (double)lnorm;
+ return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
+}
+
+
+/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
+double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
+{
+ long corr;
+ long lnorm;
+ int i;
+
+ // hint compiler autovectorization that loop length is divisible by 8
+ int ilength = (channels * overlapLength) & -8;
+
+ // cancel first normalizer tap from previous round
+ lnorm = 0;
+ for (i = 1; i <= channels; i ++)
+ {
+ lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
+ }
+
+ corr = 0;
+ // Same routine for stereo and mono.
+ for (i = 0; i < ilength; i += 2)
+ {
+ corr += (mixingPos[i] * compare[i] +
+ mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
+ }
+
+ // update normalizer with last samples of this round
+ for (int j = 0; j < channels; j ++)
+ {
+ i --;
+ lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
+ }
+
+ norm += (double)lnorm;
+ if (norm > maxnorm)
+ {
+ maxnorm = (unsigned long)norm;
+ }
+
+ // Normalize result by dividing by sqrt(norm) - this step is easiest
+ // done using floating point operation
+ return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
+}
+
+#endif // SOUNDTOUCH_INTEGER_SAMPLES
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Floating point arithmetic specific algorithm implementations.
+//
+
+#ifdef SOUNDTOUCH_FLOAT_SAMPLES
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInput'
+void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
+{
+ int i;
+ float fScale;
+ float f1;
+ float f2;
+
+ fScale = 1.0f / (float)overlapLength;
+
+ f1 = 0;
+ f2 = 1.0f;
+
+ for (i = 0; i < 2 * (int)overlapLength ; i += 2)
+ {
+ pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
+ pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
+
+ f1 += fScale;
+ f2 -= fScale;
+ }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'input'.
+void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
+{
+ int i;
+ float fScale;
+ float f1;
+ float f2;
+
+ fScale = 1.0f / (float)overlapLength;
+
+ f1 = 0;
+ f2 = 1.0f;
+
+ i=0;
+ for (int i2 = 0; i2 < overlapLength; i2 ++)
+ {
+ // note: Could optimize this slightly by taking into account that always channels > 2
+ for (int c = 0; c < channels; c ++)
+ {
+ pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
+ i++;
+ }
+ f1 += fScale;
+ f2 -= fScale;
+ }
+}
+
+
+/// Calculates overlapInMsec period length in samples.
+void TDStretch::calculateOverlapLength(int overlapInMsec)
+{
+ int newOvl;
+
+ assert(overlapInMsec >= 0);
+ newOvl = (sampleRate * overlapInMsec) / 1000;
+ if (newOvl < 16) newOvl = 16;
+
+ // must be divisible by 8
+ newOvl -= newOvl % 8;
+
+ acceptNewOverlapLength(newOvl);
+}
+
+
+/// Calculate cross-correlation
+double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
+{
+ float corr;
+ float norm;
+ int i;
+
+ #ifdef ST_SIMD_AVOID_UNALIGNED
+ // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
+ if (((ulongptr)mixingPos) & 15) return -1e50;
+ #endif
+
+ // hint compiler autovectorization that loop length is divisible by 8
+ int ilength = (channels * overlapLength) & -8;
+
+ corr = norm = 0;
+ // Same routine for stereo and mono
+ for (i = 0; i < ilength; i ++)
+ {
+ corr += mixingPos[i] * compare[i];
+ norm += mixingPos[i] * mixingPos[i];
+ }
+
+ anorm = norm;
+ return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
+}
+
+
+/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
+double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
+{
+ float corr;
+ int i;
+
+ corr = 0;
+
+ // cancel first normalizer tap from previous round
+ for (i = 1; i <= channels; i ++)
+ {
+ norm -= mixingPos[-i] * mixingPos[-i];
+ }
+
+ // hint compiler autovectorization that loop length is divisible by 8
+ int ilength = (channels * overlapLength) & -8;
+
+ // Same routine for stereo and mono
+ for (i = 0; i < ilength; i ++)
+ {
+ corr += mixingPos[i] * compare[i];
+ }
+
+ // update normalizer with last samples of this round
+ for (int j = 0; j < channels; j ++)
+ {
+ i --;
+ norm += mixingPos[i] * mixingPos[i];
+ }
+
+ return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
+}
+
+
+#endif // SOUNDTOUCH_FLOAT_SAMPLES