diff options
Diffstat (limited to 'media/libsoundtouch/src/TDStretch.cpp')
-rw-r--r-- | media/libsoundtouch/src/TDStretch.cpp | 1108 |
1 files changed, 1108 insertions, 0 deletions
diff --git a/media/libsoundtouch/src/TDStretch.cpp b/media/libsoundtouch/src/TDStretch.cpp new file mode 100644 index 0000000000..709e979d1d --- /dev/null +++ b/media/libsoundtouch/src/TDStretch.cpp @@ -0,0 +1,1108 @@ +/////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like +/// method with several performance-increasing tweaks. +/// +/// Notes : MMX optimized functions reside in a separate, platform-specific +/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'. +/// +/// This source file contains OpenMP optimizations that allow speeding up the +/// corss-correlation algorithm by executing it in several threads / CPU cores +/// in parallel. See the following article link for more detailed discussion +/// about SoundTouch OpenMP optimizations: +/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include <string.h> +#include <limits.h> +#include <assert.h> +#include <math.h> +#include <float.h> + +#include "STTypes.h" +#include "cpu_detect.h" +#include "TDStretch.h" + +using namespace soundtouch; + +#define max(x, y) (((x) > (y)) ? (x) : (y)) + +/***************************************************************************** + * + * Constant definitions + * + *****************************************************************************/ + +// Table for the hierarchical mixing position seeking algorithm +const short _scanOffsets[5][24]={ + { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, + 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, + {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, + 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; + +/***************************************************************************** + * + * Implementation of the class 'TDStretch' + * + *****************************************************************************/ + + +TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) +{ + bQuickSeek = false; + channels = 2; + + pMidBuffer = NULL; + pMidBufferUnaligned = NULL; + overlapLength = 0; + + bAutoSeqSetting = true; + bAutoSeekSetting = true; + + tempo = 1.0f; + setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); + setTempo(1.0f); + + clear(); +} + + + +TDStretch::~TDStretch() +{ + delete[] pMidBufferUnaligned; +} + + + +// Sets routine control parameters. These control are certain time constants +// defining how the sound is stretched to the desired duration. +// +// 'sampleRate' = sample rate of the sound +// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) +// 'seekwindowMS' = seeking window length for scanning the best overlapping +// position (default = 28 ms) +// 'overlapMS' = overlapping length (default = 12 ms) + +void TDStretch::setParameters(int aSampleRate, int aSequenceMS, + int aSeekWindowMS, int aOverlapMS) +{ + // accept only positive parameter values - if zero or negative, use old values instead + if (aSampleRate > 0) + { + if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate"); + this->sampleRate = aSampleRate; + } + + if (aOverlapMS > 0) this->overlapMs = aOverlapMS; + + if (aSequenceMS > 0) + { + this->sequenceMs = aSequenceMS; + bAutoSeqSetting = false; + } + else if (aSequenceMS == 0) + { + // if zero, use automatic setting + bAutoSeqSetting = true; + } + + if (aSeekWindowMS > 0) + { + this->seekWindowMs = aSeekWindowMS; + bAutoSeekSetting = false; + } + else if (aSeekWindowMS == 0) + { + // if zero, use automatic setting + bAutoSeekSetting = true; + } + + calcSeqParameters(); + + calculateOverlapLength(overlapMs); + + // set tempo to recalculate 'sampleReq' + setTempo(tempo); +} + + + +/// Get routine control parameters, see setParameters() function. +/// Any of the parameters to this function can be NULL, in such case corresponding parameter +/// value isn't returned. +void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const +{ + if (pSampleRate) + { + *pSampleRate = sampleRate; + } + + if (pSequenceMs) + { + *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; + } + + if (pSeekWindowMs) + { + *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; + } + + if (pOverlapMs) + { + *pOverlapMs = overlapMs; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'pInput' +void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const +{ + int i; + SAMPLETYPE m1, m2; + + m1 = (SAMPLETYPE)0; + m2 = (SAMPLETYPE)overlapLength; + + for (i = 0; i < overlapLength ; i ++) + { + pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; + m1 += 1; + m2 -= 1; + } +} + + + +void TDStretch::clearMidBuffer() +{ + memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); +} + + +void TDStretch::clearInput() +{ + inputBuffer.clear(); + clearMidBuffer(); + isBeginning = true; + maxnorm = 0; + maxnormf = 1e8; + skipFract = 0; +} + + +// Clears the sample buffers +void TDStretch::clear() +{ + outputBuffer.clear(); + clearInput(); +} + + + +// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero +// to enable +void TDStretch::enableQuickSeek(bool enable) +{ + bQuickSeek = enable; +} + + +// Returns nonzero if the quick seeking algorithm is enabled. +bool TDStretch::isQuickSeekEnabled() const +{ + return bQuickSeek; +} + + +// Seeks for the optimal overlap-mixing position. +int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) +{ + if (bQuickSeek) + { + return seekBestOverlapPositionQuick(refPos); + } + else + { + return seekBestOverlapPositionFull(refPos); + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position +// of 'ovlPos'. +inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const +{ +#ifndef USE_MULTICH_ALWAYS + if (channels == 1) + { + // mono sound. + overlapMono(pOutput, pInput + ovlPos); + } + else if (channels == 2) + { + // stereo sound + overlapStereo(pOutput, pInput + 2 * ovlPos); + } + else +#endif // USE_MULTICH_ALWAYS + { + assert(channels > 0); + overlapMulti(pOutput, pInput + channels * ovlPos); + } +} + + +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) +{ + int bestOffs; + double bestCorr; + int i; + double norm; + + bestCorr = -FLT_MAX; + bestOffs = 0; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. + bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); + bestCorr = (bestCorr + 0.1) * 0.75; + + #pragma omp parallel for + for (i = 1; i < seekLength; i ++) + { + double corr; + // Calculates correlation value for the mixing position corresponding to 'i' +#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED) + // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't + // iterate the loop in sequential order + // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions + corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm); +#else + // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same + // as "calcCrossCorr", but saves time by reusing & updating previously stored + // "norm" value + corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); +#endif + // heuristic rule to slightly favour values close to mid of the range + double tmp = (double)(2 * i - seekLength) / (double)seekLength; + corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + // For optimal performance, enter critical section only in case that best value found. + // in such case repeat 'if' condition as it's possible that parallel execution may have + // updated the bestCorr value in the mean time + #pragma omp critical + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + } + } + } + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + adaptNormalizer(); +#endif + + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + +// Quick seek algorithm for improved runtime-performance: First roughly scans through the +// correlation area, and then scan surroundings of two best preliminary correlation candidates +// with improved precision +// +// Based on testing: +// - This algorithm gives on average 99% as good match as the full algorithm +// - this quick seek algorithm finds the best match on ~90% of cases +// - on those 10% of cases when this algorithm doesn't find best match, +// it still finds on average ~90% match vs. the best possible match +int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) +{ +#define _MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define SCANSTEP 16 +#define SCANWIND 8 + + int bestOffs; + int i; + int bestOffs2; + float bestCorr, corr; + float bestCorr2; + double norm; + + // note: 'float' types used in this function in case that the platform would need to use software-fp + + bestCorr = + bestCorr2 = -FLT_MAX; + bestOffs = + bestOffs2 = SCANWIND; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. Look for two best matches on the first pass to + // increase possibility of ideal match. + // + // Begin from "SCANSTEP" instead of SCANWIND to make the calculation + // catch the 'middlepoint' of seekLength vector as that's the a-priori + // expected best match position + // + // Roughly: + // - 15% of cases find best result directly on the first round, + // - 75% cases find better match on 2nd round around the best match from 1st round + // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round + for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP) + { + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the seek range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + // found new best match. keep the previous best as 2nd best match + bestCorr2 = bestCorr; + bestOffs2 = bestOffs; + bestCorr = corr; + bestOffs = i; + } + else if (corr > bestCorr2) + { + // not new best, but still new 2nd best match + bestCorr2 = corr; + bestOffs2 = i; + } + } + + // Scans surroundings of the found best match with small stepping + int end = _MIN(bestOffs + SCANWIND + 1, seekLength); + for (i = bestOffs - SCANWIND; i < end; i++) + { + if (i == bestOffs) continue; // this offset already calculated, thus skip + + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + } + } + + // Scans surroundings of the 2nd best match with small stepping + end = _MIN(bestOffs2 + SCANWIND + 1, seekLength); + for (i = bestOffs2 - SCANWIND; i < end; i++) + { + if (i == bestOffs2) continue; // this offset already calculated, thus skip + + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); + // heuristic rule to slightly favour values close to mid of the range + float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; + corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + } + } + + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + adaptNormalizer(); +#endif + + return bestOffs; +} + + + + +/// For integer algorithm: adapt normalization factor divider with music so that +/// it'll not be pessimistically restrictive that can degrade quality on quieter sections +/// yet won't cause integer overflows either +void TDStretch::adaptNormalizer() +{ + // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to + // too low values during pauses in music + if ((maxnorm > 1000) || (maxnormf > 40000000)) + { + //norm averaging filter + maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm; + + if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16)) + { + // large values, so increase divider + overlapDividerBitsNorm++; + if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase + } + else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0)) + { + // extra small values, decrease divider + overlapDividerBitsNorm--; + } + } + + maxnorm = 0; +} + + +/// clear cross correlation routine state if necessary +void TDStretch::clearCrossCorrState() +{ + // default implementation is empty. +} + + +/// Calculates processing sequence length according to tempo setting +void TDStretch::calcSeqParameters() +{ + // Adjust tempo param according to tempo, so that variating processing sequence length is used + // at various tempo settings, between the given low...top limits + #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) + #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) + + // sequence-ms setting values at above low & top tempo + #define AUTOSEQ_AT_MIN 90.0 + #define AUTOSEQ_AT_MAX 40.0 + #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) + #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) + + // seek-window-ms setting values at above low & top tempoq + #define AUTOSEEK_AT_MIN 20.0 + #define AUTOSEEK_AT_MAX 15.0 + #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) + #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) + + #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) + + double seq, seek; + + if (bAutoSeqSetting) + { + seq = AUTOSEQ_C + AUTOSEQ_K * tempo; + seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); + sequenceMs = (int)(seq + 0.5); + } + + if (bAutoSeekSetting) + { + seek = AUTOSEEK_C + AUTOSEEK_K * tempo; + seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); + seekWindowMs = (int)(seek + 0.5); + } + + // Update seek window lengths + seekWindowLength = (sampleRate * sequenceMs) / 1000; + if (seekWindowLength < 2 * overlapLength) + { + seekWindowLength = 2 * overlapLength; + } + seekLength = (sampleRate * seekWindowMs) / 1000; +} + + + +// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower +// tempo, larger faster tempo. +void TDStretch::setTempo(double newTempo) +{ + int intskip; + + tempo = newTempo; + + // Calculate new sequence duration + calcSeqParameters(); + + // Calculate ideal skip length (according to tempo value) + nominalSkip = tempo * (seekWindowLength - overlapLength); + intskip = (int)(nominalSkip + 0.5); + + // Calculate how many samples are needed in the 'inputBuffer' to + // process another batch of samples + //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; + sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; +} + + + +// Sets the number of channels, 1 = mono, 2 = stereo +void TDStretch::setChannels(int numChannels) +{ + if (!verifyNumberOfChannels(numChannels) || + (channels == numChannels)) return; + + channels = numChannels; + inputBuffer.setChannels(channels); + outputBuffer.setChannels(channels); + + // re-init overlap/buffer + overlapLength=0; + setParameters(sampleRate); +} + + +// nominal tempo, no need for processing, just pass the samples through +// to outputBuffer +/* +void TDStretch::processNominalTempo() +{ + assert(tempo == 1.0f); + + if (bMidBufferDirty) + { + // If there are samples in pMidBuffer waiting for overlapping, + // do a single sliding overlapping with them in order to prevent a + // clicking distortion in the output sound + if (inputBuffer.numSamples() < overlapLength) + { + // wait until we've got overlapLength input samples + return; + } + // Mix the samples in the beginning of 'inputBuffer' with the + // samples in 'midBuffer' using sliding overlapping + overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); + outputBuffer.putSamples(overlapLength); + inputBuffer.receiveSamples(overlapLength); + clearMidBuffer(); + // now we've caught the nominal sample flow and may switch to + // bypass mode + } + + // Simply bypass samples from input to output + outputBuffer.moveSamples(inputBuffer); +} +*/ + + +// Processes as many processing frames of the samples 'inputBuffer', store +// the result into 'outputBuffer' +void TDStretch::processSamples() +{ + int ovlSkip; + int offset = 0; + int temp; + + /* Removed this small optimization - can introduce a click to sound when tempo setting + crosses the nominal value + if (tempo == 1.0f) + { + // tempo not changed from the original, so bypass the processing + processNominalTempo(); + return; + } + */ + + // Process samples as long as there are enough samples in 'inputBuffer' + // to form a processing frame. + while ((int)inputBuffer.numSamples() >= sampleReq) + { + if (isBeginning == false) + { + // apart from the very beginning of the track, + // scan for the best overlapping position & do overlap-add + offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); + + // Mix the samples in the 'inputBuffer' at position of 'offset' with the + // samples in 'midBuffer' using sliding overlapping + // ... first partially overlap with the end of the previous sequence + // (that's in 'midBuffer') + overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); + outputBuffer.putSamples((uint)overlapLength); + offset += overlapLength; + } + else + { + // Adjust processing offset at beginning of track by not perform initial overlapping + // and compensating that in the 'input buffer skip' calculation + isBeginning = false; + int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5); + + #ifdef ST_SIMD_AVOID_UNALIGNED + // in SIMD mode, round the skip amount to value corresponding to aligned memory address + if (channels == 1) + { + skip &= -4; + } + else if (channels == 2) + { + skip &= -2; + } + #endif + skipFract -= skip; + if (skipFract <= -nominalSkip) + { + skipFract = -nominalSkip; + } + } + + // ... then copy sequence samples from 'inputBuffer' to output: + + // crosscheck that we don't have buffer overflow... + if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength)) + { + continue; // just in case, shouldn't really happen + } + + // length of sequence + temp = (seekWindowLength - 2 * overlapLength); + outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp); + + // Copies the end of the current sequence from 'inputBuffer' to + // 'midBuffer' for being mixed with the beginning of the next + // processing sequence and so on + assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples()); + memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp), + channels * sizeof(SAMPLETYPE) * overlapLength); + + // Remove the processed samples from the input buffer. Update + // the difference between integer & nominal skip step to 'skipFract' + // in order to prevent the error from accumulating over time. + skipFract += nominalSkip; // real skip size + ovlSkip = (int)skipFract; // rounded to integer skip + skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip + inputBuffer.receiveSamples((uint)ovlSkip); + } +} + + +// Adds 'numsamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) +{ + // Add the samples into the input buffer + inputBuffer.putSamples(samples, nSamples); + // Process the samples in input buffer + processSamples(); +} + + + +/// Set new overlap length parameter & reallocate RefMidBuffer if necessary. +void TDStretch::acceptNewOverlapLength(int newOverlapLength) +{ + int prevOvl; + + assert(newOverlapLength >= 0); + prevOvl = overlapLength; + overlapLength = newOverlapLength; + + if (overlapLength > prevOvl) + { + delete[] pMidBufferUnaligned; + + pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; + // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency + pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); + + clearMidBuffer(); + } +} + + +// Operator 'new' is overloaded so that it automatically creates a suitable instance +// depending on if we've a MMX/SSE/etc-capable CPU available or not. +void * TDStretch::operator new(size_t s) +{ + // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! + ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); + return newInstance(); +} + + +TDStretch * TDStretch::newInstance() +{ +#if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) + uint uExtensions; + + uExtensions = detectCPUextensions(); +#endif + + // Check if MMX/SSE instruction set extensions supported by CPU + +#ifdef SOUNDTOUCH_ALLOW_MMX + // MMX routines available only with integer sample types + if (uExtensions & SUPPORT_MMX) + { + return ::new TDStretchMMX; + } + else +#endif // SOUNDTOUCH_ALLOW_MMX + + +#ifdef SOUNDTOUCH_ALLOW_SSE + if (uExtensions & SUPPORT_SSE) + { + // SSE support + return ::new TDStretchSSE; + } + else +#endif // SOUNDTOUCH_ALLOW_SSE + + { + // ISA optimizations not supported, use plain C version + return ::new TDStretch; + } +} + + +////////////////////////////////////////////////////////////////////////////// +// +// Integer arithmetic specific algorithm implementations. +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' +// version of the routine. +void TDStretch::overlapStereo(short *poutput, const short *input) const +{ + int i; + short temp; + int cnt2; + + for (i = 0; i < overlapLength ; i ++) + { + temp = (short)(overlapLength - i); + cnt2 = 2 * i; + poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; + poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' +// version of the routine. +void TDStretch::overlapMulti(short *poutput, const short *input) const +{ + short m1; + int i = 0; + + for (m1 = 0; m1 < overlapLength; m1 ++) + { + short m2 = (short)(overlapLength - m1); + for (int c = 0; c < channels; c ++) + { + poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; + i++; + } + } +} + +// Calculates the x having the closest 2^x value for the given value +static int _getClosest2Power(double value) +{ + return (int)(log(value) / log(2.0) + 0.5); +} + + +/// Calculates overlap period length in samples. +/// Integer version rounds overlap length to closest power of 2 +/// for a divide scaling operation. +void TDStretch::calculateOverlapLength(int aoverlapMs) +{ + int newOvl; + + assert(aoverlapMs >= 0); + + // calculate overlap length so that it's power of 2 - thus it's easy to do + // integer division by right-shifting. Term "-1" at end is to account for + // the extra most significatnt bit left unused in result by signed multiplication + overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; + if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9; + if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3; + newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above + + acceptNewOverlapLength(newOvl); + + overlapDividerBitsNorm = overlapDividerBitsPure; + + // calculate sloping divider so that crosscorrelation operation won't + // overflow 32-bit register. Max. sum of the crosscorrelation sum without + // divider would be 2^30*(N^3-N)/3, where N = overlap length + slopingDivider = (newOvl * newOvl - 1) / 3; +} + + +double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) +{ + long corr; + unsigned long lnorm; + int i; + + #ifdef ST_SIMD_AVOID_UNALIGNED + // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary + if (((ulongptr)mixingPos) & 15) return -1e50; + #endif + + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = (channels * overlapLength) & -8; + + corr = lnorm = 0; + // Same routine for stereo and mono + for (i = 0; i < ilength; i += 2) + { + corr += (mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; + lnorm += (mixingPos[i] * mixingPos[i] + + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; + // do intermediate scalings to avoid integer overflow + } + + if (lnorm > maxnorm) + { + // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode + #pragma omp critical + if (lnorm > maxnorm) + { + maxnorm = lnorm; + } + } + // Normalize result by dividing by sqrt(norm) - this step is easiest + // done using floating point operation + norm = (double)lnorm; + return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); +} + + +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value +double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) +{ + long corr; + long lnorm; + int i; + + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = (channels * overlapLength) & -8; + + // cancel first normalizer tap from previous round + lnorm = 0; + for (i = 1; i <= channels; i ++) + { + lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm; + } + + corr = 0; + // Same routine for stereo and mono. + for (i = 0; i < ilength; i += 2) + { + corr += (mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; + } + + // update normalizer with last samples of this round + for (int j = 0; j < channels; j ++) + { + i --; + lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm; + } + + norm += (double)lnorm; + if (norm > maxnorm) + { + maxnorm = (unsigned long)norm; + } + + // Normalize result by dividing by sqrt(norm) - this step is easiest + // done using floating point operation + return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); +} + +#endif // SOUNDTOUCH_INTEGER_SAMPLES + +////////////////////////////////////////////////////////////////////////////// +// +// Floating point arithmetic specific algorithm implementations. +// + +#ifdef SOUNDTOUCH_FLOAT_SAMPLES + +// Overlaps samples in 'midBuffer' with the samples in 'pInput' +void TDStretch::overlapStereo(float *pOutput, const float *pInput) const +{ + int i; + float fScale; + float f1; + float f2; + + fScale = 1.0f / (float)overlapLength; + + f1 = 0; + f2 = 1.0f; + + for (i = 0; i < 2 * (int)overlapLength ; i += 2) + { + pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; + pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; + + f1 += fScale; + f2 -= fScale; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'input'. +void TDStretch::overlapMulti(float *pOutput, const float *pInput) const +{ + int i; + float fScale; + float f1; + float f2; + + fScale = 1.0f / (float)overlapLength; + + f1 = 0; + f2 = 1.0f; + + i=0; + for (int i2 = 0; i2 < overlapLength; i2 ++) + { + // note: Could optimize this slightly by taking into account that always channels > 2 + for (int c = 0; c < channels; c ++) + { + pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; + i++; + } + f1 += fScale; + f2 -= fScale; + } +} + + +/// Calculates overlapInMsec period length in samples. +void TDStretch::calculateOverlapLength(int overlapInMsec) +{ + int newOvl; + + assert(overlapInMsec >= 0); + newOvl = (sampleRate * overlapInMsec) / 1000; + if (newOvl < 16) newOvl = 16; + + // must be divisible by 8 + newOvl -= newOvl % 8; + + acceptNewOverlapLength(newOvl); +} + + +/// Calculate cross-correlation +double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) +{ + float corr; + float norm; + int i; + + #ifdef ST_SIMD_AVOID_UNALIGNED + // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary + if (((ulongptr)mixingPos) & 15) return -1e50; + #endif + + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = (channels * overlapLength) & -8; + + corr = norm = 0; + // Same routine for stereo and mono + for (i = 0; i < ilength; i ++) + { + corr += mixingPos[i] * compare[i]; + norm += mixingPos[i] * mixingPos[i]; + } + + anorm = norm; + return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); +} + + +/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value +double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) +{ + float corr; + int i; + + corr = 0; + + // cancel first normalizer tap from previous round + for (i = 1; i <= channels; i ++) + { + norm -= mixingPos[-i] * mixingPos[-i]; + } + + // hint compiler autovectorization that loop length is divisible by 8 + int ilength = (channels * overlapLength) & -8; + + // Same routine for stereo and mono + for (i = 0; i < ilength; i ++) + { + corr += mixingPos[i] * compare[i]; + } + + // update normalizer with last samples of this round + for (int j = 0; j < channels; j ++) + { + i --; + norm += mixingPos[i] * mixingPos[i]; + } + + return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); +} + + +#endif // SOUNDTOUCH_FLOAT_SAMPLES |