summaryrefslogtreecommitdiffstats
path: root/audio/filter
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--audio/filter/af_drop.c114
-rw-r--r--audio/filter/af_format.c143
-rw-r--r--audio/filter/af_lavcac3enc.c437
-rw-r--r--audio/filter/af_rubberband.c382
-rw-r--r--audio/filter/af_scaletempo.c626
-rw-r--r--audio/filter/af_scaletempo2.c254
-rw-r--r--audio/filter/af_scaletempo2_internals.c873
-rw-r--r--audio/filter/af_scaletempo2_internals.h134
8 files changed, 2963 insertions, 0 deletions
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
new file mode 100644
index 0000000..724c482
--- /dev/null
+++ b/audio/filter/af_drop.c
@@ -0,0 +1,114 @@
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+
+struct priv {
+ double speed;
+ double diff; // amount of too many additional samples in normal speed
+ struct mp_aframe *last; // for repeating
+};
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ struct mp_frame frame = {0};
+
+ double last_dur = p->last ? mp_aframe_duration(p->last) : 0;
+ if (p->last && p->diff < 0 && -p->diff > last_dur / 2) {
+ MP_VERBOSE(f, "repeat\n");
+ frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last);
+ p->last = NULL;
+ } else {
+ frame = mp_pin_out_read(f->ppins[0]);
+
+ if (frame.type == MP_FRAME_AUDIO) {
+ last_dur = mp_aframe_duration(frame.data);
+ p->diff -= last_dur;
+ if (p->diff > last_dur / 2) {
+ MP_VERBOSE(f, "drop\n");
+ mp_frame_unref(&frame);
+ mp_filter_internal_mark_progress(f);
+ }
+ }
+ }
+
+ if (frame.type == MP_FRAME_AUDIO) {
+ struct mp_aframe *fr = frame.data;
+ talloc_free(p->last);
+ p->last = mp_aframe_new_ref(fr);
+ mp_aframe_mul_speed(fr, p->speed);
+ p->diff += mp_aframe_duration(fr);
+ mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr));
+ } else if (frame.type == MP_FRAME_EOF) {
+ TA_FREEP(&p->last);
+ }
+ mp_pin_in_write(f->ppins[1], frame);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *p = f->priv;
+
+ switch (cmd->type) {
+ case MP_FILTER_COMMAND_SET_SPEED:
+ p->speed = cmd->speed;
+ return true;
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ TA_FREEP(&p->last);
+ p->diff = 0;
+}
+
+static void destroy(struct mp_filter *f)
+{
+ reset(f);
+}
+
+static const struct mp_filter_info af_drop_filter = {
+ .name = "drop",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_drop_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *p = f->priv;
+ p->speed = 1.0;
+
+ return f;
+}
+
+const struct mp_user_filter_entry af_drop = {
+ .desc = {
+ .description = "Change audio speed by dropping/repeating frames",
+ .name = "drop",
+ .priv_size = sizeof(struct priv),
+ },
+ .create = af_drop_create,
+};
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
new file mode 100644
index 0000000..2d1c1cc
--- /dev/null
+++ b/audio/filter/af_format.c
@@ -0,0 +1,143 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct f_opts {
+ int in_format;
+ int in_srate;
+ struct m_channels in_channels;
+ int out_format;
+ int out_srate;
+ struct m_channels out_channels;
+
+ bool fail;
+};
+
+struct priv {
+ struct f_opts *opts;
+ struct mp_pin *in_pin;
+};
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_can_transfer_data(f->ppins[1], p->in_pin))
+ return;
+
+ struct mp_frame frame = mp_pin_out_read(p->in_pin);
+
+ if (p->opts->fail) {
+ MP_ERR(f, "Failing on purpose.\n");
+ goto error;
+ }
+
+ if (frame.type == MP_FRAME_EOF) {
+ mp_pin_in_write(f->ppins[1], frame);
+ return;
+ }
+
+ if (frame.type != MP_FRAME_AUDIO) {
+ MP_ERR(f, "audio frame expected\n");
+ goto error;
+ }
+
+ struct mp_aframe *in = frame.data;
+
+ if (p->opts->out_channels.num_chmaps > 0) {
+ if (!mp_aframe_set_chmap(in, &p->opts->out_channels.chmaps[0])) {
+ MP_ERR(f, "could not force output channels\n");
+ goto error;
+ }
+ }
+
+ if (p->opts->out_srate)
+ mp_aframe_set_rate(in, p->opts->out_srate);
+
+ mp_pin_in_write(f->ppins[1], frame);
+ return;
+
+error:
+ mp_frame_unref(&frame);
+ mp_filter_internal_mark_failed(f);
+}
+
+static const struct mp_filter_info af_format_filter = {
+ .name = "format",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+};
+
+static struct mp_filter *af_format_create(struct mp_filter *parent,
+ void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_format_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ struct priv *p = f->priv;
+ p->opts = talloc_steal(p, options);
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ if (p->opts->in_format)
+ mp_autoconvert_add_afmt(conv, p->opts->in_format);
+ if (p->opts->in_srate)
+ mp_autoconvert_add_srate(conv, p->opts->in_srate);
+ if (p->opts->in_channels.num_chmaps > 0)
+ mp_autoconvert_add_chmap(conv, &p->opts->in_channels.chmaps[0]);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+ p->in_pin = conv->f->pins[1];
+
+ return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_format = {
+ .desc = {
+ .name = "format",
+ .description = "Force audio format",
+ .priv_size = sizeof(struct f_opts),
+ .options = (const struct m_option[]) {
+ {"format", OPT_AUDIOFORMAT(in_format)},
+ {"srate", OPT_INT(in_srate), M_RANGE(1000, 8*48000)},
+ {"channels", OPT_CHANNELS(in_channels),
+ .flags = M_OPT_CHANNELS_LIMITED},
+ {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
+ {"out-channels", OPT_CHANNELS(out_channels),
+ .flags = M_OPT_CHANNELS_LIMITED},
+ {"fail", OPT_BOOL(fail)},
+ {0}
+ },
+ },
+ .create = af_format_create,
+};
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
new file mode 100644
index 0000000..b4a1d59
--- /dev/null
+++ b/audio/filter/af_lavcac3enc.c
@@ -0,0 +1,437 @@
+/*
+ * audio filter for runtime AC-3 encoding with libavcodec.
+ *
+ * Copyright (C) 2007 Ulion <ulion A gmail P com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/common.h>
+#include <libavutil/bswap.h>
+#include <libavutil/mem.h>
+
+#include "config.h"
+
+#include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
+#include "audio/chmap_sel.h"
+#include "audio/fmt-conversion.h"
+#include "audio/format.h"
+#include "common/av_common.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/f_utils.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+
+#define AC3_MAX_CHANNELS 6
+#define AC3_MAX_CODED_FRAME_SIZE 3840
+#define AC3_FRAME_SIZE (6 * 256)
+const static uint16_t ac3_bitrate_tab[19] = {
+ 32, 40, 48, 56, 64, 80, 96, 112, 128,
+ 160, 192, 224, 256, 320, 384, 448, 512, 576, 640
+};
+
+struct f_opts {
+ bool add_iec61937_header;
+ int bit_rate;
+ int min_channel_num;
+ char *encoder;
+ char **avopts;
+};
+
+struct priv {
+ struct f_opts *opts;
+
+ struct mp_pin *in_pin;
+ struct mp_aframe *cur_format;
+ struct mp_aframe *in_frame;
+ struct mp_aframe_pool *out_pool;
+
+ const struct AVCodec *lavc_acodec;
+ struct AVCodecContext *lavc_actx;
+ AVPacket *lavc_pkt;
+ int bit_rate;
+ int out_samples; // upper bound on encoded output per AC3 frame
+};
+
+static bool reinit(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ mp_aframe_reset(s->cur_format);
+
+ static const int default_bit_rate[AC3_MAX_CHANNELS+1] = \
+ {0, 96000, 192000, 256000, 384000, 448000, 448000};
+
+ if (s->opts->add_iec61937_header) {
+ s->out_samples = AC3_FRAME_SIZE;
+ } else {
+ s->out_samples = AC3_MAX_CODED_FRAME_SIZE /
+ mp_aframe_get_sstride(s->in_frame);
+ }
+
+ int format = mp_aframe_get_format(s->in_frame);
+ int rate = mp_aframe_get_rate(s->in_frame);
+ struct mp_chmap chmap = {0};
+ mp_aframe_get_chmap(s->in_frame, &chmap);
+
+ int bit_rate = s->bit_rate;
+ if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
+ bit_rate = default_bit_rate[chmap.num];
+
+ avcodec_close(s->lavc_actx);
+
+ // Put sample parameters
+ s->lavc_actx->sample_fmt = af_to_avformat(format);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+ s->lavc_actx->channels = chmap.num;
+ s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap);
+#else
+ mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap);
+#endif
+ s->lavc_actx->sample_rate = rate;
+ s->lavc_actx->bit_rate = bit_rate;
+
+ if (avcodec_open2(s->lavc_actx, s->lavc_acodec, NULL) < 0) {
+ MP_ERR(f, "Couldn't open codec %s, br=%d.\n", "ac3", bit_rate);
+ return false;
+ }
+
+ if (s->lavc_actx->frame_size < 1) {
+ MP_ERR(f, "encoder didn't specify input frame size\n");
+ return false;
+ }
+
+ mp_aframe_config_copy(s->cur_format, s->in_frame);
+ return true;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ TA_FREEP(&s->in_frame);
+}
+
+static void destroy(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ reset(f);
+ av_packet_free(&s->lavc_pkt);
+ avcodec_free_context(&s->lavc_actx);
+}
+
+static void swap_16(uint16_t *ptr, size_t size)
+{
+ for (size_t n = 0; n < size; n++)
+ ptr[n] = av_bswap16(ptr[n]);
+}
+
+static void process(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ bool err = true;
+ struct mp_aframe *out = NULL;
+ AVPacket *pkt = s->lavc_pkt;
+
+ // Send input as long as it wants.
+ while (1) {
+ if (avcodec_is_open(s->lavc_actx)) {
+ int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt);
+ if (lavc_ret >= 0)
+ break;
+ if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
+ MP_FATAL(f, "Encode failed (receive).\n");
+ goto error;
+ }
+ }
+ AVFrame *frame = NULL;
+ struct mp_frame input = mp_pin_out_read(s->in_pin);
+ // The following code assumes no sample data buffering in the encoder.
+ switch (input.type) {
+ case MP_FRAME_NONE:
+ goto done; // no data yet
+ case MP_FRAME_EOF:
+ mp_pin_in_write(f->ppins[1], input);
+ goto done;
+ case MP_FRAME_AUDIO:
+ TA_FREEP(&s->in_frame);
+ s->in_frame = input.data;
+ frame = mp_frame_to_av(input, NULL);
+ if (!frame)
+ goto error;
+ if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
+ // Just pass it through.
+ s->in_frame = NULL;
+ mp_pin_in_write(f->ppins[1], input);
+ goto done;
+ }
+ if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) {
+ if (!reinit(f))
+ goto error;
+ }
+ break;
+ default: goto error; // unexpected packet type
+ }
+ int lavc_ret = avcodec_send_frame(s->lavc_actx, frame);
+ av_frame_free(&frame);
+ if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
+ MP_FATAL(f, "Encode failed (send).\n");
+ goto error;
+ }
+ }
+
+ if (!s->in_frame)
+ goto error;
+
+ out = mp_aframe_create();
+ mp_aframe_set_format(out, AF_FORMAT_S_AC3);
+ mp_aframe_set_chmap(out, &(struct mp_chmap)MP_CHMAP_INIT_STEREO);
+ mp_aframe_set_rate(out, 48000);
+
+ if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0)
+ goto error;
+
+ int sstride = mp_aframe_get_sstride(out);
+
+ mp_aframe_copy_attributes(out, s->in_frame);
+
+ int frame_size = pkt->size;
+ int header_len = 0;
+ char hdr[8];
+
+ if (s->opts->add_iec61937_header && pkt->size > 5) {
+ int bsmod = pkt->data[5] & 0x7;
+ int len = frame_size;
+
+ frame_size = AC3_FRAME_SIZE * 2 * 2;
+ header_len = 8;
+
+ AV_WL16(hdr, 0xF872); // iec 61937 syncword 1
+ AV_WL16(hdr + 2, 0x4E1F); // iec 61937 syncword 2
+ hdr[5] = bsmod; // bsmod
+ hdr[4] = 0x01; // data-type ac3
+ AV_WL16(hdr + 6, len << 3); // number of bits in payload
+ }
+
+ if (frame_size > s->out_samples * sstride)
+ abort();
+
+ uint8_t **planes = mp_aframe_get_data_rw(out);
+ if (!planes)
+ goto error;
+ char *buf = planes[0];
+ memcpy(buf, hdr, header_len);
+ memcpy(buf + header_len, pkt->data, pkt->size);
+ memset(buf + header_len + pkt->size, 0,
+ frame_size - (header_len + pkt->size));
+ swap_16((uint16_t *)(buf + header_len), pkt->size / 2);
+ mp_aframe_set_size(out, frame_size / sstride);
+ mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+ out = NULL;
+
+done:
+ err = false;
+ // fall through
+error:
+ av_packet_unref(pkt);
+ talloc_free(out);
+ if (err)
+ mp_filter_internal_mark_failed(f);
+}
+
+static const struct mp_filter_info af_lavcac3enc_filter = {
+ .name = "lavcac3enc",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static void add_chmaps_to_autoconv(struct mp_filter *f,
+ struct mp_autoconvert *conv,
+ const struct AVCodec *codec)
+{
+#if !HAVE_AV_CHANNEL_LAYOUT
+ const uint64_t *lch = codec->channel_layouts;
+ for (int n = 0; lch && lch[n]; n++) {
+ struct mp_chmap chmap = {0};
+ mp_chmap_from_lavc(&chmap, lch[n]);
+ if (mp_chmap_is_valid(&chmap))
+ mp_autoconvert_add_chmap(conv, &chmap);
+ }
+#else
+ const AVChannelLayout *lch = codec->ch_layouts;
+ for (int n = 0; lch && lch[n].nb_channels; n++) {
+ struct mp_chmap chmap = {0};
+
+ if (!mp_chmap_from_av_layout(&chmap, &lch[n])) {
+ char layout[128] = {0};
+ MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n",
+ av_channel_layout_describe(&lch[n],
+ layout, 128) < 0 ?
+ "undefined" : layout);
+ continue;
+ }
+
+ if (mp_chmap_is_valid(&chmap))
+ mp_autoconvert_add_chmap(conv, &chmap);
+ }
+#endif
+}
+
+static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
+ void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_lavcac3enc_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *s = f->priv;
+ s->opts = talloc_steal(s, options);
+ s->cur_format = talloc_steal(s, mp_aframe_create());
+ s->out_pool = mp_aframe_pool_create(s);
+
+ s->lavc_acodec = avcodec_find_encoder_by_name(s->opts->encoder);
+ if (!s->lavc_acodec) {
+ MP_ERR(f, "Couldn't find encoder %s.\n", s->opts->encoder);
+ goto error;
+ }
+
+ s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+ if (!s->lavc_actx) {
+ MP_ERR(f, "Audio LAVC, couldn't allocate context!\n");
+ goto error;
+ }
+
+ s->lavc_pkt = av_packet_alloc();
+ if (!s->lavc_pkt)
+ goto error;
+
+ if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+ goto error;
+
+ // For this one, we require the decoder to export lists of all supported
+ // parameters. (Not all decoders do that, but the ones we're interested
+ // in do.)
+ if (!s->lavc_acodec->sample_fmts ||
+#if !HAVE_AV_CHANNEL_LAYOUT
+ !s->lavc_acodec->channel_layouts
+#else
+ !s->lavc_acodec->ch_layouts
+#endif
+ )
+ {
+ MP_ERR(f, "Audio encoder doesn't list supported parameters.\n");
+ goto error;
+ }
+
+ if (s->opts->bit_rate) {
+ int i;
+ for (i = 0; i < 19; i++) {
+ if (ac3_bitrate_tab[i] == s->opts->bit_rate) {
+ s->bit_rate = ac3_bitrate_tab[i] * 1000;
+ break;
+ }
+ }
+ if (i >= 19) {
+ MP_WARN(f, "unable set unsupported bitrate %d, using default "
+ "bitrate (check manpage to see supported bitrates).\n",
+ s->opts->bit_rate);
+ }
+ }
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ const enum AVSampleFormat *lf = s->lavc_acodec->sample_fmts;
+ for (int i = 0; lf && lf[i] != AV_SAMPLE_FMT_NONE; i++) {
+ int mpfmt = af_from_avformat(lf[i]);
+ if (mpfmt)
+ mp_autoconvert_add_afmt(conv, mpfmt);
+ }
+
+ add_chmaps_to_autoconv(f, conv, s->lavc_acodec);
+
+ // At least currently, the AC3 encoder doesn't export sample rates.
+ mp_autoconvert_add_srate(conv, 48000);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+
+ struct mp_filter *fs = mp_fixed_aframe_size_create(f, AC3_FRAME_SIZE, true);
+ if (!fs)
+ abort();
+
+ mp_pin_connect(fs->pins[0], conv->f->pins[1]);
+ s->in_pin = fs->pins[1];
+
+ return f;
+
+error:
+ av_packet_free(&s->lavc_pkt);
+ avcodec_free_context(&s->lavc_actx);
+ talloc_free(f);
+ return NULL;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_lavcac3enc = {
+ .desc = {
+ .description = "runtime encode to ac3 using libavcodec",
+ .name = "lavcac3enc",
+ .priv_size = sizeof(OPT_BASE_STRUCT),
+ .priv_defaults = &(const OPT_BASE_STRUCT) {
+ .add_iec61937_header = true,
+ .bit_rate = 640,
+ .min_channel_num = 3,
+ .encoder = "ac3",
+ },
+ .options = (const struct m_option[]) {
+ {"tospdif", OPT_BOOL(add_iec61937_header)},
+ {"bitrate", OPT_CHOICE(bit_rate,
+ {"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
+ {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
+ {"encoder", OPT_STRING(encoder)},
+ {"o", OPT_KEYVALUELIST(avopts)},
+ {0}
+ },
+ },
+ .create = af_lavcac3enc_create,
+};
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
new file mode 100644
index 0000000..48e5cc1
--- /dev/null
+++ b/audio/filter/af_rubberband.c
@@ -0,0 +1,382 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include <rubberband/rubberband-c.h>
+
+#include "config.h"
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+// command line options
+struct f_opts {
+ int transients, detector, phase, window,
+ smoothing, formant, pitch, channels, engine;
+ double scale;
+};
+
+struct priv {
+ struct f_opts *opts;
+
+ struct mp_pin *in_pin;
+ struct mp_aframe *cur_format;
+ struct mp_aframe_pool *out_pool;
+ bool sent_final;
+ RubberBandState rubber;
+ double speed;
+ double pitch;
+ struct mp_aframe *pending;
+ // Estimate how much librubberband has buffered internally.
+ // I could not find a way to do this with the librubberband API.
+ double rubber_delay;
+};
+
+static void update_speed(struct priv *p, double new_speed)
+{
+ p->speed = new_speed;
+ if (p->rubber)
+ rubberband_set_time_ratio(p->rubber, 1.0 / p->speed);
+}
+
+static bool update_pitch(struct priv *p, double new_pitch)
+{
+ if (new_pitch < 0.01 || new_pitch > 100.0)
+ return false;
+
+ p->pitch = new_pitch;
+ if (p->rubber)
+ rubberband_set_pitch_scale(p->rubber, p->pitch);
+ return true;
+}
+
+static bool init_rubberband(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ assert(!p->rubber);
+ assert(p->pending);
+
+ int opts = p->opts->transients | p->opts->detector | p->opts->phase |
+ p->opts->window | p->opts->smoothing | p->opts->formant |
+ p->opts->pitch | p->opts->channels |
+#if HAVE_RUBBERBAND_3
+ p->opts->engine |
+#endif
+ RubberBandOptionProcessRealTime;
+
+ int rate = mp_aframe_get_rate(p->pending);
+ int channels = mp_aframe_get_channels(p->pending);
+ if (mp_aframe_get_format(p->pending) != AF_FORMAT_FLOATP)
+ return false;
+
+ p->rubber = rubberband_new(rate, channels, opts, 1.0, 1.0);
+ if (!p->rubber) {
+ MP_FATAL(f, "librubberband initialization failed.\n");
+ return false;
+ }
+
+ mp_aframe_config_copy(p->cur_format, p->pending);
+
+ update_speed(p, p->speed);
+ update_pitch(p, p->pitch);
+
+ return true;
+}
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ while (!p->rubber || !p->pending || rubberband_available(p->rubber) <= 0) {
+ const float *dummy[MP_NUM_CHANNELS] = {0};
+ const float **in_data = dummy;
+ size_t in_samples = 0;
+
+ bool eof = false;
+ if (!p->pending || !mp_aframe_get_size(p->pending)) {
+ struct mp_frame frame = mp_pin_out_read(p->in_pin);
+ if (frame.type == MP_FRAME_AUDIO) {
+ TA_FREEP(&p->pending);
+ p->pending = frame.data;
+ } else if (frame.type == MP_FRAME_EOF) {
+ eof = true;
+ } else if (frame.type) {
+ MP_ERR(f, "unexpected frame type\n");
+ goto error;
+ } else {
+ return; // no new data yet
+ }
+ }
+ assert(p->pending || eof);
+
+ if (!p->rubber) {
+ if (!p->pending) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+ if (!init_rubberband(f))
+ goto error;
+ }
+
+ bool format_change =
+ p->pending && !mp_aframe_config_equals(p->pending, p->cur_format);
+
+ if (p->pending && !format_change) {
+ size_t needs = rubberband_get_samples_required(p->rubber);
+ uint8_t **planes = mp_aframe_get_data_ro(p->pending);
+ int num_planes = mp_aframe_get_planes(p->pending);
+ for (int n = 0; n < num_planes; n++)
+ in_data[n] = (void *)planes[n];
+ in_samples = MPMIN(mp_aframe_get_size(p->pending), needs);
+ }
+
+ bool final = format_change || eof;
+ if (!p->sent_final)
+ rubberband_process(p->rubber, in_data, in_samples, final);
+ p->sent_final |= final;
+
+ p->rubber_delay += in_samples;
+
+ if (p->pending && !format_change)
+ mp_aframe_skip_samples(p->pending, in_samples);
+
+ if (rubberband_available(p->rubber) > 0) {
+ if (eof)
+ mp_pin_out_repeat_eof(p->in_pin); // drain more next time
+ } else {
+ if (eof) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ rubberband_reset(p->rubber);
+ p->rubber_delay = 0;
+ TA_FREEP(&p->pending);
+ p->sent_final = false;
+ return;
+ } else if (format_change) {
+ // go on with proper reinit on the next iteration
+ rubberband_delete(p->rubber);
+ p->sent_final = false;
+ p->rubber = NULL;
+ }
+ }
+ }
+
+ assert(p->pending);
+
+ int out_samples = rubberband_available(p->rubber);
+ if (out_samples > 0) {
+ struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
+ if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
+ talloc_free(out);
+ goto error;
+ }
+
+ mp_aframe_copy_attributes(out, p->pending);
+
+ float *out_data[MP_NUM_CHANNELS] = {0};
+ uint8_t **planes = mp_aframe_get_data_rw(out);
+ assert(planes);
+ int num_planes = mp_aframe_get_planes(out);
+ for (int n = 0; n < num_planes; n++)
+ out_data[n] = (void *)planes[n];
+
+ out_samples = rubberband_retrieve(p->rubber, out_data, out_samples);
+
+ if (!out_samples) {
+ mp_filter_internal_mark_progress(f); // unexpected, just try again
+ talloc_free(out);
+ return;
+ }
+
+ mp_aframe_set_size(out, out_samples);
+
+ p->rubber_delay -= out_samples * p->speed;
+
+ double pts = mp_aframe_get_pts(p->pending);
+ if (pts != MP_NOPTS_VALUE) {
+ // Note: rubberband_get_latency() does not do what you'd expect.
+ double delay = p->rubber_delay / mp_aframe_get_effective_rate(out);
+ mp_aframe_set_pts(out, pts - delay);
+ }
+
+ mp_aframe_mul_speed(out, p->speed);
+
+ mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+ }
+
+ return;
+error:
+ mp_filter_internal_mark_failed(f);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *p = f->priv;
+
+ switch (cmd->type) {
+ case MP_FILTER_COMMAND_TEXT: {
+ char *endptr = NULL;
+ double pitch = p->pitch;
+ if (!strcmp(cmd->cmd, "set-pitch")) {
+ pitch = strtod(cmd->arg, &endptr);
+ if (*endptr)
+ return false;
+ return update_pitch(p, pitch);
+ } else if (!strcmp(cmd->cmd, "multiply-pitch")) {
+ double mult = strtod(cmd->arg, &endptr);
+ if (*endptr || mult <= 0)
+ return false;
+ pitch *= mult;
+ return update_pitch(p, pitch);
+ }
+ return false;
+ }
+ case MP_FILTER_COMMAND_SET_SPEED:
+ update_speed(p, cmd->speed);
+ return true;
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (p->rubber)
+ rubberband_reset(p->rubber);
+ p->rubber_delay = 0;
+ p->sent_final = false;
+ TA_FREEP(&p->pending);
+}
+
+static void destroy(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (p->rubber)
+ rubberband_delete(p->rubber);
+ talloc_free(p->pending);
+}
+
+static const struct mp_filter_info af_rubberband_filter = {
+ .name = "rubberband",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_rubberband_create(struct mp_filter *parent,
+ void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_rubberband_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *p = f->priv;
+ p->opts = talloc_steal(p, options);
+ p->speed = 1.0;
+ p->pitch = p->opts->scale;
+ p->cur_format = talloc_steal(p, mp_aframe_create());
+ p->out_pool = mp_aframe_pool_create(p);
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOATP);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+ p->in_pin = conv->f->pins[1];
+
+ return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_rubberband = {
+ .desc = {
+ .description = "Pitch conversion with librubberband",
+ .name = "rubberband",
+ .priv_size = sizeof(OPT_BASE_STRUCT),
+ .priv_defaults = &(const OPT_BASE_STRUCT) {
+ .scale = 1.0,
+ .pitch = RubberBandOptionPitchHighConsistency,
+ .transients = RubberBandOptionTransientsMixed,
+ .formant = RubberBandOptionFormantPreserved,
+ .channels = RubberBandOptionChannelsTogether,
+#if HAVE_RUBBERBAND_3
+ .engine = RubberBandOptionEngineFiner,
+#endif
+ },
+ .options = (const struct m_option[]) {
+ {"transients", OPT_CHOICE(transients,
+ {"crisp", RubberBandOptionTransientsCrisp},
+ {"mixed", RubberBandOptionTransientsMixed},
+ {"smooth", RubberBandOptionTransientsSmooth})},
+ {"detector", OPT_CHOICE(detector,
+ {"compound", RubberBandOptionDetectorCompound},
+ {"percussive", RubberBandOptionDetectorPercussive},
+ {"soft", RubberBandOptionDetectorSoft})},
+ {"phase", OPT_CHOICE(phase,
+ {"laminar", RubberBandOptionPhaseLaminar},
+ {"independent", RubberBandOptionPhaseIndependent})},
+ {"window", OPT_CHOICE(window,
+ {"standard", RubberBandOptionWindowStandard},
+ {"short", RubberBandOptionWindowShort},
+ {"long", RubberBandOptionWindowLong})},
+ {"smoothing", OPT_CHOICE(smoothing,
+ {"off", RubberBandOptionSmoothingOff},
+ {"on", RubberBandOptionSmoothingOn})},
+ {"formant", OPT_CHOICE(formant,
+ {"shifted", RubberBandOptionFormantShifted},
+ {"preserved", RubberBandOptionFormantPreserved})},
+ {"pitch", OPT_CHOICE(pitch,
+ {"quality", RubberBandOptionPitchHighQuality},
+ {"speed", RubberBandOptionPitchHighSpeed},
+ {"consistency", RubberBandOptionPitchHighConsistency})},
+ {"channels", OPT_CHOICE(channels,
+ {"apart", RubberBandOptionChannelsApart},
+ {"together", RubberBandOptionChannelsTogether})},
+#if HAVE_RUBBERBAND_3
+ {"engine", OPT_CHOICE(engine,
+ {"finer", RubberBandOptionEngineFiner},
+ {"faster", RubberBandOptionEngineFaster})},
+#endif
+ {"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)},
+ {0}
+ },
+ },
+ .create = af_rubberband_create,
+};
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
new file mode 100644
index 0000000..f06478f
--- /dev/null
+++ b/audio/filter/af_scaletempo.c
@@ -0,0 +1,626 @@
+/*
+ * scaletempo audio filter
+ *
+ * scale tempo while maintaining pitch
+ * (WSOLA technique with cross correlation)
+ * inspired by SoundTouch library by Olli Parviainen
+ *
+ * basic algorithm
+ * - produce 'stride' output samples per loop
+ * - consume stride*scale input samples per loop
+ *
+ * to produce smoother transitions between strides, blend next overlap
+ * samples from last stride with correlated samples of current input
+ *
+ * Copyright (c) 2007 Robert Juliano
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <float.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct f_opts {
+ float scale_nominal;
+ float ms_stride;
+ float ms_search;
+ float factor_overlap;
+#define SCALE_TEMPO 1
+#define SCALE_PITCH 2
+ int speed_opt;
+};
+
+struct priv {
+ struct f_opts *opts;
+
+ struct mp_pin *in_pin;
+ struct mp_aframe *cur_format;
+ struct mp_aframe_pool *out_pool;
+ double current_pts;
+ struct mp_aframe *in;
+
+ // stride
+ float scale;
+ float speed;
+ int frames_stride;
+ float frames_stride_scaled;
+ float frames_stride_error;
+ int bytes_per_frame;
+ int bytes_stride;
+ int bytes_queue;
+ int bytes_queued;
+ int bytes_to_slide;
+ int8_t *buf_queue;
+ // overlap
+ int samples_overlap;
+ int samples_standing;
+ int bytes_overlap;
+ int bytes_standing;
+ void *buf_overlap;
+ void *table_blend;
+ void (*output_overlap)(struct priv *s, void *out_buf,
+ int bytes_off);
+ // best overlap
+ int frames_search;
+ int num_channels;
+ void *buf_pre_corr;
+ void *table_window;
+ int (*best_overlap_offset)(struct priv *s);
+};
+
+static bool reinit(struct mp_filter *f);
+
+// Return whether it got enough data for filtering.
+static bool fill_queue(struct priv *s)
+{
+ int bytes_in = s->in ? mp_aframe_get_size(s->in) * s->bytes_per_frame : 0;
+ int offset = 0;
+
+ if (s->bytes_to_slide > 0) {
+ if (s->bytes_to_slide < s->bytes_queued) {
+ int bytes_move = s->bytes_queued - s->bytes_to_slide;
+ memmove(s->buf_queue, s->buf_queue + s->bytes_to_slide, bytes_move);
+ s->bytes_to_slide = 0;
+ s->bytes_queued = bytes_move;
+ } else {
+ int bytes_skip;
+ s->bytes_to_slide -= s->bytes_queued;
+ bytes_skip = MPMIN(s->bytes_to_slide, bytes_in);
+ s->bytes_queued = 0;
+ s->bytes_to_slide -= bytes_skip;
+ offset += bytes_skip;
+ bytes_in -= bytes_skip;
+ }
+ }
+
+ int bytes_needed = s->bytes_queue - s->bytes_queued;
+ assert(bytes_needed >= 0);
+
+ int bytes_copy = MPMIN(bytes_needed, bytes_in);
+ if (bytes_copy > 0) {
+ uint8_t **planes = mp_aframe_get_data_ro(s->in);
+ memcpy(s->buf_queue + s->bytes_queued, planes[0] + offset, bytes_copy);
+ s->bytes_queued += bytes_copy;
+ offset += bytes_copy;
+ bytes_needed -= bytes_copy;
+ }
+
+ if (s->in)
+ mp_aframe_skip_samples(s->in, offset / s->bytes_per_frame);
+
+ return bytes_needed == 0;
+}
+
+#define UNROLL_PADDING (4 * 4)
+
+static int best_overlap_offset_float(struct priv *s)
+{
+ float best_corr = INT_MIN;
+ int best_off = 0;
+
+ float *pw = s->table_window;
+ float *po = s->buf_overlap;
+ po += s->num_channels;
+ float *ppc = s->buf_pre_corr;
+ for (int i = s->num_channels; i < s->samples_overlap; i++)
+ *ppc++ = *pw++ **po++;
+
+ float *search_start = (float *)s->buf_queue + s->num_channels;
+ for (int off = 0; off < s->frames_search; off++) {
+ float corr = 0;
+ float *ps = search_start;
+ ppc = s->buf_pre_corr;
+ for (int i = s->num_channels; i < s->samples_overlap; i++)
+ corr += *ppc++ **ps++;
+ if (corr > best_corr) {
+ best_corr = corr;
+ best_off = off;
+ }
+ search_start += s->num_channels;
+ }
+
+ return best_off * 4 * s->num_channels;
+}
+
+static int best_overlap_offset_s16(struct priv *s)
+{
+ int64_t best_corr = INT64_MIN;
+ int best_off = 0;
+
+ int32_t *pw = s->table_window;
+ int16_t *po = s->buf_overlap;
+ po += s->num_channels;
+ int32_t *ppc = s->buf_pre_corr;
+ for (long i = s->num_channels; i < s->samples_overlap; i++)
+ *ppc++ = (*pw++ **po++) >> 15;
+
+ int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels;
+ for (int off = 0; off < s->frames_search; off++) {
+ int64_t corr = 0;
+ int16_t *ps = search_start;
+ ppc = s->buf_pre_corr;
+ ppc += s->samples_overlap - s->num_channels;
+ ps += s->samples_overlap - s->num_channels;
+ long i = -(s->samples_overlap - s->num_channels);
+ do {
+ corr += ppc[i + 0] * (int64_t)ps[i + 0];
+ corr += ppc[i + 1] * (int64_t)ps[i + 1];
+ corr += ppc[i + 2] * (int64_t)ps[i + 2];
+ corr += ppc[i + 3] * (int64_t)ps[i + 3];
+ i += 4;
+ } while (i < 0);
+ if (corr > best_corr) {
+ best_corr = corr;
+ best_off = off;
+ }
+ search_start += s->num_channels;
+ }
+
+ return best_off * 2 * s->num_channels;
+}
+
+static void output_overlap_float(struct priv *s, void *buf_out,
+ int bytes_off)
+{
+ float *pout = buf_out;
+ float *pb = s->table_blend;
+ float *po = s->buf_overlap;
+ float *pin = (float *)(s->buf_queue + bytes_off);
+ for (int i = 0; i < s->samples_overlap; i++) {
+ *pout++ = *po - *pb++ *(*po - *pin++);
+ po++;
+ }
+}
+
+static void output_overlap_s16(struct priv *s, void *buf_out,
+ int bytes_off)
+{
+ int16_t *pout = buf_out;
+ int32_t *pb = s->table_blend;
+ int16_t *po = s->buf_overlap;
+ int16_t *pin = (int16_t *)(s->buf_queue + bytes_off);
+ for (int i = 0; i < s->samples_overlap; i++) {
+ *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16);
+ po++;
+ }
+}
+
+static void process(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ struct mp_aframe *out = NULL;
+
+ bool drain = false;
+ bool is_eof = false;
+ if (!s->in) {
+ struct mp_frame frame = mp_pin_out_read(s->in_pin);
+ if (!frame.type)
+ return; // no input yet
+ if (frame.type != MP_FRAME_AUDIO && frame.type != MP_FRAME_EOF) {
+ MP_ERR(f, "unexpected frame type\n");
+ goto error;
+ }
+
+ s->in = frame.type == MP_FRAME_AUDIO ? frame.data : NULL;
+ is_eof = drain = !s->in;
+
+ // EOF before it was even initialized once.
+ if (is_eof && !mp_aframe_config_is_valid(s->cur_format)) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+
+ if (s->in && !mp_aframe_config_equals(s->in, s->cur_format)) {
+ if (s->bytes_queued) {
+ // Drain remaining data before executing the format change.
+ MP_VERBOSE(f, "draining\n");
+ mp_pin_out_unread(s->in_pin, frame);
+ s->in = NULL;
+ drain = true;
+ } else {
+ if (!reinit(f)) {
+ MP_ERR(f, "initialization failed\n");
+ goto error;
+ }
+ }
+ }
+
+ if (s->in)
+ s->current_pts = mp_aframe_end_pts(s->in);
+ }
+
+ if (!fill_queue(s) && !drain) {
+ TA_FREEP(&s->in);
+ mp_pin_out_request_data_next(s->in_pin);
+ return;
+ }
+
+ int max_out_samples = s->bytes_stride / s->bytes_per_frame;
+ if (drain)
+ max_out_samples += s->bytes_queued;
+
+ out = mp_aframe_new_ref(s->cur_format);
+ if (mp_aframe_pool_allocate(s->out_pool, out, max_out_samples) < 0)
+ goto error;
+
+ if (s->in)
+ mp_aframe_copy_attributes(out, s->in);
+
+ uint8_t **out_planes = mp_aframe_get_data_rw(out);
+ if (!out_planes)
+ goto error;
+ int8_t *pout = out_planes[0];
+ int out_offset = 0;
+ if (s->bytes_queued >= s->bytes_queue) {
+ int ti;
+ float tf;
+ int bytes_off = 0;
+
+ // output stride
+ if (s->output_overlap) {
+ if (s->best_overlap_offset)
+ bytes_off = s->best_overlap_offset(s);
+ s->output_overlap(s, pout + out_offset, bytes_off);
+ }
+ memcpy(pout + out_offset + s->bytes_overlap,
+ s->buf_queue + bytes_off + s->bytes_overlap,
+ s->bytes_standing);
+ out_offset += s->bytes_stride;
+
+ // input stride
+ memcpy(s->buf_overlap,
+ s->buf_queue + bytes_off + s->bytes_stride,
+ s->bytes_overlap);
+ tf = s->frames_stride_scaled + s->frames_stride_error;
+ ti = (int)tf;
+ s->frames_stride_error = tf - ti;
+ s->bytes_to_slide = ti * s->bytes_per_frame;
+ }
+ // Drain remaining buffered data.
+ if (drain && s->bytes_queued) {
+ memcpy(pout + out_offset, s->buf_queue, s->bytes_queued);
+ out_offset += s->bytes_queued;
+ s->bytes_queued = 0;
+ }
+ mp_aframe_set_size(out, out_offset / s->bytes_per_frame);
+
+ // This filter can have a negative delay when scale > 1:
+ // output corresponding to some length of input can be decided and written
+ // after receiving only a part of that input.
+ float delay = (out_offset * s->speed + s->bytes_queued - s->bytes_to_slide) /
+ s->bytes_per_frame / mp_aframe_get_effective_rate(out)
+ + (s->in ? mp_aframe_duration(s->in) : 0);
+
+ if (s->current_pts != MP_NOPTS_VALUE)
+ mp_aframe_set_pts(out, s->current_pts - delay);
+
+ mp_aframe_mul_speed(out, s->speed);
+
+ if (!mp_aframe_get_size(out))
+ TA_FREEP(&out);
+
+ if (is_eof && out) {
+ mp_pin_out_repeat_eof(s->in_pin);
+ } else if (is_eof && !out) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ } else if (!is_eof && !out) {
+ mp_pin_out_request_data_next(s->in_pin);
+ }
+
+ if (out)
+ mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+
+ return;
+
+error:
+ TA_FREEP(&s->in);
+ talloc_free(out);
+ mp_filter_internal_mark_failed(f);
+}
+
+static void update_speed(struct priv *s, float speed)
+{
+ s->speed = speed;
+
+ double factor = (s->opts->speed_opt & SCALE_PITCH) ? 1.0 / s->speed : s->speed;
+ s->scale = factor * s->opts->scale_nominal;
+
+ s->frames_stride_scaled = s->scale * s->frames_stride;
+ s->frames_stride_error = MPMIN(s->frames_stride_error, s->frames_stride_scaled);
+}
+
+static bool reinit(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ mp_aframe_reset(s->cur_format);
+
+ float srate = mp_aframe_get_rate(s->in) / 1000.0;
+ int nch = mp_aframe_get_channels(s->in);
+ int format = mp_aframe_get_format(s->in);
+
+ int use_int = 0;
+ if (format == AF_FORMAT_S16) {
+ use_int = 1;
+ } else if (format != AF_FORMAT_FLOAT) {
+ return false;
+ }
+ int bps = use_int ? 2 : 4;
+
+ s->frames_stride = srate * s->opts->ms_stride;
+ s->bytes_stride = s->frames_stride * bps * nch;
+
+ update_speed(s, s->speed);
+
+ int frames_overlap = s->frames_stride * s->opts->factor_overlap;
+ if (frames_overlap <= 0) {
+ s->bytes_standing = s->bytes_stride;
+ s->samples_standing = s->bytes_standing / bps;
+ s->output_overlap = NULL;
+ s->bytes_overlap = 0;
+ } else {
+ s->samples_overlap = frames_overlap * nch;
+ s->bytes_overlap = frames_overlap * nch * bps;
+ s->bytes_standing = s->bytes_stride - s->bytes_overlap;
+ s->samples_standing = s->bytes_standing / bps;
+ s->buf_overlap = realloc(s->buf_overlap, s->bytes_overlap);
+ s->table_blend = realloc(s->table_blend, s->bytes_overlap * 4);
+ if (!s->buf_overlap || !s->table_blend) {
+ MP_FATAL(f, "Out of memory\n");
+ return false;
+ }
+ memset(s->buf_overlap, 0, s->bytes_overlap);
+ if (use_int) {
+ int32_t *pb = s->table_blend;
+ int64_t blend = 0;
+ for (int i = 0; i < frames_overlap; i++) {
+ int32_t v = blend / frames_overlap;
+ for (int j = 0; j < nch; j++)
+ *pb++ = v;
+ blend += 65536; // 2^16
+ }
+ s->output_overlap = output_overlap_s16;
+ } else {
+ float *pb = s->table_blend;
+ for (int i = 0; i < frames_overlap; i++) {
+ float v = i / (float)frames_overlap;
+ for (int j = 0; j < nch; j++)
+ *pb++ = v;
+ }
+ s->output_overlap = output_overlap_float;
+ }
+ }
+
+ s->frames_search = (frames_overlap > 1) ? srate * s->opts->ms_search : 0;
+ if (s->frames_search <= 0)
+ s->best_overlap_offset = NULL;
+ else {
+ if (use_int) {
+ int64_t t = frames_overlap;
+ int32_t n = 8589934588LL / (t * t); // 4 * (2^31 - 1) / t^2
+ s->buf_pre_corr = realloc(s->buf_pre_corr,
+ s->bytes_overlap * 2 + UNROLL_PADDING);
+ s->table_window = realloc(s->table_window,
+ s->bytes_overlap * 2 - nch * bps * 2);
+ if (!s->buf_pre_corr || !s->table_window) {
+ MP_FATAL(f, "Out of memory\n");
+ return false;
+ }
+ memset((char *)s->buf_pre_corr + s->bytes_overlap * 2, 0,
+ UNROLL_PADDING);
+ int32_t *pw = s->table_window;
+ for (int i = 1; i < frames_overlap; i++) {
+ int32_t v = (i * (t - i) * n) >> 15;
+ for (int j = 0; j < nch; j++)
+ *pw++ = v;
+ }
+ s->best_overlap_offset = best_overlap_offset_s16;
+ } else {
+ s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap);
+ s->table_window = realloc(s->table_window,
+ s->bytes_overlap - nch * bps);
+ if (!s->buf_pre_corr || !s->table_window) {
+ MP_FATAL(f, "Out of memory\n");
+ return false;
+ }
+ float *pw = s->table_window;
+ for (int i = 1; i < frames_overlap; i++) {
+ float v = i * (frames_overlap - i);
+ for (int j = 0; j < nch; j++)
+ *pw++ = v;
+ }
+ s->best_overlap_offset = best_overlap_offset_float;
+ }
+ }
+
+ s->bytes_per_frame = bps * nch;
+ s->num_channels = nch;
+
+ s->bytes_queue = (s->frames_search + s->frames_stride + frames_overlap)
+ * bps * nch;
+ s->buf_queue = realloc(s->buf_queue, s->bytes_queue + UNROLL_PADDING);
+ if (!s->buf_queue) {
+ MP_FATAL(f, "Out of memory\n");
+ return false;
+ }
+
+ s->bytes_queued = 0;
+ s->bytes_to_slide = 0;
+
+ MP_DBG(f, ""
+ "%.2f stride_in, %i stride_out, %i standing, "
+ "%i overlap, %i search, %i queue, %s mode\n",
+ s->frames_stride_scaled,
+ (int)(s->bytes_stride / nch / bps),
+ (int)(s->bytes_standing / nch / bps),
+ (int)(s->bytes_overlap / nch / bps),
+ s->frames_search,
+ (int)(s->bytes_queue / nch / bps),
+ (use_int ? "s16" : "float"));
+
+ mp_aframe_config_copy(s->cur_format, s->in);
+
+ return true;
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *s = f->priv;
+
+ if (cmd->type == MP_FILTER_COMMAND_SET_SPEED) {
+ if (s->opts->speed_opt & SCALE_TEMPO) {
+ if (s->opts->speed_opt & SCALE_PITCH)
+ return false;
+ update_speed(s, cmd->speed);
+ return true;
+ } else if (s->opts->speed_opt & SCALE_PITCH) {
+ update_speed(s, cmd->speed);
+ return false; // do not signal OK
+ }
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+
+ s->current_pts = MP_NOPTS_VALUE;
+ s->bytes_queued = 0;
+ s->bytes_to_slide = 0;
+ s->frames_stride_error = 0;
+ if (s->buf_overlap && s->bytes_overlap)
+ memset(s->buf_overlap, 0, s->bytes_overlap);
+ TA_FREEP(&s->in);
+}
+
+static void destroy(struct mp_filter *f)
+{
+ struct priv *s = f->priv;
+ free(s->buf_queue);
+ free(s->buf_overlap);
+ free(s->buf_pre_corr);
+ free(s->table_blend);
+ free(s->table_window);
+ TA_FREEP(&s->in);
+ mp_filter_free_children(f);
+}
+
+static const struct mp_filter_info af_scaletempo_filter = {
+ .name = "scaletempo",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_scaletempo_create(struct mp_filter *parent,
+ void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_scaletempo_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *s = f->priv;
+ s->opts = talloc_steal(s, options);
+ s->speed = 1.0;
+ s->cur_format = talloc_steal(s, mp_aframe_create());
+ s->out_pool = mp_aframe_pool_create(s);
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ mp_autoconvert_add_afmt(conv, AF_FORMAT_S16);
+ mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOAT);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+ s->in_pin = conv->f->pins[1];
+
+ return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_scaletempo = {
+ .desc = {
+ .description = "Scale audio tempo while maintaining pitch",
+ .name = "scaletempo",
+ .priv_size = sizeof(OPT_BASE_STRUCT),
+ .priv_defaults = &(const OPT_BASE_STRUCT) {
+ .ms_stride = 60,
+ .factor_overlap = .20,
+ .ms_search = 14,
+ .speed_opt = SCALE_TEMPO,
+ .scale_nominal = 1.0,
+ },
+ .options = (const struct m_option[]) {
+ {"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)},
+ {"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)},
+ {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)},
+ {"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)},
+ {"speed", OPT_CHOICE(speed_opt,
+ {"pitch", SCALE_PITCH},
+ {"tempo", SCALE_TEMPO},
+ {"none", 0},
+ {"both", SCALE_TEMPO | SCALE_PITCH})},
+ {0}
+ },
+ },
+ .create = af_scaletempo_create,
+};
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
new file mode 100644
index 0000000..7ad8e35
--- /dev/null
+++ b/audio/filter/af_scaletempo2.c
@@ -0,0 +1,254 @@
+#include "audio/aframe.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct priv {
+ struct mp_scaletempo2 data;
+ struct mp_pin *in_pin;
+ struct mp_aframe *cur_format;
+ struct mp_aframe_pool *out_pool;
+ bool sent_final;
+ struct mp_aframe *pending;
+ bool initialized;
+ float speed;
+};
+
+static bool init_scaletempo2(struct mp_filter *f);
+static void reset(struct mp_filter *f);
+
+static void process(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+
+ if (!mp_pin_in_needs_data(f->ppins[1]))
+ return;
+
+ while (!p->initialized || !p->pending ||
+ !mp_scaletempo2_frames_available(&p->data, p->speed))
+ {
+ bool eof = false;
+ if (!p->pending || !mp_aframe_get_size(p->pending)) {
+ struct mp_frame frame = mp_pin_out_read(p->in_pin);
+ if (frame.type == MP_FRAME_AUDIO) {
+ TA_FREEP(&p->pending);
+ p->pending = frame.data;
+ } else if (frame.type == MP_FRAME_EOF) {
+ eof = true;
+ } else if (frame.type) {
+ MP_ERR(f, "unexpected frame type\n");
+ goto error;
+ } else {
+ return; // no new data yet
+ }
+ }
+ assert(p->pending || eof);
+
+ if (!p->initialized) {
+ if (!p->pending) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+ if (!init_scaletempo2(f))
+ goto error;
+ }
+
+ bool format_change =
+ p->pending && !mp_aframe_config_equals(p->pending, p->cur_format);
+
+ bool final = format_change || eof;
+ if (p->pending && !format_change && !p->sent_final) {
+ int frame_size = mp_aframe_get_size(p->pending);
+ uint8_t **planes = mp_aframe_get_data_ro(p->pending);
+ int read = mp_scaletempo2_fill_input_buffer(&p->data,
+ planes, frame_size, p->speed);
+ mp_aframe_skip_samples(p->pending, read);
+ }
+ if (final && p->pending && !p->sent_final) {
+ mp_scaletempo2_set_final(&p->data);
+ p->sent_final = true;
+ }
+
+ if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+ if (eof) {
+ mp_pin_out_repeat_eof(p->in_pin); // drain more next time
+ }
+ } else if (final) {
+ p->initialized = false;
+ p->sent_final = false;
+ if (eof) {
+ mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+ return;
+ }
+ // for format change go on with proper reinit on the next iteration
+ }
+ }
+
+ assert(p->pending);
+ if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+ struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
+ int out_samples = p->data.ola_hop_size;
+ if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
+ talloc_free(out);
+ goto error;
+ }
+
+ mp_aframe_copy_attributes(out, p->pending);
+
+ uint8_t **planes = mp_aframe_get_data_rw(out);
+ assert(planes);
+ assert(mp_aframe_get_planes(out) == p->data.channels);
+
+ out_samples = mp_scaletempo2_fill_buffer(&p->data,
+ (float**)planes, out_samples, p->speed);
+
+ double pts = mp_aframe_get_pts(p->pending);
+ if (pts != MP_NOPTS_VALUE) {
+ double frame_delay = mp_scaletempo2_get_latency(&p->data, p->speed)
+ + out_samples * p->speed;
+ mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out));
+
+ if (p->sent_final) {
+ double remain_pts = pts - mp_aframe_get_pts(out);
+ double rate = mp_aframe_get_effective_rate(out) / p->speed;
+ int max_samples = MPMAX(0, (int) (remain_pts * rate));
+ // truncate final packet to expected length
+ if (out_samples >= max_samples) {
+ out_samples = max_samples;
+
+ // reset the filter to ensure it stops generating audio
+ // and mp_scaletempo2_frames_available returns false
+ mp_scaletempo2_reset(&p->data);
+ }
+ }
+ }
+
+ mp_aframe_set_size(out, out_samples);
+ mp_aframe_mul_speed(out, p->speed);
+ mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+ }
+
+ return;
+error:
+ mp_filter_internal_mark_failed(f);
+}
+
+static bool init_scaletempo2(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ assert(p->pending);
+
+ if (mp_aframe_get_format(p->pending) != AF_FORMAT_FLOATP)
+ return false;
+
+ mp_aframe_reset(p->cur_format);
+ p->initialized = true;
+ p->sent_final = false;
+ mp_aframe_config_copy(p->cur_format, p->pending);
+
+ mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending),
+ mp_aframe_get_rate(p->pending));
+
+ return true;
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+ struct priv *p = f->priv;
+
+ switch (cmd->type) {
+ case MP_FILTER_COMMAND_SET_SPEED:
+ p->speed = cmd->speed;
+ return true;
+ }
+
+ return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ mp_scaletempo2_reset(&p->data);
+ p->initialized = false;
+ TA_FREEP(&p->pending);
+}
+
+static void destroy(struct mp_filter *f)
+{
+ struct priv *p = f->priv;
+ mp_scaletempo2_destroy(&p->data);
+ talloc_free(p->pending);
+}
+
+static const struct mp_filter_info af_scaletempo2_filter = {
+ .name = "scaletempo2",
+ .priv_size = sizeof(struct priv),
+ .process = process,
+ .command = command,
+ .reset = reset,
+ .destroy = destroy,
+};
+
+static struct mp_filter *af_scaletempo2_create(
+ struct mp_filter *parent, void *options)
+{
+ struct mp_filter *f = mp_filter_create(parent, &af_scaletempo2_filter);
+ if (!f) {
+ talloc_free(options);
+ return NULL;
+ }
+
+ mp_filter_add_pin(f, MP_PIN_IN, "in");
+ mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+ struct priv *p = f->priv;
+ p->data.opts = talloc_steal(p, options);
+ p->speed = 1.0;
+ p->cur_format = talloc_steal(p, mp_aframe_create());
+ p->out_pool = mp_aframe_pool_create(p);
+ p->pending = NULL;
+ p->initialized = false;
+
+ struct mp_autoconvert *conv = mp_autoconvert_create(f);
+ if (!conv)
+ abort();
+
+ mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOATP);
+
+ mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+ p->in_pin = conv->f->pins[1];
+
+ return f;
+}
+
+#define OPT_BASE_STRUCT struct mp_scaletempo2_opts
+const struct mp_user_filter_entry af_scaletempo2 = {
+ .desc = {
+ .description = "Scale audio tempo while maintaining pitch"
+ " (filter ported from chromium)",
+ .name = "scaletempo2",
+ .priv_size = sizeof(OPT_BASE_STRUCT),
+ .priv_defaults = &(const OPT_BASE_STRUCT) {
+ .min_playback_rate = 0.25,
+ .max_playback_rate = 8.0,
+ .ola_window_size_ms = 12,
+ .wsola_search_interval_ms = 40,
+ },
+ .options = (const struct m_option[]) {
+ {"search-interval",
+ OPT_FLOAT(wsola_search_interval_ms), M_RANGE(1, 1000)},
+ {"window-size",
+ OPT_FLOAT(ola_window_size_ms), M_RANGE(1, 1000)},
+ {"min-speed",
+ OPT_FLOAT(min_playback_rate), M_RANGE(0, FLT_MAX)},
+ {"max-speed",
+ OPT_FLOAT(max_playback_rate), M_RANGE(0, FLT_MAX)},
+ {0}
+ }
+ },
+ .create = af_scaletempo2_create,
+};
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
new file mode 100644
index 0000000..534f4f6
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -0,0 +1,873 @@
+#include <float.h>
+#include <math.h>
+
+#include "audio/chmap.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+
+#include "config.h"
+
+// Algorithm overview (from chromium):
+// Waveform Similarity Overlap-and-add (WSOLA).
+//
+// One WSOLA iteration
+//
+// 1) Extract |target_block| as input frames at indices
+// [|target_block_index|, |target_block_index| + |ola_window_size|).
+// Note that |target_block| is the "natural" continuation of the output.
+//
+// 2) Extract |search_block| as input frames at indices
+// [|search_block_index|,
+// |search_block_index| + |num_candidate_blocks| + |ola_window_size|).
+//
+// 3) Find a block within the |search_block| that is most similar
+// to |target_block|. Let |optimal_index| be the index of such block and
+// write it to |optimal_block|.
+//
+// 4) Update:
+// |optimal_block| = |transition_window| * |target_block| +
+// (1 - |transition_window|) * |optimal_block|.
+//
+// 5) Overlap-and-add |optimal_block| to the |wsola_output|.
+//
+// 6) Update:write
+
+struct interval {
+ int lo;
+ int hi;
+};
+
+static bool in_interval(int n, struct interval q)
+{
+ return n >= q.lo && n <= q.hi;
+}
+
+static float **realloc_2d(float **p, int x, int y)
+{
+ float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y);
+ float* data = (float*) (array + x);
+ for (int i = 0; i < x; ++i) {
+ array[i] = data + i * y;
+ }
+ return array;
+}
+
+static void zero_2d(float **a, int x, int y)
+{
+ memset(a + x, 0, sizeof(float) * x * y);
+}
+
+static void zero_2d_partial(float **a, int x, int y)
+{
+ for (int i = 0; i < x; ++i) {
+ memset(a[i], 0, sizeof(float) * y);
+ }
+}
+
+// Energies of sliding windows of channels are interleaved.
+// The number windows is |input_frames| - (|frames_per_window| - 1), hence,
+// the method assumes |energy| must be, at least, of size
+// (|input_frames| - (|frames_per_window| - 1)) * |channels|.
+static void multi_channel_moving_block_energies(
+ float **input, int input_frames, int channels,
+ int frames_per_block, float *energy)
+{
+ int num_blocks = input_frames - (frames_per_block - 1);
+
+ for (int k = 0; k < channels; ++k) {
+ const float* input_channel = input[k];
+
+ energy[k] = 0;
+
+ // First block of channel |k|.
+ for (int m = 0; m < frames_per_block; ++m) {
+ energy[k] += input_channel[m] * input_channel[m];
+ }
+
+ const float* slide_out = input_channel;
+ const float* slide_in = input_channel + frames_per_block;
+ for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
+ energy[k + n * channels] = energy[k + (n - 1) * channels]
+ - *slide_out * *slide_out + *slide_in * *slide_in;
+ }
+ }
+}
+
+static float multi_channel_similarity_measure(
+ const float* dot_prod_a_b,
+ const float* energy_a, const float* energy_b,
+ int channels)
+{
+ const float epsilon = 1e-12f;
+ float similarity_measure = 0.0f;
+ for (int n = 0; n < channels; ++n) {
+ similarity_measure += dot_prod_a_b[n]
+ / sqrtf(energy_a[n] * energy_b[n] + epsilon);
+ }
+ return similarity_measure;
+}
+
+#if HAVE_VECTOR
+
+typedef float v8sf __attribute__ ((vector_size (32), aligned (1)));
+
+// Dot-product of channels of two AudioBus. For each AudioBus an offset is
+// given. |dot_product[k]| is the dot-product of channel |k|. The caller should
+// allocate sufficient space for |dot_product|.
+static void multi_channel_dot_product(
+ float **a, int frame_offset_a,
+ float **b, int frame_offset_b,
+ int channels,
+ int num_frames, float *dot_product)
+{
+ assert(frame_offset_a >= 0);
+ assert(frame_offset_b >= 0);
+
+ for (int k = 0; k < channels; ++k) {
+ const float* ch_a = a[k] + frame_offset_a;
+ const float* ch_b = b[k] + frame_offset_b;
+ float sum = 0.0;
+ if (num_frames < 32)
+ goto rest;
+
+ const v8sf *va = (const v8sf *) ch_a;
+ const v8sf *vb = (const v8sf *) ch_b;
+ v8sf vsum[4] = {
+ // Initialize to product of first 32 floats
+ va[0] * vb[0],
+ va[1] * vb[1],
+ va[2] * vb[2],
+ va[3] * vb[3],
+ };
+ va += 4;
+ vb += 4;
+
+ // Process `va` and `vb` across four vertical stripes
+ for (int n = 1; n < num_frames / 32; n++) {
+ vsum[0] += va[0] * vb[0];
+ vsum[1] += va[1] * vb[1];
+ vsum[2] += va[2] * vb[2];
+ vsum[3] += va[3] * vb[3];
+ va += 4;
+ vb += 4;
+ }
+
+ // Vertical sum across `vsum` entries
+ vsum[0] += vsum[1];
+ vsum[2] += vsum[3];
+ vsum[0] += vsum[2];
+
+ // Horizontal sum across `vsum[0]`, could probably be done better but
+ // this section is not super performance critical
+ float *vf = (float *) &vsum[0];
+ sum = vf[0] + vf[1] + vf[2] + vf[3] + vf[4] + vf[5] + vf[6] + vf[7];
+ ch_a = (const float *) va;
+ ch_b = (const float *) vb;
+
+rest:
+ // Process the remainder
+ for (int n = 0; n < num_frames % 32; n++)
+ sum += *ch_a++ * *ch_b++;
+
+ dot_product[k] = sum;
+ }
+}
+
+#else // !HAVE_VECTOR
+
+static void multi_channel_dot_product(
+ float **a, int frame_offset_a,
+ float **b, int frame_offset_b,
+ int channels,
+ int num_frames, float *dot_product)
+{
+ assert(frame_offset_a >= 0);
+ assert(frame_offset_b >= 0);
+
+ for (int k = 0; k < channels; ++k) {
+ const float* ch_a = a[k] + frame_offset_a;
+ const float* ch_b = b[k] + frame_offset_b;
+ float sum = 0.0;
+ for (int n = 0; n < num_frames; n++)
+ sum += *ch_a++ * *ch_b++;
+ dot_product[k] = sum;
+ }
+}
+
+#endif // HAVE_VECTOR
+
+// Fit the curve f(x) = a * x^2 + b * x + c such that
+// f(-1) = y[0]
+// f(0) = y[1]
+// f(1) = y[2]
+// and return the maximum, assuming that y[0] <= y[1] >= y[2].
+static void quadratic_interpolation(
+ const float* y_values, float* extremum, float* extremum_value)
+{
+ float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];
+ float b = 0.5f * (y_values[2] - y_values[0]);
+ float c = y_values[1];
+
+ if (a == 0.f) {
+ // The coordinates are colinear (within floating-point error).
+ *extremum = 0;
+ *extremum_value = y_values[1];
+ } else {
+ *extremum = -b / (2.f * a);
+ *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c;
+ }
+}
+
+// Search a subset of all candid blocks. The search is performed every
+// |decimation| frames. This reduces complexity by a factor of about
+// 1 / |decimation|. A cubic interpolation is used to have a better estimate of
+// the best match.
+static int decimated_search(
+ int decimation, struct interval exclude_interval,
+ float **target_block, int target_block_frames,
+ float **search_segment, int search_segment_frames,
+ int channels,
+ const float *energy_target_block, const float *energy_candidate_blocks)
+{
+ int num_candidate_blocks = search_segment_frames - (target_block_frames - 1);
+ float dot_prod [MP_NUM_CHANNELS];
+ float similarity[3]; // Three elements for cubic interpolation.
+
+ int n = 0;
+ multi_channel_dot_product(
+ target_block, 0,
+ search_segment, n,
+ channels,
+ target_block_frames, dot_prod);
+ similarity[0] = multi_channel_similarity_measure(
+ dot_prod, energy_target_block,
+ &energy_candidate_blocks[n * channels], channels);
+
+ // Set the starting point as optimal point.
+ float best_similarity = similarity[0];
+ int optimal_index = 0;
+
+ n += decimation;
+ if (n >= num_candidate_blocks) {
+ return 0;
+ }
+
+ multi_channel_dot_product(
+ target_block, 0,
+ search_segment, n,
+ channels,
+ target_block_frames, dot_prod);
+ similarity[1] = multi_channel_similarity_measure(
+ dot_prod, energy_target_block,
+ &energy_candidate_blocks[n * channels], channels);
+
+ n += decimation;
+ if (n >= num_candidate_blocks) {
+ // We cannot do any more sampling. Compare these two values and return the
+ // optimal index.
+ return similarity[1] > similarity[0] ? decimation : 0;
+ }
+
+ for (; n < num_candidate_blocks; n += decimation) {
+ multi_channel_dot_product(
+ target_block, 0,
+ search_segment, n,
+ channels,
+ target_block_frames, dot_prod);
+
+ similarity[2] = multi_channel_similarity_measure(
+ dot_prod, energy_target_block,
+ &energy_candidate_blocks[n * channels], channels);
+
+ if ((similarity[1] > similarity[0] && similarity[1] >= similarity[2]) ||
+ (similarity[1] >= similarity[0] && similarity[1] > similarity[2]))
+ {
+ // A local maximum is found. Do a cubic interpolation for a better
+ // estimate of candidate maximum.
+ float normalized_candidate_index;
+ float candidate_similarity;
+ quadratic_interpolation(similarity, &normalized_candidate_index,
+ &candidate_similarity);
+
+ int candidate_index = n - decimation
+ + (int)(normalized_candidate_index * decimation + 0.5f);
+ if (candidate_similarity > best_similarity
+ && !in_interval(candidate_index, exclude_interval)) {
+ optimal_index = candidate_index;
+ best_similarity = candidate_similarity;
+ }
+ } else if (n + decimation >= num_candidate_blocks &&
+ similarity[2] > best_similarity &&
+ !in_interval(n, exclude_interval))
+ {
+ // If this is the end-point and has a better similarity-measure than
+ // optimal, then we accept it as optimal point.
+ optimal_index = n;
+ best_similarity = similarity[2];
+ }
+ memmove(similarity, &similarity[1], 2 * sizeof(*similarity));
+ }
+ return optimal_index;
+}
+
+// Search [|low_limit|, |high_limit|] of |search_segment| to find a block that
+// is most similar to |target_block|. |energy_target_block| is the energy of the
+// |target_block|. |energy_candidate_blocks| is the energy of all blocks within
+// |search_block|.
+static int full_search(
+ int low_limit, int high_limit,
+ struct interval exclude_interval,
+ float **target_block, int target_block_frames,
+ float **search_block, int search_block_frames,
+ int channels,
+ const float* energy_target_block,
+ const float* energy_candidate_blocks)
+{
+ // int block_size = target_block->frames;
+ float dot_prod [sizeof(float) * MP_NUM_CHANNELS];
+
+ float best_similarity = -FLT_MAX;//FLT_MIN;
+ int optimal_index = 0;
+
+ for (int n = low_limit; n <= high_limit; ++n) {
+ if (in_interval(n, exclude_interval)) {
+ continue;
+ }
+ multi_channel_dot_product(target_block, 0, search_block, n, channels,
+ target_block_frames, dot_prod);
+
+ float similarity = multi_channel_similarity_measure(
+ dot_prod, energy_target_block,
+ &energy_candidate_blocks[n * channels], channels);
+
+ if (similarity > best_similarity) {
+ best_similarity = similarity;
+ optimal_index = n;
+ }
+ }
+
+ return optimal_index;
+}
+
+// Find the index of the block, within |search_block|, that is most similar
+// to |target_block|. Obviously, the returned index is w.r.t. |search_block|.
+// |exclude_interval| is an interval that is excluded from the search.
+static int compute_optimal_index(
+ float **search_block, int search_block_frames,
+ float **target_block, int target_block_frames,
+ float *energy_candidate_blocks,
+ int channels,
+ struct interval exclude_interval)
+{
+ int num_candidate_blocks = search_block_frames - (target_block_frames - 1);
+
+ // This is a compromise between complexity reduction and search accuracy. I
+ // don't have a proof that down sample of order 5 is optimal.
+ // One can compute a decimation factor that minimizes complexity given
+ // the size of |search_block| and |target_block|. However, my experiments
+ // show the rate of missing the optimal index is significant.
+ // This value is chosen heuristically based on experiments.
+ const int search_decimation = 5;
+
+ float energy_target_block [MP_NUM_CHANNELS];
+ // energy_candidate_blocks must have at least size
+ // sizeof(float) * channels * num_candidate_blocks
+
+ // Energy of all candid frames.
+ multi_channel_moving_block_energies(
+ search_block,
+ search_block_frames,
+ channels,
+ target_block_frames,
+ energy_candidate_blocks);
+
+ // Energy of target frame.
+ multi_channel_dot_product(
+ target_block, 0,
+ target_block, 0,
+ channels,
+ target_block_frames, energy_target_block);
+
+ int optimal_index = decimated_search(
+ search_decimation, exclude_interval,
+ target_block, target_block_frames,
+ search_block, search_block_frames,
+ channels,
+ energy_target_block,
+ energy_candidate_blocks);
+
+ int lim_low = MPMAX(0, optimal_index - search_decimation);
+ int lim_high = MPMIN(num_candidate_blocks - 1,
+ optimal_index + search_decimation);
+ return full_search(
+ lim_low, lim_high, exclude_interval,
+ target_block, target_block_frames,
+ search_block, search_block_frames,
+ channels,
+ energy_target_block, energy_candidate_blocks);
+}
+
+static void peek_buffer(struct mp_scaletempo2 *p,
+ int frames, int read_offset, int write_offset, float **dest)
+{
+ assert(p->input_buffer_frames >= frames);
+ for (int i = 0; i < p->channels; ++i) {
+ memcpy(dest[i] + write_offset,
+ p->input_buffer[i] + read_offset,
+ frames * sizeof(float));
+ }
+}
+
+static void seek_buffer(struct mp_scaletempo2 *p, int frames)
+{
+ assert(p->input_buffer_frames >= frames);
+ p->input_buffer_frames -= frames;
+ if (p->input_buffer_final_frames > 0) {
+ p->input_buffer_final_frames = MPMAX(0, p->input_buffer_final_frames - frames);
+ }
+ for (int i = 0; i < p->channels; ++i) {
+ memmove(p->input_buffer[i], p->input_buffer[i] + frames,
+ p->input_buffer_frames * sizeof(float));
+ }
+}
+
+static int write_completed_frames_to(struct mp_scaletempo2 *p,
+ int requested_frames, int dest_offset, float **dest)
+{
+ int rendered_frames = MPMIN(p->num_complete_frames, requested_frames);
+
+ if (rendered_frames == 0)
+ return 0; // There is nothing to read from |wsola_output|, return.
+
+ for (int i = 0; i < p->channels; ++i) {
+ memcpy(dest[i] + dest_offset, p->wsola_output[i],
+ rendered_frames * sizeof(float));
+ }
+
+ // Remove the frames which are read.
+ int frames_to_move = p->wsola_output_size - rendered_frames;
+ for (int k = 0; k < p->channels; ++k) {
+ float *ch = p->wsola_output[k];
+ memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
+ }
+ p->num_complete_frames -= rendered_frames;
+ return rendered_frames;
+}
+
+// next output_time for the given playback_rate
+static double get_updated_time(struct mp_scaletempo2 *p, double playback_rate)
+{
+ return p->output_time + p->ola_hop_size * playback_rate;
+}
+
+// search_block_index for the given output_time
+static int get_search_block_index(struct mp_scaletempo2 *p, double output_time)
+{
+ return (int)(output_time - p->search_block_center_offset + 0.5);
+}
+
+// number of frames needed until a wsola iteration can be performed
+static int frames_needed(struct mp_scaletempo2 *p, double playback_rate)
+{
+ int search_block_index =
+ get_search_block_index(p, get_updated_time(p, playback_rate));
+ return MPMAX(0, MPMAX(
+ p->target_block_index + p->ola_window_size - p->input_buffer_frames,
+ search_block_index + p->search_block_size - p->input_buffer_frames));
+}
+
+static bool can_perform_wsola(struct mp_scaletempo2 *p, double playback_rate)
+{
+ return frames_needed(p, playback_rate) <= 0;
+}
+
+static void resize_input_buffer(struct mp_scaletempo2 *p, int size)
+{
+ p->input_buffer_size = size;
+ p->input_buffer = realloc_2d(p->input_buffer, p->channels, size);
+}
+
+// pad end with silence until a wsola iteration can be performed
+static void add_input_buffer_final_silence(struct mp_scaletempo2 *p, double playback_rate)
+{
+ int needed = frames_needed(p, playback_rate);
+ if (needed <= 0)
+ return; // no silence needed for iteration
+
+ int required_size = needed + p->input_buffer_frames;
+ if (required_size > p->input_buffer_size)
+ resize_input_buffer(p, required_size);
+
+ for (int i = 0; i < p->channels; ++i) {
+ float *ch_input = p->input_buffer[i];
+ for (int j = 0; j < needed; ++j) {
+ ch_input[p->input_buffer_frames + j] = 0.0f;
+ }
+ }
+
+ p->input_buffer_added_silence += needed;
+ p->input_buffer_frames += needed;
+}
+
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p)
+{
+ if (p->input_buffer_final_frames <= 0) {
+ p->input_buffer_final_frames = p->input_buffer_frames;
+ }
+}
+
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+ uint8_t **planes, int frame_size, double playback_rate)
+{
+ int needed = frames_needed(p, playback_rate);
+ int read = MPMIN(needed, frame_size);
+ if (read == 0)
+ return 0;
+
+ int required_size = read + p->input_buffer_frames;
+ if (required_size > p->input_buffer_size)
+ resize_input_buffer(p, required_size);
+
+ for (int i = 0; i < p->channels; ++i) {
+ memcpy(p->input_buffer[i] + p->input_buffer_frames,
+ planes[i], read * sizeof(float));
+ }
+
+ p->input_buffer_frames += read;
+ return read;
+}
+
+static bool target_is_within_search_region(struct mp_scaletempo2 *p)
+{
+ return p->target_block_index >= p->search_block_index
+ && p->target_block_index + p->ola_window_size
+ <= p->search_block_index + p->search_block_size;
+}
+
+
+static void peek_audio_with_zero_prepend(struct mp_scaletempo2 *p,
+ int read_offset_frames, float **dest, int dest_frames)
+{
+ assert(read_offset_frames + dest_frames <= p->input_buffer_frames);
+
+ int write_offset = 0;
+ int num_frames_to_read = dest_frames;
+ if (read_offset_frames < 0) {
+ int num_zero_frames_appended = MPMIN(
+ -read_offset_frames, num_frames_to_read);
+ read_offset_frames = 0;
+ num_frames_to_read -= num_zero_frames_appended;
+ write_offset = num_zero_frames_appended;
+ zero_2d_partial(dest, p->channels, num_zero_frames_appended);
+ }
+ peek_buffer(p, num_frames_to_read, read_offset_frames, write_offset, dest);
+}
+
+static void get_optimal_block(struct mp_scaletempo2 *p)
+{
+ int optimal_index = 0;
+
+ // An interval around last optimal block which is excluded from the search.
+ // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
+ // derived heuristically.
+ const int exclude_interval_length_frames = 160;
+ if (target_is_within_search_region(p)) {
+ optimal_index = p->target_block_index;
+ peek_audio_with_zero_prepend(p,
+ optimal_index, p->optimal_block, p->ola_window_size);
+ } else {
+ peek_audio_with_zero_prepend(p,
+ p->target_block_index, p->target_block, p->ola_window_size);
+ peek_audio_with_zero_prepend(p,
+ p->search_block_index, p->search_block, p->search_block_size);
+ int last_optimal = p->target_block_index
+ - p->ola_hop_size - p->search_block_index;
+ struct interval exclude_iterval = {
+ .lo = last_optimal - exclude_interval_length_frames / 2,
+ .hi = last_optimal + exclude_interval_length_frames / 2
+ };
+
+ // |optimal_index| is in frames and it is relative to the beginning of the
+ // |search_block|.
+ optimal_index = compute_optimal_index(
+ p->search_block, p->search_block_size,
+ p->target_block, p->ola_window_size,
+ p->energy_candidate_blocks,
+ p->channels,
+ exclude_iterval);
+
+ // Translate |index| w.r.t. the beginning of |audio_buffer| and extract the
+ // optimal block.
+ optimal_index += p->search_block_index;
+ peek_audio_with_zero_prepend(p,
+ optimal_index, p->optimal_block, p->ola_window_size);
+
+ // Make a transition from target block to the optimal block if different.
+ // Target block has the best continuation to the current output.
+ // Optimal block is the most similar block to the target, however, it might
+ // introduce some discontinuity when over-lap-added. Therefore, we combine
+ // them for a smoother transition. The length of transition window is twice
+ // as that of the optimal-block which makes it like a weighting function
+ // where target-block has higher weight close to zero (weight of 1 at index
+ // 0) and lower weight close the end.
+ for (int k = 0; k < p->channels; ++k) {
+ float* ch_opt = p->optimal_block[k];
+ float* ch_target = p->target_block[k];
+ for (int n = 0; n < p->ola_window_size; ++n) {
+ ch_opt[n] = ch_opt[n] * p->transition_window[n]
+ + ch_target[n] * p->transition_window[p->ola_window_size + n];
+ }
+ }
+ }
+
+ // Next target is one hop ahead of the current optimal.
+ p->target_block_index = optimal_index + p->ola_hop_size;
+}
+
+static void set_output_time(struct mp_scaletempo2 *p, double output_time)
+{
+ p->output_time = output_time;
+ p->search_block_index = get_search_block_index(p, output_time);
+}
+
+static void remove_old_input_frames(struct mp_scaletempo2 *p)
+{
+ const int earliest_used_index = MPMIN(
+ p->target_block_index, p->search_block_index);
+ if (earliest_used_index <= 0)
+ return; // Nothing to remove.
+
+ // Remove frames from input and adjust indices accordingly.
+ seek_buffer(p, earliest_used_index);
+ p->target_block_index -= earliest_used_index;
+ p->output_time -= earliest_used_index;
+ p->search_block_index -= earliest_used_index;
+}
+
+static bool run_one_wsola_iteration(struct mp_scaletempo2 *p, double playback_rate)
+{
+ if (!can_perform_wsola(p, playback_rate)) {
+ return false;
+ }
+
+ set_output_time(p, get_updated_time(p, playback_rate));
+ remove_old_input_frames(p);
+
+ assert(p->search_block_index + p->search_block_size <= p->input_buffer_frames);
+
+ get_optimal_block(p);
+
+ // Overlap-and-add.
+ for (int k = 0; k < p->channels; ++k) {
+ float* ch_opt_frame = p->optimal_block[k];
+ float* ch_output = p->wsola_output[k] + p->num_complete_frames;
+ if (p->wsola_output_started) {
+ for (int n = 0; n < p->ola_hop_size; ++n) {
+ ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
+ ch_opt_frame[n] * p->ola_window[n];
+ }
+
+ // Copy the second half to the output.
+ memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
+ sizeof(*ch_opt_frame) * p->ola_hop_size);
+ } else {
+ // No overlap for the first iteration.
+ memcpy(ch_output, ch_opt_frame,
+ sizeof(*ch_opt_frame) * p->ola_window_size);
+ }
+ }
+
+ p->num_complete_frames += p->ola_hop_size;
+ p->wsola_output_started = true;
+ return true;
+}
+
+static int read_input_buffer(struct mp_scaletempo2 *p, int dest_size, float **dest)
+{
+ int frames_to_copy = MPMIN(dest_size, p->input_buffer_frames - p->target_block_index);
+
+ if (frames_to_copy <= 0)
+ return 0; // There is nothing to read from input buffer; return.
+
+ peek_buffer(p, frames_to_copy, p->target_block_index, 0, dest);
+ seek_buffer(p, frames_to_copy);
+ return frames_to_copy;
+}
+
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+ float **dest, int dest_size, double playback_rate)
+{
+ if (playback_rate == 0) return 0;
+
+ if (p->input_buffer_final_frames > 0) {
+ add_input_buffer_final_silence(p, playback_rate);
+ }
+
+ // Optimize the muted case to issue a single clear instead of performing
+ // the full crossfade and clearing each crossfaded frame.
+ if (playback_rate < p->opts->min_playback_rate
+ || (playback_rate > p->opts->max_playback_rate
+ && p->opts->max_playback_rate > 0))
+ {
+ int frames_to_render = MPMIN(dest_size,
+ (int) (p->input_buffer_frames / playback_rate));
+
+ // Compute accurate number of frames to actually skip in the source data.
+ // Includes the leftover partial frame from last request. However, we can
+ // only skip over complete frames, so a partial frame may remain for next
+ // time.
+ p->muted_partial_frame += frames_to_render * playback_rate;
+ int seek_frames = (int) (p->muted_partial_frame);
+ zero_2d_partial(dest, p->channels, frames_to_render);
+ seek_buffer(p, seek_frames);
+
+ // Determine the partial frame that remains to be skipped for next call. If
+ // the user switches back to playing, it may be off time by this partial
+ // frame, which would be undetectable. If they subsequently switch to
+ // another playback rate that mutes, the code will attempt to line up the
+ // frames again.
+ p->muted_partial_frame -= seek_frames;
+ return frames_to_render;
+ }
+
+ int slower_step = (int) ceilf(p->ola_window_size * playback_rate);
+ int faster_step = (int) ceilf(p->ola_window_size / playback_rate);
+
+ // Optimize the most common |playback_rate| ~= 1 case to use a single copy
+ // instead of copying frame by frame.
+ if (p->ola_window_size <= faster_step && slower_step >= p->ola_window_size) {
+
+ if (p->wsola_output_started) {
+ p->wsola_output_started = false;
+
+ // sync audio precisely again
+ set_output_time(p, p->target_block_index);
+ remove_old_input_frames(p);
+ }
+
+ return read_input_buffer(p, dest_size, dest);
+ }
+
+ int rendered_frames = 0;
+ do {
+ rendered_frames += write_completed_frames_to(p,
+ dest_size - rendered_frames, rendered_frames, dest);
+ } while (rendered_frames < dest_size
+ && run_one_wsola_iteration(p, playback_rate));
+ return rendered_frames;
+}
+
+double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate)
+{
+ return p->input_buffer_frames - p->output_time
+ - p->input_buffer_added_silence
+ + p->num_complete_frames * playback_rate;
+}
+
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate)
+{
+ return p->input_buffer_final_frames > p->target_block_index
+ || can_perform_wsola(p, playback_rate)
+ || p->num_complete_frames > 0;
+}
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
+{
+ free(p->ola_window);
+ free(p->transition_window);
+ free(p->wsola_output);
+ free(p->optimal_block);
+ free(p->search_block);
+ free(p->target_block);
+ free(p->input_buffer);
+ free(p->energy_candidate_blocks);
+}
+
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
+{
+ p->input_buffer_frames = 0;
+ p->input_buffer_final_frames = 0;
+ p->input_buffer_added_silence = 0;
+ p->output_time = 0.0;
+ p->search_block_index = 0;
+ p->target_block_index = 0;
+ // Clear the queue of decoded packets.
+ zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
+ p->num_complete_frames = 0;
+ p->wsola_output_started = false;
+}
+
+// Return a "periodic" Hann window. This is the first L samples of an L+1
+// Hann window. It is perfect reconstruction for overlap-and-add.
+static void get_symmetric_hanning_window(int window_length, float* window)
+{
+ const float scale = 2.0f * M_PI / window_length;
+ for (int n = 0; n < window_length; ++n)
+ window[n] = 0.5f * (1.0f - cosf(n * scale));
+}
+
+
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
+{
+ p->muted_partial_frame = 0;
+ p->output_time = 0;
+ p->search_block_index = 0;
+ p->target_block_index = 0;
+ p->num_complete_frames = 0;
+ p->wsola_output_started = false;
+ p->channels = channels;
+
+ p->samples_per_second = rate;
+ p->num_candidate_blocks = (int)(p->opts->wsola_search_interval_ms
+ * p->samples_per_second / 1000);
+ p->ola_window_size = (int)(p->opts->ola_window_size_ms
+ * p->samples_per_second / 1000);
+ // Make sure window size in an even number.
+ p->ola_window_size += p->ola_window_size & 1;
+ p->ola_hop_size = p->ola_window_size / 2;
+ // |num_candidate_blocks| / 2 is the offset of the center of the search
+ // block to the center of the first (left most) candidate block. The offset
+ // of the center of a candidate block to its left most point is
+ // |ola_window_size| / 2 - 1. Note that |ola_window_size| is even and in
+ // our convention the center belongs to the left half, so we need to subtract
+ // one frame to get the correct offset.
+ //
+ // Search Block
+ // <------------------------------------------->
+ //
+ // |ola_window_size| / 2 - 1
+ // <----
+ //
+ // |num_candidate_blocks| / 2
+ // <----------------
+ // center
+ // X----X----------------X---------------X-----X
+ // <----------> <---------->
+ // Candidate ... Candidate
+ // 1, ... |num_candidate_blocks|
+ p->search_block_center_offset = p->num_candidate_blocks / 2
+ + (p->ola_window_size / 2 - 1);
+ p->ola_window = realloc(p->ola_window, sizeof(float) * p->ola_window_size);
+ get_symmetric_hanning_window(p->ola_window_size, p->ola_window);
+ p->transition_window = realloc(p->transition_window,
+ sizeof(float) * p->ola_window_size * 2);
+ get_symmetric_hanning_window(2 * p->ola_window_size, p->transition_window);
+
+ p->wsola_output_size = p->ola_window_size + p->ola_hop_size;
+ p->wsola_output = realloc_2d(p->wsola_output, p->channels, p->wsola_output_size);
+ // Initialize for overlap-and-add of the first block.
+ zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
+
+ // Auxiliary containers.
+ p->optimal_block = realloc_2d(p->optimal_block, p->channels, p->ola_window_size);
+ p->search_block_size = p->num_candidate_blocks + (p->ola_window_size - 1);
+ p->search_block = realloc_2d(p->search_block, p->channels, p->search_block_size);
+ p->target_block = realloc_2d(p->target_block, p->channels, p->ola_window_size);
+
+ resize_input_buffer(p, 4 * MPMAX(p->ola_window_size, p->search_block_size));
+ p->input_buffer_frames = 0;
+ p->input_buffer_final_frames = 0;
+ p->input_buffer_added_silence = 0;
+
+ p->energy_candidate_blocks = realloc(p->energy_candidate_blocks,
+ sizeof(float) * p->channels * p->num_candidate_blocks);
+}
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
new file mode 100644
index 0000000..6c3c94c
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -0,0 +1,134 @@
+// This filter was ported from Chromium
+// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc)
+//
+// Copyright 2015 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common/common.h"
+
+struct mp_scaletempo2_opts {
+ // Max/min supported playback rates for fast/slow audio. Audio outside of these
+ // ranges are muted.
+ // Audio at these speeds would sound better under a frequency domain algorithm.
+ float min_playback_rate;
+ float max_playback_rate;
+ // Overlap-and-add window size in milliseconds.
+ float ola_window_size_ms;
+ // Size of search interval in milliseconds. The search interval is
+ // [-delta delta] around |output_index| * |playback_rate|. So the search
+ // interval is 2 * delta.
+ float wsola_search_interval_ms;
+};
+
+struct mp_scaletempo2 {
+ struct mp_scaletempo2_opts *opts;
+ // Number of channels in audio stream.
+ int channels;
+ // Sample rate of audio stream.
+ int samples_per_second;
+ // If muted, keep track of partial frames that should have been skipped over.
+ double muted_partial_frame;
+ // Book keeping of the current time of generated audio, in frames.
+ // Corresponds to the center of |search_block|. This is increased in
+ // intervals of |ola_hop_size| multiplied by the current playback_rate,
+ // for every WSOLA iteration. This tracks the number of advanced frames as
+ // a double to achieve accurate playback rates beyond the integer precision
+ // of |search_block_index|.
+ // Needs to be adjusted like any other index when frames are evicted from
+ // |input_buffer|.
+ double output_time;
+ // The offset of the center frame of |search_block| w.r.t. its first frame.
+ int search_block_center_offset;
+ // Index of the beginning of the |search_block|, in frames. This may be
+ // negative, which is handled by |peek_audio_with_zero_prepend|.
+ int search_block_index;
+ // Number of Blocks to search to find the most similar one to the target
+ // frame.
+ int num_candidate_blocks;
+ // Index of the beginning of the target block, counted in frames.
+ int target_block_index;
+ // Overlap-and-add window size in frames.
+ int ola_window_size;
+ // The hop size of overlap-and-add in frames. This implementation assumes 50%
+ // overlap-and-add.
+ int ola_hop_size;
+ // Number of frames in |wsola_output| that overlap-and-add is completed for
+ // them and can be copied to output if fill_buffer() is called. It also
+ // specifies the index where the next WSOLA window has to overlap-and-add.
+ int num_complete_frames;
+ // Whether |wsola_output| contains an additional |ola_hop_size| of overlap
+ // frames for the next iteration.
+ bool wsola_output_started;
+ // Overlap-and-add window.
+ float *ola_window;
+ // Transition window, used to update |optimal_block| by a weighted sum of
+ // |optimal_block| and |target_block|.
+ float *transition_window;
+ // This stores a part of the output that is created but couldn't be rendered.
+ // Output is generated frame-by-frame which at some point might exceed the
+ // number of requested samples. Furthermore, due to overlap-and-add,
+ // the last half-window of the output is incomplete, which is stored in this
+ // buffer.
+ float **wsola_output;
+ int wsola_output_size;
+ // Auxiliary variables to avoid allocation in every iteration.
+ // Stores the optimal block in every iteration. This is the most
+ // similar block to |target_block| within |search_block| and it is
+ // overlap-and-added to |wsola_output|.
+ float **optimal_block;
+ // A block of data that search is performed over to find the |optimal_block|.
+ float **search_block;
+ int search_block_size;
+ // Stores the target block, denoted as |target| above. |search_block| is
+ // searched for a block (|optimal_block|) that is most similar to
+ // |target_block|.
+ float **target_block;
+ // Buffered audio data.
+ float **input_buffer;
+ int input_buffer_size;
+ int input_buffer_frames;
+ // How many frames in |input_buffer| need to be flushed by padding with
+ // silence to process the final packet. While this is nonzero, the filter
+ // appends silence to |input_buffer| until these frames are processed.
+ int input_buffer_final_frames;
+ // How many additional frames of silence have been added to |input_buffer|
+ // for padding after the final packet.
+ int input_buffer_added_silence;
+ float *energy_candidate_blocks;
+};
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p);
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
+double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate);
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+ uint8_t **planes, int frame_size, double playback_rate);
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p);
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+ float **dest, int dest_size, double playback_rate);
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate);