52 files changed, 18636 insertions, 0 deletions
diff --git a/audio/aframe.c b/audio/aframe.c
new file mode 100644
index 0000000..cb6ea17
--- /dev/null
+++ b/audio/aframe.c
@@ -0,0 +1,720 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/frame.h>
+#include <libavutil/mem.h>
+
+#include "config.h"
+
+#include "common/common.h"
+
+#include "chmap.h"
+#include "chmap_avchannel.h"
+#include "fmt-conversion.h"
+#include "format.h"
+#include "aframe.h"
+
+struct mp_aframe {
+    AVFrame *av_frame;
+    // We support channel layouts different from AVFrame channel masks
+    struct mp_chmap chmap;
+    // We support spdif formats, which are allocated as AV_SAMPLE_FMT_S16.
+    int format;
+    double pts;
+    double speed;
+};
+
+struct avframe_opaque {
+    double speed;
+};
+
+static void free_frame(void *ptr)
+{
+    struct mp_aframe *frame = ptr;
+    av_frame_free(&frame->av_frame);
+}
+
+struct mp_aframe *mp_aframe_create(void)
+{
+    struct mp_aframe *frame = talloc_zero(NULL, struct mp_aframe);
+    frame->av_frame = av_frame_alloc();
+    MP_HANDLE_OOM(frame->av_frame);
+    talloc_set_destructor(frame, free_frame);
+    mp_aframe_reset(frame);
+    return frame;
+}
+
+struct mp_aframe *mp_aframe_new_ref(struct mp_aframe *frame)
+{
+    if (!frame)
+        return NULL;
+
+    struct mp_aframe *dst = mp_aframe_create();
+
+    dst->chmap = frame->chmap;
+    dst->format = frame->format;
+    dst->pts = frame->pts;
+    dst->speed = frame->speed;
+
+    if (mp_aframe_is_allocated(frame)) {
+        if (av_frame_ref(dst->av_frame, frame->av_frame) < 0)
+            abort();
+    } else {
+        // av_frame_ref() would fail.
+        mp_aframe_config_copy(dst, frame);
+    }
+
+    return dst;
+}
+
+// Revert to state after mp_aframe_create().
+void mp_aframe_reset(struct mp_aframe *frame)
+{
+    av_frame_unref(frame->av_frame);
+    frame->chmap.num = 0;
+    frame->format = 0;
+    frame->pts = MP_NOPTS_VALUE;
+    frame->speed = 1.0;
+}
+
+// Remove all actual audio data and leave only the metadata.
+void mp_aframe_unref_data(struct mp_aframe *frame)
+{
+    // In a fucked up way, this is less complex than just unreffing the data.
+    struct mp_aframe *tmp = mp_aframe_create();
+    MPSWAP(struct mp_aframe, *tmp, *frame);
+    mp_aframe_reset(frame);
+    mp_aframe_config_copy(frame, tmp);
+    talloc_free(tmp);
+}
+
+// Allocate this much data. Returns false for failure (data already allocated,
+// invalid sample count or format, allocation failures).
+// Normally you're supposed to use a frame pool and mp_aframe_pool_allocate().
+bool mp_aframe_alloc_data(struct mp_aframe *frame, int samples)
+{
+    if (mp_aframe_is_allocated(frame))
+        return false;
+    struct mp_aframe_pool *p = mp_aframe_pool_create(NULL);
+    int r = mp_aframe_pool_allocate(p, frame, samples);
+    talloc_free(p);
+    return r >= 0;
+}
+
+// Return a new reference to the data in av_frame. av_frame itself is not
+// touched. Returns NULL if not representable, or if input is NULL.
+// Does not copy the timestamps.
+struct mp_aframe *mp_aframe_from_avframe(struct AVFrame *av_frame)
+{
+    if (!av_frame || av_frame->width > 0 || av_frame->height > 0)
+        return NULL;
+
+#if HAVE_AV_CHANNEL_LAYOUT
+    if (!av_channel_layout_check(&av_frame->ch_layout))
+        return NULL;
+
+    struct mp_chmap converted_map = { 0 };
+    if (!mp_chmap_from_av_layout(&converted_map, &av_frame->ch_layout)) {
+        return NULL;
+    }
+#endif
+
+    int format = af_from_avformat(av_frame->format);
+    if (!format && av_frame->format != AV_SAMPLE_FMT_NONE)
+        return NULL;
+
+    struct mp_aframe *frame = mp_aframe_create();
+
+    // This also takes care of forcing refcounting.
+    if (av_frame_ref(frame->av_frame, av_frame) < 0)
+        abort();
+
+    frame->format = format;
+#if !HAVE_AV_CHANNEL_LAYOUT
+    mp_chmap_from_lavc(&frame->chmap, frame->av_frame->channel_layout);
+
+    // FFmpeg being a stupid POS again
+    if (frame->chmap.num != frame->av_frame->channels)
+        mp_chmap_from_channels(&frame->chmap, av_frame->channels);
+#else
+    frame->chmap = converted_map;
+#endif
+
+    if (av_frame->opaque_ref) {
+        struct avframe_opaque *op = (void *)av_frame->opaque_ref->data;
+        frame->speed = op->speed;
+    }
+
+    return frame;
+}
+
+// Return a new reference to the data in frame. Returns NULL is not
+// representable (), or if input is NULL.
+// Does not copy the timestamps.
+struct AVFrame *mp_aframe_to_avframe(struct mp_aframe *frame)
+{
+    if (!frame)
+        return NULL;
+
+    if (af_to_avformat(frame->format) != frame->av_frame->format)
+        return NULL;
+
+    if (!mp_chmap_is_lavc(&frame->chmap))
+        return NULL;
+
+    if (!frame->av_frame->opaque_ref && frame->speed != 1.0) {
+        frame->av_frame->opaque_ref =
+            av_buffer_alloc(sizeof(struct avframe_opaque));
+        if (!frame->av_frame->opaque_ref)
+            return NULL;
+
+        struct avframe_opaque *op = (void *)frame->av_frame->opaque_ref->data;
+        op->speed = frame->speed;
+    }
+
+    return av_frame_clone(frame->av_frame);
+}
+
+struct AVFrame *mp_aframe_to_avframe_and_unref(struct mp_aframe *frame)
+{
+    AVFrame *av = mp_aframe_to_avframe(frame);
+    talloc_free(frame);
+    return av;
+}
+
+// You must not use this.
+struct AVFrame *mp_aframe_get_raw_avframe(struct mp_aframe *frame)
+{
+    return frame->av_frame;
+}
+
+// Return whether it has associated audio data. (If not, metadata only.)
+bool mp_aframe_is_allocated(struct mp_aframe *frame)
+{
+    return frame->av_frame->buf[0] || frame->av_frame->extended_data[0];
+}
+
+// Clear dst, and then copy the configuration to it.
+void mp_aframe_config_copy(struct mp_aframe *dst, struct mp_aframe *src)
+{
+    mp_aframe_reset(dst);
+
+    dst->chmap = src->chmap;
+    dst->format = src->format;
+
+    mp_aframe_copy_attributes(dst, src);
+
+    dst->av_frame->sample_rate = src->av_frame->sample_rate;
+    dst->av_frame->format = src->av_frame->format;
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+    dst->av_frame->channel_layout = src->av_frame->channel_layout;
+    // FFmpeg being a stupid POS again
+    dst->av_frame->channels = src->av_frame->channels;
+#else
+    if (av_channel_layout_copy(&dst->av_frame->ch_layout,
+                               &src->av_frame->ch_layout) < 0)
+        abort();
+#endif
+}
+
+// Copy "soft" attributes from src to dst, excluding things which affect
+// frame allocation and organization.
+void mp_aframe_copy_attributes(struct mp_aframe *dst, struct mp_aframe *src)
+{
+    dst->pts = src->pts;
+    dst->speed = src->speed;
+
+    int rate = dst->av_frame->sample_rate;
+
+    if (av_frame_copy_props(dst->av_frame, src->av_frame) < 0)
+        abort();
+
+    dst->av_frame->sample_rate = rate;
+}
+
+// Return whether a and b use the same physical audio format. Extra metadata
+// such as PTS, per-frame signalling, and AVFrame side data is not compared.
+bool mp_aframe_config_equals(struct mp_aframe *a, struct mp_aframe *b)
+{
+    struct mp_chmap ca = {0}, cb = {0};
+    mp_aframe_get_chmap(a, &ca);
+    mp_aframe_get_chmap(b, &cb);
+    return mp_chmap_equals(&ca, &cb) &&
+           mp_aframe_get_rate(a) == mp_aframe_get_rate(b) &&
+           mp_aframe_get_format(a) == mp_aframe_get_format(b);
+}
+
+// Return whether all required format fields have been set.
+bool mp_aframe_config_is_valid(struct mp_aframe *frame)
+{
+    return frame->format && frame->chmap.num && frame->av_frame->sample_rate;
+}
+
+// Return the pointer to the first sample for each plane. The pointers stay
+// valid until the next call that mutates frame somehow. You must not write to
+// the audio data. Returns NULL if no frame allocated.
+uint8_t **mp_aframe_get_data_ro(struct mp_aframe *frame)
+{
+    return mp_aframe_is_allocated(frame) ? frame->av_frame->extended_data : NULL;
+}
+
+// Like mp_aframe_get_data_ro(), but you can write to the audio data.
+// Additionally, it will return NULL if copy-on-write fails.
+uint8_t **mp_aframe_get_data_rw(struct mp_aframe *frame)
+{
+    if (!mp_aframe_is_allocated(frame))
+        return NULL;
+    if (av_frame_make_writable(frame->av_frame) < 0)
+        return NULL;
+    return frame->av_frame->extended_data;
+}
+
+int mp_aframe_get_format(struct mp_aframe *frame)
+{
+    return frame->format;
+}
+
+bool mp_aframe_get_chmap(struct mp_aframe *frame, struct mp_chmap *out)
+{
+    if (!mp_chmap_is_valid(&frame->chmap))
+        return false;
+    *out = frame->chmap;
+    return true;
+}
+
+int mp_aframe_get_channels(struct mp_aframe *frame)
+{
+    return frame->chmap.num;
+}
+
+int mp_aframe_get_rate(struct mp_aframe *frame)
+{
+    return frame->av_frame->sample_rate;
+}
+
+int mp_aframe_get_size(struct mp_aframe *frame)
+{
+    return frame->av_frame->nb_samples;
+}
+
+double mp_aframe_get_pts(struct mp_aframe *frame)
+{
+    return frame->pts;
+}
+
+bool mp_aframe_set_format(struct mp_aframe *frame, int format)
+{
+    if (mp_aframe_is_allocated(frame))
+        return false;
+    enum AVSampleFormat av_format = af_to_avformat(format);
+    if (av_format == AV_SAMPLE_FMT_NONE && format) {
+        if (!af_fmt_is_spdif(format))
+            return false;
+        av_format = AV_SAMPLE_FMT_S16;
+    }
+    frame->format = format;
+    frame->av_frame->format = av_format;
+    return true;
+}
+
+bool mp_aframe_set_chmap(struct mp_aframe *frame, struct mp_chmap *in)
+{
+    if (!mp_chmap_is_valid(in) && !mp_chmap_is_empty(in))
+        return false;
+    if (mp_aframe_is_allocated(frame) && in->num != frame->chmap.num)
+        return false;
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+    uint64_t lavc_layout = mp_chmap_to_lavc_unchecked(in);
+    if (!lavc_layout && in->num)
+        return false;
+#endif
+    frame->chmap = *in;
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+    frame->av_frame->channel_layout = lavc_layout;
+    // FFmpeg being a stupid POS again
+    frame->av_frame->channels = frame->chmap.num;
+#else
+    mp_chmap_to_av_layout(&frame->av_frame->ch_layout, in);
+#endif
+    return true;
+}
+
+bool mp_aframe_set_rate(struct mp_aframe *frame, int rate)
+{
+    if (rate < 1 || rate > 10000000)
+        return false;
+    frame->av_frame->sample_rate = rate;
+    return true;
+}
+
+bool mp_aframe_set_size(struct mp_aframe *frame, int samples)
+{
+    if (!mp_aframe_is_allocated(frame) || mp_aframe_get_size(frame) < samples)
+        return false;
+    frame->av_frame->nb_samples = MPMAX(samples, 0);
+    return true;
+}
+
+void mp_aframe_set_pts(struct mp_aframe *frame, double pts)
+{
+    frame->pts = pts;
+}
+
+// Set a speed factor. This is multiplied with the sample rate to get the
+// "effective" samplerate (mp_aframe_get_effective_rate()), which will be used
+// to do PTS calculations. If speed!=1.0, the PTS values always refer to the
+// original PTS (before changing speed), and if you want reasonably continuous
+// PTS between frames, you need to use the effective samplerate.
+void mp_aframe_set_speed(struct mp_aframe *frame, double factor)
+{
+    frame->speed = factor;
+}
+
+// Adjust current speed factor.
+void mp_aframe_mul_speed(struct mp_aframe *frame, double factor)
+{
+    frame->speed *= factor;
+}
+
+double mp_aframe_get_speed(struct mp_aframe *frame)
+{
+    return frame->speed;
+}
+
+// Matters for speed changed frames (such as a frame which has been resampled
+// to play at a different speed).
+// Return the sample rate at which the frame would have to be played to result
+// in the same duration as the original frame before the speed change.
+// This is used for A/V sync.
+double mp_aframe_get_effective_rate(struct mp_aframe *frame)
+{
+    return mp_aframe_get_rate(frame) / frame->speed;
+}
+
+// Return number of data pointers.
+int mp_aframe_get_planes(struct mp_aframe *frame)
+{
+    return af_fmt_is_planar(mp_aframe_get_format(frame))
+           ? mp_aframe_get_channels(frame) : 1;
+}
+
+// Return number of bytes between 2 consecutive samples on the same plane.
+size_t mp_aframe_get_sstride(struct mp_aframe *frame)
+{
+    int format = mp_aframe_get_format(frame);
+    return af_fmt_to_bytes(format) *
+           (af_fmt_is_planar(format) ? 1 : mp_aframe_get_channels(frame));
+}
+
+// Return total number of samples on each plane.
+int mp_aframe_get_total_plane_samples(struct mp_aframe *frame)
+{
+    return frame->av_frame->nb_samples *
+           (af_fmt_is_planar(mp_aframe_get_format(frame))
+            ? 1 : mp_aframe_get_channels(frame));
+}
+
+char *mp_aframe_format_str_buf(char *buf, size_t buf_size, struct mp_aframe *fmt)
+{
+    char ch[128];
+    mp_chmap_to_str_buf(ch, sizeof(ch), &fmt->chmap);
+    char *hr_ch = mp_chmap_to_str_hr(&fmt->chmap);
+    if (strcmp(hr_ch, ch) != 0)
+        mp_snprintf_cat(ch, sizeof(ch), " (%s)", hr_ch);
+    snprintf(buf, buf_size, "%dHz %s %dch %s", fmt->av_frame->sample_rate,
+             ch, fmt->chmap.num, af_fmt_to_str(fmt->format));
+    return buf;
+}
+
+// Set data to the audio after the given number of samples (i.e. slice it).
+void mp_aframe_skip_samples(struct mp_aframe *f, int samples)
+{
+    assert(samples >= 0 && samples <= mp_aframe_get_size(f));
+
+    if (av_frame_make_writable(f->av_frame) < 0)
+        return; // go complain to ffmpeg
+
+    int num_planes = mp_aframe_get_planes(f);
+    size_t sstride = mp_aframe_get_sstride(f);
+    for (int n = 0; n < num_planes; n++) {
+        memmove(f->av_frame->extended_data[n],
+                f->av_frame->extended_data[n] + samples * sstride,
+                (f->av_frame->nb_samples - samples) * sstride);
+    }
+
+    f->av_frame->nb_samples -= samples;
+
+    if (f->pts != MP_NOPTS_VALUE)
+        f->pts += samples / mp_aframe_get_effective_rate(f);
+}
+
+// sanitize a floating point sample value
+#define sanitizef(f) do {       \
+    if (!isnormal(f))           \
+        (f) = 0;                \
+} while (0)
+
+void mp_aframe_sanitize_float(struct mp_aframe *mpa)
+{
+    int format = af_fmt_from_planar(mp_aframe_get_format(mpa));
+    if (format != AF_FORMAT_FLOAT && format != AF_FORMAT_DOUBLE)
+        return;
+    int num_planes = mp_aframe_get_planes(mpa);
+    uint8_t **planes = mp_aframe_get_data_rw(mpa);
+    if (!planes)
+        return;
+    for (int p = 0; p < num_planes; p++) {
+        void *ptr = planes[p];
+        int total = mp_aframe_get_total_plane_samples(mpa);
+        switch (format) {
+        case AF_FORMAT_FLOAT:
+            for (int s = 0; s < total; s++)
+                sanitizef(((float *)ptr)[s]);
+            break;
+        case AF_FORMAT_DOUBLE:
+            for (int s = 0; s < total; s++)
+                sanitizef(((double *)ptr)[s]);
+            break;
+        }
+    }
+}
+
+// Return the timestamp of the sample just after the end of this frame.
+double mp_aframe_end_pts(struct mp_aframe *f)
+{
+    double rate = mp_aframe_get_effective_rate(f);
+    if (f->pts == MP_NOPTS_VALUE || rate <= 0)
+        return MP_NOPTS_VALUE;
+    return f->pts + f->av_frame->nb_samples / rate;
+}
+
+// Return the duration in seconds of the frame (0 if invalid).
+double mp_aframe_duration(struct mp_aframe *f)
+{
+    double rate = mp_aframe_get_effective_rate(f);
+    if (rate <= 0)
+        return 0;
+    return f->av_frame->nb_samples / rate;
+}
+
+// Clip the given frame to the given timestamp range. Adjusts the frame size
+// and timestamp.
+// Refuses to change spdif frames.
+void mp_aframe_clip_timestamps(struct mp_aframe *f, double start, double end)
+{
+    double f_end = mp_aframe_end_pts(f);
+    double rate = mp_aframe_get_effective_rate(f);
+    if (f_end == MP_NOPTS_VALUE)
+        return;
+    if (end != MP_NOPTS_VALUE) {
+        if (f_end >= end) {
+            if (f->pts >= end) {
+                f->av_frame->nb_samples = 0;
+            } else {
+                if (af_fmt_is_spdif(mp_aframe_get_format(f)))
+                    return;
+                int new = (end - f->pts) * rate;
+                f->av_frame->nb_samples = MPCLAMP(new, 0, f->av_frame->nb_samples);
+            }
+        }
+    }
+    if (start != MP_NOPTS_VALUE) {
+        if (f->pts < start) {
+            if (f_end <= start) {
+                f->av_frame->nb_samples = 0;
+                f->pts = f_end;
+            } else {
+                if (af_fmt_is_spdif(mp_aframe_get_format(f)))
+                    return;
+                int skip = (start - f->pts) * rate;
+                skip = MPCLAMP(skip, 0, f->av_frame->nb_samples);
+                mp_aframe_skip_samples(f, skip);
+            }
+        }
+    }
+}
+
+bool mp_aframe_copy_samples(struct mp_aframe *dst, int dst_offset,
+                            struct mp_aframe *src, int src_offset,
+                            int samples)
+{
+    if (!mp_aframe_config_equals(dst, src))
+        return false;
+
+    if (mp_aframe_get_size(dst) < dst_offset + samples ||
+        mp_aframe_get_size(src) < src_offset + samples)
+        return false;
+
+    uint8_t **s = mp_aframe_get_data_ro(src);
+    uint8_t **d = mp_aframe_get_data_rw(dst);
+    if (!s || !d)
+        return false;
+
+    int planes = mp_aframe_get_planes(dst);
+    size_t sstride = mp_aframe_get_sstride(dst);
+
+    for (int n = 0; n < planes; n++) {
+        memcpy(d[n] + dst_offset * sstride, s[n] + src_offset * sstride,
+               samples * sstride);
+    }
+
+    return true;
+}
+
+bool mp_aframe_set_silence(struct mp_aframe *f, int offset, int samples)
+{
+    if (mp_aframe_get_size(f) < offset + samples)
+        return false;
+
+    int format = mp_aframe_get_format(f);
+    uint8_t **d = mp_aframe_get_data_rw(f);
+    if (!d)
+        return false;
+
+    int planes = mp_aframe_get_planes(f);
+    size_t sstride = mp_aframe_get_sstride(f);
+
+    for (int n = 0; n < planes; n++)
+        af_fill_silence(d[n] + offset * sstride, samples * sstride, format);
+
+    return true;
+}
+
+bool mp_aframe_reverse(struct mp_aframe *f)
+{
+    int format = mp_aframe_get_format(f);
+    size_t bps = af_fmt_to_bytes(format);
+    if (!af_fmt_is_pcm(format) || bps > 16)
+        return false;
+
+    uint8_t **d = mp_aframe_get_data_rw(f);
+    if (!d)
+        return false;
+
+    int planes = mp_aframe_get_planes(f);
+    int samples = mp_aframe_get_size(f);
+    int channels = mp_aframe_get_channels(f);
+    size_t sstride = mp_aframe_get_sstride(f);
+
+    int plane_samples = channels;
+    if (af_fmt_is_planar(format))
+        plane_samples = 1;
+
+    for (int p = 0; p < planes; p++) {
+        for (int n = 0; n < samples / 2; n++) {
+            int s1_offset = n * sstride;
+            int s2_offset = (samples - 1 - n) * sstride;
+            for (int c = 0; c < plane_samples; c++) {
+                // Nobody said it'd be fast.
+                char tmp[16];
+                uint8_t *s1 = d[p] + s1_offset + c * bps;
+                uint8_t *s2 = d[p] + s2_offset + c * bps;
+                memcpy(tmp, s2, bps);
+                memcpy(s2, s1, bps);
+                memcpy(s1, tmp, bps);
+            }
+        }
+    }
+
+    return true;
+}
+
+int mp_aframe_approx_byte_size(struct mp_aframe *frame)
+{
+    // God damn, AVFrame is too fucking annoying. Just go with the size that
+    // allocating a new frame would use.
+    int planes = mp_aframe_get_planes(frame);
+    size_t sstride = mp_aframe_get_sstride(frame);
+    int samples = frame->av_frame->nb_samples;
+    int plane_size = MP_ALIGN_UP(sstride * MPMAX(samples, 1), 32);
+    return plane_size * planes + sizeof(*frame);
+}
+
+struct mp_aframe_pool {
+    AVBufferPool *avpool;
+    int element_size;
+};
+
+struct mp_aframe_pool *mp_aframe_pool_create(void *ta_parent)
+{
+    return talloc_zero(ta_parent, struct mp_aframe_pool);
+}
+
+static void mp_aframe_pool_destructor(void *p)
+{
+    struct mp_aframe_pool *pool = p;
+    av_buffer_pool_uninit(&pool->avpool);
+}
+
+// Like mp_aframe_allocate(), but use the pool to allocate data.
+int mp_aframe_pool_allocate(struct mp_aframe_pool *pool, struct mp_aframe *frame,
+                            int samples)
+{
+    int planes = mp_aframe_get_planes(frame);
+    size_t sstride = mp_aframe_get_sstride(frame);
+    // FFmpeg hardcodes similar hidden possibly-requirements in a number of
+    // places: av_frame_get_buffer(), libavcodec's get_buffer(), mem.c,
+    // probably more.
+    int align_samples = MP_ALIGN_UP(MPMAX(samples, 1), 32);
+    int plane_size = MP_ALIGN_UP(sstride * align_samples, 64);
+    int size = plane_size * planes;
+
+    if (size <= 0 || mp_aframe_is_allocated(frame))
+        return -1;
+
+    if (!pool->avpool || size > pool->element_size) {
+        size_t alloc = ta_calc_prealloc_elems(size);
+        if (alloc >= INT_MAX)
+            return -1;
+        av_buffer_pool_uninit(&pool->avpool);
+        pool->element_size = alloc;
+        pool->avpool = av_buffer_pool_init(pool->element_size, NULL);
+        if (!pool->avpool)
+            return -1;
+        talloc_set_destructor(pool, mp_aframe_pool_destructor);
+    }
+
+    // Yes, you have to do all this shit manually.
+    // At least it's less stupid than av_frame_get_buffer(), which just wipes
+    // the entire frame struct on error for no reason.
+    AVFrame *av_frame = frame->av_frame;
+    if (av_frame->extended_data != av_frame->data)
+        av_freep(&av_frame->extended_data); // sigh
+    if (planes > AV_NUM_DATA_POINTERS) {
+        av_frame->extended_data =
+            av_calloc(planes, sizeof(av_frame->extended_data[0]));
+        MP_HANDLE_OOM(av_frame->extended_data);
+    } else {
+        av_frame->extended_data = av_frame->data;
+    }
+    av_frame->buf[0] = av_buffer_pool_get(pool->avpool);
+    if (!av_frame->buf[0])
+        return -1;
+    av_frame->linesize[0] = samples * sstride;
+    for (int n = 0; n < planes; n++)
+        av_frame->extended_data[n] = av_frame->buf[0]->data + n * plane_size;
+    if (planes > AV_NUM_DATA_POINTERS) {
+        for (int n = 0; n < AV_NUM_DATA_POINTERS; n++)
+            av_frame->data[n] = av_frame->extended_data[n];
+    }
+    av_frame->nb_samples = samples;
+
+    return 0;
+}
diff --git a/audio/aframe.h b/audio/aframe.h
new file mode 100644
index 0000000..d19c7e8
--- /dev/null
+++ b/audio/aframe.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+struct mp_aframe;
+struct AVFrame;
+struct mp_chmap;
+
+struct mp_aframe *mp_aframe_from_avframe(struct AVFrame *av_frame);
+struct mp_aframe *mp_aframe_create(void);
+struct mp_aframe *mp_aframe_new_ref(struct mp_aframe *frame);
+
+void mp_aframe_reset(struct mp_aframe *frame);
+void mp_aframe_unref_data(struct mp_aframe *frame);
+
+struct AVFrame *mp_aframe_to_avframe(struct mp_aframe *frame);
+struct AVFrame *mp_aframe_to_avframe_and_unref(struct mp_aframe *frame);
+struct AVFrame *mp_aframe_get_raw_avframe(struct mp_aframe *frame);
+
+bool mp_aframe_is_allocated(struct mp_aframe *frame);
+bool mp_aframe_alloc_data(struct mp_aframe *frame, int samples);
+
+void mp_aframe_config_copy(struct mp_aframe *dst, struct mp_aframe *src);
+bool mp_aframe_config_equals(struct mp_aframe *a, struct mp_aframe *b);
+bool mp_aframe_config_is_valid(struct mp_aframe *frame);
+
+void mp_aframe_copy_attributes(struct mp_aframe *dst, struct mp_aframe *src);
+
+uint8_t **mp_aframe_get_data_ro(struct mp_aframe *frame);
+uint8_t **mp_aframe_get_data_rw(struct mp_aframe *frame);
+
+int mp_aframe_get_format(struct mp_aframe *frame);
+bool mp_aframe_get_chmap(struct mp_aframe *frame, struct mp_chmap *out);
+int mp_aframe_get_channels(struct mp_aframe *frame);
+int mp_aframe_get_rate(struct mp_aframe *frame);
+int mp_aframe_get_size(struct mp_aframe *frame);
+double mp_aframe_get_pts(struct mp_aframe *frame);
+double mp_aframe_get_speed(struct mp_aframe *frame);
+double mp_aframe_get_effective_rate(struct mp_aframe *frame);
+
+bool mp_aframe_set_format(struct mp_aframe *frame, int format);
+bool mp_aframe_set_chmap(struct mp_aframe *frame, struct mp_chmap *in);
+bool mp_aframe_set_rate(struct mp_aframe *frame, int rate);
+bool mp_aframe_set_size(struct mp_aframe *frame, int samples);
+void mp_aframe_set_pts(struct mp_aframe *frame, double pts);
+void mp_aframe_set_speed(struct mp_aframe *frame, double factor);
+void mp_aframe_mul_speed(struct mp_aframe *frame, double factor);
+
+int mp_aframe_get_planes(struct mp_aframe *frame);
+int mp_aframe_get_total_plane_samples(struct mp_aframe *frame);
+size_t mp_aframe_get_sstride(struct mp_aframe *frame);
+
+bool mp_aframe_reverse(struct mp_aframe *frame);
+
+int mp_aframe_approx_byte_size(struct mp_aframe *frame);
+
+char *mp_aframe_format_str_buf(char *buf, size_t buf_size, struct mp_aframe *fmt);
+#define mp_aframe_format_str(fmt) mp_aframe_format_str_buf((char[32]){0}, 32, (fmt))
+
+void mp_aframe_skip_samples(struct mp_aframe *f, int samples);
+void mp_aframe_sanitize_float(struct mp_aframe *f);
+double mp_aframe_end_pts(struct mp_aframe *f);
+double mp_aframe_duration(struct mp_aframe *f);
+void mp_aframe_clip_timestamps(struct mp_aframe *f, double start, double end);
+bool mp_aframe_copy_samples(struct mp_aframe *dst, int dst_offset,
+                            struct mp_aframe *src, int src_offset,
+                            int samples);
+bool mp_aframe_set_silence(struct mp_aframe *f, int offset, int samples);
+
+struct mp_aframe_pool;
+struct mp_aframe_pool *mp_aframe_pool_create(void *ta_parent);
+int mp_aframe_pool_allocate(struct mp_aframe_pool *pool, struct mp_aframe *frame,
+                            int samples);
diff --git a/audio/chmap.c b/audio/chmap.c
new file mode 100644
index 0000000..e2b95f4
--- /dev/null
+++ b/audio/chmap.c
@@ -0,0 +1,515 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include <libavutil/common.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "chmap.h"
+
+// Names taken from libavutil/channel_layout.c (Not accessible by API.)
+// Use of these names is hard-coded in some places (e.g. ao_alsa.c)
+static const char *const speaker_names[MP_SPEAKER_ID_COUNT][2] = {
+    [MP_SPEAKER_ID_FL]          = {"fl",   "front left"},
+    [MP_SPEAKER_ID_FR]          = {"fr",   "front right"},
+    [MP_SPEAKER_ID_FC]          = {"fc",   "front center"},
+    [MP_SPEAKER_ID_LFE]         = {"lfe",  "low frequency"},
+    [MP_SPEAKER_ID_BL]          = {"bl",   "back left"},
+    [MP_SPEAKER_ID_BR]          = {"br",   "back right"},
+    [MP_SPEAKER_ID_FLC]         = {"flc",  "front left-of-center"},
+    [MP_SPEAKER_ID_FRC]         = {"frc",  "front right-of-center"},
+    [MP_SPEAKER_ID_BC]          = {"bc",   "back center"},
+    [MP_SPEAKER_ID_SL]          = {"sl",   "side left"},
+    [MP_SPEAKER_ID_SR]          = {"sr",   "side right"},
+    [MP_SPEAKER_ID_TC]          = {"tc",   "top center"},
+    [MP_SPEAKER_ID_TFL]         = {"tfl",  "top front left"},
+    [MP_SPEAKER_ID_TFC]         = {"tfc",  "top front center"},
+    [MP_SPEAKER_ID_TFR]         = {"tfr",  "top front right"},
+    [MP_SPEAKER_ID_TBL]         = {"tbl",  "top back left"},
+    [MP_SPEAKER_ID_TBC]         = {"tbc",  "top back center"},
+    [MP_SPEAKER_ID_TBR]         = {"tbr",  "top back right"},
+    [MP_SPEAKER_ID_DL]          = {"dl",   "downmix left"},
+    [MP_SPEAKER_ID_DR]          = {"dr",   "downmix right"},
+    [MP_SPEAKER_ID_WL]          = {"wl",   "wide left"},
+    [MP_SPEAKER_ID_WR]          = {"wr",   "wide right"},
+    [MP_SPEAKER_ID_SDL]         = {"sdl",  "surround direct left"},
+    [MP_SPEAKER_ID_SDR]         = {"sdr",  "surround direct right"},
+    [MP_SPEAKER_ID_LFE2]        = {"lfe2", "low frequency 2"},
+    [MP_SPEAKER_ID_TSL]         = {"tsl",  "top side left"},
+    [MP_SPEAKER_ID_TSR]         = {"tsr",  "top side right"},
+    [MP_SPEAKER_ID_BFC]         = {"bfc",  "bottom front center"},
+    [MP_SPEAKER_ID_BFL]         = {"bfl",  "bottom front left"},
+    [MP_SPEAKER_ID_BFR]         = {"bfr",  "bottom front right"},
+    [MP_SPEAKER_ID_NA]          = {"na",   "not available"},
+};
+
+// Names taken from libavutil/channel_layout.c (Not accessible by API.)
+// Channel order corresponds to lavc/waveex, except for the alsa entries.
+static const char *const std_layout_names[][2] = {
+    {"empty",           ""}, // not in lavc
+    {"mono",            "fc"},
+    {"1.0",             "fc"}, // not in lavc
+    {"stereo",          "fl-fr"},
+    {"2.0",             "fl-fr"}, // not in lavc
+    {"2.1",             "fl-fr-lfe"},
+    {"3.0",             "fl-fr-fc"},
+    {"3.0(back)",       "fl-fr-bc"},
+    {"4.0",             "fl-fr-fc-bc"},
+    {"quad",            "fl-fr-bl-br"},
+    {"quad(side)",      "fl-fr-sl-sr"},
+    {"3.1",             "fl-fr-fc-lfe"},
+    {"3.1(back)",       "fl-fr-lfe-bc"}, // not in lavc
+    {"5.0",             "fl-fr-fc-bl-br"},
+    {"5.0(alsa)",       "fl-fr-bl-br-fc"}, // not in lavc
+    {"5.0(side)",       "fl-fr-fc-sl-sr"},
+    {"4.1",             "fl-fr-fc-lfe-bc"},
+    {"4.1(alsa)",       "fl-fr-bl-br-lfe"}, // not in lavc
+    {"5.1",             "fl-fr-fc-lfe-bl-br"},
+    {"5.1(alsa)",       "fl-fr-bl-br-fc-lfe"}, // not in lavc
+    {"5.1(side)",       "fl-fr-fc-lfe-sl-sr"},
+    {"6.0",             "fl-fr-fc-bc-sl-sr"},
+    {"6.0(front)",      "fl-fr-flc-frc-sl-sr"},
+    {"hexagonal",       "fl-fr-fc-bl-br-bc"},
+    {"6.1",             "fl-fr-fc-lfe-bc-sl-sr"},
+    {"6.1(back)",       "fl-fr-fc-lfe-bl-br-bc"},
+    {"6.1(top)",        "fl-fr-fc-lfe-bl-br-tc"}, // not in lavc
+    {"6.1(front)",      "fl-fr-lfe-flc-frc-sl-sr"},
+    {"7.0",             "fl-fr-fc-bl-br-sl-sr"},
+    {"7.0(front)",      "fl-fr-fc-flc-frc-sl-sr"},
+    {"7.0(rear)",       "fl-fr-fc-bl-br-sdl-sdr"}, // not in lavc
+    {"7.1",             "fl-fr-fc-lfe-bl-br-sl-sr"},
+    {"7.1(alsa)",       "fl-fr-bl-br-fc-lfe-sl-sr"}, // not in lavc
+    {"7.1(wide)",       "fl-fr-fc-lfe-bl-br-flc-frc"},
+    {"7.1(wide-side)",  "fl-fr-fc-lfe-flc-frc-sl-sr"},
+    {"7.1(top)",        "fl-fr-fc-lfe-bl-br-tfl-tfr"},
+    {"7.1(rear)",       "fl-fr-fc-lfe-bl-br-sdl-sdr"}, // not in lavc
+    {"octagonal",       "fl-fr-fc-bl-br-bc-sl-sr"},
+    {"cube",            "fl-fr-bl-br-tfl-tfr-tbl-tbr"},
+    {"hexadecagonal",   "fl-fr-fc-bl-br-bc-sl-sr-tfc-tfl-tfr-tbl-tbc-tbr-wl-wr"},
+    {"downmix",         "fl-fr"},
+    {"22.2",            "fl-fr-fc-lfe-bl-br-flc-frc-bc-sl-sr-tc-tfl-tfc-tfr-tbl-tbc-tbr-lfe2-tsl-tsr-bfc-bfl-bfr"},
+    {"auto",            ""}, // not in lavc
+    {0}
+};
+
+static const struct mp_chmap default_layouts[] = {
+    {0},                                        // empty
+    MP_CHMAP_INIT_MONO,                         // mono
+    MP_CHMAP2(FL, FR),                          // stereo
+    MP_CHMAP3(FL, FR, LFE),                     // 2.1
+    MP_CHMAP4(FL, FR, FC, BC),                  // 4.0
+    MP_CHMAP5(FL, FR, FC, BL,  BR),             // 5.0
+    MP_CHMAP6(FL, FR, FC, LFE, BL, BR),         // 5.1
+    MP_CHMAP7(FL, FR, FC, LFE, BC, SL, SR),     // 6.1
+    MP_CHMAP8(FL, FR, FC, LFE, BL, BR, SL, SR), // 7.1
+};
+
+// Returns true if speakers are mapped uniquely, and there's at least 1 channel.
+bool mp_chmap_is_valid(const struct mp_chmap *src)
+{
+    bool mapped[MP_SPEAKER_ID_COUNT] = {0};
+    for (int n = 0; n < src->num; n++) {
+        int sp = src->speaker[n];
+        if (sp >= MP_SPEAKER_ID_COUNT || mapped[sp])
+            return false;
+        if (sp != MP_SPEAKER_ID_NA)
+            mapped[sp] = true;
+    }
+    return src->num > 0;
+}
+
+bool mp_chmap_is_empty(const struct mp_chmap *src)
+{
+    return src->num == 0;
+}
+
+// Return true if the channel map defines the number of the channels only, and
+// the channels have to meaning associated with them.
+bool mp_chmap_is_unknown(const struct mp_chmap *src)
+{
+    for (int n = 0; n < src->num; n++) {
+        if (src->speaker[n] != MP_SPEAKER_ID_NA)
+            return false;
+    }
+    return mp_chmap_is_valid(src);
+}
+
+// Note: empty channel maps compare as equal. Invalid ones can equal too.
+bool mp_chmap_equals(const struct mp_chmap *a, const struct mp_chmap *b)
+{
+    if (a->num != b->num)
+        return false;
+    for (int n = 0; n < a->num; n++) {
+        if (a->speaker[n] != b->speaker[n])
+            return false;
+    }
+    return true;
+}
+
+// Whether they use the same speakers (even if in different order).
+bool mp_chmap_equals_reordered(const struct mp_chmap *a, const struct mp_chmap *b)
+{
+    struct mp_chmap t1 = *a, t2 = *b;
+    mp_chmap_reorder_norm(&t1);
+    mp_chmap_reorder_norm(&t2);
+    return mp_chmap_equals(&t1, &t2);
+}
+
+bool mp_chmap_is_stereo(const struct mp_chmap *src)
+{
+    static const struct mp_chmap stereo = MP_CHMAP_INIT_STEREO;
+    return mp_chmap_equals(src, &stereo);
+}
+
+static int comp_uint8(const void *a, const void *b)
+{
+    return *(const uint8_t *)a - *(const uint8_t *)b;
+}
+
+// Reorder channels to normal order, with monotonically increasing speaker IDs.
+// We define this order as the same order used with waveex.
+void mp_chmap_reorder_norm(struct mp_chmap *map)
+{
+    uint8_t *arr = &map->speaker[0];
+    qsort(arr, map->num, 1, comp_uint8);
+}
+
+// Remove silent (NA) channels, if any.
+void mp_chmap_remove_na(struct mp_chmap *map)
+{
+    struct mp_chmap new = {0};
+    for (int n = 0; n < map->num; n++) {
+        int sp = map->speaker[n];
+        if (sp != MP_SPEAKER_ID_NA)
+            new.speaker[new.num++] = map->speaker[n];
+    }
+    *map = new;
+}
+
+// Add silent (NA) channels to map until map->num >= num.
+void mp_chmap_fill_na(struct mp_chmap *map, int num)
+{
+    assert(num <= MP_NUM_CHANNELS);
+    while (map->num < num)
+        map->speaker[map->num++] = MP_SPEAKER_ID_NA;
+}
+
+// Set *dst to a standard layout with the given number of channels.
+// If the number of channels is invalid, an invalid map is set, and
+// mp_chmap_is_valid(dst) will return false.
+void mp_chmap_from_channels(struct mp_chmap *dst, int num_channels)
+{
+    *dst = (struct mp_chmap) {0};
+    if (num_channels >= 0 && num_channels < MP_ARRAY_SIZE(default_layouts))
+        *dst = default_layouts[num_channels];
+    if (!dst->num)
+        mp_chmap_set_unknown(dst, num_channels);
+}
+
+// Set *dst to an unknown layout for the given numbers of channels.
+// If the number of channels is invalid, an invalid map is set, and
+// mp_chmap_is_valid(dst) will return false.
+// A mp_chmap with all entries set to NA is treated specially in some
+// contexts (watch out for mp_chmap_is_unknown()).
+void mp_chmap_set_unknown(struct mp_chmap *dst, int num_channels)
+{
+    if (num_channels < 0 || num_channels > MP_NUM_CHANNELS) {
+        *dst = (struct mp_chmap) {0};
+    } else {
+        dst->num = num_channels;
+        for (int n = 0; n < dst->num; n++)
+            dst->speaker[n] = MP_SPEAKER_ID_NA;
+    }
+}
+
+// Return the ffmpeg/libav channel layout as in <libavutil/channel_layout.h>.
+// Speakers not representable by ffmpeg/libav are dropped.
+// Warning: this ignores the order of the channels, and will return a channel
+//          mask even if the order is different from libavcodec's.
+//          Also, "unknown" channel maps are translated to non-sense channel
+//          maps with the same number of channels.
+uint64_t mp_chmap_to_lavc_unchecked(const struct mp_chmap *src)
+{
+    struct mp_chmap t = *src;
+    if (t.num > 64)
+        return 0;
+    // lavc has no concept for unknown layouts yet, so pick something that does
+    // the job of signaling the number of channels, even if it makes no sense
+    // as a proper layout.
+    if (mp_chmap_is_unknown(&t))
+        return t.num == 64 ? (uint64_t)-1 : (1ULL << t.num) - 1;
+    uint64_t mask = 0;
+    for (int n = 0; n < t.num; n++) {
+        if (t.speaker[n] < 64) // ignore MP_SPEAKER_ID_NA etc.
+            mask |= 1ULL << t.speaker[n];
+    }
+    return mask;
+}
+
+// Return the ffmpeg/libav channel layout as in <libavutil/channel_layout.h>.
+// Returns 0 if the channel order doesn't match lavc's or if it's invalid.
+uint64_t mp_chmap_to_lavc(const struct mp_chmap *src)
+{
+    if (!mp_chmap_is_lavc(src))
+        return 0;
+    return mp_chmap_to_lavc_unchecked(src);
+}
+
+// Set channel map from the ffmpeg/libav channel layout as in
+// <libavutil/channel_layout.h>.
+// If the number of channels exceed MP_NUM_CHANNELS, set dst to empty.
+void mp_chmap_from_lavc(struct mp_chmap *dst, uint64_t src)
+{
+    dst->num = 0;
+    for (int n = 0; n < 64; n++) {
+        if (src & (1ULL << n)) {
+            if (dst->num >= MP_NUM_CHANNELS) {
+                dst->num = 0;
+                return;
+            }
+            dst->speaker[dst->num] = n;
+            dst->num++;
+        }
+    }
+}
+
+bool mp_chmap_is_lavc(const struct mp_chmap *src)
+{
+    if (!mp_chmap_is_valid(src))
+        return false;
+    if (mp_chmap_is_unknown(src))
+        return true;
+    // lavc's channel layout is a bit mask, and channels are always ordered
+    // from LSB to MSB speaker bits, so speaker IDs have to increase.
+    assert(src->num > 0);
+    for (int n = 1; n < src->num; n++) {
+        if (src->speaker[n - 1] >= src->speaker[n])
+            return false;
+    }
+    for (int n = 0; n < src->num; n++) {
+        if (src->speaker[n] >= 64)
+            return false;
+    }
+    return true;
+}
+
+// Warning: for "unknown" channel maps, this returns something that may not
+//          make sense. Invalid channel maps are not changed.
+void mp_chmap_reorder_to_lavc(struct mp_chmap *map)
+{
+    if (!mp_chmap_is_valid(map))
+        return;
+    uint64_t mask = mp_chmap_to_lavc_unchecked(map);
+    mp_chmap_from_lavc(map, mask);
+}
+
+// Get reordering array for from->to reordering. from->to must have the same set
+// of speakers (i.e. same number and speaker IDs, just different order). Then,
+// for each speaker n, src[n] will be set such that:
+//      to->speaker[n] = from->speaker[src[n]]
+// (src[n] gives the source channel for destination channel n)
+// If *from and *to don't contain the same set of speakers, then the above
+// invariant is not guaranteed. Instead, src[n] can be set to -1 if the channel
+// at to->speaker[n] is unmapped.
+void mp_chmap_get_reorder(int src[MP_NUM_CHANNELS], const struct mp_chmap *from,
+                          const struct mp_chmap *to)
+{
+    for (int n = 0; n < MP_NUM_CHANNELS; n++)
+        src[n] = -1;
+
+    if (mp_chmap_is_unknown(from) || mp_chmap_is_unknown(to)) {
+        for (int n = 0; n < to->num; n++)
+            src[n] = n < from->num ? n : -1;
+        return;
+    }
+
+    for (int n = 0; n < to->num; n++) {
+        for (int i = 0; i < from->num; i++) {
+            if (to->speaker[n] == from->speaker[i]) {
+                src[n] = i;
+                break;
+            }
+        }
+    }
+
+    for (int n = 0; n < to->num; n++)
+        assert(src[n] < 0 || (to->speaker[n] == from->speaker[src[n]]));
+}
+
+// Return the number of channels only in a.
+int mp_chmap_diffn(const struct mp_chmap *a, const struct mp_chmap *b)
+{
+    uint64_t a_mask = mp_chmap_to_lavc_unchecked(a);
+    uint64_t b_mask = mp_chmap_to_lavc_unchecked(b);
+    return av_popcount64((a_mask ^ b_mask) & a_mask);
+}
+
+// Returns something like "fl-fr-fc". If there's a standard layout in lavc
+// order, return that, e.g. "3.0" instead of "fl-fr-fc".
+// Unassigned but valid speakers get names like "sp28".
+char *mp_chmap_to_str_buf(char *buf, size_t buf_size, const struct mp_chmap *src)
+{
+    buf[0] = '\0';
+
+    if (mp_chmap_is_unknown(src)) {
+        snprintf(buf, buf_size, "unknown%d", src->num);
+        return buf;
+    }
+
+    for (int n = 0; n < src->num; n++) {
+        int sp = src->speaker[n];
+        const char *s = sp < MP_SPEAKER_ID_COUNT ? speaker_names[sp][0] : NULL;
+        char sp_buf[10];
+        if (!s) {
+            snprintf(sp_buf, sizeof(sp_buf), "sp%d", sp);
+            s = sp_buf;
+        }
+        mp_snprintf_cat(buf, buf_size, "%s%s", n > 0 ? "-" : "", s);
+    }
+
+    // To standard layout name
+    for (int n = 0; std_layout_names[n][0]; n++) {
+        if (strcmp(buf, std_layout_names[n][1]) == 0) {
+            snprintf(buf, buf_size, "%s", std_layout_names[n][0]);
+            break;
+        }
+    }
+
+    return buf;
+}
+
+// If src can be parsed as channel map (as produced by mp_chmap_to_str()),
+// return true and set *dst. Otherwise, return false and don't change *dst.
+// Note: call mp_chmap_is_valid() to test whether the returned map is valid
+//       the map could be empty, or contain multiply mapped channels
+bool mp_chmap_from_str(struct mp_chmap *dst, bstr src)
+{
+    // Single number corresponds to mp_chmap_from_channels()
+    if (src.len > 0) {
+        bstr t = src;
+        bool unknown = bstr_eatstart0(&t, "unknown");
+        bstr rest;
+        long long count = bstrtoll(t, &rest, 10);
+        if (rest.len == 0) {
+            struct mp_chmap res;
+            if (unknown) {
+                mp_chmap_set_unknown(&res, count);
+            } else {
+                mp_chmap_from_channels(&res, count);
+            }
+            if (mp_chmap_is_valid(&res)) {
+                *dst = res;
+                return true;
+            }
+        }
+    }
+
+    // From standard layout name
+    for (int n = 0; std_layout_names[n][0]; n++) {
+        if (bstr_equals0(src, std_layout_names[n][0])) {
+            src = bstr0(std_layout_names[n][1]);
+            break;
+        }
+    }
+
+    // Explicit speaker list (separated by "-")
+    struct mp_chmap res = {0};
+    while (src.len) {
+        bstr s;
+        bstr_split_tok(src, "-", &s, &src);
+        int speaker = -1;
+        for (int n = 0; n < MP_SPEAKER_ID_COUNT; n++) {
+            const char *name = speaker_names[n][0];
+            if (name && bstr_equals0(s, name)) {
+                speaker = n;
+                break;
+            }
+        }
+        if (speaker < 0) {
+            if (bstr_eatstart0(&s, "sp")) {
+                long long sp = bstrtoll(s, &s, 0);
+                if (s.len == 0 && sp >= 0 && sp < MP_SPEAKER_ID_COUNT)
+                    speaker = sp;
+            }
+            if (speaker < 0)
+                return false;
+        }
+        if (res.num >= MP_NUM_CHANNELS)
+            return false;
+        res.speaker[res.num] = speaker;
+        res.num++;
+    }
+
+    *dst = res;
+    return true;
+}
+
+// Output a human readable "canonical" channel map string. Converting this from
+// a string back to a channel map can yield a different map, but the string
+// looks nicer. E.g. "fc-fl-fr-na" becomes "3.0".
+char *mp_chmap_to_str_hr_buf(char *buf, size_t buf_size, const struct mp_chmap *src)
+{
+    struct mp_chmap map = *src;
+    mp_chmap_remove_na(&map);
+    for (int n = 0; std_layout_names[n][0]; n++) {
+        struct mp_chmap s;
+        if (mp_chmap_from_str(&s, bstr0(std_layout_names[n][0])) &&
+            mp_chmap_equals_reordered(&s, &map))
+        {
+            map = s;
+            break;
+        }
+    }
+    return mp_chmap_to_str_buf(buf, buf_size, &map);
+}
+
+mp_ch_layout_tuple *mp_iterate_builtin_layouts(void **opaque)
+{
+    uintptr_t i = (uintptr_t)*opaque;
+
+    if (i >= MP_ARRAY_SIZE(std_layout_names) ||
+        !std_layout_names[i][0])
+        return NULL;
+
+    *opaque = (void *)(i + 1);
+
+    if (std_layout_names[i][1][0] == '\0') {
+        return mp_iterate_builtin_layouts(opaque);
+    }
+
+    return &std_layout_names[i];
+}
+
+void mp_chmap_print_help(struct mp_log *log)
+{
+    mp_info(log, "Speakers:\n");
+    for (int n = 0; n < MP_SPEAKER_ID_COUNT; n++) {
+        if (speaker_names[n][0])
+            mp_info(log, "    %-16s (%s)\n",
+                    speaker_names[n][0], speaker_names[n][1]);
+    }
+    mp_info(log, "Standard layouts:\n");
+    for (int n = 0; std_layout_names[n][0]; n++) {
+        mp_info(log, "    %-16s (%s)\n",
+                 std_layout_names[n][0], std_layout_names[n][1]);
+    }
+    for (int n = 0; n < MP_NUM_CHANNELS; n++)
+        mp_info(log, "    unknown%d\n", n + 1);
+}
diff --git a/audio/chmap.h b/audio/chmap.h
new file mode 100644
index 0000000..58a3f71
--- /dev/null
+++ b/audio/chmap.h
@@ -0,0 +1,163 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_CHMAP_H
+#define MP_CHMAP_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "misc/bstr.h"
+
+#define MP_NUM_CHANNELS 64
+
+// Speaker a channel can be assigned to.
+// This corresponds to WAVEFORMATEXTENSIBLE channel mask bit indexes.
+// E.g. channel_mask = (1 << MP_SPEAKER_ID_FL) | ...
+enum mp_speaker_id {
+    // Official WAVEFORMATEXTENSIBLE (shortened names)
+    MP_SPEAKER_ID_FL = 0,       // FRONT_LEFT
+    MP_SPEAKER_ID_FR,           // FRONT_RIGHT
+    MP_SPEAKER_ID_FC,           // FRONT_CENTER
+    MP_SPEAKER_ID_LFE,          // LOW_FREQUENCY
+    MP_SPEAKER_ID_BL,           // BACK_LEFT
+    MP_SPEAKER_ID_BR,           // BACK_RIGHT
+    MP_SPEAKER_ID_FLC,          // FRONT_LEFT_OF_CENTER
+    MP_SPEAKER_ID_FRC,          // FRONT_RIGHT_OF_CENTER
+    MP_SPEAKER_ID_BC,           // BACK_CENTER
+    MP_SPEAKER_ID_SL,           // SIDE_LEFT
+    MP_SPEAKER_ID_SR,           // SIDE_RIGHT
+    MP_SPEAKER_ID_TC,           // TOP_CENTER
+    MP_SPEAKER_ID_TFL,          // TOP_FRONT_LEFT
+    MP_SPEAKER_ID_TFC,          // TOP_FRONT_CENTER
+    MP_SPEAKER_ID_TFR,          // TOP_FRONT_RIGHT
+    MP_SPEAKER_ID_TBL,          // TOP_BACK_LEFT
+    MP_SPEAKER_ID_TBC,          // TOP_BACK_CENTER
+    MP_SPEAKER_ID_TBR,          // TOP_BACK_RIGHT
+     // Unofficial/libav* extensions
+    MP_SPEAKER_ID_DL = 29,      // STEREO_LEFT (stereo downmix special speakers)
+    MP_SPEAKER_ID_DR,           // STEREO_RIGHT
+    MP_SPEAKER_ID_WL,           // WIDE_LEFT
+    MP_SPEAKER_ID_WR,           // WIDE_RIGHT
+    MP_SPEAKER_ID_SDL,          // SURROUND_DIRECT_LEFT
+    MP_SPEAKER_ID_SDR,          // SURROUND_DIRECT_RIGHT
+    MP_SPEAKER_ID_LFE2,         // LOW_FREQUENCY_2
+    MP_SPEAKER_ID_TSL,          // TOP_SIDE_LEFT
+    MP_SPEAKER_ID_TSR,          // TOP_SIDE_RIGHT,
+    MP_SPEAKER_ID_BFC,          // BOTTOM_FRONT_CENTER
+    MP_SPEAKER_ID_BFL,          // BOTTOM_FRONT_LEFT
+    MP_SPEAKER_ID_BFR,          // BOTTOM_FRONT_RIGHT
+
+    // Speaker IDs >= 64 are not representable in WAVEFORMATEXTENSIBLE or libav*.
+
+    // "Silent" channels. These are sometimes used to insert padding for
+    // unused channels. Unlike other speaker types, multiple of these can
+    // occur in a single mp_chmap.
+    MP_SPEAKER_ID_NA = 64,
+
+    // Including the unassigned IDs in between. This is not a valid ID anymore,
+    // but is still within uint8_t.
+    MP_SPEAKER_ID_COUNT,
+};
+
+struct mp_chmap {
+    uint8_t num; // number of channels
+    // Given a channel n, speaker[n] is the speaker ID driven by that channel.
+    // Entries after speaker[num - 1] are undefined.
+    uint8_t speaker[MP_NUM_CHANNELS];
+};
+
+typedef const char * const (mp_ch_layout_tuple)[2];
+
+#define MP_SP(speaker) MP_SPEAKER_ID_ ## speaker
+
+#define MP_CHMAP2(a, b) \
+    {2, {MP_SP(a), MP_SP(b)}}
+#define MP_CHMAP3(a, b, c) \
+    {3, {MP_SP(a), MP_SP(b), MP_SP(c)}}
+#define MP_CHMAP4(a, b, c, d) \
+    {4, {MP_SP(a), MP_SP(b), MP_SP(c), MP_SP(d)}}
+#define MP_CHMAP5(a, b, c, d, e) \
+    {5, {MP_SP(a), MP_SP(b), MP_SP(c), MP_SP(d), MP_SP(e)}}
+#define MP_CHMAP6(a, b, c, d, e, f) \
+    {6, {MP_SP(a), MP_SP(b), MP_SP(c), MP_SP(d), MP_SP(e), MP_SP(f)}}
+#define MP_CHMAP7(a, b, c, d, e, f, g) \
+    {7, {MP_SP(a), MP_SP(b), MP_SP(c), MP_SP(d), MP_SP(e), MP_SP(f), MP_SP(g)}}
+#define MP_CHMAP8(a, b, c, d, e, f, g, h) \
+    {8, {MP_SP(a), MP_SP(b), MP_SP(c), MP_SP(d), MP_SP(e), MP_SP(f), MP_SP(g), MP_SP(h)}}
+
+#define MP_CHMAP_INIT_MONO {1, {MP_SPEAKER_ID_FC}}
+#define MP_CHMAP_INIT_STEREO MP_CHMAP2(FL, FR)
+
+bool mp_chmap_is_valid(const struct mp_chmap *src);
+bool mp_chmap_is_empty(const struct mp_chmap *src);
+bool mp_chmap_is_unknown(const struct mp_chmap *src);
+bool mp_chmap_equals(const struct mp_chmap *a, const struct mp_chmap *b);
+bool mp_chmap_equals_reordered(const struct mp_chmap *a, const struct mp_chmap *b);
+bool mp_chmap_is_stereo(const struct mp_chmap *src);
+
+void mp_chmap_reorder_norm(struct mp_chmap *map);
+void mp_chmap_remove_na(struct mp_chmap *map);
+void mp_chmap_fill_na(struct mp_chmap *map, int num);
+
+void mp_chmap_from_channels(struct mp_chmap *dst, int num_channels);
+void mp_chmap_set_unknown(struct mp_chmap *dst, int num_channels);
+
+uint64_t mp_chmap_to_lavc(const struct mp_chmap *src);
+uint64_t mp_chmap_to_lavc_unchecked(const struct mp_chmap *src);
+void mp_chmap_from_lavc(struct mp_chmap *dst, uint64_t src);
+
+bool mp_chmap_is_lavc(const struct mp_chmap *src);
+void mp_chmap_reorder_to_lavc(struct mp_chmap *map);
+
+void mp_chmap_get_reorder(int src[MP_NUM_CHANNELS], const struct mp_chmap *from,
+                          const struct mp_chmap *to);
+
+int mp_chmap_diffn(const struct mp_chmap *a, const struct mp_chmap *b);
+
+char *mp_chmap_to_str_buf(char *buf, size_t buf_size, const struct mp_chmap *src);
+#define mp_chmap_to_str_(m, sz) mp_chmap_to_str_buf((char[sz]){0}, sz, (m))
+#define mp_chmap_to_str(m) mp_chmap_to_str_(m, MP_NUM_CHANNELS * 4)
+
+char *mp_chmap_to_str_hr_buf(char *buf, size_t buf_size, const struct mp_chmap *src);
+#define mp_chmap_to_str_hr_(m, sz) mp_chmap_to_str_hr_buf((char[sz]){0}, sz, (m))
+#define mp_chmap_to_str_hr(m) mp_chmap_to_str_hr_(m, MP_NUM_CHANNELS * 4)
+
+bool mp_chmap_from_str(struct mp_chmap *dst, bstr src);
+
+/**
+ * Iterate over all built-in channel layouts which have mapped channels.
+ *
+ * @param opaque a pointer where the iteration state is stored. Must point
+ *               to nullptr to start the iteration.
+ *
+ * @return nullptr when the iteration is finished.
+ *         Otherwise a pointer to an array of two char pointers.
+ *         - [0] being the human-readable layout name.
+ *         - [1] being the string representation of the layout.
+ */
+mp_ch_layout_tuple *mp_iterate_builtin_layouts(void **opaque);
+
+struct mp_log;
+void mp_chmap_print_help(struct mp_log *log);
+
+// Use these to avoid chaos in case lavc's definition should diverge from MS.
+#define mp_chmap_to_waveext mp_chmap_to_lavc
+#define mp_chmap_from_waveext mp_chmap_from_lavc
+#define mp_chmap_is_waveext mp_chmap_is_lavc
+#define mp_chmap_reorder_to_waveext mp_chmap_reorder_to_lavc
+
+#endif
diff --git a/audio/chmap_avchannel.c b/audio/chmap_avchannel.c
new file mode 100644
index 0000000..ec961de
--- /dev/null
+++ b/audio/chmap_avchannel.c
@@ -0,0 +1,51 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/channel_layout.h>
+
+#include "chmap.h"
+#include "chmap_avchannel.h"
+
+bool mp_chmap_from_av_layout(struct mp_chmap *dst, const AVChannelLayout *src)
+{
+    *dst = (struct mp_chmap) {0};
+
+    switch (src->order) {
+    case AV_CHANNEL_ORDER_UNSPEC:
+        mp_chmap_from_channels(dst, src->nb_channels);
+        return dst->num == src->nb_channels;
+    case AV_CHANNEL_ORDER_NATIVE:
+        mp_chmap_from_lavc(dst, src->u.mask);
+        return dst->num == src->nb_channels;
+    default:
+        // TODO: handle custom layouts
+        return false;
+    }
+}
+
+void mp_chmap_to_av_layout(AVChannelLayout *dst, const struct mp_chmap *src)
+{
+    *dst = (AVChannelLayout){
+        .order = AV_CHANNEL_ORDER_UNSPEC,
+        .nb_channels = src->num,
+    };
+
+    // TODO: handle custom layouts
+    if (!mp_chmap_is_unknown(src)) {
+        av_channel_layout_from_mask(dst, mp_chmap_to_lavc(src));
+    }
+}
diff --git a/audio/chmap_avchannel.h b/audio/chmap_avchannel.h
new file mode 100644
index 0000000..e136ccc
--- /dev/null
+++ b/audio/chmap_avchannel.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libavutil/channel_layout.h>
+
+#include "config.h"
+
+#include "chmap.h"
+
+#if HAVE_AV_CHANNEL_LAYOUT
+
+bool mp_chmap_from_av_layout(struct mp_chmap *dst, const AVChannelLayout *src);
+
+void mp_chmap_to_av_layout(AVChannelLayout *dst, const struct mp_chmap *src);
+
+#endif
diff --git a/audio/chmap_sel.c b/audio/chmap_sel.c
new file mode 100644
index 0000000..4fb7544
--- /dev/null
+++ b/audio/chmap_sel.c
@@ -0,0 +1,389 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "chmap_sel.h"
+
+static const struct mp_chmap speaker_replacements[][2] = {
+    // 5.1 <-> 5.1 (side)
+    { MP_CHMAP2(SL, SR), MP_CHMAP2(BL, BR) },
+    // 7.1 <-> 7.1 (rear ext)
+    { MP_CHMAP2(SL, SR), MP_CHMAP2(SDL, SDR) },
+};
+
+// Try to replace speakers from the left of the list with the ones on the
+// right, or the other way around.
+static bool replace_speakers(struct mp_chmap *map, struct mp_chmap list[2])
+{
+    assert(list[0].num == list[1].num);
+    if (!mp_chmap_is_valid(map))
+        return false;
+    for (int dir = 0; dir < 2; dir++) {
+        int from = dir ? 0 : 1;
+        int to   = dir ? 1 : 0;
+        bool replaced = false;
+        struct mp_chmap t = *map;
+        for (int n = 0; n < t.num; n++) {
+            for (int i = 0; i < list[0].num; i++) {
+                if (t.speaker[n] == list[from].speaker[i]) {
+                    t.speaker[n] = list[to].speaker[i];
+                    replaced = true;
+                    break;
+                }
+            }
+        }
+        if (replaced && mp_chmap_is_valid(&t)) {
+            *map = t;
+            return true;
+        }
+    }
+    return false;
+}
+
+// These go strictly from the first to the second entry and always use the
+// full layout (possibly reordered and/or padding channels added).
+static const struct mp_chmap preferred_remix[][2] = {
+    // mono can be perfectly played as stereo
+    { MP_CHMAP_INIT_MONO, MP_CHMAP_INIT_STEREO },
+};
+
+// Conversion from src to dst is explicitly encouraged and should be preferred
+// over "mathematical" upmixes or downmixes (which minimize lost channels).
+static bool test_preferred_remix(const struct mp_chmap *src,
+                                 const struct mp_chmap *dst)
+{
+    struct mp_chmap src_p = *src, dst_p = *dst;
+    mp_chmap_remove_na(&src_p);
+    mp_chmap_remove_na(&dst_p);
+    for (int n = 0; n < MP_ARRAY_SIZE(preferred_remix); n++) {
+        if (mp_chmap_equals_reordered(&src_p, &preferred_remix[n][0]) &&
+            mp_chmap_equals_reordered(&dst_p, &preferred_remix[n][1]))
+            return true;
+    }
+    return false;
+}
+
+// Allow all channel layouts that can be expressed with mp_chmap.
+// (By default, all layouts are rejected.)
+void mp_chmap_sel_add_any(struct mp_chmap_sel *s)
+{
+    s->allow_any = true;
+}
+
+// Allow all waveext formats, and force waveext channel order.
+void mp_chmap_sel_add_waveext(struct mp_chmap_sel *s)
+{
+    s->allow_waveext = true;
+}
+
+// Add a channel map that should be allowed.
+void mp_chmap_sel_add_map(struct mp_chmap_sel *s, const struct mp_chmap *map)
+{
+    if (!mp_chmap_is_valid(map))
+        return;
+    if (!s->chmaps)
+        s->chmaps = s->chmaps_storage;
+    if (s->num_chmaps == MP_ARRAY_SIZE(s->chmaps_storage)) {
+        if (!s->tmp)
+            return;
+        s->chmaps = talloc_memdup(s->tmp, s->chmaps, sizeof(s->chmaps_storage));
+    }
+    if (s->chmaps != s->chmaps_storage)
+        MP_TARRAY_GROW(s->tmp, s->chmaps, s->num_chmaps);
+    s->chmaps[s->num_chmaps++] = *map;
+}
+
+// Allow all waveext formats in default order.
+void mp_chmap_sel_add_waveext_def(struct mp_chmap_sel *s)
+{
+    for (int n = 1; n <= MP_NUM_CHANNELS; n++) {
+        struct mp_chmap map;
+        mp_chmap_from_channels(&map, n);
+        mp_chmap_sel_add_map(s, &map);
+    }
+}
+
+// Whitelist a speaker (MP_SPEAKER_ID_...). All layouts that contain whitelisted
+// speakers are allowed.
+void mp_chmap_sel_add_speaker(struct mp_chmap_sel *s, int id)
+{
+    assert(id >= 0 && id < MP_SPEAKER_ID_COUNT);
+    s->speakers[id] = true;
+}
+
+static bool test_speakers(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    for (int n = 0; n < map->num; n++) {
+        if (!s->speakers[map->speaker[n]])
+            return false;
+    }
+    return true;
+}
+
+static bool test_maps(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    for (int n = 0; n < s->num_chmaps; n++) {
+        if (mp_chmap_equals_reordered(&s->chmaps[n], map)) {
+            *map = s->chmaps[n];
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool test_waveext(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    if (s->allow_waveext) {
+        struct mp_chmap t = *map;
+        mp_chmap_reorder_to_waveext(&t);
+        if (mp_chmap_is_waveext(&t)) {
+            *map = t;
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool test_layout(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    if (!mp_chmap_is_valid(map))
+        return false;
+
+    return s->allow_any || test_waveext(s, map) || test_speakers(s, map) ||
+           test_maps(s, map);
+}
+
+// Determine which channel map to use given a source channel map, and various
+// parameters restricting possible choices. If the map doesn't match, select
+// a fallback and set it.
+// If no matching layout is found, a reordered layout may be returned.
+// If that is not possible, a fallback for up/downmixing may be returned.
+// If no choice is possible, set *map to empty.
+bool mp_chmap_sel_adjust(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    if (test_layout(s, map))
+        return true;
+    if (mp_chmap_is_unknown(map)) {
+        struct mp_chmap t = {0};
+        if (mp_chmap_sel_get_def(s, &t, map->num) && test_layout(s, &t)) {
+            *map = t;
+            return true;
+        }
+    }
+
+    if (mp_chmap_sel_fallback(s, map))
+        return true;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(speaker_replacements); i++) {
+        struct mp_chmap  t = *map;
+        struct mp_chmap *r = (struct mp_chmap *)speaker_replacements[i];
+        if (replace_speakers(&t, r) && test_layout(s, &t)) {
+            *map = t;
+            return true;
+        }
+    }
+
+    // Fallback to mono/stereo as last resort
+    *map = (struct mp_chmap) MP_CHMAP_INIT_STEREO;
+    if (test_layout(s, map))
+        return true;
+    *map = (struct mp_chmap) MP_CHMAP_INIT_MONO;
+    if (test_layout(s, map))
+        return true;
+    *map = (struct mp_chmap) {0};
+    return false;
+}
+
+// Like mp_chmap_diffn(), but find the minimum difference with all possible
+// speaker replacements considered.
+static int mp_chmap_diffn_r(const struct mp_chmap *a, const struct mp_chmap *b)
+{
+    int mindiff = INT_MAX;
+
+    for (int i = -1; i < (int)MP_ARRAY_SIZE(speaker_replacements); i++) {
+        struct mp_chmap ar = *a;
+        if (i >= 0) {
+            struct mp_chmap *r = (struct mp_chmap *)speaker_replacements[i];
+            if (!replace_speakers(&ar, r))
+                continue;
+        }
+        int d = mp_chmap_diffn(&ar, b);
+        if (d < mindiff)
+            mindiff = d;
+    }
+
+    // Special-case: we consider stereo a replacement for mono. (This is not
+    // true in the other direction. Also, fl-fr is generally not a replacement
+    // for fc. Thus it's not part of the speaker replacement list.)
+    struct mp_chmap mono   = MP_CHMAP_INIT_MONO;
+    struct mp_chmap stereo = MP_CHMAP_INIT_STEREO;
+    if (mp_chmap_equals(&mono, b) && mp_chmap_equals(&stereo, a))
+        mindiff = 0;
+
+    return mindiff;
+}
+
+// Decide whether we should prefer old or new for the requested layout.
+// Return true if new should be used, false if old should be used.
+// If old is empty, always return new (initial case).
+static bool mp_chmap_is_better(struct mp_chmap *req, struct mp_chmap *old,
+                               struct mp_chmap *new)
+{
+    // Initial case
+    if (!old->num)
+        return true;
+
+    // Exact pick - this also ensures that the best layout is chosen if the
+    // layouts are the same, but with different order of channels.
+    if (mp_chmap_equals(req, old))
+        return false;
+    if (mp_chmap_equals(req, new))
+        return true;
+
+    // If there's no exact match, strictly do a preferred conversion.
+    bool old_pref = test_preferred_remix(req, old);
+    bool new_pref = test_preferred_remix(req, new);
+    if (old_pref && !new_pref)
+        return false;
+    if (!old_pref && new_pref)
+        return true;
+
+    int old_lost_r = mp_chmap_diffn_r(req, old); // num. channels only in req
+    int new_lost_r = mp_chmap_diffn_r(req, new);
+
+    // Imperfect upmix (no real superset) - minimize lost channels
+    if (new_lost_r != old_lost_r)
+        return new_lost_r < old_lost_r;
+
+    struct mp_chmap old_p = *old, new_p = *new;
+    mp_chmap_remove_na(&old_p);
+    mp_chmap_remove_na(&new_p);
+
+    // If the situation is equal with replaced speakers, but the replacement is
+    // perfect for only one of them, let the better one win. This prefers
+    // inexact equivalents over exact supersets.
+    bool perfect_r_new = !new_lost_r && new_p.num <= old_p.num;
+    bool perfect_r_old = !old_lost_r && old_p.num <= new_p.num;
+    if (perfect_r_new != perfect_r_old)
+        return perfect_r_new;
+
+    int old_lost = mp_chmap_diffn(req, old);
+    int new_lost = mp_chmap_diffn(req, new);
+    // If the situation is equal with replaced speakers, pick the better one,
+    // even if it means an upmix.
+    if (new_lost != old_lost)
+        return new_lost < old_lost;
+
+    // Some kind of upmix. If it's perfect, prefer the smaller one. Even if not,
+    // both have equal loss, so also prefer the smaller one.
+    // Drop padding channels (NA) for the sake of this check, as the number of
+    // padding channels isn't really meaningful.
+    if (new_p.num != old_p.num)
+        return new_p.num < old_p.num;
+
+    // Again, with physical channels (minimizes number of NA channels).
+    return new->num < old->num;
+}
+
+// Determine which channel map to fallback to given a source channel map.
+bool mp_chmap_sel_fallback(const struct mp_chmap_sel *s, struct mp_chmap *map)
+{
+    struct mp_chmap best = {0};
+
+    for (int n = 0; n < s->num_chmaps; n++) {
+        struct mp_chmap e = s->chmaps[n];
+
+        if (mp_chmap_is_unknown(&e))
+            continue;
+
+        if (mp_chmap_is_better(map, &best, &e))
+            best = e;
+    }
+
+    if (best.num) {
+        *map = best;
+        return true;
+    }
+
+    return false;
+}
+
+// Set map to a default layout with num channels. Used for audio APIs that
+// return a channel count as part of format negotiation, but give no
+// information about the channel layout.
+// If the channel count is correct, do nothing and leave *map untouched.
+bool mp_chmap_sel_get_def(const struct mp_chmap_sel *s, struct mp_chmap *map,
+                          int num)
+{
+    if (map->num != num) {
+        *map = (struct mp_chmap) {0};
+        // Set of speakers or waveext might allow it.
+        struct mp_chmap t;
+        mp_chmap_from_channels(&t, num);
+        mp_chmap_reorder_to_waveext(&t);
+        if (test_layout(s, &t)) {
+            *map = t;
+        } else {
+            for (int n = 0; n < s->num_chmaps; n++) {
+                if (s->chmaps[n].num == num) {
+                    *map = s->chmaps[n];
+                    break;
+                }
+            }
+        }
+    }
+    return map->num > 0;
+}
+
+// Print the set of allowed channel layouts.
+void mp_chmal_sel_log(const struct mp_chmap_sel *s, struct mp_log *log, int lev)
+{
+    if (!mp_msg_test(log, lev))
+        return;
+
+    for (int i = 0; i < s->num_chmaps; i++)
+        mp_msg(log, lev, " - %s\n", mp_chmap_to_str(&s->chmaps[i]));
+    for (int i = 0; i < MP_SPEAKER_ID_COUNT; i++) {
+        if (!s->speakers[i])
+            continue;
+        struct mp_chmap l = {.num = 1, .speaker = { i }};
+        mp_msg(log, lev, " - #%s\n",
+                    i == MP_SPEAKER_ID_FC ? "fc" : mp_chmap_to_str_hr(&l));
+    }
+    if (s->allow_waveext)
+        mp_msg(log, lev, " - waveext\n");
+    if (s->allow_any)
+        mp_msg(log, lev, " - anything\n");
+}
+
+// Select a channel map from the given list that fits best to c. Don't change
+// *c if there's no match, or the list is empty.
+void mp_chmap_sel_list(struct mp_chmap *c, struct mp_chmap *maps, int num_maps)
+{
+    // This is a separate function to keep messing with mp_chmap_sel internals
+    // within this source file.
+    struct mp_chmap_sel sel = {
+        .chmaps = maps,
+        .num_chmaps = num_maps,
+    };
+    mp_chmap_sel_fallback(&sel, c);
+}
diff --git a/audio/chmap_sel.h b/audio/chmap_sel.h
new file mode 100644
index 0000000..4b11557
--- /dev/null
+++ b/audio/chmap_sel.h
@@ -0,0 +1,52 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_CHMAP_SEL_H
+#define MP_CHMAP_SEL_H
+
+#include <stdbool.h>
+
+#include "chmap.h"
+
+struct mp_chmap_sel {
+    // should be considered opaque
+    bool allow_any, allow_waveext;
+    bool speakers[MP_SPEAKER_ID_COUNT];
+    struct mp_chmap *chmaps;
+    int num_chmaps;
+
+    struct mp_chmap chmaps_storage[20];
+
+    void *tmp; // set to any talloc context to allow more chmaps entries
+};
+
+void mp_chmap_sel_add_any(struct mp_chmap_sel *s);
+void mp_chmap_sel_add_waveext(struct mp_chmap_sel *s);
+void mp_chmap_sel_add_waveext_def(struct mp_chmap_sel *s);
+void mp_chmap_sel_add_map(struct mp_chmap_sel *s, const struct mp_chmap *map);
+void mp_chmap_sel_add_speaker(struct mp_chmap_sel *s, int id);
+bool mp_chmap_sel_adjust(const struct mp_chmap_sel *s, struct mp_chmap *map);
+bool mp_chmap_sel_fallback(const struct mp_chmap_sel *s, struct mp_chmap *map);
+bool mp_chmap_sel_get_def(const struct mp_chmap_sel *s, struct mp_chmap *map,
+                          int num);
+
+struct mp_log;
+void mp_chmal_sel_log(const struct mp_chmap_sel *s, struct mp_log *log, int lev);
+
+void mp_chmap_sel_list(struct mp_chmap *c, struct mp_chmap *maps, int num_maps);
+
+#endif
diff --git a/audio/decode/ad_lavc.c b/audio/decode/ad_lavc.c
new file mode 100644
index 0000000..08b789a
--- /dev/null
+++ b/audio/decode/ad_lavc.c
@@ -0,0 +1,325 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/opt.h>
+#include <libavutil/common.h>
+#include <libavutil/intreadwrite.h>
+
+#include "config.h"
+
+#include "mpv_talloc.h"
+#include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
+#include "audio/fmt-conversion.h"
+#include "common/av_common.h"
+#include "common/codecs.h"
+#include "common/global.h"
+#include "common/msg.h"
+#include "demux/packet.h"
+#include "demux/stheader.h"
+#include "filters/f_decoder_wrapper.h"
+#include "filters/filter_internal.h"
+#include "options/m_config.h"
+#include "options/options.h"
+
+struct priv {
+    AVCodecContext *avctx;
+    AVFrame *avframe;
+    AVPacket *avpkt;
+    struct mp_chmap force_channel_map;
+    uint32_t skip_samples, trim_samples;
+    bool preroll_done;
+    double next_pts;
+    AVRational codec_timebase;
+    struct lavc_state state;
+
+    struct mp_decoder public;
+};
+
+#define OPT_BASE_STRUCT struct ad_lavc_params
+struct ad_lavc_params {
+    float ac3drc;
+    bool downmix;
+    int threads;
+    char **avopts;
+};
+
+const struct m_sub_options ad_lavc_conf = {
+    .opts = (const m_option_t[]) {
+        {"ac3drc", OPT_FLOAT(ac3drc), M_RANGE(0, 6)},
+        {"downmix", OPT_BOOL(downmix)},
+        {"threads", OPT_INT(threads), M_RANGE(0, 16)},
+        {"o", OPT_KEYVALUELIST(avopts)},
+        {0}
+    },
+    .size = sizeof(struct ad_lavc_params),
+    .defaults = &(const struct ad_lavc_params){
+        .ac3drc = 0,
+        .threads = 1,
+    },
+};
+
+static bool init(struct mp_filter *da, struct mp_codec_params *codec,
+                 const char *decoder)
+{
+    struct priv *ctx = da->priv;
+    struct MPOpts *mpopts = mp_get_config_group(ctx, da->global, &mp_opt_root);
+    struct ad_lavc_params *opts =
+        mp_get_config_group(ctx, da->global, &ad_lavc_conf);
+    AVCodecContext *lavc_context;
+    const AVCodec *lavc_codec;
+
+    ctx->codec_timebase = mp_get_codec_timebase(codec);
+
+    if (codec->force_channels)
+        ctx->force_channel_map = codec->channels;
+
+    lavc_codec = avcodec_find_decoder_by_name(decoder);
+    if (!lavc_codec) {
+        MP_ERR(da, "Cannot find codec '%s' in libavcodec...\n", decoder);
+        return false;
+    }
+
+    lavc_context = avcodec_alloc_context3(lavc_codec);
+    ctx->avctx = lavc_context;
+    ctx->avframe = av_frame_alloc();
+    ctx->avpkt = av_packet_alloc();
+    MP_HANDLE_OOM(ctx->avctx && ctx->avframe && ctx->avpkt);
+    lavc_context->codec_type = AVMEDIA_TYPE_AUDIO;
+    lavc_context->codec_id = lavc_codec->id;
+    lavc_context->pkt_timebase = ctx->codec_timebase;
+
+    if (opts->downmix && mpopts->audio_output_channels.num_chmaps == 1) {
+        const struct mp_chmap *requested_layout =
+            &mpopts->audio_output_channels.chmaps[0];
+#if !HAVE_AV_CHANNEL_LAYOUT
+        lavc_context->request_channel_layout =
+            mp_chmap_to_lavc(requested_layout);
+#else
+        AVChannelLayout av_layout = { 0 };
+        mp_chmap_to_av_layout(&av_layout, requested_layout);
+
+        // Always try to set requested output layout - currently only something
+        // supported by AC3, MLP/TrueHD, DTS and the fdk-aac wrapper.
+        av_opt_set_chlayout(lavc_context, "downmix", &av_layout,
+                            AV_OPT_SEARCH_CHILDREN);
+
+        av_channel_layout_uninit(&av_layout);
+#endif
+    }
+
+    // Always try to set - option only exists for AC3 at the moment
+    av_opt_set_double(lavc_context, "drc_scale", opts->ac3drc,
+                      AV_OPT_SEARCH_CHILDREN);
+
+    // Let decoder add AV_FRAME_DATA_SKIP_SAMPLES.
+    av_opt_set(lavc_context, "flags2", "+skip_manual", AV_OPT_SEARCH_CHILDREN);
+
+    mp_set_avopts(da->log, lavc_context, opts->avopts);
+
+    if (mp_set_avctx_codec_headers(lavc_context, codec) < 0) {
+        MP_ERR(da, "Could not set decoder parameters.\n");
+        return false;
+    }
+
+    mp_set_avcodec_threads(da->log, lavc_context, opts->threads);
+
+    /* open it */
+    if (avcodec_open2(lavc_context, lavc_codec, NULL) < 0) {
+        MP_ERR(da, "Could not open codec.\n");
+        return false;
+    }
+
+    ctx->next_pts = MP_NOPTS_VALUE;
+
+    return true;
+}
+
+static void destroy(struct mp_filter *da)
+{
+    struct priv *ctx = da->priv;
+
+    avcodec_free_context(&ctx->avctx);
+    av_frame_free(&ctx->avframe);
+    mp_free_av_packet(&ctx->avpkt);
+}
+
+static void reset(struct mp_filter *da)
+{
+    struct priv *ctx = da->priv;
+
+    avcodec_flush_buffers(ctx->avctx);
+    ctx->skip_samples = 0;
+    ctx->trim_samples = 0;
+    ctx->preroll_done = false;
+    ctx->next_pts = MP_NOPTS_VALUE;
+    ctx->state = (struct lavc_state){0};
+}
+
+static int send_packet(struct mp_filter *da, struct demux_packet *mpkt)
+{
+    struct priv *priv = da->priv;
+    AVCodecContext *avctx = priv->avctx;
+
+    // If the decoder discards the timestamp for some reason, we use the
+    // interpolated PTS. Initialize it so that it works for the initial
+    // packet as well.
+    if (mpkt && priv->next_pts == MP_NOPTS_VALUE)
+        priv->next_pts = mpkt->pts;
+
+    mp_set_av_packet(priv->avpkt, mpkt, &priv->codec_timebase);
+
+    int ret = avcodec_send_packet(avctx, mpkt ? priv->avpkt : NULL);
+    if (ret < 0)
+        MP_ERR(da, "Error decoding audio.\n");
+    return ret;
+}
+
+static int receive_frame(struct mp_filter *da, struct mp_frame *out)
+{
+    struct priv *priv = da->priv;
+    AVCodecContext *avctx = priv->avctx;
+
+    int ret = avcodec_receive_frame(avctx, priv->avframe);
+
+    if (ret == AVERROR_EOF) {
+        // If flushing was initialized earlier and has ended now, make it start
+        // over in case we get new packets at some point in the future.
+        // (Dont' reset the filter itself, we want to keep other state.)
+        avcodec_flush_buffers(priv->avctx);
+        return ret;
+    } else if (ret < 0 && ret != AVERROR(EAGAIN)) {
+        MP_ERR(da, "Error decoding audio.\n");
+    }
+
+    if (priv->avframe->flags & AV_FRAME_FLAG_DISCARD)
+        av_frame_unref(priv->avframe);
+
+    if (!priv->avframe->buf[0])
+        return ret;
+
+    double out_pts = mp_pts_from_av(priv->avframe->pts, &priv->codec_timebase);
+
+    struct mp_aframe *mpframe = mp_aframe_from_avframe(priv->avframe);
+    if (!mpframe) {
+        MP_ERR(da, "Converting libavcodec frame to mpv frame failed.\n");
+        return ret;
+    }
+
+    if (priv->force_channel_map.num)
+        mp_aframe_set_chmap(mpframe, &priv->force_channel_map);
+
+    if (out_pts == MP_NOPTS_VALUE)
+        out_pts = priv->next_pts;
+    mp_aframe_set_pts(mpframe, out_pts);
+
+    priv->next_pts = mp_aframe_end_pts(mpframe);
+
+    AVFrameSideData *sd =
+        av_frame_get_side_data(priv->avframe, AV_FRAME_DATA_SKIP_SAMPLES);
+    if (sd && sd->size >= 10) {
+        char *d = sd->data;
+        priv->skip_samples += AV_RL32(d + 0);
+        priv->trim_samples += AV_RL32(d + 4);
+    }
+
+    if (!priv->preroll_done) {
+        // Skip only if this isn't already handled by AV_FRAME_DATA_SKIP_SAMPLES.
+        if (!priv->skip_samples)
+            priv->skip_samples = avctx->delay;
+        priv->preroll_done = true;
+    }
+
+    uint32_t skip = MPMIN(priv->skip_samples, mp_aframe_get_size(mpframe));
+    if (skip) {
+        mp_aframe_skip_samples(mpframe, skip);
+        priv->skip_samples -= skip;
+    }
+    uint32_t trim = MPMIN(priv->trim_samples, mp_aframe_get_size(mpframe));
+    if (trim) {
+        mp_aframe_set_size(mpframe, mp_aframe_get_size(mpframe) - trim);
+        priv->trim_samples -= trim;
+    }
+
+    // Strip possibly bogus float values like Infinity, NaN, denormalized
+    mp_aframe_sanitize_float(mpframe);
+
+    if (mp_aframe_get_size(mpframe) > 0) {
+        *out = MAKE_FRAME(MP_FRAME_AUDIO, mpframe);
+    } else {
+        talloc_free(mpframe);
+    }
+
+    av_frame_unref(priv->avframe);
+
+    return ret;
+}
+
+static void process(struct mp_filter *ad)
+{
+    struct priv *priv = ad->priv;
+
+    lavc_process(ad, &priv->state, send_packet, receive_frame);
+}
+
+static const struct mp_filter_info ad_lavc_filter = {
+    .name = "ad_lavc",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_decoder *create(struct mp_filter *parent,
+                                 struct mp_codec_params *codec,
+                                 const char *decoder)
+{
+    struct mp_filter *da = mp_filter_create(parent, &ad_lavc_filter);
+    if (!da)
+        return NULL;
+
+    mp_filter_add_pin(da, MP_PIN_IN, "in");
+    mp_filter_add_pin(da, MP_PIN_OUT, "out");
+
+    da->log = mp_log_new(da, parent->log, NULL);
+
+    struct priv *priv = da->priv;
+    priv->public.f = da;
+
+    if (!init(da, codec, decoder)) {
+        talloc_free(da);
+        return NULL;
+    }
+    return &priv->public;
+}
+
+static void add_decoders(struct mp_decoder_list *list)
+{
+    mp_add_lavc_decoders(list, AVMEDIA_TYPE_AUDIO);
+}
+
+const struct mp_decoder_fns ad_lavc = {
+    .create = create,
+    .add_decoders = add_decoders,
+};
diff --git a/audio/decode/ad_spdif.c b/audio/decode/ad_spdif.c
new file mode 100644
index 0000000..393af8a
--- /dev/null
+++ b/audio/decode/ad_spdif.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) 2012 Naoya OYAMA
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <assert.h>
+
+#include <libavformat/avformat.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/opt.h>
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/av_common.h"
+#include "common/codecs.h"
+#include "common/msg.h"
+#include "demux/packet.h"
+#include "demux/stheader.h"
+#include "filters/f_decoder_wrapper.h"
+#include "filters/filter_internal.h"
+#include "options/options.h"
+
+#define OUTBUF_SIZE 65536
+
+struct spdifContext {
+    struct mp_log   *log;
+    enum AVCodecID   codec_id;
+    AVFormatContext *lavf_ctx;
+    AVPacket        *avpkt;
+    int              out_buffer_len;
+    uint8_t          out_buffer[OUTBUF_SIZE];
+    bool             need_close;
+    bool             use_dts_hd;
+    struct mp_aframe *fmt;
+    int              sstride;
+    struct mp_aframe_pool *pool;
+
+    struct mp_decoder public;
+};
+
+static int write_packet(void *p, uint8_t *buf, int buf_size)
+{
+    struct spdifContext *ctx = p;
+
+    int buffer_left = OUTBUF_SIZE - ctx->out_buffer_len;
+    if (buf_size > buffer_left) {
+        MP_ERR(ctx, "spdif packet too large.\n");
+        buf_size = buffer_left;
+    }
+
+    memcpy(&ctx->out_buffer[ctx->out_buffer_len], buf, buf_size);
+    ctx->out_buffer_len += buf_size;
+    return buf_size;
+}
+
+// (called on both filter destruction _and_ if lavf fails to init)
+static void destroy(struct mp_filter *da)
+{
+    struct spdifContext *spdif_ctx = da->priv;
+    AVFormatContext     *lavf_ctx  = spdif_ctx->lavf_ctx;
+
+    if (lavf_ctx) {
+        if (spdif_ctx->need_close)
+            av_write_trailer(lavf_ctx);
+        if (lavf_ctx->pb)
+            av_freep(&lavf_ctx->pb->buffer);
+        av_freep(&lavf_ctx->pb);
+        avformat_free_context(lavf_ctx);
+        spdif_ctx->lavf_ctx = NULL;
+    }
+    mp_free_av_packet(&spdif_ctx->avpkt);
+}
+
+static void determine_codec_params(struct mp_filter *da, AVPacket *pkt,
+                                   int *out_profile, int *out_rate)
+{
+    struct spdifContext *spdif_ctx = da->priv;
+    int profile = FF_PROFILE_UNKNOWN;
+    AVCodecContext *ctx = NULL;
+    AVFrame *frame = NULL;
+
+    AVCodecParserContext *parser = av_parser_init(spdif_ctx->codec_id);
+    if (parser) {
+        // Don't make it wait for the next frame.
+        parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
+
+        ctx = avcodec_alloc_context3(NULL);
+        if (!ctx) {
+            av_parser_close(parser);
+            goto done;
+        }
+
+        uint8_t *d = NULL;
+        int s = 0;
+        av_parser_parse2(parser, ctx, &d, &s, pkt->data, pkt->size, 0, 0, 0);
+        *out_profile = profile = ctx->profile;
+        *out_rate = ctx->sample_rate;
+
+        avcodec_free_context(&ctx);
+        av_parser_close(parser);
+    }
+
+    if (profile != FF_PROFILE_UNKNOWN || spdif_ctx->codec_id != AV_CODEC_ID_DTS)
+        return;
+
+    const AVCodec *codec = avcodec_find_decoder(spdif_ctx->codec_id);
+    if (!codec)
+        goto done;
+
+    frame = av_frame_alloc();
+    if (!frame)
+        goto done;
+
+    ctx = avcodec_alloc_context3(codec);
+    if (!ctx)
+        goto done;
+
+    if (avcodec_open2(ctx, codec, NULL) < 0)
+        goto done;
+
+    if (avcodec_send_packet(ctx, pkt) < 0)
+        goto done;
+    if (avcodec_receive_frame(ctx, frame) < 0)
+        goto done;
+
+    *out_profile = profile = ctx->profile;
+    *out_rate = ctx->sample_rate;
+
+done:
+    av_frame_free(&frame);
+    avcodec_free_context(&ctx);
+
+    if (profile == FF_PROFILE_UNKNOWN)
+        MP_WARN(da, "Failed to parse codec profile.\n");
+}
+
+static int init_filter(struct mp_filter *da)
+{
+    struct spdifContext *spdif_ctx = da->priv;
+
+    AVPacket *pkt = spdif_ctx->avpkt;
+
+    int profile = FF_PROFILE_UNKNOWN;
+    int c_rate = 0;
+    determine_codec_params(da, pkt, &profile, &c_rate);
+    MP_VERBOSE(da, "In: profile=%d samplerate=%d\n", profile, c_rate);
+
+    AVFormatContext *lavf_ctx  = avformat_alloc_context();
+    if (!lavf_ctx)
+        goto fail;
+
+    spdif_ctx->lavf_ctx = lavf_ctx;
+
+    lavf_ctx->oformat = av_guess_format("spdif", NULL, NULL);
+    if (!lavf_ctx->oformat)
+        goto fail;
+
+    void *buffer = av_mallocz(OUTBUF_SIZE);
+   MP_HANDLE_OOM(buffer);
+    lavf_ctx->pb = avio_alloc_context(buffer, OUTBUF_SIZE, 1, spdif_ctx, NULL,
+                                      write_packet, NULL);
+    if (!lavf_ctx->pb) {
+        av_free(buffer);
+        goto fail;
+    }
+
+    // Request minimal buffering
+    lavf_ctx->pb->direct = 1;
+
+    AVStream *stream = avformat_new_stream(lavf_ctx, 0);
+    if (!stream)
+        goto fail;
+
+    stream->codecpar->codec_id = spdif_ctx->codec_id;
+
+    AVDictionary *format_opts = NULL;
+
+    spdif_ctx->fmt = mp_aframe_create();
+    talloc_steal(spdif_ctx, spdif_ctx->fmt);
+
+    int num_channels = 0;
+    int sample_format = 0;
+    int samplerate = 0;
+    switch (spdif_ctx->codec_id) {
+    case AV_CODEC_ID_AAC:
+        sample_format                   = AF_FORMAT_S_AAC;
+        samplerate                      = 48000;
+        num_channels                    = 2;
+        break;
+    case AV_CODEC_ID_AC3:
+        sample_format                   = AF_FORMAT_S_AC3;
+        samplerate                      = c_rate > 0 ? c_rate : 48000;
+        num_channels                    = 2;
+        break;
+    case AV_CODEC_ID_DTS: {
+        bool is_hd = profile == FF_PROFILE_DTS_HD_HRA ||
+                     profile == FF_PROFILE_DTS_HD_MA  ||
+                     profile == FF_PROFILE_UNKNOWN;
+
+        // Apparently, DTS-HD over SPDIF is specified to be 7.1 (8 channels)
+        // for DTS-HD MA, and stereo (2 channels) for DTS-HD HRA. The bit
+        // streaming rate as well as the signaled channel count are defined
+        // based on this value.
+        int dts_hd_spdif_channel_count = profile == FF_PROFILE_DTS_HD_HRA ?
+                                         2 : 8;
+        if (spdif_ctx->use_dts_hd && is_hd) {
+            av_dict_set_int(&format_opts, "dtshd_rate",
+                            dts_hd_spdif_channel_count * 96000, 0);
+            sample_format               = AF_FORMAT_S_DTSHD;
+            samplerate                  = 192000;
+            num_channels                = dts_hd_spdif_channel_count;
+        } else {
+            sample_format               = AF_FORMAT_S_DTS;
+            samplerate                  = 48000;
+            num_channels                = 2;
+        }
+        break;
+    }
+    case AV_CODEC_ID_EAC3:
+        sample_format                   = AF_FORMAT_S_EAC3;
+        samplerate                      = 192000;
+        num_channels                    = 2;
+        break;
+    case AV_CODEC_ID_MP3:
+        sample_format                   = AF_FORMAT_S_MP3;
+        samplerate                      = 48000;
+        num_channels                    = 2;
+        break;
+    case AV_CODEC_ID_TRUEHD:
+        sample_format                   = AF_FORMAT_S_TRUEHD;
+        samplerate                      = 192000;
+        num_channels                    = 8;
+        break;
+    default:
+        abort();
+    }
+
+    struct mp_chmap chmap;
+    mp_chmap_from_channels(&chmap, num_channels);
+    mp_aframe_set_chmap(spdif_ctx->fmt, &chmap);
+    mp_aframe_set_format(spdif_ctx->fmt, sample_format);
+    mp_aframe_set_rate(spdif_ctx->fmt, samplerate);
+
+    spdif_ctx->sstride = mp_aframe_get_sstride(spdif_ctx->fmt);
+
+    if (avformat_write_header(lavf_ctx, &format_opts) < 0) {
+        MP_FATAL(da, "libavformat spdif initialization failed.\n");
+        av_dict_free(&format_opts);
+        goto fail;
+    }
+    av_dict_free(&format_opts);
+
+    spdif_ctx->need_close = true;
+
+    return 0;
+
+fail:
+    destroy(da);
+    mp_filter_internal_mark_failed(da);
+    return -1;
+}
+
+static void process(struct mp_filter *da)
+{
+    struct spdifContext *spdif_ctx = da->priv;
+
+    if (!mp_pin_can_transfer_data(da->ppins[1], da->ppins[0]))
+        return;
+
+    struct mp_frame inframe = mp_pin_out_read(da->ppins[0]);
+    if (inframe.type == MP_FRAME_EOF) {
+        mp_pin_in_write(da->ppins[1], inframe);
+        return;
+    } else if (inframe.type != MP_FRAME_PACKET) {
+        if (inframe.type) {
+            MP_ERR(da, "unknown frame type\n");
+            mp_filter_internal_mark_failed(da);
+        }
+        return;
+    }
+
+    struct demux_packet *mpkt = inframe.data;
+    struct mp_aframe *out = NULL;
+    double pts = mpkt->pts;
+
+    if (!spdif_ctx->avpkt) {
+        spdif_ctx->avpkt = av_packet_alloc();
+        MP_HANDLE_OOM(spdif_ctx->avpkt);
+    }
+    mp_set_av_packet(spdif_ctx->avpkt, mpkt, NULL);
+    spdif_ctx->avpkt->pts = spdif_ctx->avpkt->dts = 0;
+    if (!spdif_ctx->lavf_ctx) {
+        if (init_filter(da) < 0)
+            goto done;
+        assert(spdif_ctx->avpkt);
+    }
+
+    spdif_ctx->out_buffer_len  = 0;
+    int ret = av_write_frame(spdif_ctx->lavf_ctx, spdif_ctx->avpkt);
+    avio_flush(spdif_ctx->lavf_ctx->pb);
+    if (ret < 0) {
+        MP_ERR(da, "spdif mux error: '%s'\n", mp_strerror(AVUNERROR(ret)));
+        goto done;
+    }
+
+    out = mp_aframe_new_ref(spdif_ctx->fmt);
+    int samples = spdif_ctx->out_buffer_len / spdif_ctx->sstride;
+    if (mp_aframe_pool_allocate(spdif_ctx->pool, out, samples) < 0) {
+        TA_FREEP(&out);
+        goto done;
+    }
+
+    uint8_t **data = mp_aframe_get_data_rw(out);
+    if (!data) {
+        TA_FREEP(&out);
+        goto done;
+    }
+
+    memcpy(data[0], spdif_ctx->out_buffer, spdif_ctx->out_buffer_len);
+    mp_aframe_set_pts(out, pts);
+
+done:
+    talloc_free(mpkt);
+    if (out) {
+        mp_pin_in_write(da->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+    } else {
+        mp_filter_internal_mark_failed(da);
+    }
+}
+
+static const int codecs[] = {
+    AV_CODEC_ID_AAC,
+    AV_CODEC_ID_AC3,
+    AV_CODEC_ID_DTS,
+    AV_CODEC_ID_EAC3,
+    AV_CODEC_ID_MP3,
+    AV_CODEC_ID_TRUEHD,
+    AV_CODEC_ID_NONE
+};
+
+static bool find_codec(const char *name)
+{
+    for (int n = 0; codecs[n] != AV_CODEC_ID_NONE; n++) {
+        const char *format = mp_codec_from_av_codec_id(codecs[n]);
+        if (format && name && strcmp(format, name) == 0)
+            return true;
+    }
+    return false;
+}
+
+// codec is the libavcodec name of the source audio codec.
+// pref is a ","-separated list of names, some of them which do not match with
+// libavcodec names (like dts-hd).
+struct mp_decoder_list *select_spdif_codec(const char *codec, const char *pref)
+{
+    struct mp_decoder_list *list = talloc_zero(NULL, struct mp_decoder_list);
+
+    if (!find_codec(codec))
+        return list;
+
+    bool spdif_allowed = false, dts_hd_allowed = false;
+    bstr sel = bstr0(pref);
+    while (sel.len) {
+        bstr decoder;
+        bstr_split_tok(sel, ",", &decoder, &sel);
+        if (decoder.len) {
+            if (bstr_equals0(decoder, codec))
+                spdif_allowed = true;
+            if (bstr_equals0(decoder, "dts-hd") && strcmp(codec, "dts") == 0)
+                spdif_allowed = dts_hd_allowed = true;
+        }
+    }
+
+    if (!spdif_allowed)
+        return list;
+
+    const char *suffix_name = dts_hd_allowed ? "dts_hd" : codec;
+    char name[80];
+    snprintf(name, sizeof(name), "spdif_%s", suffix_name);
+    mp_add_decoder(list, codec, name,
+                   "libavformat/spdifenc audio pass-through decoder");
+    return list;
+}
+
+static const struct mp_filter_info ad_spdif_filter = {
+    .name = "ad_spdif",
+    .priv_size = sizeof(struct spdifContext),
+    .process = process,
+    .destroy = destroy,
+};
+
+static struct mp_decoder *create(struct mp_filter *parent,
+                                 struct mp_codec_params *codec,
+                                 const char *decoder)
+{
+    struct mp_filter *da = mp_filter_create(parent, &ad_spdif_filter);
+    if (!da)
+        return NULL;
+
+    mp_filter_add_pin(da, MP_PIN_IN, "in");
+    mp_filter_add_pin(da, MP_PIN_OUT, "out");
+
+    da->log = mp_log_new(da, parent->log, NULL);
+
+    struct spdifContext *spdif_ctx = da->priv;
+    spdif_ctx->log = da->log;
+    spdif_ctx->pool = mp_aframe_pool_create(spdif_ctx);
+    spdif_ctx->public.f = da;
+
+    if (strcmp(decoder, "spdif_dts_hd") == 0)
+        spdif_ctx->use_dts_hd = true;
+
+    spdif_ctx->codec_id = mp_codec_to_av_codec_id(codec->codec);
+
+
+    if (spdif_ctx->codec_id == AV_CODEC_ID_NONE) {
+        talloc_free(da);
+        return NULL;
+    }
+
+    return &spdif_ctx->public;
+}
+
+const struct mp_decoder_fns ad_spdif = {
+    .create = create,
+};
diff --git a/audio/filter/af_drop.c b/audio/filter/af_drop.c
new file mode 100644
index 0000000..724c482
--- /dev/null
+++ b/audio/filter/af_drop.c
@@ -0,0 +1,114 @@
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+
+struct priv {
+    double speed;
+    double diff; // amount of too many additional samples in normal speed
+    struct mp_aframe *last; // for repeating
+};
+
+static void process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    struct mp_frame frame = {0};
+
+    double last_dur = p->last ? mp_aframe_duration(p->last) : 0;
+    if (p->last && p->diff < 0 && -p->diff > last_dur / 2) {
+        MP_VERBOSE(f, "repeat\n");
+        frame = MAKE_FRAME(MP_FRAME_AUDIO, p->last);
+        p->last = NULL;
+    } else {
+        frame = mp_pin_out_read(f->ppins[0]);
+
+        if (frame.type == MP_FRAME_AUDIO) {
+            last_dur = mp_aframe_duration(frame.data);
+            p->diff -= last_dur;
+            if (p->diff > last_dur / 2) {
+                MP_VERBOSE(f, "drop\n");
+                mp_frame_unref(&frame);
+                mp_filter_internal_mark_progress(f);
+            }
+        }
+    }
+
+    if (frame.type == MP_FRAME_AUDIO) {
+        struct mp_aframe *fr = frame.data;
+        talloc_free(p->last);
+        p->last = mp_aframe_new_ref(fr);
+        mp_aframe_mul_speed(fr, p->speed);
+        p->diff += mp_aframe_duration(fr);
+        mp_aframe_set_pts(p->last, mp_aframe_end_pts(fr));
+    } else if (frame.type == MP_FRAME_EOF) {
+        TA_FREEP(&p->last);
+    }
+    mp_pin_in_write(f->ppins[1], frame);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_SET_SPEED:
+        p->speed = cmd->speed;
+        return true;
+    }
+
+    return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    TA_FREEP(&p->last);
+    p->diff = 0;
+}
+
+static void destroy(struct mp_filter *f)
+{
+    reset(f);
+}
+
+static const struct mp_filter_info af_drop_filter = {
+    .name = "drop",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .command = command,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_filter *af_drop_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_drop_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->speed = 1.0;
+
+    return f;
+}
+
+const struct mp_user_filter_entry af_drop = {
+    .desc = {
+        .description = "Change audio speed by dropping/repeating frames",
+        .name = "drop",
+        .priv_size = sizeof(struct priv),
+    },
+    .create = af_drop_create,
+};
diff --git a/audio/filter/af_format.c b/audio/filter/af_format.c
new file mode 100644
index 0000000..2d1c1cc
--- /dev/null
+++ b/audio/filter/af_format.c
@@ -0,0 +1,143 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct f_opts {
+    int in_format;
+    int in_srate;
+    struct m_channels in_channels;
+    int out_format;
+    int out_srate;
+    struct m_channels out_channels;
+
+    bool fail;
+};
+
+struct priv {
+    struct f_opts *opts;
+    struct mp_pin *in_pin;
+};
+
+static void process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_can_transfer_data(f->ppins[1], p->in_pin))
+        return;
+
+    struct mp_frame frame = mp_pin_out_read(p->in_pin);
+
+    if (p->opts->fail) {
+        MP_ERR(f, "Failing on purpose.\n");
+        goto error;
+    }
+
+    if (frame.type == MP_FRAME_EOF) {
+        mp_pin_in_write(f->ppins[1], frame);
+        return;
+    }
+
+    if (frame.type != MP_FRAME_AUDIO) {
+        MP_ERR(f, "audio frame expected\n");
+        goto error;
+    }
+
+    struct mp_aframe *in = frame.data;
+
+    if (p->opts->out_channels.num_chmaps > 0) {
+        if (!mp_aframe_set_chmap(in, &p->opts->out_channels.chmaps[0])) {
+            MP_ERR(f, "could not force output channels\n");
+            goto error;
+        }
+    }
+
+    if (p->opts->out_srate)
+        mp_aframe_set_rate(in, p->opts->out_srate);
+
+    mp_pin_in_write(f->ppins[1], frame);
+    return;
+
+error:
+    mp_frame_unref(&frame);
+    mp_filter_internal_mark_failed(f);
+}
+
+static const struct mp_filter_info af_format_filter = {
+    .name = "format",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+};
+
+static struct mp_filter *af_format_create(struct mp_filter *parent,
+                                              void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_format_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        abort();
+
+    if (p->opts->in_format)
+        mp_autoconvert_add_afmt(conv, p->opts->in_format);
+    if (p->opts->in_srate)
+        mp_autoconvert_add_srate(conv, p->opts->in_srate);
+    if (p->opts->in_channels.num_chmaps > 0)
+        mp_autoconvert_add_chmap(conv, &p->opts->in_channels.chmaps[0]);
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+    p->in_pin = conv->f->pins[1];
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_format = {
+    .desc = {
+        .name = "format",
+        .description = "Force audio format",
+        .priv_size = sizeof(struct f_opts),
+        .options = (const struct m_option[]) {
+            {"format", OPT_AUDIOFORMAT(in_format)},
+            {"srate", OPT_INT(in_srate), M_RANGE(1000, 8*48000)},
+            {"channels", OPT_CHANNELS(in_channels),
+                .flags = M_OPT_CHANNELS_LIMITED},
+            {"out-srate", OPT_INT(out_srate), M_RANGE(1000, 8*48000)},
+            {"out-channels", OPT_CHANNELS(out_channels),
+                .flags = M_OPT_CHANNELS_LIMITED},
+            {"fail", OPT_BOOL(fail)},
+            {0}
+        },
+    },
+    .create = af_format_create,
+};
diff --git a/audio/filter/af_lavcac3enc.c b/audio/filter/af_lavcac3enc.c
new file mode 100644
index 0000000..b4a1d59
--- /dev/null
+++ b/audio/filter/af_lavcac3enc.c
@@ -0,0 +1,437 @@
+/*
+ * audio filter for runtime AC-3 encoding with libavcodec.
+ *
+ * Copyright (C) 2007 Ulion <ulion A gmail P com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/common.h>
+#include <libavutil/bswap.h>
+#include <libavutil/mem.h>
+
+#include "config.h"
+
+#include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
+#include "audio/chmap_sel.h"
+#include "audio/fmt-conversion.h"
+#include "audio/format.h"
+#include "common/av_common.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/f_utils.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+
+#define AC3_MAX_CHANNELS 6
+#define AC3_MAX_CODED_FRAME_SIZE 3840
+#define AC3_FRAME_SIZE (6  * 256)
+const static uint16_t ac3_bitrate_tab[19] = {
+    32, 40, 48, 56, 64, 80, 96, 112, 128,
+    160, 192, 224, 256, 320, 384, 448, 512, 576, 640
+};
+
+struct f_opts {
+    bool add_iec61937_header;
+    int bit_rate;
+    int min_channel_num;
+    char *encoder;
+    char **avopts;
+};
+
+struct priv {
+    struct f_opts *opts;
+
+    struct mp_pin *in_pin;
+    struct mp_aframe *cur_format;
+    struct mp_aframe *in_frame;
+    struct mp_aframe_pool *out_pool;
+
+    const struct AVCodec  *lavc_acodec;
+    struct AVCodecContext *lavc_actx;
+    AVPacket              *lavc_pkt;
+    int bit_rate;
+    int out_samples;    // upper bound on encoded output per AC3 frame
+};
+
+static bool reinit(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    mp_aframe_reset(s->cur_format);
+
+    static const int default_bit_rate[AC3_MAX_CHANNELS+1] = \
+        {0, 96000, 192000, 256000, 384000, 448000, 448000};
+
+    if (s->opts->add_iec61937_header) {
+        s->out_samples = AC3_FRAME_SIZE;
+    } else {
+        s->out_samples = AC3_MAX_CODED_FRAME_SIZE /
+                         mp_aframe_get_sstride(s->in_frame);
+    }
+
+    int format = mp_aframe_get_format(s->in_frame);
+    int rate = mp_aframe_get_rate(s->in_frame);
+    struct mp_chmap chmap = {0};
+    mp_aframe_get_chmap(s->in_frame, &chmap);
+
+    int bit_rate = s->bit_rate;
+    if (!bit_rate && chmap.num < AC3_MAX_CHANNELS + 1)
+        bit_rate = default_bit_rate[chmap.num];
+
+    avcodec_close(s->lavc_actx);
+
+    // Put sample parameters
+    s->lavc_actx->sample_fmt = af_to_avformat(format);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+    s->lavc_actx->channels = chmap.num;
+    s->lavc_actx->channel_layout = mp_chmap_to_lavc(&chmap);
+#else
+    mp_chmap_to_av_layout(&s->lavc_actx->ch_layout, &chmap);
+#endif
+    s->lavc_actx->sample_rate = rate;
+    s->lavc_actx->bit_rate = bit_rate;
+
+    if (avcodec_open2(s->lavc_actx, s->lavc_acodec, NULL) < 0) {
+        MP_ERR(f, "Couldn't open codec %s, br=%d.\n", "ac3", bit_rate);
+        return false;
+    }
+
+    if (s->lavc_actx->frame_size < 1) {
+        MP_ERR(f, "encoder didn't specify input frame size\n");
+        return false;
+    }
+
+    mp_aframe_config_copy(s->cur_format, s->in_frame);
+    return true;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    TA_FREEP(&s->in_frame);
+}
+
+static void destroy(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    reset(f);
+    av_packet_free(&s->lavc_pkt);
+    avcodec_free_context(&s->lavc_actx);
+}
+
+static void swap_16(uint16_t *ptr, size_t size)
+{
+    for (size_t n = 0; n < size; n++)
+        ptr[n] = av_bswap16(ptr[n]);
+}
+
+static void process(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    bool err = true;
+    struct mp_aframe *out = NULL;
+    AVPacket *pkt = s->lavc_pkt;
+
+    // Send input as long as it wants.
+    while (1) {
+        if (avcodec_is_open(s->lavc_actx)) {
+            int lavc_ret = avcodec_receive_packet(s->lavc_actx, pkt);
+            if (lavc_ret >= 0)
+                break;
+            if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
+                MP_FATAL(f, "Encode failed (receive).\n");
+                goto error;
+            }
+        }
+        AVFrame *frame = NULL;
+        struct mp_frame input = mp_pin_out_read(s->in_pin);
+        // The following code assumes no sample data buffering in the encoder.
+        switch (input.type) {
+        case MP_FRAME_NONE:
+            goto done; // no data yet
+        case MP_FRAME_EOF:
+            mp_pin_in_write(f->ppins[1], input);
+            goto done;
+        case MP_FRAME_AUDIO:
+            TA_FREEP(&s->in_frame);
+            s->in_frame = input.data;
+            frame = mp_frame_to_av(input, NULL);
+            if (!frame)
+                goto error;
+            if (mp_aframe_get_channels(s->in_frame) < s->opts->min_channel_num) {
+                // Just pass it through.
+                s->in_frame = NULL;
+                mp_pin_in_write(f->ppins[1], input);
+                goto done;
+            }
+            if (!mp_aframe_config_equals(s->in_frame, s->cur_format)) {
+                if (!reinit(f))
+                    goto error;
+            }
+            break;
+        default: goto error; // unexpected packet type
+        }
+        int lavc_ret = avcodec_send_frame(s->lavc_actx, frame);
+        av_frame_free(&frame);
+        if (lavc_ret < 0 && lavc_ret != AVERROR(EAGAIN)) {
+            MP_FATAL(f, "Encode failed (send).\n");
+            goto error;
+        }
+    }
+
+    if (!s->in_frame)
+        goto error;
+
+    out = mp_aframe_create();
+    mp_aframe_set_format(out, AF_FORMAT_S_AC3);
+    mp_aframe_set_chmap(out, &(struct mp_chmap)MP_CHMAP_INIT_STEREO);
+    mp_aframe_set_rate(out, 48000);
+
+    if (mp_aframe_pool_allocate(s->out_pool, out, s->out_samples) < 0)
+        goto error;
+
+    int sstride = mp_aframe_get_sstride(out);
+
+    mp_aframe_copy_attributes(out, s->in_frame);
+
+    int frame_size = pkt->size;
+    int header_len = 0;
+    char hdr[8];
+
+    if (s->opts->add_iec61937_header && pkt->size > 5) {
+        int bsmod = pkt->data[5] & 0x7;
+        int len = frame_size;
+
+        frame_size = AC3_FRAME_SIZE * 2 * 2;
+        header_len = 8;
+
+        AV_WL16(hdr,     0xF872);   // iec 61937 syncword 1
+        AV_WL16(hdr + 2, 0x4E1F);   // iec 61937 syncword 2
+        hdr[5] = bsmod;             // bsmod
+        hdr[4] = 0x01;              // data-type ac3
+        AV_WL16(hdr + 6, len << 3); // number of bits in payload
+    }
+
+    if (frame_size > s->out_samples * sstride)
+        abort();
+
+    uint8_t **planes = mp_aframe_get_data_rw(out);
+    if (!planes)
+        goto error;
+    char *buf = planes[0];
+    memcpy(buf, hdr, header_len);
+    memcpy(buf + header_len, pkt->data, pkt->size);
+    memset(buf + header_len + pkt->size, 0,
+           frame_size - (header_len + pkt->size));
+    swap_16((uint16_t *)(buf + header_len), pkt->size / 2);
+    mp_aframe_set_size(out, frame_size / sstride);
+    mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+    out = NULL;
+
+done:
+    err = false;
+    // fall through
+error:
+    av_packet_unref(pkt);
+    talloc_free(out);
+    if (err)
+        mp_filter_internal_mark_failed(f);
+}
+
+static const struct mp_filter_info af_lavcac3enc_filter = {
+    .name = "lavcac3enc",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static void add_chmaps_to_autoconv(struct mp_filter *f,
+                                   struct mp_autoconvert *conv,
+                                   const struct AVCodec *codec)
+{
+#if !HAVE_AV_CHANNEL_LAYOUT
+    const uint64_t *lch = codec->channel_layouts;
+    for (int n = 0; lch && lch[n]; n++) {
+        struct mp_chmap chmap = {0};
+        mp_chmap_from_lavc(&chmap, lch[n]);
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#else
+    const AVChannelLayout *lch = codec->ch_layouts;
+    for (int n = 0; lch && lch[n].nb_channels; n++) {
+        struct mp_chmap chmap = {0};
+
+        if (!mp_chmap_from_av_layout(&chmap, &lch[n])) {
+            char layout[128] = {0};
+            MP_VERBOSE(f, "Skipping unsupported channel layout: %s\n",
+                       av_channel_layout_describe(&lch[n],
+                                                  layout, 128) < 0 ?
+                       "undefined" : layout);
+            continue;
+        }
+
+        if (mp_chmap_is_valid(&chmap))
+            mp_autoconvert_add_chmap(conv, &chmap);
+    }
+#endif
+}
+
+static struct mp_filter *af_lavcac3enc_create(struct mp_filter *parent,
+                                              void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_lavcac3enc_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *s = f->priv;
+    s->opts = talloc_steal(s, options);
+    s->cur_format = talloc_steal(s, mp_aframe_create());
+    s->out_pool = mp_aframe_pool_create(s);
+
+    s->lavc_acodec = avcodec_find_encoder_by_name(s->opts->encoder);
+    if (!s->lavc_acodec) {
+        MP_ERR(f, "Couldn't find encoder %s.\n", s->opts->encoder);
+        goto error;
+    }
+
+    s->lavc_actx = avcodec_alloc_context3(s->lavc_acodec);
+    if (!s->lavc_actx) {
+        MP_ERR(f, "Audio LAVC, couldn't allocate context!\n");
+        goto error;
+    }
+
+    s->lavc_pkt = av_packet_alloc();
+    if (!s->lavc_pkt)
+        goto error;
+
+    if (mp_set_avopts(f->log, s->lavc_actx, s->opts->avopts) < 0)
+        goto error;
+
+    // For this one, we require the decoder to export lists of all supported
+    // parameters. (Not all decoders do that, but the ones we're interested
+    // in do.)
+    if (!s->lavc_acodec->sample_fmts ||
+#if !HAVE_AV_CHANNEL_LAYOUT
+        !s->lavc_acodec->channel_layouts
+#else
+        !s->lavc_acodec->ch_layouts
+#endif
+        )
+    {
+        MP_ERR(f, "Audio encoder doesn't list supported parameters.\n");
+        goto error;
+    }
+
+    if (s->opts->bit_rate) {
+        int i;
+        for (i = 0; i < 19; i++) {
+            if (ac3_bitrate_tab[i] == s->opts->bit_rate) {
+                s->bit_rate = ac3_bitrate_tab[i] * 1000;
+                break;
+            }
+        }
+        if (i >= 19) {
+            MP_WARN(f, "unable set unsupported bitrate %d, using default "
+                    "bitrate (check manpage to see supported bitrates).\n",
+                    s->opts->bit_rate);
+        }
+    }
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        abort();
+
+    const enum AVSampleFormat *lf = s->lavc_acodec->sample_fmts;
+    for (int i = 0; lf && lf[i] != AV_SAMPLE_FMT_NONE; i++) {
+        int mpfmt = af_from_avformat(lf[i]);
+        if (mpfmt)
+            mp_autoconvert_add_afmt(conv, mpfmt);
+    }
+
+    add_chmaps_to_autoconv(f, conv, s->lavc_acodec);
+
+    // At least currently, the AC3 encoder doesn't export sample rates.
+    mp_autoconvert_add_srate(conv, 48000);
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+
+    struct mp_filter *fs = mp_fixed_aframe_size_create(f, AC3_FRAME_SIZE, true);
+    if (!fs)
+        abort();
+
+    mp_pin_connect(fs->pins[0], conv->f->pins[1]);
+    s->in_pin = fs->pins[1];
+
+    return f;
+
+error:
+    av_packet_free(&s->lavc_pkt);
+    avcodec_free_context(&s->lavc_actx);
+    talloc_free(f);
+    return NULL;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_lavcac3enc = {
+    .desc = {
+        .description = "runtime encode to ac3 using libavcodec",
+        .name = "lavcac3enc",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT) {
+            .add_iec61937_header = true,
+            .bit_rate = 640,
+            .min_channel_num = 3,
+            .encoder = "ac3",
+        },
+        .options = (const struct m_option[]) {
+            {"tospdif", OPT_BOOL(add_iec61937_header)},
+            {"bitrate", OPT_CHOICE(bit_rate,
+                {"auto", 0}, {"default", 0}), M_RANGE(32, 640)},
+            {"minch", OPT_INT(min_channel_num), M_RANGE(2, 6)},
+            {"encoder", OPT_STRING(encoder)},
+            {"o", OPT_KEYVALUELIST(avopts)},
+            {0}
+        },
+    },
+    .create = af_lavcac3enc_create,
+};
diff --git a/audio/filter/af_rubberband.c b/audio/filter/af_rubberband.c
new file mode 100644
index 0000000..48e5cc1
--- /dev/null
+++ b/audio/filter/af_rubberband.c
@@ -0,0 +1,382 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include <rubberband/rubberband-c.h>
+
+#include "config.h"
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+// command line options
+struct f_opts {
+    int transients, detector, phase, window,
+        smoothing, formant, pitch, channels, engine;
+    double scale;
+};
+
+struct priv {
+    struct f_opts *opts;
+
+    struct mp_pin *in_pin;
+    struct mp_aframe *cur_format;
+    struct mp_aframe_pool *out_pool;
+    bool sent_final;
+    RubberBandState rubber;
+    double speed;
+    double pitch;
+    struct mp_aframe *pending;
+    // Estimate how much librubberband has buffered internally.
+    // I could not find a way to do this with the librubberband API.
+    double rubber_delay;
+};
+
+static void update_speed(struct priv *p, double new_speed)
+{
+    p->speed = new_speed;
+    if (p->rubber)
+        rubberband_set_time_ratio(p->rubber, 1.0 / p->speed);
+}
+
+static bool update_pitch(struct priv *p, double new_pitch)
+{
+    if (new_pitch < 0.01 || new_pitch > 100.0)
+        return false;
+
+    p->pitch = new_pitch;
+    if (p->rubber)
+        rubberband_set_pitch_scale(p->rubber, p->pitch);
+    return true;
+}
+
+static bool init_rubberband(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    assert(!p->rubber);
+    assert(p->pending);
+
+    int opts = p->opts->transients | p->opts->detector | p->opts->phase |
+               p->opts->window | p->opts->smoothing | p->opts->formant |
+               p->opts->pitch | p->opts->channels |
+#if HAVE_RUBBERBAND_3
+               p->opts->engine |
+#endif
+               RubberBandOptionProcessRealTime;
+
+    int rate = mp_aframe_get_rate(p->pending);
+    int channels = mp_aframe_get_channels(p->pending);
+    if (mp_aframe_get_format(p->pending) != AF_FORMAT_FLOATP)
+        return false;
+
+    p->rubber = rubberband_new(rate, channels, opts, 1.0, 1.0);
+    if (!p->rubber) {
+        MP_FATAL(f, "librubberband initialization failed.\n");
+        return false;
+    }
+
+    mp_aframe_config_copy(p->cur_format, p->pending);
+
+    update_speed(p, p->speed);
+    update_pitch(p, p->pitch);
+
+    return true;
+}
+
+static void process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    while (!p->rubber || !p->pending || rubberband_available(p->rubber) <= 0) {
+        const float *dummy[MP_NUM_CHANNELS] = {0};
+        const float **in_data = dummy;
+        size_t in_samples = 0;
+
+        bool eof = false;
+        if (!p->pending || !mp_aframe_get_size(p->pending)) {
+            struct mp_frame frame = mp_pin_out_read(p->in_pin);
+            if (frame.type == MP_FRAME_AUDIO) {
+                TA_FREEP(&p->pending);
+                p->pending = frame.data;
+            } else if (frame.type == MP_FRAME_EOF) {
+                eof = true;
+            } else if (frame.type) {
+                MP_ERR(f, "unexpected frame type\n");
+                goto error;
+            } else {
+                return; // no new data yet
+            }
+        }
+        assert(p->pending || eof);
+
+        if (!p->rubber) {
+            if (!p->pending) {
+                mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+                return;
+            }
+            if (!init_rubberband(f))
+                goto error;
+        }
+
+        bool format_change =
+            p->pending && !mp_aframe_config_equals(p->pending, p->cur_format);
+
+        if (p->pending && !format_change) {
+            size_t needs = rubberband_get_samples_required(p->rubber);
+            uint8_t **planes = mp_aframe_get_data_ro(p->pending);
+            int num_planes = mp_aframe_get_planes(p->pending);
+            for (int n = 0; n < num_planes; n++)
+                in_data[n] = (void *)planes[n];
+            in_samples = MPMIN(mp_aframe_get_size(p->pending), needs);
+        }
+
+        bool final = format_change || eof;
+        if (!p->sent_final)
+            rubberband_process(p->rubber, in_data, in_samples, final);
+        p->sent_final |= final;
+
+        p->rubber_delay += in_samples;
+
+        if (p->pending && !format_change)
+            mp_aframe_skip_samples(p->pending, in_samples);
+
+        if (rubberband_available(p->rubber) > 0) {
+            if (eof)
+                mp_pin_out_repeat_eof(p->in_pin); // drain more next time
+        } else {
+            if (eof) {
+                mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+                rubberband_reset(p->rubber);
+                p->rubber_delay = 0;
+                TA_FREEP(&p->pending);
+                p->sent_final = false;
+                return;
+            } else if (format_change) {
+                // go on with proper reinit on the next iteration
+                rubberband_delete(p->rubber);
+                p->sent_final = false;
+                p->rubber = NULL;
+            }
+        }
+    }
+
+    assert(p->pending);
+
+    int out_samples = rubberband_available(p->rubber);
+    if (out_samples > 0) {
+        struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
+        if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
+            talloc_free(out);
+            goto error;
+        }
+
+        mp_aframe_copy_attributes(out, p->pending);
+
+        float *out_data[MP_NUM_CHANNELS] = {0};
+        uint8_t **planes = mp_aframe_get_data_rw(out);
+        assert(planes);
+        int num_planes = mp_aframe_get_planes(out);
+        for (int n = 0; n < num_planes; n++)
+            out_data[n] = (void *)planes[n];
+
+        out_samples = rubberband_retrieve(p->rubber, out_data, out_samples);
+
+        if (!out_samples) {
+            mp_filter_internal_mark_progress(f); // unexpected, just try again
+            talloc_free(out);
+            return;
+        }
+
+        mp_aframe_set_size(out, out_samples);
+
+        p->rubber_delay -= out_samples * p->speed;
+
+        double pts = mp_aframe_get_pts(p->pending);
+        if (pts != MP_NOPTS_VALUE) {
+            // Note: rubberband_get_latency() does not do what you'd expect.
+            double delay = p->rubber_delay / mp_aframe_get_effective_rate(out);
+            mp_aframe_set_pts(out, pts - delay);
+        }
+
+        mp_aframe_mul_speed(out, p->speed);
+
+        mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+    }
+
+    return;
+error:
+    mp_filter_internal_mark_failed(f);
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_TEXT: {
+        char *endptr = NULL;
+        double pitch = p->pitch;
+        if (!strcmp(cmd->cmd, "set-pitch")) {
+            pitch = strtod(cmd->arg, &endptr);
+            if (*endptr)
+                return false;
+            return update_pitch(p, pitch);
+        } else if (!strcmp(cmd->cmd, "multiply-pitch")) {
+            double mult = strtod(cmd->arg, &endptr);
+            if (*endptr || mult <= 0)
+                return false;
+            pitch *= mult;
+            return update_pitch(p, pitch);
+        }
+        return false;
+    }
+    case MP_FILTER_COMMAND_SET_SPEED:
+        update_speed(p, cmd->speed);
+        return true;
+    }
+
+    return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (p->rubber)
+        rubberband_reset(p->rubber);
+    p->rubber_delay = 0;
+    p->sent_final = false;
+    TA_FREEP(&p->pending);
+}
+
+static void destroy(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (p->rubber)
+        rubberband_delete(p->rubber);
+    talloc_free(p->pending);
+}
+
+static const struct mp_filter_info af_rubberband_filter = {
+    .name = "rubberband",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .command = command,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_filter *af_rubberband_create(struct mp_filter *parent,
+                                              void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_rubberband_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+    p->speed = 1.0;
+    p->pitch = p->opts->scale;
+    p->cur_format = talloc_steal(p, mp_aframe_create());
+    p->out_pool = mp_aframe_pool_create(p);
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        abort();
+
+    mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOATP);
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+    p->in_pin = conv->f->pins[1];
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_rubberband = {
+    .desc = {
+        .description = "Pitch conversion with librubberband",
+        .name = "rubberband",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT) {
+            .scale = 1.0,
+            .pitch = RubberBandOptionPitchHighConsistency,
+            .transients = RubberBandOptionTransientsMixed,
+            .formant = RubberBandOptionFormantPreserved,
+            .channels = RubberBandOptionChannelsTogether,
+#if HAVE_RUBBERBAND_3
+            .engine = RubberBandOptionEngineFiner,
+#endif
+        },
+        .options = (const struct m_option[]) {
+            {"transients", OPT_CHOICE(transients,
+                {"crisp", RubberBandOptionTransientsCrisp},
+                {"mixed", RubberBandOptionTransientsMixed},
+                {"smooth", RubberBandOptionTransientsSmooth})},
+            {"detector", OPT_CHOICE(detector,
+                {"compound", RubberBandOptionDetectorCompound},
+                {"percussive", RubberBandOptionDetectorPercussive},
+                {"soft", RubberBandOptionDetectorSoft})},
+            {"phase", OPT_CHOICE(phase,
+                {"laminar", RubberBandOptionPhaseLaminar},
+                {"independent", RubberBandOptionPhaseIndependent})},
+            {"window", OPT_CHOICE(window,
+                {"standard", RubberBandOptionWindowStandard},
+                {"short", RubberBandOptionWindowShort},
+                {"long", RubberBandOptionWindowLong})},
+            {"smoothing", OPT_CHOICE(smoothing,
+                {"off", RubberBandOptionSmoothingOff},
+                {"on", RubberBandOptionSmoothingOn})},
+            {"formant", OPT_CHOICE(formant,
+                {"shifted", RubberBandOptionFormantShifted},
+                {"preserved", RubberBandOptionFormantPreserved})},
+            {"pitch", OPT_CHOICE(pitch,
+                {"quality", RubberBandOptionPitchHighQuality},
+                {"speed", RubberBandOptionPitchHighSpeed},
+                {"consistency", RubberBandOptionPitchHighConsistency})},
+            {"channels", OPT_CHOICE(channels,
+                {"apart", RubberBandOptionChannelsApart},
+                {"together", RubberBandOptionChannelsTogether})},
+#if HAVE_RUBBERBAND_3
+            {"engine", OPT_CHOICE(engine,
+                {"finer", RubberBandOptionEngineFiner},
+                {"faster", RubberBandOptionEngineFaster})},
+#endif
+            {"pitch-scale", OPT_DOUBLE(scale), M_RANGE(0.01, 100)},
+            {0}
+        },
+    },
+    .create = af_rubberband_create,
+};
diff --git a/audio/filter/af_scaletempo.c b/audio/filter/af_scaletempo.c
new file mode 100644
index 0000000..f06478f
--- /dev/null
+++ b/audio/filter/af_scaletempo.c
@@ -0,0 +1,626 @@
+/*
+ * scaletempo audio filter
+ *
+ * scale tempo while maintaining pitch
+ * (WSOLA technique with cross correlation)
+ * inspired by SoundTouch library by Olli Parviainen
+ *
+ * basic algorithm
+ *   - produce 'stride' output samples per loop
+ *   - consume stride*scale input samples per loop
+ *
+ * to produce smoother transitions between strides, blend next overlap
+ * samples from last stride with correlated samples of current input
+ *
+ * Copyright (c) 2007 Robert Juliano
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <float.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "audio/aframe.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct f_opts {
+    float scale_nominal;
+    float ms_stride;
+    float ms_search;
+    float factor_overlap;
+#define SCALE_TEMPO 1
+#define SCALE_PITCH 2
+    int speed_opt;
+};
+
+struct priv {
+    struct f_opts *opts;
+
+    struct mp_pin *in_pin;
+    struct mp_aframe *cur_format;
+    struct mp_aframe_pool *out_pool;
+    double current_pts;
+    struct mp_aframe *in;
+
+    // stride
+    float scale;
+    float speed;
+    int frames_stride;
+    float frames_stride_scaled;
+    float frames_stride_error;
+    int bytes_per_frame;
+    int bytes_stride;
+    int bytes_queue;
+    int bytes_queued;
+    int bytes_to_slide;
+    int8_t *buf_queue;
+    // overlap
+    int samples_overlap;
+    int samples_standing;
+    int bytes_overlap;
+    int bytes_standing;
+    void *buf_overlap;
+    void *table_blend;
+    void (*output_overlap)(struct priv *s, void *out_buf,
+                           int bytes_off);
+    // best overlap
+    int frames_search;
+    int num_channels;
+    void *buf_pre_corr;
+    void *table_window;
+    int (*best_overlap_offset)(struct priv *s);
+};
+
+static bool reinit(struct mp_filter *f);
+
+// Return whether it got enough data for filtering.
+static bool fill_queue(struct priv *s)
+{
+    int bytes_in = s->in ? mp_aframe_get_size(s->in) * s->bytes_per_frame : 0;
+    int offset = 0;
+
+    if (s->bytes_to_slide > 0) {
+        if (s->bytes_to_slide < s->bytes_queued) {
+            int bytes_move = s->bytes_queued - s->bytes_to_slide;
+            memmove(s->buf_queue, s->buf_queue + s->bytes_to_slide, bytes_move);
+            s->bytes_to_slide = 0;
+            s->bytes_queued = bytes_move;
+        } else {
+            int bytes_skip;
+            s->bytes_to_slide -= s->bytes_queued;
+            bytes_skip = MPMIN(s->bytes_to_slide, bytes_in);
+            s->bytes_queued = 0;
+            s->bytes_to_slide -= bytes_skip;
+            offset += bytes_skip;
+            bytes_in -= bytes_skip;
+        }
+    }
+
+    int bytes_needed = s->bytes_queue - s->bytes_queued;
+    assert(bytes_needed >= 0);
+
+    int bytes_copy = MPMIN(bytes_needed, bytes_in);
+    if (bytes_copy > 0) {
+        uint8_t **planes = mp_aframe_get_data_ro(s->in);
+        memcpy(s->buf_queue + s->bytes_queued, planes[0] + offset, bytes_copy);
+        s->bytes_queued += bytes_copy;
+        offset += bytes_copy;
+        bytes_needed -= bytes_copy;
+    }
+
+    if (s->in)
+        mp_aframe_skip_samples(s->in, offset / s->bytes_per_frame);
+
+    return bytes_needed == 0;
+}
+
+#define UNROLL_PADDING (4 * 4)
+
+static int best_overlap_offset_float(struct priv *s)
+{
+    float best_corr = INT_MIN;
+    int best_off = 0;
+
+    float *pw  = s->table_window;
+    float *po  = s->buf_overlap;
+    po += s->num_channels;
+    float *ppc = s->buf_pre_corr;
+    for (int i = s->num_channels; i < s->samples_overlap; i++)
+        *ppc++ = *pw++ **po++;
+
+    float *search_start = (float *)s->buf_queue + s->num_channels;
+    for (int off = 0; off < s->frames_search; off++) {
+        float corr = 0;
+        float *ps = search_start;
+        ppc = s->buf_pre_corr;
+        for (int i = s->num_channels; i < s->samples_overlap; i++)
+            corr += *ppc++ **ps++;
+        if (corr > best_corr) {
+            best_corr = corr;
+            best_off  = off;
+        }
+        search_start += s->num_channels;
+    }
+
+    return best_off * 4 * s->num_channels;
+}
+
+static int best_overlap_offset_s16(struct priv *s)
+{
+    int64_t best_corr = INT64_MIN;
+    int best_off = 0;
+
+    int32_t *pw  = s->table_window;
+    int16_t *po  = s->buf_overlap;
+    po += s->num_channels;
+    int32_t *ppc = s->buf_pre_corr;
+    for (long i = s->num_channels; i < s->samples_overlap; i++)
+        *ppc++ = (*pw++ **po++) >> 15;
+
+    int16_t *search_start = (int16_t *)s->buf_queue + s->num_channels;
+    for (int off = 0; off < s->frames_search; off++) {
+        int64_t corr = 0;
+        int16_t *ps = search_start;
+        ppc = s->buf_pre_corr;
+        ppc += s->samples_overlap - s->num_channels;
+        ps  += s->samples_overlap - s->num_channels;
+        long i  = -(s->samples_overlap - s->num_channels);
+        do {
+            corr += ppc[i + 0] * (int64_t)ps[i + 0];
+            corr += ppc[i + 1] * (int64_t)ps[i + 1];
+            corr += ppc[i + 2] * (int64_t)ps[i + 2];
+            corr += ppc[i + 3] * (int64_t)ps[i + 3];
+            i += 4;
+        } while (i < 0);
+        if (corr > best_corr) {
+            best_corr = corr;
+            best_off  = off;
+        }
+        search_start += s->num_channels;
+    }
+
+    return best_off * 2 * s->num_channels;
+}
+
+static void output_overlap_float(struct priv *s, void *buf_out,
+                                 int bytes_off)
+{
+    float *pout = buf_out;
+    float *pb   = s->table_blend;
+    float *po   = s->buf_overlap;
+    float *pin  = (float *)(s->buf_queue + bytes_off);
+    for (int i = 0; i < s->samples_overlap; i++) {
+        *pout++ = *po - *pb++ *(*po - *pin++);
+        po++;
+    }
+}
+
+static void output_overlap_s16(struct priv *s, void *buf_out,
+                               int bytes_off)
+{
+    int16_t *pout = buf_out;
+    int32_t *pb   = s->table_blend;
+    int16_t *po   = s->buf_overlap;
+    int16_t *pin  = (int16_t *)(s->buf_queue + bytes_off);
+    for (int i = 0; i < s->samples_overlap; i++) {
+        *pout++ = *po - ((*pb++ *(*po - *pin++)) >> 16);
+        po++;
+    }
+}
+
+static void process(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    struct mp_aframe *out = NULL;
+
+    bool drain = false;
+    bool is_eof = false;
+    if (!s->in) {
+        struct mp_frame frame = mp_pin_out_read(s->in_pin);
+        if (!frame.type)
+            return; // no input yet
+        if (frame.type != MP_FRAME_AUDIO && frame.type != MP_FRAME_EOF) {
+            MP_ERR(f, "unexpected frame type\n");
+            goto error;
+        }
+
+        s->in = frame.type == MP_FRAME_AUDIO ? frame.data : NULL;
+        is_eof = drain = !s->in;
+
+        // EOF before it was even initialized once.
+        if (is_eof && !mp_aframe_config_is_valid(s->cur_format)) {
+            mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+            return;
+        }
+
+        if (s->in && !mp_aframe_config_equals(s->in, s->cur_format)) {
+            if (s->bytes_queued) {
+                // Drain remaining data before executing the format change.
+                MP_VERBOSE(f, "draining\n");
+                mp_pin_out_unread(s->in_pin, frame);
+                s->in = NULL;
+                drain = true;
+            } else {
+                if (!reinit(f)) {
+                    MP_ERR(f, "initialization failed\n");
+                    goto error;
+                }
+            }
+        }
+
+        if (s->in)
+            s->current_pts = mp_aframe_end_pts(s->in);
+    }
+
+    if (!fill_queue(s) && !drain) {
+        TA_FREEP(&s->in);
+        mp_pin_out_request_data_next(s->in_pin);
+        return;
+    }
+
+    int max_out_samples = s->bytes_stride / s->bytes_per_frame;
+    if (drain)
+        max_out_samples += s->bytes_queued;
+
+    out = mp_aframe_new_ref(s->cur_format);
+    if (mp_aframe_pool_allocate(s->out_pool, out, max_out_samples) < 0)
+        goto error;
+
+    if (s->in)
+        mp_aframe_copy_attributes(out, s->in);
+
+    uint8_t **out_planes = mp_aframe_get_data_rw(out);
+    if (!out_planes)
+        goto error;
+    int8_t *pout = out_planes[0];
+    int out_offset = 0;
+    if (s->bytes_queued >= s->bytes_queue) {
+        int ti;
+        float tf;
+        int bytes_off = 0;
+
+        // output stride
+        if (s->output_overlap) {
+            if (s->best_overlap_offset)
+                bytes_off = s->best_overlap_offset(s);
+            s->output_overlap(s, pout + out_offset, bytes_off);
+        }
+        memcpy(pout + out_offset + s->bytes_overlap,
+               s->buf_queue + bytes_off + s->bytes_overlap,
+               s->bytes_standing);
+        out_offset += s->bytes_stride;
+
+        // input stride
+        memcpy(s->buf_overlap,
+               s->buf_queue + bytes_off + s->bytes_stride,
+               s->bytes_overlap);
+        tf = s->frames_stride_scaled + s->frames_stride_error;
+        ti = (int)tf;
+        s->frames_stride_error = tf - ti;
+        s->bytes_to_slide = ti * s->bytes_per_frame;
+    }
+    // Drain remaining buffered data.
+    if (drain && s->bytes_queued) {
+        memcpy(pout + out_offset, s->buf_queue, s->bytes_queued);
+        out_offset += s->bytes_queued;
+        s->bytes_queued = 0;
+    }
+    mp_aframe_set_size(out, out_offset / s->bytes_per_frame);
+
+    // This filter can have a negative delay when scale > 1:
+    // output corresponding to some length of input can be decided and written
+    // after receiving only a part of that input.
+    float delay = (out_offset * s->speed + s->bytes_queued - s->bytes_to_slide) /
+                    s->bytes_per_frame / mp_aframe_get_effective_rate(out)
+                  + (s->in ? mp_aframe_duration(s->in) : 0);
+
+    if (s->current_pts != MP_NOPTS_VALUE)
+        mp_aframe_set_pts(out, s->current_pts - delay);
+
+    mp_aframe_mul_speed(out, s->speed);
+
+    if (!mp_aframe_get_size(out))
+        TA_FREEP(&out);
+
+    if (is_eof && out) {
+        mp_pin_out_repeat_eof(s->in_pin);
+    } else if (is_eof && !out) {
+        mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+    } else if (!is_eof && !out) {
+        mp_pin_out_request_data_next(s->in_pin);
+    }
+
+    if (out)
+        mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+
+    return;
+
+error:
+    TA_FREEP(&s->in);
+    talloc_free(out);
+    mp_filter_internal_mark_failed(f);
+}
+
+static void update_speed(struct priv *s, float speed)
+{
+    s->speed = speed;
+
+    double factor = (s->opts->speed_opt & SCALE_PITCH) ? 1.0 / s->speed : s->speed;
+    s->scale = factor * s->opts->scale_nominal;
+
+    s->frames_stride_scaled = s->scale * s->frames_stride;
+    s->frames_stride_error = MPMIN(s->frames_stride_error, s->frames_stride_scaled);
+}
+
+static bool reinit(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    mp_aframe_reset(s->cur_format);
+
+    float srate  = mp_aframe_get_rate(s->in) / 1000.0;
+    int nch = mp_aframe_get_channels(s->in);
+    int format = mp_aframe_get_format(s->in);
+
+    int use_int = 0;
+    if (format == AF_FORMAT_S16) {
+        use_int = 1;
+    } else if (format != AF_FORMAT_FLOAT) {
+        return false;
+    }
+    int bps = use_int ? 2 : 4;
+
+    s->frames_stride        = srate * s->opts->ms_stride;
+    s->bytes_stride         = s->frames_stride * bps * nch;
+
+    update_speed(s, s->speed);
+
+    int frames_overlap = s->frames_stride * s->opts->factor_overlap;
+    if (frames_overlap <= 0) {
+        s->bytes_standing   = s->bytes_stride;
+        s->samples_standing = s->bytes_standing / bps;
+        s->output_overlap   = NULL;
+        s->bytes_overlap    = 0;
+    } else {
+        s->samples_overlap  = frames_overlap * nch;
+        s->bytes_overlap    = frames_overlap * nch * bps;
+        s->bytes_standing   = s->bytes_stride - s->bytes_overlap;
+        s->samples_standing = s->bytes_standing / bps;
+        s->buf_overlap      = realloc(s->buf_overlap, s->bytes_overlap);
+        s->table_blend      = realloc(s->table_blend, s->bytes_overlap * 4);
+        if (!s->buf_overlap || !s->table_blend) {
+            MP_FATAL(f, "Out of memory\n");
+            return false;
+        }
+        memset(s->buf_overlap, 0, s->bytes_overlap);
+        if (use_int) {
+            int32_t *pb = s->table_blend;
+            int64_t blend = 0;
+            for (int i = 0; i < frames_overlap; i++) {
+                int32_t v = blend / frames_overlap;
+                for (int j = 0; j < nch; j++)
+                    *pb++ = v;
+                blend += 65536; // 2^16
+            }
+            s->output_overlap = output_overlap_s16;
+        } else {
+            float *pb = s->table_blend;
+            for (int i = 0; i < frames_overlap; i++) {
+                float v = i / (float)frames_overlap;
+                for (int j = 0; j < nch; j++)
+                    *pb++ = v;
+            }
+            s->output_overlap = output_overlap_float;
+        }
+    }
+
+    s->frames_search = (frames_overlap > 1) ? srate * s->opts->ms_search : 0;
+    if (s->frames_search <= 0)
+        s->best_overlap_offset = NULL;
+    else {
+        if (use_int) {
+            int64_t t = frames_overlap;
+            int32_t n = 8589934588LL / (t * t); // 4 * (2^31 - 1) / t^2
+            s->buf_pre_corr = realloc(s->buf_pre_corr,
+                                        s->bytes_overlap * 2 + UNROLL_PADDING);
+            s->table_window = realloc(s->table_window,
+                                        s->bytes_overlap * 2 - nch * bps * 2);
+            if (!s->buf_pre_corr || !s->table_window) {
+                MP_FATAL(f, "Out of memory\n");
+                return false;
+            }
+            memset((char *)s->buf_pre_corr + s->bytes_overlap * 2, 0,
+                    UNROLL_PADDING);
+            int32_t *pw = s->table_window;
+            for (int i = 1; i < frames_overlap; i++) {
+                int32_t v = (i * (t - i) * n) >> 15;
+                for (int j = 0; j < nch; j++)
+                    *pw++ = v;
+            }
+            s->best_overlap_offset = best_overlap_offset_s16;
+        } else {
+            s->buf_pre_corr = realloc(s->buf_pre_corr, s->bytes_overlap);
+            s->table_window = realloc(s->table_window,
+                                        s->bytes_overlap - nch * bps);
+            if (!s->buf_pre_corr || !s->table_window) {
+                MP_FATAL(f, "Out of memory\n");
+                return false;
+            }
+            float *pw = s->table_window;
+            for (int i = 1; i < frames_overlap; i++) {
+                float v = i * (frames_overlap - i);
+                for (int j = 0; j < nch; j++)
+                    *pw++ = v;
+            }
+            s->best_overlap_offset = best_overlap_offset_float;
+        }
+    }
+
+    s->bytes_per_frame = bps * nch;
+    s->num_channels    = nch;
+
+    s->bytes_queue = (s->frames_search + s->frames_stride + frames_overlap)
+                        * bps * nch;
+    s->buf_queue = realloc(s->buf_queue, s->bytes_queue + UNROLL_PADDING);
+    if (!s->buf_queue) {
+        MP_FATAL(f, "Out of memory\n");
+        return false;
+    }
+
+    s->bytes_queued = 0;
+    s->bytes_to_slide = 0;
+
+    MP_DBG(f, ""
+           "%.2f stride_in, %i stride_out, %i standing, "
+           "%i overlap, %i search, %i queue, %s mode\n",
+           s->frames_stride_scaled,
+           (int)(s->bytes_stride / nch / bps),
+           (int)(s->bytes_standing / nch / bps),
+           (int)(s->bytes_overlap / nch / bps),
+           s->frames_search,
+           (int)(s->bytes_queue / nch / bps),
+           (use_int ? "s16" : "float"));
+
+    mp_aframe_config_copy(s->cur_format, s->in);
+
+    return true;
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *s = f->priv;
+
+    if (cmd->type == MP_FILTER_COMMAND_SET_SPEED) {
+        if (s->opts->speed_opt & SCALE_TEMPO) {
+            if (s->opts->speed_opt & SCALE_PITCH)
+                return false;
+            update_speed(s, cmd->speed);
+            return true;
+        } else if (s->opts->speed_opt & SCALE_PITCH) {
+            update_speed(s, cmd->speed);
+            return false; // do not signal OK
+        }
+    }
+
+    return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+
+    s->current_pts = MP_NOPTS_VALUE;
+    s->bytes_queued = 0;
+    s->bytes_to_slide = 0;
+    s->frames_stride_error = 0;
+    if (s->buf_overlap && s->bytes_overlap)
+        memset(s->buf_overlap, 0, s->bytes_overlap);
+    TA_FREEP(&s->in);
+}
+
+static void destroy(struct mp_filter *f)
+{
+    struct priv *s = f->priv;
+    free(s->buf_queue);
+    free(s->buf_overlap);
+    free(s->buf_pre_corr);
+    free(s->table_blend);
+    free(s->table_window);
+    TA_FREEP(&s->in);
+    mp_filter_free_children(f);
+}
+
+static const struct mp_filter_info af_scaletempo_filter = {
+    .name = "scaletempo",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .command = command,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_filter *af_scaletempo_create(struct mp_filter *parent,
+                                              void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_scaletempo_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *s = f->priv;
+    s->opts = talloc_steal(s, options);
+    s->speed = 1.0;
+    s->cur_format = talloc_steal(s, mp_aframe_create());
+    s->out_pool = mp_aframe_pool_create(s);
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        abort();
+
+    mp_autoconvert_add_afmt(conv, AF_FORMAT_S16);
+    mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOAT);
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+    s->in_pin = conv->f->pins[1];
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct f_opts
+
+const struct mp_user_filter_entry af_scaletempo = {
+    .desc = {
+        .description = "Scale audio tempo while maintaining pitch",
+        .name = "scaletempo",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT) {
+            .ms_stride = 60,
+            .factor_overlap = .20,
+            .ms_search = 14,
+            .speed_opt = SCALE_TEMPO,
+            .scale_nominal = 1.0,
+        },
+        .options = (const struct m_option[]) {
+            {"scale", OPT_FLOAT(scale_nominal), M_RANGE(0.01, DBL_MAX)},
+            {"stride", OPT_FLOAT(ms_stride), M_RANGE(0.01, DBL_MAX)},
+            {"overlap", OPT_FLOAT(factor_overlap), M_RANGE(0, 1)},
+            {"search", OPT_FLOAT(ms_search), M_RANGE(0, DBL_MAX)},
+            {"speed", OPT_CHOICE(speed_opt,
+                {"pitch", SCALE_PITCH},
+                {"tempo", SCALE_TEMPO},
+                {"none", 0},
+                {"both", SCALE_TEMPO | SCALE_PITCH})},
+            {0}
+        },
+    },
+    .create = af_scaletempo_create,
+};
diff --git a/audio/filter/af_scaletempo2.c b/audio/filter/af_scaletempo2.c
new file mode 100644
index 0000000..7ad8e35
--- /dev/null
+++ b/audio/filter/af_scaletempo2.c
@@ -0,0 +1,254 @@
+#include "audio/aframe.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+#include "audio/format.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+
+struct priv {
+    struct mp_scaletempo2 data;
+    struct mp_pin *in_pin;
+    struct mp_aframe *cur_format;
+    struct mp_aframe_pool *out_pool;
+    bool sent_final;
+    struct mp_aframe *pending;
+    bool initialized;
+    float speed;
+};
+
+static bool init_scaletempo2(struct mp_filter *f);
+static void reset(struct mp_filter *f);
+
+static void process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_in_needs_data(f->ppins[1]))
+        return;
+
+    while (!p->initialized || !p->pending ||
+           !mp_scaletempo2_frames_available(&p->data, p->speed))
+    {
+        bool eof = false;
+        if (!p->pending || !mp_aframe_get_size(p->pending)) {
+            struct mp_frame frame = mp_pin_out_read(p->in_pin);
+            if (frame.type == MP_FRAME_AUDIO) {
+                TA_FREEP(&p->pending);
+                p->pending = frame.data;
+            } else if (frame.type == MP_FRAME_EOF) {
+                eof = true;
+            } else if (frame.type) {
+                MP_ERR(f, "unexpected frame type\n");
+                goto error;
+            } else {
+                return; // no new data yet
+            }
+        }
+        assert(p->pending || eof);
+
+        if (!p->initialized) {
+            if (!p->pending) {
+                mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+                return;
+            }
+            if (!init_scaletempo2(f))
+                goto error;
+        }
+
+        bool format_change =
+            p->pending && !mp_aframe_config_equals(p->pending, p->cur_format);
+
+        bool final = format_change || eof;
+        if (p->pending && !format_change && !p->sent_final) {
+            int frame_size = mp_aframe_get_size(p->pending);
+            uint8_t **planes = mp_aframe_get_data_ro(p->pending);
+            int read = mp_scaletempo2_fill_input_buffer(&p->data,
+                planes, frame_size, p->speed);
+            mp_aframe_skip_samples(p->pending, read);
+        }
+        if (final && p->pending && !p->sent_final) {
+            mp_scaletempo2_set_final(&p->data);
+            p->sent_final = true;
+        }
+
+        if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+            if (eof) {
+                mp_pin_out_repeat_eof(p->in_pin); // drain more next time
+            }
+        } else if (final) {
+            p->initialized = false;
+            p->sent_final = false;
+            if (eof) {
+                mp_pin_in_write(f->ppins[1], MP_EOF_FRAME);
+                return;
+            }
+            // for format change go on with proper reinit on the next iteration
+        }
+    }
+
+    assert(p->pending);
+    if (mp_scaletempo2_frames_available(&p->data, p->speed)) {
+        struct mp_aframe *out = mp_aframe_new_ref(p->cur_format);
+        int out_samples = p->data.ola_hop_size;
+        if (mp_aframe_pool_allocate(p->out_pool, out, out_samples) < 0) {
+            talloc_free(out);
+            goto error;
+        }
+
+        mp_aframe_copy_attributes(out, p->pending);
+
+        uint8_t **planes = mp_aframe_get_data_rw(out);
+        assert(planes);
+        assert(mp_aframe_get_planes(out) == p->data.channels);
+
+        out_samples = mp_scaletempo2_fill_buffer(&p->data,
+            (float**)planes, out_samples, p->speed);
+
+        double pts = mp_aframe_get_pts(p->pending);
+        if (pts != MP_NOPTS_VALUE) {
+            double frame_delay = mp_scaletempo2_get_latency(&p->data, p->speed)
+                + out_samples * p->speed;
+            mp_aframe_set_pts(out, pts - frame_delay / mp_aframe_get_effective_rate(out));
+
+            if (p->sent_final) {
+                double remain_pts = pts - mp_aframe_get_pts(out);
+                double rate = mp_aframe_get_effective_rate(out) / p->speed;
+                int max_samples = MPMAX(0, (int) (remain_pts * rate));
+                // truncate final packet to expected length
+                if (out_samples >= max_samples) {
+                    out_samples = max_samples;
+
+                    // reset the filter to ensure it stops generating audio
+                    // and mp_scaletempo2_frames_available returns false
+                    mp_scaletempo2_reset(&p->data);
+                }
+            }
+        }
+
+        mp_aframe_set_size(out, out_samples);
+        mp_aframe_mul_speed(out, p->speed);
+        mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_AUDIO, out));
+    }
+
+    return;
+error:
+    mp_filter_internal_mark_failed(f);
+}
+
+static bool init_scaletempo2(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    assert(p->pending);
+
+    if (mp_aframe_get_format(p->pending) != AF_FORMAT_FLOATP)
+        return false;
+
+    mp_aframe_reset(p->cur_format);
+    p->initialized = true;
+    p->sent_final = false;
+    mp_aframe_config_copy(p->cur_format, p->pending);
+
+    mp_scaletempo2_init(&p->data, mp_aframe_get_channels(p->pending),
+        mp_aframe_get_rate(p->pending));
+
+    return true;
+}
+
+static bool command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_SET_SPEED:
+        p->speed = cmd->speed;
+        return true;
+    }
+
+    return false;
+}
+
+static void reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    mp_scaletempo2_reset(&p->data);
+    p->initialized = false;
+    TA_FREEP(&p->pending);
+}
+
+static void destroy(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    mp_scaletempo2_destroy(&p->data);
+    talloc_free(p->pending);
+}
+
+static const struct mp_filter_info af_scaletempo2_filter = {
+    .name = "scaletempo2",
+    .priv_size = sizeof(struct priv),
+    .process = process,
+    .command = command,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_filter *af_scaletempo2_create(
+    struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &af_scaletempo2_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->data.opts = talloc_steal(p, options);
+    p->speed = 1.0;
+    p->cur_format = talloc_steal(p, mp_aframe_create());
+    p->out_pool = mp_aframe_pool_create(p);
+    p->pending = NULL;
+    p->initialized = false;
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        abort();
+
+    mp_autoconvert_add_afmt(conv, AF_FORMAT_FLOATP);
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+    p->in_pin = conv->f->pins[1];
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct mp_scaletempo2_opts
+const struct mp_user_filter_entry af_scaletempo2 = {
+    .desc = {
+        .description = "Scale audio tempo while maintaining pitch"
+            " (filter ported from chromium)",
+        .name = "scaletempo2",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT) {
+            .min_playback_rate = 0.25,
+            .max_playback_rate = 8.0,
+            .ola_window_size_ms = 12,
+            .wsola_search_interval_ms = 40,
+        },
+        .options = (const struct m_option[]) {
+            {"search-interval",
+                OPT_FLOAT(wsola_search_interval_ms), M_RANGE(1, 1000)},
+            {"window-size",
+                OPT_FLOAT(ola_window_size_ms), M_RANGE(1, 1000)},
+            {"min-speed",
+                OPT_FLOAT(min_playback_rate), M_RANGE(0, FLT_MAX)},
+            {"max-speed",
+                OPT_FLOAT(max_playback_rate), M_RANGE(0, FLT_MAX)},
+            {0}
+        }
+    },
+    .create = af_scaletempo2_create,
+};
diff --git a/audio/filter/af_scaletempo2_internals.c b/audio/filter/af_scaletempo2_internals.c
new file mode 100644
index 0000000..534f4f6
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.c
@@ -0,0 +1,873 @@
+#include <float.h>
+#include <math.h>
+
+#include "audio/chmap.h"
+#include "audio/filter/af_scaletempo2_internals.h"
+
+#include "config.h"
+
+// Algorithm overview (from chromium):
+// Waveform Similarity Overlap-and-add (WSOLA).
+//
+// One WSOLA iteration
+//
+// 1) Extract |target_block| as input frames at indices
+//    [|target_block_index|, |target_block_index| + |ola_window_size|).
+//    Note that |target_block| is the "natural" continuation of the output.
+//
+// 2) Extract |search_block| as input frames at indices
+//    [|search_block_index|,
+//     |search_block_index| + |num_candidate_blocks| + |ola_window_size|).
+//
+// 3) Find a block within the |search_block| that is most similar
+//    to |target_block|. Let |optimal_index| be the index of such block and
+//    write it to |optimal_block|.
+//
+// 4) Update:
+//    |optimal_block| = |transition_window| * |target_block| +
+//    (1 - |transition_window|) * |optimal_block|.
+//
+// 5) Overlap-and-add |optimal_block| to the |wsola_output|.
+//
+// 6) Update:write
+
+struct interval {
+    int lo;
+    int hi;
+};
+
+static bool in_interval(int n, struct interval q)
+{
+    return n >= q.lo && n <= q.hi;
+}
+
+static float **realloc_2d(float **p, int x, int y)
+{
+    float **array = realloc(p, sizeof(float*) * x + sizeof(float) * x * y);
+    float* data = (float*) (array + x);
+    for (int i = 0; i < x; ++i) {
+        array[i] = data + i * y;
+    }
+    return array;
+}
+
+static void zero_2d(float **a, int x, int y)
+{
+    memset(a + x, 0, sizeof(float) * x * y);
+}
+
+static void zero_2d_partial(float **a, int x, int y)
+{
+    for (int i = 0; i < x; ++i) {
+        memset(a[i], 0, sizeof(float) * y);
+    }
+}
+
+// Energies of sliding windows of channels are interleaved.
+// The number windows is |input_frames| - (|frames_per_window| - 1), hence,
+// the method assumes |energy| must be, at least, of size
+// (|input_frames| - (|frames_per_window| - 1)) * |channels|.
+static void multi_channel_moving_block_energies(
+    float **input, int input_frames, int channels,
+    int frames_per_block, float *energy)
+{
+    int num_blocks = input_frames - (frames_per_block - 1);
+
+    for (int k = 0; k < channels; ++k) {
+        const float* input_channel = input[k];
+
+        energy[k] = 0;
+
+        // First block of channel |k|.
+        for (int m = 0; m < frames_per_block; ++m) {
+            energy[k] += input_channel[m] * input_channel[m];
+        }
+
+        const float* slide_out = input_channel;
+        const float* slide_in = input_channel + frames_per_block;
+        for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
+            energy[k + n * channels] = energy[k + (n - 1) * channels]
+                - *slide_out * *slide_out + *slide_in * *slide_in;
+        }
+    }
+}
+
+static float multi_channel_similarity_measure(
+    const float* dot_prod_a_b,
+    const float* energy_a, const float* energy_b,
+    int channels)
+{
+    const float epsilon = 1e-12f;
+    float similarity_measure = 0.0f;
+    for (int n = 0; n < channels; ++n) {
+        similarity_measure += dot_prod_a_b[n]
+            / sqrtf(energy_a[n] * energy_b[n] + epsilon);
+    }
+    return similarity_measure;
+}
+
+#if HAVE_VECTOR
+
+typedef float v8sf __attribute__ ((vector_size (32), aligned (1)));
+
+// Dot-product of channels of two AudioBus. For each AudioBus an offset is
+// given. |dot_product[k]| is the dot-product of channel |k|. The caller should
+// allocate sufficient space for |dot_product|.
+static void multi_channel_dot_product(
+    float **a, int frame_offset_a,
+    float **b, int frame_offset_b,
+    int channels,
+    int num_frames, float *dot_product)
+{
+    assert(frame_offset_a >= 0);
+    assert(frame_offset_b >= 0);
+
+    for (int k = 0; k < channels; ++k) {
+        const float* ch_a = a[k] + frame_offset_a;
+        const float* ch_b = b[k] + frame_offset_b;
+        float sum = 0.0;
+        if (num_frames < 32)
+            goto rest;
+
+        const v8sf *va = (const v8sf *) ch_a;
+        const v8sf *vb = (const v8sf *) ch_b;
+        v8sf vsum[4] = {
+            // Initialize to product of first 32 floats
+            va[0] * vb[0],
+            va[1] * vb[1],
+            va[2] * vb[2],
+            va[3] * vb[3],
+        };
+        va += 4;
+        vb += 4;
+
+        // Process `va` and `vb` across four vertical stripes
+        for (int n = 1; n < num_frames / 32; n++) {
+            vsum[0] += va[0] * vb[0];
+            vsum[1] += va[1] * vb[1];
+            vsum[2] += va[2] * vb[2];
+            vsum[3] += va[3] * vb[3];
+            va += 4;
+            vb += 4;
+        }
+
+        // Vertical sum across `vsum` entries
+        vsum[0] += vsum[1];
+        vsum[2] += vsum[3];
+        vsum[0] += vsum[2];
+
+        // Horizontal sum across `vsum[0]`, could probably be done better but
+        // this section is not super performance critical
+        float *vf = (float *) &vsum[0];
+        sum = vf[0] + vf[1] + vf[2] + vf[3] + vf[4] + vf[5] + vf[6] + vf[7];
+        ch_a = (const float *) va;
+        ch_b = (const float *) vb;
+
+rest:
+        // Process the remainder
+        for (int n = 0; n < num_frames % 32; n++)
+            sum += *ch_a++ * *ch_b++;
+
+        dot_product[k] = sum;
+    }
+}
+
+#else // !HAVE_VECTOR
+
+static void multi_channel_dot_product(
+    float **a, int frame_offset_a,
+    float **b, int frame_offset_b,
+    int channels,
+    int num_frames, float *dot_product)
+{
+    assert(frame_offset_a >= 0);
+    assert(frame_offset_b >= 0);
+
+    for (int k = 0; k < channels; ++k) {
+        const float* ch_a = a[k] + frame_offset_a;
+        const float* ch_b = b[k] + frame_offset_b;
+        float sum = 0.0;
+        for (int n = 0; n < num_frames; n++)
+            sum += *ch_a++ * *ch_b++;
+        dot_product[k] = sum;
+    }
+}
+
+#endif // HAVE_VECTOR
+
+// Fit the curve f(x) = a * x^2 + b * x + c such that
+//   f(-1) = y[0]
+//   f(0) = y[1]
+//   f(1) = y[2]
+// and return the maximum, assuming that y[0] <= y[1] >= y[2].
+static void quadratic_interpolation(
+    const float* y_values, float* extremum, float* extremum_value)
+{
+    float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];
+    float b = 0.5f * (y_values[2] - y_values[0]);
+    float c = y_values[1];
+
+    if (a == 0.f) {
+        // The coordinates are colinear (within floating-point error).
+        *extremum = 0;
+        *extremum_value = y_values[1];
+    } else {
+        *extremum = -b / (2.f * a);
+        *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c;
+    }
+}
+
+// Search a subset of all candid blocks. The search is performed every
+// |decimation| frames. This reduces complexity by a factor of about
+// 1 / |decimation|. A cubic interpolation is used to have a better estimate of
+// the best match.
+static int decimated_search(
+    int decimation, struct interval exclude_interval,
+    float **target_block, int target_block_frames,
+    float **search_segment, int search_segment_frames,
+    int channels,
+    const float *energy_target_block, const float *energy_candidate_blocks)
+{
+    int num_candidate_blocks = search_segment_frames - (target_block_frames - 1);
+    float dot_prod [MP_NUM_CHANNELS];
+    float similarity[3];  // Three elements for cubic interpolation.
+
+    int n = 0;
+    multi_channel_dot_product(
+        target_block, 0,
+        search_segment, n,
+        channels,
+        target_block_frames, dot_prod);
+    similarity[0] = multi_channel_similarity_measure(
+        dot_prod, energy_target_block,
+        &energy_candidate_blocks[n * channels], channels);
+
+    // Set the starting point as optimal point.
+    float best_similarity = similarity[0];
+    int optimal_index = 0;
+
+    n += decimation;
+    if (n >= num_candidate_blocks) {
+        return 0;
+    }
+
+    multi_channel_dot_product(
+        target_block, 0,
+        search_segment, n,
+        channels,
+        target_block_frames, dot_prod);
+    similarity[1] = multi_channel_similarity_measure(
+        dot_prod, energy_target_block,
+        &energy_candidate_blocks[n * channels], channels);
+
+    n += decimation;
+    if (n >= num_candidate_blocks) {
+        // We cannot do any more sampling. Compare these two values and return the
+        // optimal index.
+        return similarity[1] > similarity[0] ? decimation : 0;
+    }
+
+    for (; n < num_candidate_blocks; n += decimation) {
+        multi_channel_dot_product(
+            target_block, 0,
+            search_segment, n,
+            channels,
+            target_block_frames, dot_prod);
+
+        similarity[2] = multi_channel_similarity_measure(
+            dot_prod, energy_target_block,
+            &energy_candidate_blocks[n * channels], channels);
+
+        if ((similarity[1] > similarity[0] && similarity[1] >= similarity[2]) ||
+            (similarity[1] >= similarity[0] && similarity[1] > similarity[2]))
+        {
+            // A local maximum is found. Do a cubic interpolation for a better
+            // estimate of candidate maximum.
+            float normalized_candidate_index;
+            float candidate_similarity;
+            quadratic_interpolation(similarity, &normalized_candidate_index,
+                                    &candidate_similarity);
+
+            int candidate_index = n - decimation
+                 + (int)(normalized_candidate_index * decimation +  0.5f);
+            if (candidate_similarity > best_similarity
+                && !in_interval(candidate_index, exclude_interval)) {
+                optimal_index = candidate_index;
+                best_similarity = candidate_similarity;
+            }
+        } else if (n + decimation >= num_candidate_blocks &&
+                   similarity[2] > best_similarity &&
+                   !in_interval(n, exclude_interval))
+        {
+            // If this is the end-point and has a better similarity-measure than
+            // optimal, then we accept it as optimal point.
+            optimal_index = n;
+            best_similarity = similarity[2];
+        }
+        memmove(similarity, &similarity[1], 2 * sizeof(*similarity));
+    }
+    return optimal_index;
+}
+
+// Search [|low_limit|, |high_limit|] of |search_segment| to find a block that
+// is most similar to |target_block|. |energy_target_block| is the energy of the
+// |target_block|. |energy_candidate_blocks| is the energy of all blocks within
+// |search_block|.
+static int full_search(
+    int low_limit, int high_limit,
+    struct interval exclude_interval,
+    float **target_block, int target_block_frames,
+    float **search_block, int search_block_frames,
+    int channels,
+    const float* energy_target_block,
+    const float* energy_candidate_blocks)
+{
+    // int block_size = target_block->frames;
+    float dot_prod [sizeof(float) * MP_NUM_CHANNELS];
+
+    float best_similarity = -FLT_MAX;//FLT_MIN;
+    int optimal_index = 0;
+
+    for (int n = low_limit; n <= high_limit; ++n) {
+        if (in_interval(n, exclude_interval)) {
+            continue;
+        }
+        multi_channel_dot_product(target_block, 0, search_block, n, channels,
+            target_block_frames, dot_prod);
+
+        float similarity = multi_channel_similarity_measure(
+            dot_prod, energy_target_block,
+            &energy_candidate_blocks[n * channels], channels);
+
+        if (similarity > best_similarity) {
+            best_similarity = similarity;
+            optimal_index = n;
+        }
+    }
+
+    return optimal_index;
+}
+
+// Find the index of the block, within |search_block|, that is most similar
+// to |target_block|. Obviously, the returned index is w.r.t. |search_block|.
+// |exclude_interval| is an interval that is excluded from the search.
+static int compute_optimal_index(
+    float **search_block, int search_block_frames,
+    float **target_block, int target_block_frames,
+    float *energy_candidate_blocks,
+    int channels,
+    struct interval exclude_interval)
+{
+    int num_candidate_blocks = search_block_frames - (target_block_frames - 1);
+
+    // This is a compromise between complexity reduction and search accuracy. I
+    // don't have a proof that down sample of order 5 is optimal.
+    // One can compute a decimation factor that minimizes complexity given
+    // the size of |search_block| and |target_block|. However, my experiments
+    // show the rate of missing the optimal index is significant.
+    // This value is chosen heuristically based on experiments.
+    const int search_decimation = 5;
+
+    float energy_target_block [MP_NUM_CHANNELS];
+    // energy_candidate_blocks must have at least size
+    // sizeof(float) * channels * num_candidate_blocks
+
+    // Energy of all candid frames.
+    multi_channel_moving_block_energies(
+        search_block,
+        search_block_frames,
+        channels,
+        target_block_frames,
+        energy_candidate_blocks);
+
+    // Energy of target frame.
+    multi_channel_dot_product(
+        target_block, 0,
+        target_block, 0,
+        channels,
+        target_block_frames, energy_target_block);
+
+    int optimal_index = decimated_search(
+        search_decimation, exclude_interval,
+        target_block, target_block_frames,
+        search_block, search_block_frames,
+        channels,
+        energy_target_block,
+        energy_candidate_blocks);
+
+    int lim_low = MPMAX(0, optimal_index - search_decimation);
+    int lim_high = MPMIN(num_candidate_blocks - 1,
+                            optimal_index + search_decimation);
+    return full_search(
+        lim_low, lim_high, exclude_interval,
+        target_block, target_block_frames,
+        search_block, search_block_frames,
+        channels,
+        energy_target_block, energy_candidate_blocks);
+}
+
+static void peek_buffer(struct mp_scaletempo2 *p,
+    int frames, int read_offset, int write_offset, float **dest)
+{
+    assert(p->input_buffer_frames >= frames);
+    for (int i = 0; i < p->channels; ++i) {
+        memcpy(dest[i] + write_offset,
+            p->input_buffer[i] + read_offset,
+            frames * sizeof(float));
+    }
+}
+
+static void seek_buffer(struct mp_scaletempo2 *p, int frames)
+{
+    assert(p->input_buffer_frames >= frames);
+    p->input_buffer_frames -= frames;
+    if (p->input_buffer_final_frames > 0) {
+        p->input_buffer_final_frames = MPMAX(0, p->input_buffer_final_frames - frames);
+    }
+    for (int i = 0; i < p->channels; ++i) {
+        memmove(p->input_buffer[i], p->input_buffer[i] + frames,
+            p->input_buffer_frames * sizeof(float));
+    }
+}
+
+static int write_completed_frames_to(struct mp_scaletempo2 *p,
+    int requested_frames, int dest_offset, float **dest)
+{
+    int rendered_frames = MPMIN(p->num_complete_frames, requested_frames);
+
+    if (rendered_frames == 0)
+        return 0;  // There is nothing to read from |wsola_output|, return.
+
+    for (int i = 0; i < p->channels; ++i) {
+        memcpy(dest[i] + dest_offset, p->wsola_output[i],
+            rendered_frames * sizeof(float));
+    }
+
+    // Remove the frames which are read.
+    int frames_to_move = p->wsola_output_size - rendered_frames;
+    for (int k = 0; k < p->channels; ++k) {
+        float *ch = p->wsola_output[k];
+        memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
+    }
+    p->num_complete_frames -= rendered_frames;
+    return rendered_frames;
+}
+
+// next output_time for the given playback_rate
+static double get_updated_time(struct mp_scaletempo2 *p, double playback_rate)
+{
+    return p->output_time + p->ola_hop_size * playback_rate;
+}
+
+// search_block_index for the given output_time
+static int get_search_block_index(struct mp_scaletempo2 *p, double output_time)
+{
+    return (int)(output_time - p->search_block_center_offset + 0.5);
+}
+
+// number of frames needed until a wsola iteration can be performed
+static int frames_needed(struct mp_scaletempo2 *p, double playback_rate)
+{
+    int search_block_index =
+        get_search_block_index(p, get_updated_time(p, playback_rate));
+    return MPMAX(0, MPMAX(
+        p->target_block_index + p->ola_window_size - p->input_buffer_frames,
+        search_block_index + p->search_block_size - p->input_buffer_frames));
+}
+
+static bool can_perform_wsola(struct mp_scaletempo2 *p, double playback_rate)
+{
+    return frames_needed(p, playback_rate) <= 0;
+}
+
+static void resize_input_buffer(struct mp_scaletempo2 *p, int size)
+{
+    p->input_buffer_size = size;
+    p->input_buffer = realloc_2d(p->input_buffer, p->channels, size);
+}
+
+// pad end with silence until a wsola iteration can be performed
+static void add_input_buffer_final_silence(struct mp_scaletempo2 *p, double playback_rate)
+{
+    int needed = frames_needed(p, playback_rate);
+    if (needed <= 0)
+        return; // no silence needed for iteration
+
+    int required_size = needed + p->input_buffer_frames;
+    if (required_size > p->input_buffer_size)
+        resize_input_buffer(p, required_size);
+
+    for (int i = 0; i < p->channels; ++i) {
+        float *ch_input = p->input_buffer[i];
+        for (int j = 0; j < needed; ++j) {
+            ch_input[p->input_buffer_frames + j] = 0.0f;
+        }
+    }
+
+    p->input_buffer_added_silence += needed;
+    p->input_buffer_frames += needed;
+}
+
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p)
+{
+    if (p->input_buffer_final_frames <= 0) {
+        p->input_buffer_final_frames = p->input_buffer_frames;
+    }
+}
+
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+    uint8_t **planes, int frame_size, double playback_rate)
+{
+    int needed = frames_needed(p, playback_rate);
+    int read = MPMIN(needed, frame_size);
+    if (read == 0)
+        return 0;
+
+    int required_size = read + p->input_buffer_frames;
+    if (required_size > p->input_buffer_size)
+        resize_input_buffer(p, required_size);
+
+    for (int i = 0; i < p->channels; ++i) {
+        memcpy(p->input_buffer[i] + p->input_buffer_frames,
+            planes[i], read * sizeof(float));
+    }
+
+    p->input_buffer_frames += read;
+    return read;
+}
+
+static bool target_is_within_search_region(struct mp_scaletempo2 *p)
+{
+    return p->target_block_index >= p->search_block_index
+        && p->target_block_index + p->ola_window_size
+            <= p->search_block_index + p->search_block_size;
+}
+
+
+static void peek_audio_with_zero_prepend(struct mp_scaletempo2 *p,
+    int read_offset_frames, float **dest, int dest_frames)
+{
+    assert(read_offset_frames + dest_frames <= p->input_buffer_frames);
+
+    int write_offset = 0;
+    int num_frames_to_read = dest_frames;
+    if (read_offset_frames < 0) {
+        int num_zero_frames_appended = MPMIN(
+            -read_offset_frames, num_frames_to_read);
+        read_offset_frames = 0;
+        num_frames_to_read -= num_zero_frames_appended;
+        write_offset = num_zero_frames_appended;
+        zero_2d_partial(dest, p->channels, num_zero_frames_appended);
+    }
+    peek_buffer(p, num_frames_to_read, read_offset_frames, write_offset, dest);
+}
+
+static void get_optimal_block(struct mp_scaletempo2 *p)
+{
+    int optimal_index = 0;
+
+    // An interval around last optimal block which is excluded from the search.
+    // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
+    // derived heuristically.
+    const int exclude_interval_length_frames = 160;
+    if (target_is_within_search_region(p)) {
+        optimal_index = p->target_block_index;
+        peek_audio_with_zero_prepend(p,
+            optimal_index, p->optimal_block, p->ola_window_size);
+    } else {
+        peek_audio_with_zero_prepend(p,
+            p->target_block_index, p->target_block, p->ola_window_size);
+        peek_audio_with_zero_prepend(p,
+            p->search_block_index, p->search_block, p->search_block_size);
+        int last_optimal = p->target_block_index
+            - p->ola_hop_size - p->search_block_index;
+        struct interval exclude_iterval = {
+            .lo = last_optimal - exclude_interval_length_frames / 2,
+            .hi = last_optimal + exclude_interval_length_frames / 2
+        };
+
+        // |optimal_index| is in frames and it is relative to the beginning of the
+        // |search_block|.
+        optimal_index = compute_optimal_index(
+            p->search_block, p->search_block_size,
+            p->target_block, p->ola_window_size,
+            p->energy_candidate_blocks,
+            p->channels,
+            exclude_iterval);
+
+        // Translate |index| w.r.t. the beginning of |audio_buffer| and extract the
+        // optimal block.
+        optimal_index += p->search_block_index;
+        peek_audio_with_zero_prepend(p,
+            optimal_index, p->optimal_block, p->ola_window_size);
+
+        // Make a transition from target block to the optimal block if different.
+        // Target block has the best continuation to the current output.
+        // Optimal block is the most similar block to the target, however, it might
+        // introduce some discontinuity when over-lap-added. Therefore, we combine
+        // them for a smoother transition. The length of transition window is twice
+        // as that of the optimal-block which makes it like a weighting function
+        // where target-block has higher weight close to zero (weight of 1 at index
+        // 0) and lower weight close the end.
+        for (int k = 0; k < p->channels; ++k) {
+            float* ch_opt = p->optimal_block[k];
+            float* ch_target = p->target_block[k];
+            for (int n = 0; n < p->ola_window_size; ++n) {
+                ch_opt[n] = ch_opt[n] * p->transition_window[n]
+                    + ch_target[n] * p->transition_window[p->ola_window_size + n];
+            }
+        }
+    }
+
+    // Next target is one hop ahead of the current optimal.
+    p->target_block_index = optimal_index + p->ola_hop_size;
+}
+
+static void set_output_time(struct mp_scaletempo2 *p, double output_time)
+{
+    p->output_time = output_time;
+    p->search_block_index = get_search_block_index(p, output_time);
+}
+
+static void remove_old_input_frames(struct mp_scaletempo2 *p)
+{
+    const int earliest_used_index = MPMIN(
+        p->target_block_index, p->search_block_index);
+    if (earliest_used_index <= 0)
+        return;  // Nothing to remove.
+
+    // Remove frames from input and adjust indices accordingly.
+    seek_buffer(p, earliest_used_index);
+    p->target_block_index -= earliest_used_index;
+    p->output_time -= earliest_used_index;
+    p->search_block_index -= earliest_used_index;
+}
+
+static bool run_one_wsola_iteration(struct mp_scaletempo2 *p, double playback_rate)
+{
+    if (!can_perform_wsola(p, playback_rate)) {
+        return false;
+    }
+
+    set_output_time(p, get_updated_time(p, playback_rate));
+    remove_old_input_frames(p);
+
+    assert(p->search_block_index + p->search_block_size <= p->input_buffer_frames);
+
+    get_optimal_block(p);
+
+    // Overlap-and-add.
+    for (int k = 0; k < p->channels; ++k) {
+        float* ch_opt_frame = p->optimal_block[k];
+        float* ch_output = p->wsola_output[k] + p->num_complete_frames;
+        if (p->wsola_output_started) {
+            for (int n = 0; n < p->ola_hop_size; ++n) {
+                ch_output[n] = ch_output[n] * p->ola_window[p->ola_hop_size + n] +
+                    ch_opt_frame[n] * p->ola_window[n];
+            }
+
+            // Copy the second half to the output.
+            memcpy(&ch_output[p->ola_hop_size], &ch_opt_frame[p->ola_hop_size],
+                   sizeof(*ch_opt_frame) * p->ola_hop_size);
+        } else {
+            // No overlap for the first iteration.
+            memcpy(ch_output, ch_opt_frame,
+                   sizeof(*ch_opt_frame) * p->ola_window_size);
+        }
+    }
+
+    p->num_complete_frames += p->ola_hop_size;
+    p->wsola_output_started = true;
+    return true;
+}
+
+static int read_input_buffer(struct mp_scaletempo2 *p, int dest_size, float **dest)
+{
+    int frames_to_copy = MPMIN(dest_size, p->input_buffer_frames - p->target_block_index);
+
+    if (frames_to_copy <= 0)
+        return 0; // There is nothing to read from input buffer; return.
+
+    peek_buffer(p, frames_to_copy, p->target_block_index, 0, dest);
+    seek_buffer(p, frames_to_copy);
+    return frames_to_copy;
+}
+
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+    float **dest, int dest_size, double playback_rate)
+{
+    if (playback_rate == 0) return 0;
+
+    if (p->input_buffer_final_frames > 0) {
+        add_input_buffer_final_silence(p, playback_rate);
+    }
+
+    // Optimize the muted case to issue a single clear instead of performing
+    // the full crossfade and clearing each crossfaded frame.
+    if (playback_rate < p->opts->min_playback_rate
+        || (playback_rate > p->opts->max_playback_rate
+            && p->opts->max_playback_rate > 0))
+    {
+        int frames_to_render = MPMIN(dest_size,
+            (int) (p->input_buffer_frames / playback_rate));
+
+        // Compute accurate number of frames to actually skip in the source data.
+        // Includes the leftover partial frame from last request. However, we can
+        // only skip over complete frames, so a partial frame may remain for next
+        // time.
+        p->muted_partial_frame += frames_to_render * playback_rate;
+        int seek_frames = (int) (p->muted_partial_frame);
+        zero_2d_partial(dest, p->channels, frames_to_render);
+        seek_buffer(p, seek_frames);
+
+        // Determine the partial frame that remains to be skipped for next call. If
+        // the user switches back to playing, it may be off time by this partial
+        // frame, which would be undetectable. If they subsequently switch to
+        // another playback rate that mutes, the code will attempt to line up the
+        // frames again.
+        p->muted_partial_frame -= seek_frames;
+        return frames_to_render;
+    }
+
+    int slower_step = (int) ceilf(p->ola_window_size * playback_rate);
+    int faster_step = (int) ceilf(p->ola_window_size / playback_rate);
+
+    // Optimize the most common |playback_rate| ~= 1 case to use a single copy
+    // instead of copying frame by frame.
+    if (p->ola_window_size <= faster_step && slower_step >= p->ola_window_size) {
+
+        if (p->wsola_output_started) {
+            p->wsola_output_started = false;
+
+            // sync audio precisely again
+            set_output_time(p, p->target_block_index);
+            remove_old_input_frames(p);
+        }
+
+        return read_input_buffer(p, dest_size, dest);
+    }
+
+    int rendered_frames = 0;
+    do {
+        rendered_frames += write_completed_frames_to(p,
+            dest_size - rendered_frames, rendered_frames, dest);
+    } while (rendered_frames < dest_size
+             && run_one_wsola_iteration(p, playback_rate));
+    return rendered_frames;
+}
+
+double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate)
+{
+    return p->input_buffer_frames - p->output_time
+        - p->input_buffer_added_silence
+        + p->num_complete_frames * playback_rate;
+}
+
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate)
+{
+    return p->input_buffer_final_frames > p->target_block_index
+        || can_perform_wsola(p, playback_rate)
+        || p->num_complete_frames > 0;
+}
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p)
+{
+    free(p->ola_window);
+    free(p->transition_window);
+    free(p->wsola_output);
+    free(p->optimal_block);
+    free(p->search_block);
+    free(p->target_block);
+    free(p->input_buffer);
+    free(p->energy_candidate_blocks);
+}
+
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p)
+{
+    p->input_buffer_frames = 0;
+    p->input_buffer_final_frames = 0;
+    p->input_buffer_added_silence = 0;
+    p->output_time = 0.0;
+    p->search_block_index = 0;
+    p->target_block_index = 0;
+    // Clear the queue of decoded packets.
+    zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
+    p->num_complete_frames = 0;
+    p->wsola_output_started = false;
+}
+
+// Return a "periodic" Hann window. This is the first L samples of an L+1
+// Hann window. It is perfect reconstruction for overlap-and-add.
+static void get_symmetric_hanning_window(int window_length, float* window)
+{
+    const float scale = 2.0f * M_PI / window_length;
+    for (int n = 0; n < window_length; ++n)
+        window[n] = 0.5f * (1.0f - cosf(n * scale));
+}
+
+
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate)
+{
+    p->muted_partial_frame = 0;
+    p->output_time = 0;
+    p->search_block_index = 0;
+    p->target_block_index = 0;
+    p->num_complete_frames = 0;
+    p->wsola_output_started = false;
+    p->channels = channels;
+
+    p->samples_per_second = rate;
+    p->num_candidate_blocks = (int)(p->opts->wsola_search_interval_ms
+        * p->samples_per_second / 1000);
+    p->ola_window_size = (int)(p->opts->ola_window_size_ms
+        * p->samples_per_second / 1000);
+    // Make sure window size in an even number.
+    p->ola_window_size += p->ola_window_size & 1;
+    p->ola_hop_size = p->ola_window_size / 2;
+    // |num_candidate_blocks| / 2 is the offset of the center of the search
+    // block to the center of the first (left most) candidate block. The offset
+    // of the center of a candidate block to its left most point is
+    // |ola_window_size| / 2 - 1. Note that |ola_window_size| is even and in
+    // our convention the center belongs to the left half, so we need to subtract
+    // one frame to get the correct offset.
+    //
+    //                             Search Block
+    //              <------------------------------------------->
+    //
+    //   |ola_window_size| / 2 - 1
+    //              <----
+    //
+    //             |num_candidate_blocks| / 2
+    //                   <----------------
+    //                                 center
+    //              X----X----------------X---------------X-----X
+    //              <---------->                     <---------->
+    //                Candidate      ...               Candidate
+    //                   1,          ...         |num_candidate_blocks|
+    p->search_block_center_offset = p->num_candidate_blocks / 2
+        + (p->ola_window_size / 2 - 1);
+    p->ola_window = realloc(p->ola_window, sizeof(float) * p->ola_window_size);
+    get_symmetric_hanning_window(p->ola_window_size, p->ola_window);
+    p->transition_window = realloc(p->transition_window,
+        sizeof(float) * p->ola_window_size * 2);
+    get_symmetric_hanning_window(2 * p->ola_window_size, p->transition_window);
+
+    p->wsola_output_size = p->ola_window_size + p->ola_hop_size;
+    p->wsola_output = realloc_2d(p->wsola_output, p->channels, p->wsola_output_size);
+    // Initialize for overlap-and-add of the first block.
+    zero_2d(p->wsola_output, p->channels, p->wsola_output_size);
+
+    // Auxiliary containers.
+    p->optimal_block = realloc_2d(p->optimal_block, p->channels, p->ola_window_size);
+    p->search_block_size = p->num_candidate_blocks + (p->ola_window_size - 1);
+    p->search_block = realloc_2d(p->search_block, p->channels, p->search_block_size);
+    p->target_block = realloc_2d(p->target_block, p->channels, p->ola_window_size);
+
+    resize_input_buffer(p, 4 * MPMAX(p->ola_window_size, p->search_block_size));
+    p->input_buffer_frames = 0;
+    p->input_buffer_final_frames = 0;
+    p->input_buffer_added_silence = 0;
+
+    p->energy_candidate_blocks = realloc(p->energy_candidate_blocks,
+        sizeof(float) * p->channels * p->num_candidate_blocks);
+}
diff --git a/audio/filter/af_scaletempo2_internals.h b/audio/filter/af_scaletempo2_internals.h
new file mode 100644
index 0000000..6c3c94c
--- /dev/null
+++ b/audio/filter/af_scaletempo2_internals.h
@@ -0,0 +1,134 @@
+// This filter was ported from Chromium
+// (https://chromium.googlesource.com/chromium/chromium/+/51ed77e3f37a9a9b80d6d0a8259e84a8ca635259/media/filters/audio_renderer_algorithm.cc)
+//
+// Copyright 2015 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common/common.h"
+
+struct mp_scaletempo2_opts {
+    // Max/min supported playback rates for fast/slow audio. Audio outside of these
+    // ranges are muted.
+    // Audio at these speeds would sound better under a frequency domain algorithm.
+    float min_playback_rate;
+    float max_playback_rate;
+    // Overlap-and-add window size in milliseconds.
+    float ola_window_size_ms;
+    // Size of search interval in milliseconds. The search interval is
+    // [-delta delta] around |output_index| * |playback_rate|. So the search
+    // interval is 2 * delta.
+    float wsola_search_interval_ms;
+};
+
+struct mp_scaletempo2 {
+    struct mp_scaletempo2_opts *opts;
+    // Number of channels in audio stream.
+    int channels;
+    // Sample rate of audio stream.
+    int samples_per_second;
+    // If muted, keep track of partial frames that should have been skipped over.
+    double muted_partial_frame;
+    // Book keeping of the current time of generated audio, in frames.
+    // Corresponds to the center of |search_block|. This is increased in
+    // intervals of |ola_hop_size| multiplied by the current playback_rate,
+    // for every WSOLA iteration. This tracks the number of advanced frames as
+    // a double to achieve accurate playback rates beyond the integer precision
+    // of |search_block_index|.
+    // Needs to be adjusted like any other index when frames are evicted from
+    // |input_buffer|.
+    double output_time;
+    // The offset of the center frame of |search_block| w.r.t. its first frame.
+    int search_block_center_offset;
+    // Index of the beginning of the |search_block|, in frames. This may be
+    // negative, which is handled by |peek_audio_with_zero_prepend|.
+    int search_block_index;
+    // Number of Blocks to search to find the most similar one to the target
+    // frame.
+    int num_candidate_blocks;
+    // Index of the beginning of the target block, counted in frames.
+    int target_block_index;
+    // Overlap-and-add window size in frames.
+    int ola_window_size;
+    // The hop size of overlap-and-add in frames. This implementation assumes 50%
+    // overlap-and-add.
+    int ola_hop_size;
+    // Number of frames in |wsola_output| that overlap-and-add is completed for
+    // them and can be copied to output if fill_buffer() is called. It also
+    // specifies the index where the next WSOLA window has to overlap-and-add.
+    int num_complete_frames;
+    // Whether |wsola_output| contains an additional |ola_hop_size| of overlap
+    // frames for the next iteration.
+    bool wsola_output_started;
+    // Overlap-and-add window.
+    float *ola_window;
+    // Transition window, used to update |optimal_block| by a weighted sum of
+    // |optimal_block| and |target_block|.
+    float *transition_window;
+    // This stores a part of the output that is created but couldn't be rendered.
+    // Output is generated frame-by-frame which at some point might exceed the
+    // number of requested samples. Furthermore, due to overlap-and-add,
+    // the last half-window of the output is incomplete, which is stored in this
+    // buffer.
+    float **wsola_output;
+    int wsola_output_size;
+    // Auxiliary variables to avoid allocation in every iteration.
+    // Stores the optimal block in every iteration. This is the most
+    // similar block to |target_block| within |search_block| and it is
+    // overlap-and-added to |wsola_output|.
+    float **optimal_block;
+    // A block of data that search is performed over to find the |optimal_block|.
+    float **search_block;
+    int search_block_size;
+    // Stores the target block, denoted as |target| above. |search_block| is
+    // searched for a block (|optimal_block|) that is most similar to
+    // |target_block|.
+    float **target_block;
+    // Buffered audio data.
+    float **input_buffer;
+    int input_buffer_size;
+    int input_buffer_frames;
+    // How many frames in |input_buffer| need to be flushed by padding with
+    // silence to process the final packet. While this is nonzero, the filter
+    // appends silence to |input_buffer| until these frames are processed.
+    int input_buffer_final_frames;
+    // How many additional frames of silence have been added to |input_buffer|
+    // for padding after the final packet.
+    int input_buffer_added_silence;
+    float *energy_candidate_blocks;
+};
+
+void mp_scaletempo2_destroy(struct mp_scaletempo2 *p);
+void mp_scaletempo2_reset(struct mp_scaletempo2 *p);
+void mp_scaletempo2_init(struct mp_scaletempo2 *p, int channels, int rate);
+double mp_scaletempo2_get_latency(struct mp_scaletempo2 *p, double playback_rate);
+int mp_scaletempo2_fill_input_buffer(struct mp_scaletempo2 *p,
+    uint8_t **planes, int frame_size, double playback_rate);
+void mp_scaletempo2_set_final(struct mp_scaletempo2 *p);
+int mp_scaletempo2_fill_buffer(struct mp_scaletempo2 *p,
+    float **dest, int dest_size, double playback_rate);
+bool mp_scaletempo2_frames_available(struct mp_scaletempo2 *p, double playback_rate);
diff --git a/audio/fmt-conversion.c b/audio/fmt-conversion.c
new file mode 100644
index 0000000..d72a50d
--- /dev/null
+++ b/audio/fmt-conversion.c
@@ -0,0 +1,60 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/avutil.h>
+#include <libavutil/samplefmt.h>
+#include "format.h"
+#include "fmt-conversion.h"
+
+static const struct {
+    enum AVSampleFormat sample_fmt;
+    int fmt;
+} audio_conversion_map[] = {
+    {AV_SAMPLE_FMT_U8,    AF_FORMAT_U8},
+    {AV_SAMPLE_FMT_S16,   AF_FORMAT_S16},
+    {AV_SAMPLE_FMT_S32,   AF_FORMAT_S32},
+    {AV_SAMPLE_FMT_S64,   AF_FORMAT_S64},
+    {AV_SAMPLE_FMT_FLT,   AF_FORMAT_FLOAT},
+    {AV_SAMPLE_FMT_DBL,   AF_FORMAT_DOUBLE},
+
+    {AV_SAMPLE_FMT_U8P,   AF_FORMAT_U8P},
+    {AV_SAMPLE_FMT_S16P,  AF_FORMAT_S16P},
+    {AV_SAMPLE_FMT_S32P,  AF_FORMAT_S32P},
+    {AV_SAMPLE_FMT_S64P,  AF_FORMAT_S64P},
+    {AV_SAMPLE_FMT_FLTP,  AF_FORMAT_FLOATP},
+    {AV_SAMPLE_FMT_DBLP,  AF_FORMAT_DOUBLEP},
+
+    {AV_SAMPLE_FMT_NONE,  0},
+};
+
+enum AVSampleFormat af_to_avformat(int fmt)
+{
+    for (int i = 0; audio_conversion_map[i].fmt; i++) {
+        if (audio_conversion_map[i].fmt == fmt)
+            return audio_conversion_map[i].sample_fmt;
+    }
+    return AV_SAMPLE_FMT_NONE;
+}
+
+int af_from_avformat(enum AVSampleFormat sample_fmt)
+{
+    for (int i = 0; audio_conversion_map[i].fmt; i++) {
+        if (audio_conversion_map[i].sample_fmt == sample_fmt)
+            return audio_conversion_map[i].fmt;
+    }
+    return 0;
+}
diff --git a/audio/fmt-conversion.h b/audio/fmt-conversion.h
new file mode 100644
index 0000000..63c315b
--- /dev/null
+++ b/audio/fmt-conversion.h
@@ -0,0 +1,24 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_SAMPLE_FMT_CONVERSION_H
+#define MPLAYER_SAMPLE_FMT_CONVERSION_H
+
+enum AVSampleFormat af_to_avformat(int fmt);
+int af_from_avformat(enum AVSampleFormat sample_fmt);
+
+#endif /* MPLAYER_SAMPLE_FMT_CONVERSION_H */
diff --git a/audio/format.c b/audio/format.c
new file mode 100644
index 0000000..4441456
--- /dev/null
+++ b/audio/format.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2005 Alex Beregszaszi
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <limits.h>
+
+#include "common/common.h"
+#include "format.h"
+
+// number of bytes per sample, 0 if invalid/unknown
+int af_fmt_to_bytes(int format)
+{
+    switch (af_fmt_from_planar(format)) {
+    case AF_FORMAT_U8:      return 1;
+    case AF_FORMAT_S16:     return 2;
+    case AF_FORMAT_S32:     return 4;
+    case AF_FORMAT_S64:     return 8;
+    case AF_FORMAT_FLOAT:   return 4;
+    case AF_FORMAT_DOUBLE:  return 8;
+    }
+    if (af_fmt_is_spdif(format))
+        return 2;
+    return 0;
+}
+
+// All formats are considered signed, except explicitly unsigned int formats.
+bool af_fmt_is_unsigned(int format)
+{
+    return format == AF_FORMAT_U8 || format == AF_FORMAT_U8P;
+}
+
+bool af_fmt_is_float(int format)
+{
+    format = af_fmt_from_planar(format);
+    return format == AF_FORMAT_FLOAT || format == AF_FORMAT_DOUBLE;
+}
+
+// true for both unsigned and signed ints
+bool af_fmt_is_int(int format)
+{
+    return format && !af_fmt_is_spdif(format) && !af_fmt_is_float(format);
+}
+
+bool af_fmt_is_spdif(int format)
+{
+    return af_format_sample_alignment(format) > 1;
+}
+
+bool af_fmt_is_pcm(int format)
+{
+    return af_fmt_is_valid(format) && !af_fmt_is_spdif(format);
+}
+
+static const int planar_formats[][2] = {
+    {AF_FORMAT_U8P,     AF_FORMAT_U8},
+    {AF_FORMAT_S16P,    AF_FORMAT_S16},
+    {AF_FORMAT_S32P,    AF_FORMAT_S32},
+    {AF_FORMAT_S64P,    AF_FORMAT_S64},
+    {AF_FORMAT_FLOATP,  AF_FORMAT_FLOAT},
+    {AF_FORMAT_DOUBLEP, AF_FORMAT_DOUBLE},
+};
+
+bool af_fmt_is_planar(int format)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(planar_formats); n++) {
+        if (planar_formats[n][0] == format)
+            return true;
+    }
+    return false;
+}
+
+// Return the planar format corresponding to the given format.
+// If the format is already planar or if there's no equivalent,
+// return it.
+int af_fmt_to_planar(int format)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(planar_formats); n++) {
+        if (planar_formats[n][1] == format)
+            return planar_formats[n][0];
+    }
+    return format;
+}
+
+// Return the interleaved format corresponding to the given format.
+// If the format is already interleaved or if there's no equivalent,
+// return it.
+int af_fmt_from_planar(int format)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(planar_formats); n++) {
+        if (planar_formats[n][0] == format)
+            return planar_formats[n][1];
+    }
+    return format;
+}
+
+bool af_fmt_is_valid(int format)
+{
+    return format > 0 && format < AF_FORMAT_COUNT;
+}
+
+const char *af_fmt_to_str(int format)
+{
+    switch (format) {
+    case AF_FORMAT_U8:          return "u8";
+    case AF_FORMAT_S16:         return "s16";
+    case AF_FORMAT_S32:         return "s32";
+    case AF_FORMAT_S64:         return "s64";
+    case AF_FORMAT_FLOAT:       return "float";
+    case AF_FORMAT_DOUBLE:      return "double";
+    case AF_FORMAT_U8P:         return "u8p";
+    case AF_FORMAT_S16P:        return "s16p";
+    case AF_FORMAT_S32P:        return "s32p";
+    case AF_FORMAT_S64P:        return "s64p";
+    case AF_FORMAT_FLOATP:      return "floatp";
+    case AF_FORMAT_DOUBLEP:     return "doublep";
+    case AF_FORMAT_S_AAC:       return "spdif-aac";
+    case AF_FORMAT_S_AC3:       return "spdif-ac3";
+    case AF_FORMAT_S_DTS:       return "spdif-dts";
+    case AF_FORMAT_S_DTSHD:     return "spdif-dtshd";
+    case AF_FORMAT_S_EAC3:      return "spdif-eac3";
+    case AF_FORMAT_S_MP3:       return "spdif-mp3";
+    case AF_FORMAT_S_TRUEHD:    return "spdif-truehd";
+    }
+    return "??";
+}
+
+void af_fill_silence(void *dst, size_t bytes, int format)
+{
+    memset(dst, af_fmt_is_unsigned(format) ? 0x80 : 0, bytes);
+}
+
+// Returns a "score" that serves as heuristic how lossy or hard a conversion is.
+// If the formats are equal, 1024 is returned. If they are gravely incompatible
+// (like s16<->ac3), INT_MIN is returned. If there is implied loss of precision
+// (like s16->s8), a value <0 is returned.
+int af_format_conversion_score(int dst_format, int src_format)
+{
+    if (dst_format == AF_FORMAT_UNKNOWN || src_format == AF_FORMAT_UNKNOWN)
+        return INT_MIN;
+    if (dst_format == src_format)
+        return 1024;
+    // Can't be normally converted
+    if (!af_fmt_is_pcm(dst_format) || !af_fmt_is_pcm(src_format))
+        return INT_MIN;
+    int score = 1024;
+    if (af_fmt_is_planar(dst_format) != af_fmt_is_planar(src_format))
+        score -= 1;     // has to (de-)planarize
+    if (af_fmt_is_float(dst_format) != af_fmt_is_float(src_format)) {
+        int dst_bytes = af_fmt_to_bytes(dst_format);
+        if (af_fmt_is_float(dst_format)) {
+            // For int->float, consider a lower bound on the precision difference.
+            int bytes = (dst_bytes == 4 ? 3 : 6) - af_fmt_to_bytes(src_format);
+            if (bytes >= 0) {
+                score -= 8 * bytes;          // excess precision
+            } else {
+                score += 1024 * (bytes - 1); // precision is lost (i.e. s32 -> float)
+            }
+        } else {
+            // float->int is the worst case. Penalize heavily and
+            // prefer highest bit depth int.
+            score -= 1048576 * (8 - dst_bytes);
+        }
+        score -= 512; // penalty for any float <-> int conversion
+    } else {
+        int bytes = af_fmt_to_bytes(dst_format) - af_fmt_to_bytes(src_format);
+        if (bytes > 0) {
+            score -= 8 * bytes;          // has to add padding
+        } else if (bytes < 0) {
+            score += 1024 * (bytes - 1); // has to reduce bit depth
+        }
+    }
+    return score;
+}
+
+struct mp_entry {
+    int fmt;
+    int score;
+};
+
+static int cmp_entry(const void *a, const void *b)
+{
+#define CMP_INT(a, b) (a > b ? 1 : (a < b ? -1 : 0))
+    return -CMP_INT(((struct mp_entry *)a)->score, ((struct mp_entry *)b)->score);
+}
+
+// Return a list of sample format compatible to src_format, sorted by order
+// of preference. out_formats[0] will be src_format (as long as it's valid),
+// and the list is terminated with 0 (AF_FORMAT_UNKNOWN).
+// Keep in mind that this also returns formats with flipped interleaving
+// (e.g. for s16, it returns [s16, s16p, ...]).
+// out_formats must be an int[AF_FORMAT_COUNT + 1] array.
+void af_get_best_sample_formats(int src_format, int *out_formats)
+{
+    int num = 0;
+    struct mp_entry e[AF_FORMAT_COUNT + 1];
+    for (int fmt = 1; fmt < AF_FORMAT_COUNT; fmt++) {
+        int score = af_format_conversion_score(fmt, src_format);
+        if (score > INT_MIN)
+            e[num++] = (struct mp_entry){fmt, score};
+    }
+    qsort(e, num, sizeof(e[0]), cmp_entry);
+    for (int n = 0; n < num; n++)
+        out_formats[n] = e[n].fmt;
+    out_formats[num] = 0;
+}
+
+// Return the best match to src_samplerate from the list provided in the array
+// *available, which must be terminated by 0, or itself NULL. If *available is
+// empty or NULL, return a negative value. Exact match to src_samplerate is
+// most preferred, followed by the lowest integer multiple, followed by the
+// maximum of *available.
+int af_select_best_samplerate(int src_samplerate, const int *available)
+{
+    if (!available)
+        return -1;
+
+    int min_mult_rate = INT_MAX;
+    int max_rate      = INT_MIN;
+    for (int i = 0; available[i]; i++) {
+        if (available[i] == src_samplerate)
+            return available[i];
+
+        if (!(available[i] % src_samplerate))
+            min_mult_rate = MPMIN(min_mult_rate, available[i]);
+
+        max_rate = MPMAX(max_rate, available[i]);
+    }
+
+    if (min_mult_rate < INT_MAX)
+        return min_mult_rate;
+
+    if (max_rate > INT_MIN)
+        return max_rate;
+
+    return -1;
+}
+
+// Return the number of samples that make up one frame in this format.
+// You get the byte size by multiplying them with sample size and channel count.
+int af_format_sample_alignment(int format)
+{
+    switch (format) {
+    case AF_FORMAT_S_AAC:       return 16384 / 4;
+    case AF_FORMAT_S_AC3:       return 6144 / 4;
+    case AF_FORMAT_S_DTSHD:     return 32768 / 16;
+    case AF_FORMAT_S_DTS:       return 2048 / 4;
+    case AF_FORMAT_S_EAC3:      return 24576 / 4;
+    case AF_FORMAT_S_MP3:       return 4608 / 4;
+    case AF_FORMAT_S_TRUEHD:    return 61440 / 16;
+    default:                    return 1;
+    }
+}
diff --git a/audio/format.h b/audio/format.h
new file mode 100644
index 0000000..bdd4744
--- /dev/null
+++ b/audio/format.h
@@ -0,0 +1,77 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_AF_FORMAT_H
+#define MPLAYER_AF_FORMAT_H
+
+#include <stddef.h>
+#include <stdbool.h>
+
+enum af_format {
+    AF_FORMAT_UNKNOWN = 0,
+
+    AF_FORMAT_U8,
+    AF_FORMAT_S16,
+    AF_FORMAT_S32,
+    AF_FORMAT_S64,
+    AF_FORMAT_FLOAT,
+    AF_FORMAT_DOUBLE,
+
+    // Planar variants
+    AF_FORMAT_U8P,
+    AF_FORMAT_S16P,
+    AF_FORMAT_S32P,
+    AF_FORMAT_S64P,
+    AF_FORMAT_FLOATP,
+    AF_FORMAT_DOUBLEP,
+
+    // All of these use IEC61937 framing, and otherwise pretend to be like PCM.
+    AF_FORMAT_S_AAC,
+    AF_FORMAT_S_AC3,
+    AF_FORMAT_S_DTS,
+    AF_FORMAT_S_DTSHD,
+    AF_FORMAT_S_EAC3,
+    AF_FORMAT_S_MP3,
+    AF_FORMAT_S_TRUEHD,
+
+    AF_FORMAT_COUNT
+};
+
+const char *af_fmt_to_str(int format);
+
+int af_fmt_to_bytes(int format);
+
+bool af_fmt_is_valid(int format);
+bool af_fmt_is_unsigned(int format);
+bool af_fmt_is_float(int format);
+bool af_fmt_is_int(int format);
+bool af_fmt_is_planar(int format);
+bool af_fmt_is_spdif(int format);
+bool af_fmt_is_pcm(int format);
+
+int af_fmt_to_planar(int format);
+int af_fmt_from_planar(int format);
+
+void af_fill_silence(void *dst, size_t bytes, int format);
+
+void af_get_best_sample_formats(int src_format, int *out_formats);
+int af_format_conversion_score(int dst_format, int src_format);
+int af_select_best_samplerate(int src_sampelrate, const int *available);
+
+int af_format_sample_alignment(int format);
+
+#endif /* MPLAYER_AF_FORMAT_H */
diff --git a/audio/out/ao.c b/audio/out/ao.c
new file mode 100644
index 0000000..a5aa3a9
--- /dev/null
+++ b/audio/out/ao.c
@@ -0,0 +1,719 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#include "mpv_talloc.h"
+
+#include "config.h"
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+
+#include "options/options.h"
+#include "options/m_config_frontend.h"
+#include "osdep/endian.h"
+#include "common/msg.h"
+#include "common/common.h"
+#include "common/global.h"
+
+extern const struct ao_driver audio_out_oss;
+extern const struct ao_driver audio_out_audiotrack;
+extern const struct ao_driver audio_out_audiounit;
+extern const struct ao_driver audio_out_coreaudio;
+extern const struct ao_driver audio_out_coreaudio_exclusive;
+extern const struct ao_driver audio_out_rsound;
+extern const struct ao_driver audio_out_pipewire;
+extern const struct ao_driver audio_out_sndio;
+extern const struct ao_driver audio_out_pulse;
+extern const struct ao_driver audio_out_jack;
+extern const struct ao_driver audio_out_openal;
+extern const struct ao_driver audio_out_opensles;
+extern const struct ao_driver audio_out_null;
+extern const struct ao_driver audio_out_alsa;
+extern const struct ao_driver audio_out_wasapi;
+extern const struct ao_driver audio_out_pcm;
+extern const struct ao_driver audio_out_lavc;
+extern const struct ao_driver audio_out_sdl;
+
+static const struct ao_driver * const audio_out_drivers[] = {
+// native:
+#if HAVE_ANDROID
+    &audio_out_audiotrack,
+#endif
+#if HAVE_AUDIOUNIT
+    &audio_out_audiounit,
+#endif
+#if HAVE_COREAUDIO
+    &audio_out_coreaudio,
+#endif
+#if HAVE_PIPEWIRE
+    &audio_out_pipewire,
+#endif
+#if HAVE_PULSE
+    &audio_out_pulse,
+#endif
+#if HAVE_ALSA
+    &audio_out_alsa,
+#endif
+#if HAVE_WASAPI
+    &audio_out_wasapi,
+#endif
+#if HAVE_OSS_AUDIO
+    &audio_out_oss,
+#endif
+    // wrappers:
+#if HAVE_JACK
+    &audio_out_jack,
+#endif
+#if HAVE_OPENAL
+    &audio_out_openal,
+#endif
+#if HAVE_OPENSLES
+    &audio_out_opensles,
+#endif
+#if HAVE_SDL2_AUDIO
+    &audio_out_sdl,
+#endif
+#if HAVE_SNDIO
+    &audio_out_sndio,
+#endif
+    &audio_out_null,
+#if HAVE_COREAUDIO
+    &audio_out_coreaudio_exclusive,
+#endif
+    &audio_out_pcm,
+    &audio_out_lavc,
+};
+
+static bool get_desc(struct m_obj_desc *dst, int index)
+{
+    if (index >= MP_ARRAY_SIZE(audio_out_drivers))
+        return false;
+    const struct ao_driver *ao = audio_out_drivers[index];
+    *dst = (struct m_obj_desc) {
+        .name = ao->name,
+        .description = ao->description,
+        .priv_size = ao->priv_size,
+        .priv_defaults = ao->priv_defaults,
+        .options = ao->options,
+        .options_prefix = ao->options_prefix,
+        .global_opts = ao->global_opts,
+        .hidden = ao->encode,
+        .p = ao,
+    };
+    return true;
+}
+
+// For the ao option
+static const struct m_obj_list ao_obj_list = {
+    .get_desc = get_desc,
+    .description = "audio outputs",
+    .allow_trailer = true,
+    .disallow_positional_parameters = true,
+    .use_global_options = true,
+};
+
+#define OPT_BASE_STRUCT struct ao_opts
+const struct m_sub_options ao_conf = {
+    .opts = (const struct m_option[]) {
+        {"ao", OPT_SETTINGSLIST(audio_driver_list, &ao_obj_list),
+            .flags = UPDATE_AUDIO},
+        {"audio-device", OPT_STRING(audio_device), .flags = UPDATE_AUDIO},
+        {"audio-client-name", OPT_STRING(audio_client_name), .flags = UPDATE_AUDIO},
+        {"audio-buffer", OPT_DOUBLE(audio_buffer),
+            .flags = UPDATE_AUDIO, M_RANGE(0, 10)},
+        {0}
+    },
+    .size = sizeof(OPT_BASE_STRUCT),
+    .defaults = &(const OPT_BASE_STRUCT){
+        .audio_buffer = 0.2,
+        .audio_device = "auto",
+        .audio_client_name = "mpv",
+    },
+};
+
+static struct ao *ao_alloc(bool probing, struct mpv_global *global,
+                           void (*wakeup_cb)(void *ctx), void *wakeup_ctx,
+                           char *name)
+{
+    assert(wakeup_cb);
+
+    struct mp_log *log = mp_log_new(NULL, global->log, "ao");
+    struct m_obj_desc desc;
+    if (!m_obj_list_find(&desc, &ao_obj_list, bstr0(name))) {
+        mp_msg(log, MSGL_ERR, "Audio output %s not found!\n", name);
+        talloc_free(log);
+        return NULL;
+    };
+    struct ao_opts *opts = mp_get_config_group(NULL, global, &ao_conf);
+    struct ao *ao = talloc_ptrtype(NULL, ao);
+    talloc_steal(ao, log);
+    *ao = (struct ao) {
+        .driver = desc.p,
+        .probing = probing,
+        .global = global,
+        .wakeup_cb = wakeup_cb,
+        .wakeup_ctx = wakeup_ctx,
+        .log = mp_log_new(ao, log, name),
+        .def_buffer = opts->audio_buffer,
+        .client_name = talloc_strdup(ao, opts->audio_client_name),
+    };
+    talloc_free(opts);
+    ao->priv = m_config_group_from_desc(ao, ao->log, global, &desc, name);
+    if (!ao->priv)
+        goto error;
+    ao_set_gain(ao, 1.0f);
+    return ao;
+error:
+    talloc_free(ao);
+    return NULL;
+}
+
+static struct ao *ao_init(bool probing, struct mpv_global *global,
+                          void (*wakeup_cb)(void *ctx), void *wakeup_ctx,
+                          struct encode_lavc_context *encode_lavc_ctx, int flags,
+                          int samplerate, int format, struct mp_chmap channels,
+                          char *dev, char *name)
+{
+    struct ao *ao = ao_alloc(probing, global, wakeup_cb, wakeup_ctx, name);
+    if (!ao)
+        return NULL;
+    ao->samplerate = samplerate;
+    ao->channels = channels;
+    ao->format = format;
+    ao->encode_lavc_ctx = encode_lavc_ctx;
+    ao->init_flags = flags;
+    if (ao->driver->encode != !!ao->encode_lavc_ctx)
+        goto fail;
+
+    MP_VERBOSE(ao, "requested format: %d Hz, %s channels, %s\n",
+               ao->samplerate, mp_chmap_to_str(&ao->channels),
+               af_fmt_to_str(ao->format));
+
+    ao->device = talloc_strdup(ao, dev);
+    ao->stream_silence = flags & AO_INIT_STREAM_SILENCE;
+
+    init_buffer_pre(ao);
+
+    int r = ao->driver->init(ao);
+    if (r < 0) {
+        // Silly exception for coreaudio spdif redirection
+        if (ao->redirect) {
+            char redirect[80], rdevice[80];
+            snprintf(redirect, sizeof(redirect), "%s", ao->redirect);
+            snprintf(rdevice, sizeof(rdevice), "%s", ao->device ? ao->device : "");
+            ao_uninit(ao);
+            return ao_init(probing, global, wakeup_cb, wakeup_ctx,
+                           encode_lavc_ctx, flags, samplerate, format, channels,
+                           rdevice, redirect);
+        }
+        goto fail;
+    }
+    ao->driver_initialized = true;
+
+    ao->sstride = af_fmt_to_bytes(ao->format);
+    ao->num_planes = 1;
+    if (af_fmt_is_planar(ao->format)) {
+        ao->num_planes = ao->channels.num;
+    } else {
+        ao->sstride *= ao->channels.num;
+    }
+    ao->bps = ao->samplerate * ao->sstride;
+
+    if (ao->device_buffer <= 0 && ao->driver->write) {
+        MP_ERR(ao, "Device buffer size not set.\n");
+        goto fail;
+    }
+    if (ao->device_buffer)
+        MP_VERBOSE(ao, "device buffer: %d samples.\n", ao->device_buffer);
+    ao->buffer = MPMAX(ao->device_buffer, ao->def_buffer * ao->samplerate);
+    ao->buffer = MPMAX(ao->buffer, 1);
+
+    int align = af_format_sample_alignment(ao->format);
+    ao->buffer = (ao->buffer + align - 1) / align * align;
+    MP_VERBOSE(ao, "using soft-buffer of %d samples.\n", ao->buffer);
+
+    if (!init_buffer_post(ao))
+        goto fail;
+    return ao;
+
+fail:
+    ao_uninit(ao);
+    return NULL;
+}
+
+static void split_ao_device(void *tmp, char *opt, char **out_ao, char **out_dev)
+{
+    *out_ao = NULL;
+    *out_dev = NULL;
+    if (!opt)
+        return;
+    if (!opt[0] || strcmp(opt, "auto") == 0)
+        return;
+    // Split on "/". If "/" is the final character, or absent, out_dev is NULL.
+    bstr b_dev, b_ao;
+    bstr_split_tok(bstr0(opt), "/", &b_ao, &b_dev);
+    if (b_dev.len > 0)
+        *out_dev = bstrto0(tmp, b_dev);
+    *out_ao = bstrto0(tmp, b_ao);
+}
+
+struct ao *ao_init_best(struct mpv_global *global,
+                        int init_flags,
+                        void (*wakeup_cb)(void *ctx), void *wakeup_ctx,
+                        struct encode_lavc_context *encode_lavc_ctx,
+                        int samplerate, int format, struct mp_chmap channels)
+{
+    void *tmp = talloc_new(NULL);
+    struct ao_opts *opts = mp_get_config_group(tmp, global, &ao_conf);
+    struct mp_log *log = mp_log_new(tmp, global->log, "ao");
+    struct ao *ao = NULL;
+    struct m_obj_settings *ao_list = NULL;
+    int ao_num = 0;
+
+    for (int n = 0; opts->audio_driver_list && opts->audio_driver_list[n].name; n++)
+        MP_TARRAY_APPEND(tmp, ao_list, ao_num, opts->audio_driver_list[n]);
+
+    bool forced_dev = false;
+    char *pref_ao, *pref_dev;
+    split_ao_device(tmp, opts->audio_device, &pref_ao, &pref_dev);
+    if (!ao_num && pref_ao) {
+        // Reuse the autoselection code
+        MP_TARRAY_APPEND(tmp, ao_list, ao_num,
+            (struct m_obj_settings){.name = pref_ao});
+        forced_dev = true;
+    }
+
+    bool autoprobe = ao_num == 0;
+
+    // Something like "--ao=a,b," means do autoprobing after a and b fail.
+    if (ao_num && strlen(ao_list[ao_num - 1].name) == 0) {
+        ao_num -= 1;
+        autoprobe = true;
+    }
+
+    if (autoprobe) {
+        for (int n = 0; n < MP_ARRAY_SIZE(audio_out_drivers); n++) {
+            const struct ao_driver *driver = audio_out_drivers[n];
+            if (driver == &audio_out_null)
+                break;
+            MP_TARRAY_APPEND(tmp, ao_list, ao_num,
+                (struct m_obj_settings){.name = (char *)driver->name});
+        }
+    }
+
+    if (init_flags & AO_INIT_NULL_FALLBACK) {
+        MP_TARRAY_APPEND(tmp, ao_list, ao_num,
+            (struct m_obj_settings){.name = "null"});
+    }
+
+    for (int n = 0; n < ao_num; n++) {
+        struct m_obj_settings *entry = &ao_list[n];
+        bool probing = n + 1 != ao_num;
+        mp_verbose(log, "Trying audio driver '%s'\n", entry->name);
+        char *dev = NULL;
+        if (pref_ao && pref_dev && strcmp(entry->name, pref_ao) == 0) {
+            dev = pref_dev;
+            mp_verbose(log, "Using preferred device '%s'\n", dev);
+        }
+        ao = ao_init(probing, global, wakeup_cb, wakeup_ctx, encode_lavc_ctx,
+                     init_flags, samplerate, format, channels, dev, entry->name);
+        if (ao)
+            break;
+        if (!probing)
+            mp_err(log, "Failed to initialize audio driver '%s'\n", entry->name);
+        if (dev && forced_dev) {
+            mp_err(log, "This audio driver/device was forced with the "
+                        "--audio-device option.\nTry unsetting it.\n");
+        }
+    }
+
+    talloc_free(tmp);
+    return ao;
+}
+
+// Query the AO_EVENT_*s as requested by the events parameter, and return them.
+int ao_query_and_reset_events(struct ao *ao, int events)
+{
+    return atomic_fetch_and(&ao->events_, ~(unsigned)events) & events;
+}
+
+// Returns events that were set by this calls.
+int ao_add_events(struct ao *ao, int events)
+{
+    unsigned prev_events = atomic_fetch_or(&ao->events_, events);
+    unsigned new = events & ~prev_events;
+    if (new)
+        ao->wakeup_cb(ao->wakeup_ctx);
+    return new;
+}
+
+// Request that the player core destroys and recreates the AO. Fully thread-safe.
+void ao_request_reload(struct ao *ao)
+{
+    ao_add_events(ao, AO_EVENT_RELOAD);
+}
+
+// Notify the player that the device list changed. Fully thread-safe.
+void ao_hotplug_event(struct ao *ao)
+{
+    ao_add_events(ao, AO_EVENT_HOTPLUG);
+}
+
+bool ao_chmap_sel_adjust(struct ao *ao, const struct mp_chmap_sel *s,
+                         struct mp_chmap *map)
+{
+    MP_VERBOSE(ao, "Channel layouts:\n");
+    mp_chmal_sel_log(s, ao->log, MSGL_V);
+    bool r = mp_chmap_sel_adjust(s, map);
+    if (r)
+        MP_VERBOSE(ao, "result: %s\n", mp_chmap_to_str(map));
+    return r;
+}
+
+// safe_multichannel=true behaves like ao_chmap_sel_adjust.
+// safe_multichannel=false is a helper for callers which do not support safe
+// handling of arbitrary channel layouts. If the multichannel layouts are not
+// considered "always safe" (e.g. HDMI), then allow only stereo or mono, if
+// they are part of the list in *s.
+bool ao_chmap_sel_adjust2(struct ao *ao, const struct mp_chmap_sel *s,
+                          struct mp_chmap *map, bool safe_multichannel)
+{
+    if (!safe_multichannel && (ao->init_flags & AO_INIT_SAFE_MULTICHANNEL_ONLY)) {
+        struct mp_chmap res = *map;
+        if (mp_chmap_sel_adjust(s, &res)) {
+            if (!mp_chmap_equals(&res, &(struct mp_chmap)MP_CHMAP_INIT_MONO) &&
+                !mp_chmap_equals(&res, &(struct mp_chmap)MP_CHMAP_INIT_STEREO))
+            {
+                MP_VERBOSE(ao, "Disabling multichannel output.\n");
+                *map = (struct mp_chmap)MP_CHMAP_INIT_STEREO;
+            }
+        }
+    }
+
+    return ao_chmap_sel_adjust(ao, s, map);
+}
+
+bool ao_chmap_sel_get_def(struct ao *ao, const struct mp_chmap_sel *s,
+                          struct mp_chmap *map, int num)
+{
+    return mp_chmap_sel_get_def(s, map, num);
+}
+
+// --- The following functions just return immutable information.
+
+void ao_get_format(struct ao *ao,
+                   int *samplerate, int *format, struct mp_chmap *channels)
+{
+    *samplerate = ao->samplerate;
+    *format = ao->format;
+    *channels = ao->channels;
+}
+
+const char *ao_get_name(struct ao *ao)
+{
+    return ao->driver->name;
+}
+
+const char *ao_get_description(struct ao *ao)
+{
+    return ao->driver->description;
+}
+
+bool ao_untimed(struct ao *ao)
+{
+    return ao->untimed;
+}
+
+// ---
+
+struct ao_hotplug {
+    struct mpv_global *global;
+    void (*wakeup_cb)(void *ctx);
+    void *wakeup_ctx;
+    // A single AO instance is used to listen to hotplug events. It wouldn't
+    // make much sense to allow multiple AO drivers; all sane platforms have
+    // a single audio API providing all events.
+    // This is _not_ necessarily the same AO instance as used for playing
+    // audio.
+    struct ao *ao;
+    // cached
+    struct ao_device_list *list;
+    bool needs_update;
+};
+
+struct ao_hotplug *ao_hotplug_create(struct mpv_global *global,
+                                     void (*wakeup_cb)(void *ctx),
+                                     void *wakeup_ctx)
+{
+    struct ao_hotplug *hp = talloc_ptrtype(NULL, hp);
+    *hp = (struct ao_hotplug){
+        .global = global,
+        .wakeup_cb = wakeup_cb,
+        .wakeup_ctx = wakeup_ctx,
+        .needs_update = true,
+    };
+    return hp;
+}
+
+static void get_devices(struct ao *ao, struct ao_device_list *list)
+{
+    if (ao->driver->list_devs) {
+        ao->driver->list_devs(ao, list);
+    } else {
+        ao_device_list_add(list, ao, &(struct ao_device_desc){"", ""});
+    }
+}
+
+bool ao_hotplug_check_update(struct ao_hotplug *hp)
+{
+    if (hp->ao && ao_query_and_reset_events(hp->ao, AO_EVENT_HOTPLUG)) {
+        hp->needs_update = true;
+        return true;
+    }
+    return false;
+}
+
+// The return value is valid until the next call to this API.
+struct ao_device_list *ao_hotplug_get_device_list(struct ao_hotplug *hp,
+                                                  struct ao *playback_ao)
+{
+    if (hp->list && !hp->needs_update)
+        return hp->list;
+
+    talloc_free(hp->list);
+    struct ao_device_list *list = talloc_zero(hp, struct ao_device_list);
+    hp->list = list;
+
+    MP_TARRAY_APPEND(list, list->devices, list->num_devices,
+        (struct ao_device_desc){"auto", "Autoselect device"});
+
+    // Try to use the same AO for hotplug handling as for playback.
+    // Different AOs may not agree and the playback one is the only one the
+    // user knows about and may even have configured explicitly.
+    if (!hp->ao && playback_ao && playback_ao->driver->hotplug_init) {
+        struct ao *ao = ao_alloc(true, hp->global, hp->wakeup_cb, hp->wakeup_ctx,
+                                 (char *)playback_ao->driver->name);
+        if (playback_ao->driver->hotplug_init(ao) >= 0) {
+            hp->ao = ao;
+        } else {
+            talloc_free(ao);
+        }
+    }
+
+    for (int n = 0; n < MP_ARRAY_SIZE(audio_out_drivers); n++) {
+        const struct ao_driver *d = audio_out_drivers[n];
+        if (d == &audio_out_null)
+            break; // don't add unsafe/special entries
+
+        struct ao *ao = ao_alloc(true, hp->global, hp->wakeup_cb, hp->wakeup_ctx,
+                                 (char *)d->name);
+        if (!ao)
+            continue;
+
+        if (ao->driver->hotplug_init) {
+            if (ao->driver->hotplug_init(ao) >= 0) {
+                get_devices(ao, list);
+                if (hp->ao)
+                    ao->driver->hotplug_uninit(ao);
+                else
+                    hp->ao = ao; // keep this one
+            }
+        } else {
+            get_devices(ao, list);
+        }
+        if (ao != hp->ao)
+            talloc_free(ao);
+    }
+    hp->needs_update = false;
+    return list;
+}
+
+void ao_device_list_add(struct ao_device_list *list, struct ao *ao,
+                        struct ao_device_desc *e)
+{
+    struct ao_device_desc c = *e;
+    const char *dname = ao->driver->name;
+    char buf[80];
+    if (!c.desc || !c.desc[0]) {
+        if (c.name && c.name[0]) {
+            c.desc = c.name;
+        } else if (list->num_devices) {
+            // Assume this is the default device.
+            snprintf(buf, sizeof(buf), "Default (%s)", dname);
+            c.desc = buf;
+        } else {
+            // First default device (and maybe the only one).
+            c.desc = "Default";
+        }
+    }
+    c.name = (c.name && c.name[0]) ? talloc_asprintf(list, "%s/%s", dname, c.name)
+                                   : talloc_strdup(list, dname);
+    c.desc = talloc_strdup(list, c.desc);
+    MP_TARRAY_APPEND(list, list->devices, list->num_devices, c);
+}
+
+void ao_hotplug_destroy(struct ao_hotplug *hp)
+{
+    if (!hp)
+        return;
+    if (hp->ao && hp->ao->driver->hotplug_uninit)
+        hp->ao->driver->hotplug_uninit(hp->ao);
+    talloc_free(hp->ao);
+    talloc_free(hp);
+}
+
+static void dummy_wakeup(void *ctx)
+{
+}
+
+void ao_print_devices(struct mpv_global *global, struct mp_log *log,
+                      struct ao *playback_ao)
+{
+    struct ao_hotplug *hp = ao_hotplug_create(global, dummy_wakeup, NULL);
+    struct ao_device_list *list = ao_hotplug_get_device_list(hp, playback_ao);
+    mp_info(log, "List of detected audio devices:\n");
+    for (int n = 0; n < list->num_devices; n++) {
+        struct ao_device_desc *desc = &list->devices[n];
+        mp_info(log, "  '%s' (%s)\n", desc->name, desc->desc);
+    }
+    ao_hotplug_destroy(hp);
+}
+
+void ao_set_gain(struct ao *ao, float gain)
+{
+    atomic_store(&ao->gain, gain);
+}
+
+#define MUL_GAIN_i(d, num_samples, gain, low, center, high)                     \
+    for (int n = 0; n < (num_samples); n++)                                     \
+        (d)[n] = MPCLAMP(                                                       \
+            ((((int64_t)((d)[n]) - (center)) * (gain) + 128) >> 8) + (center),  \
+            (low), (high))
+
+#define MUL_GAIN_f(d, num_samples, gain)                                        \
+    for (int n = 0; n < (num_samples); n++)                                     \
+        (d)[n] = MPCLAMP(((d)[n]) * (gain), -1.0, 1.0)
+
+static void process_plane(struct ao *ao, void *data, int num_samples)
+{
+    float gain = atomic_load_explicit(&ao->gain, memory_order_relaxed);
+    int gi = lrint(256.0 * gain);
+    if (gi == 256)
+        return;
+    switch (af_fmt_from_planar(ao->format)) {
+    case AF_FORMAT_U8:
+        MUL_GAIN_i((uint8_t *)data, num_samples, gi, 0, 128, 255);
+        break;
+    case AF_FORMAT_S16:
+        MUL_GAIN_i((int16_t *)data, num_samples, gi, INT16_MIN, 0, INT16_MAX);
+        break;
+    case AF_FORMAT_S32:
+        MUL_GAIN_i((int32_t *)data, num_samples, gi, INT32_MIN, 0, INT32_MAX);
+        break;
+    case AF_FORMAT_FLOAT:
+        MUL_GAIN_f((float *)data, num_samples, gain);
+        break;
+    case AF_FORMAT_DOUBLE:
+        MUL_GAIN_f((double *)data, num_samples, gain);
+        break;
+    default:;
+        // all other sample formats are simply not supported
+    }
+}
+
+void ao_post_process_data(struct ao *ao, void **data, int num_samples)
+{
+    bool planar = af_fmt_is_planar(ao->format);
+    int planes = planar ? ao->channels.num : 1;
+    int plane_samples = num_samples * (planar ? 1: ao->channels.num);
+    for (int n = 0; n < planes; n++)
+        process_plane(ao, data[n], plane_samples);
+}
+
+static int get_conv_type(struct ao_convert_fmt *fmt)
+{
+    if (af_fmt_to_bytes(fmt->src_fmt) * 8 == fmt->dst_bits && !fmt->pad_msb)
+        return 0; // passthrough
+    if (fmt->src_fmt == AF_FORMAT_S32 && fmt->dst_bits == 24 && !fmt->pad_msb)
+        return 1; // simple 32->24 bit conversion
+    if (fmt->src_fmt == AF_FORMAT_S32 && fmt->dst_bits == 32 && fmt->pad_msb == 8)
+        return 2; // simple 32->24 bit conversion, with MSB padding
+    return -1; // unsupported
+}
+
+// Check whether ao_convert_inplace() can be called. As an exception, the
+// planar-ness of the sample format and the number of channels is ignored.
+// All other parameters must be as passed to ao_convert_inplace().
+bool ao_can_convert_inplace(struct ao_convert_fmt *fmt)
+{
+    return get_conv_type(fmt) >= 0;
+}
+
+bool ao_need_conversion(struct ao_convert_fmt *fmt)
+{
+    return get_conv_type(fmt) != 0;
+}
+
+// The LSB is always ignored.
+#if BYTE_ORDER == BIG_ENDIAN
+#define SHIFT24(x) ((3-(x))*8)
+#else
+#define SHIFT24(x) (((x)+1)*8)
+#endif
+
+static void convert_plane(int type, void *data, int num_samples)
+{
+    switch (type) {
+    case 0:
+        break;
+    case 1: /* fall through */
+    case 2: {
+        int bytes = type == 1 ? 3 : 4;
+        for (int s = 0; s < num_samples; s++) {
+            uint32_t val = *((uint32_t *)data + s);
+            uint8_t *ptr = (uint8_t *)data + s * bytes;
+            ptr[0] = val >> SHIFT24(0);
+            ptr[1] = val >> SHIFT24(1);
+            ptr[2] = val >> SHIFT24(2);
+            if (type == 2)
+                ptr[3] = 0;
+        }
+        break;
+    }
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+}
+
+// data[n] contains the pointer to the first sample of the n-th plane, in the
+// format implied by fmt->src_fmt. src_fmt also controls whether the data is
+// all in one plane, or if there is a plane per channel.
+void ao_convert_inplace(struct ao_convert_fmt *fmt, void **data, int num_samples)
+{
+    int type = get_conv_type(fmt);
+    bool planar = af_fmt_is_planar(fmt->src_fmt);
+    int planes = planar ? fmt->channels : 1;
+    int plane_samples = num_samples * (planar ? 1: fmt->channels);
+    for (int n = 0; n < planes; n++)
+        convert_plane(type, data[n], plane_samples);
+}
diff --git a/audio/out/ao.h b/audio/out/ao.h
new file mode 100644
index 0000000..18c7cdc
--- /dev/null
+++ b/audio/out/ao.h
@@ -0,0 +1,122 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_AUDIO_OUT_H
+#define MPLAYER_AUDIO_OUT_H
+
+#include <stdbool.h>
+
+#include "misc/bstr.h"
+#include "common/common.h"
+#include "audio/chmap.h"
+#include "audio/chmap_sel.h"
+
+enum aocontrol {
+    // _VOLUME commands take a pointer to float for input/output.
+    AOCONTROL_GET_VOLUME,
+    AOCONTROL_SET_VOLUME,
+    // _MUTE commands take a pointer to bool
+    AOCONTROL_GET_MUTE,
+    AOCONTROL_SET_MUTE,
+    // Has char* as argument, which contains the desired stream title.
+    AOCONTROL_UPDATE_STREAM_TITLE,
+};
+
+// If set, then the queued audio data is the last. Note that after a while, new
+// data might be written again, instead of closing the AO.
+#define PLAYER_FINAL_CHUNK 1
+
+enum {
+    AO_EVENT_RELOAD = 1,
+    AO_EVENT_HOTPLUG = 2,
+    AO_EVENT_INITIAL_UNBLOCK = 4,
+};
+
+enum {
+    // Allow falling back to ao_null if nothing else works.
+    AO_INIT_NULL_FALLBACK = 1 << 0,
+    // Only accept multichannel configurations that are guaranteed to work
+    // (i.e. not sending arbitrary layouts over HDMI).
+    AO_INIT_SAFE_MULTICHANNEL_ONLY = 1 << 1,
+    // Stream silence as long as no audio is playing.
+    AO_INIT_STREAM_SILENCE = 1 << 2,
+    // Force exclusive mode, i.e. lock out the system mixer.
+    AO_INIT_EXCLUSIVE = 1 << 3,
+    // Initialize with music role.
+    AO_INIT_MEDIA_ROLE_MUSIC = 1 << 4,
+};
+
+struct ao_device_desc {
+    const char *name;   // symbolic name; will be set on ao->device
+    const char *desc;   // verbose human readable name
+};
+
+struct ao_device_list {
+    struct ao_device_desc *devices;
+    int num_devices;
+};
+
+struct ao;
+struct mpv_global;
+struct input_ctx;
+struct encode_lavc_context;
+
+struct ao_opts {
+    struct m_obj_settings *audio_driver_list;
+    char *audio_device;
+    char *audio_client_name;
+    double audio_buffer;
+};
+
+struct ao *ao_init_best(struct mpv_global *global,
+                        int init_flags,
+                        void (*wakeup_cb)(void *ctx), void *wakeup_ctx,
+                        struct encode_lavc_context *encode_lavc_ctx,
+                        int samplerate, int format, struct mp_chmap channels);
+void ao_uninit(struct ao *ao);
+void ao_get_format(struct ao *ao,
+                   int *samplerate, int *format, struct mp_chmap *channels);
+const char *ao_get_name(struct ao *ao);
+const char *ao_get_description(struct ao *ao);
+bool ao_untimed(struct ao *ao);
+int ao_control(struct ao *ao, enum aocontrol cmd, void *arg);
+void ao_set_gain(struct ao *ao, float gain);
+double ao_get_delay(struct ao *ao);
+void ao_reset(struct ao *ao);
+void ao_start(struct ao *ao);
+void ao_set_paused(struct ao *ao, bool paused, bool eof);
+void ao_drain(struct ao *ao);
+bool ao_is_playing(struct ao *ao);
+struct mp_async_queue;
+struct mp_async_queue *ao_get_queue(struct ao *ao);
+int ao_query_and_reset_events(struct ao *ao, int events);
+int ao_add_events(struct ao *ao, int events);
+void ao_unblock(struct ao *ao);
+void ao_request_reload(struct ao *ao);
+void ao_hotplug_event(struct ao *ao);
+
+struct ao_hotplug;
+struct ao_hotplug *ao_hotplug_create(struct mpv_global *global,
+                                     void (*wakeup_cb)(void *ctx),
+                                     void *wakeup_ctx);
+void ao_hotplug_destroy(struct ao_hotplug *hp);
+bool ao_hotplug_check_update(struct ao_hotplug *hp);
+struct ao_device_list *ao_hotplug_get_device_list(struct ao_hotplug *hp, struct ao *playback_ao);
+
+void ao_print_devices(struct mpv_global *global, struct mp_log *log, struct ao *playback_ao);
+
+#endif /* MPLAYER_AUDIO_OUT_H */
diff --git a/audio/out/ao_alsa.c b/audio/out/ao_alsa.c
new file mode 100644
index 0000000..75eda3b
--- /dev/null
+++ b/audio/out/ao_alsa.c
@@ -0,0 +1,1161 @@
+/*
+ * ALSA 0.9.x-1.x audio output driver
+ *
+ * Copyright (C) 2004 Alex Beregszaszi
+ * Zsolt Barat <joy@streamminister.de>
+ *
+ * modified for real ALSA 0.9.0 support by Zsolt Barat <joy@streamminister.de>
+ * additional AC-3 passthrough support by Andy Lo A Foe <andy@alsaplayer.org>
+ * 08/22/2002 iec958-init rewritten and merged with common init, zsolt
+ * 04/13/2004 merged with ao_alsa1.x, fixes provided by Jindrich Makovicka
+ * 04/25/2004 printfs converted to mp_msg, Zsolt.
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+#include "options/options.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+#include "common/msg.h"
+#include "osdep/endian.h"
+
+#include <alsa/asoundlib.h>
+
+#if defined(SND_CHMAP_API_VERSION) && SND_CHMAP_API_VERSION >= (1 << 16)
+#define HAVE_CHMAP_API 1
+#else
+#define HAVE_CHMAP_API 0
+#endif
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+
+struct ao_alsa_opts {
+    char *mixer_device;
+    char *mixer_name;
+    int mixer_index;
+    bool resample;
+    bool ni;
+    bool ignore_chmap;
+    int buffer_time;
+    int frags;
+};
+
+#define OPT_BASE_STRUCT struct ao_alsa_opts
+static const struct m_sub_options ao_alsa_conf = {
+    .opts = (const struct m_option[]) {
+        {"alsa-resample", OPT_BOOL(resample)},
+        {"alsa-mixer-device", OPT_STRING(mixer_device)},
+        {"alsa-mixer-name", OPT_STRING(mixer_name)},
+        {"alsa-mixer-index", OPT_INT(mixer_index), M_RANGE(0, 99)},
+        {"alsa-non-interleaved", OPT_BOOL(ni)},
+        {"alsa-ignore-chmap", OPT_BOOL(ignore_chmap)},
+        {"alsa-buffer-time", OPT_INT(buffer_time), M_RANGE(0, INT_MAX)},
+        {"alsa-periods", OPT_INT(frags), M_RANGE(0, INT_MAX)},
+        {0}
+    },
+    .defaults = &(const struct ao_alsa_opts) {
+        .mixer_device = "default",
+        .mixer_name = "Master",
+        .buffer_time = 100000,
+        .frags = 4,
+    },
+    .size = sizeof(struct ao_alsa_opts),
+};
+
+struct priv {
+    snd_pcm_t *alsa;
+    bool device_lost;
+    snd_pcm_format_t alsa_fmt;
+    bool can_pause;
+    snd_pcm_uframes_t buffersize;
+    snd_pcm_uframes_t outburst;
+
+    snd_output_t *output;
+
+    struct ao_convert_fmt convert;
+
+    struct ao_alsa_opts *opts;
+};
+
+#define CHECK_ALSA_ERROR(message) \
+    do { \
+        if (err < 0) { \
+            MP_ERR(ao, "%s: %s\n", (message), snd_strerror(err)); \
+            goto alsa_error; \
+        } \
+    } while (0)
+
+#define CHECK_ALSA_WARN(message) \
+    do { \
+        if (err < 0) \
+            MP_WARN(ao, "%s: %s\n", (message), snd_strerror(err)); \
+    } while (0)
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct priv *p = ao->priv;
+    snd_mixer_t *handle = NULL;
+    switch (cmd) {
+    case AOCONTROL_GET_MUTE:
+    case AOCONTROL_SET_MUTE:
+    case AOCONTROL_GET_VOLUME:
+    case AOCONTROL_SET_VOLUME:
+    {
+        int err;
+        snd_mixer_elem_t *elem;
+        snd_mixer_selem_id_t *sid;
+
+        long pmin, pmax;
+        long get_vol, set_vol;
+        float f_multi;
+
+        if (!af_fmt_is_pcm(ao->format))
+            return CONTROL_FALSE;
+
+        snd_mixer_selem_id_alloca(&sid);
+
+        snd_mixer_selem_id_set_index(sid, p->opts->mixer_index);
+        snd_mixer_selem_id_set_name(sid, p->opts->mixer_name);
+
+        err = snd_mixer_open(&handle, 0);
+        CHECK_ALSA_ERROR("Mixer open error");
+
+        err = snd_mixer_attach(handle, p->opts->mixer_device);
+        CHECK_ALSA_ERROR("Mixer attach error");
+
+        err = snd_mixer_selem_register(handle, NULL, NULL);
+        CHECK_ALSA_ERROR("Mixer register error");
+
+        err = snd_mixer_load(handle);
+        CHECK_ALSA_ERROR("Mixer load error");
+
+        elem = snd_mixer_find_selem(handle, sid);
+        if (!elem) {
+            MP_VERBOSE(ao, "Unable to find simple control '%s',%i.\n",
+                       snd_mixer_selem_id_get_name(sid),
+                       snd_mixer_selem_id_get_index(sid));
+            goto alsa_error;
+        }
+
+        snd_mixer_selem_get_playback_volume_range(elem, &pmin, &pmax);
+        f_multi = (100 / (float)(pmax - pmin));
+
+        switch (cmd) {
+        case AOCONTROL_SET_VOLUME: {
+            float *vol = arg;
+            set_vol = *vol / f_multi + pmin + 0.5;
+
+            err = snd_mixer_selem_set_playback_volume(elem, 0, set_vol);
+            CHECK_ALSA_ERROR("Error setting left channel");
+            MP_DBG(ao, "left=%li, ", set_vol);
+
+            err = snd_mixer_selem_set_playback_volume(elem, 1, set_vol);
+            CHECK_ALSA_ERROR("Error setting right channel");
+            MP_DBG(ao, "right=%li, pmin=%li, pmax=%li, mult=%f\n",
+                   set_vol, pmin, pmax, f_multi);
+            break;
+        }
+        case AOCONTROL_GET_VOLUME: {
+            float *vol = arg;
+            float left, right;
+            snd_mixer_selem_get_playback_volume(elem, 0, &get_vol);
+            left = (get_vol - pmin) * f_multi;
+            snd_mixer_selem_get_playback_volume(elem, 1, &get_vol);
+            right = (get_vol - pmin) * f_multi;
+            *vol = (left + right) / 2.0;
+            MP_DBG(ao, "vol=%f\n", *vol);
+            break;
+        }
+        case AOCONTROL_SET_MUTE: {
+            bool *mute = arg;
+            if (!snd_mixer_selem_has_playback_switch(elem))
+                goto alsa_error;
+            if (!snd_mixer_selem_has_playback_switch_joined(elem)) {
+                snd_mixer_selem_set_playback_switch(elem, 1, !*mute);
+            }
+            snd_mixer_selem_set_playback_switch(elem, 0, !*mute);
+            break;
+        }
+        case AOCONTROL_GET_MUTE: {
+            bool *mute = arg;
+            if (!snd_mixer_selem_has_playback_switch(elem))
+                goto alsa_error;
+            int tmp = 1;
+            snd_mixer_selem_get_playback_switch(elem, 0, &tmp);
+            *mute = !tmp;
+            if (!snd_mixer_selem_has_playback_switch_joined(elem)) {
+                snd_mixer_selem_get_playback_switch(elem, 1, &tmp);
+                *mute &= !tmp;
+            }
+            break;
+        }
+        }
+        snd_mixer_close(handle);
+        return CONTROL_OK;
+    }
+
+    } //end switch
+    return CONTROL_UNKNOWN;
+
+alsa_error:
+    if (handle)
+        snd_mixer_close(handle);
+    return CONTROL_ERROR;
+}
+
+struct alsa_fmt {
+    int mp_format;
+    int alsa_format;
+    int bits;           // alsa format full sample size (optional)
+    int pad_msb;        // how many MSB bits are 0 (optional)
+};
+
+// Entries that have the same mp_format must be:
+//  1. consecutive
+//  2. sorted by preferred format (worst comes last)
+static const struct alsa_fmt mp_alsa_formats[] = {
+    {AF_FORMAT_U8,          SND_PCM_FORMAT_U8},
+    {AF_FORMAT_S16,         SND_PCM_FORMAT_S16},
+    {AF_FORMAT_S32,         SND_PCM_FORMAT_S32},
+    {AF_FORMAT_S32,         SND_PCM_FORMAT_S24, .bits = 32, .pad_msb = 8},
+    {AF_FORMAT_S32,
+            MP_SELECT_LE_BE(SND_PCM_FORMAT_S24_3LE, SND_PCM_FORMAT_S24_3BE),
+            .bits = 24, .pad_msb = 0},
+    {AF_FORMAT_FLOAT,       SND_PCM_FORMAT_FLOAT},
+    {AF_FORMAT_DOUBLE,      SND_PCM_FORMAT_FLOAT64},
+    {0},
+};
+
+static const struct alsa_fmt *find_alsa_format(int mp_format)
+{
+    for (int n = 0; mp_alsa_formats[n].mp_format; n++) {
+        if (mp_alsa_formats[n].mp_format == mp_format)
+            return &mp_alsa_formats[n];
+    }
+    return NULL;
+}
+
+#if HAVE_CHMAP_API
+
+static const int alsa_to_mp_channels[][2] = {
+    {SND_CHMAP_FL,      MP_SP(FL)},
+    {SND_CHMAP_FR,      MP_SP(FR)},
+    {SND_CHMAP_RL,      MP_SP(BL)},
+    {SND_CHMAP_RR,      MP_SP(BR)},
+    {SND_CHMAP_FC,      MP_SP(FC)},
+    {SND_CHMAP_LFE,     MP_SP(LFE)},
+    {SND_CHMAP_SL,      MP_SP(SL)},
+    {SND_CHMAP_SR,      MP_SP(SR)},
+    {SND_CHMAP_RC,      MP_SP(BC)},
+    {SND_CHMAP_FLC,     MP_SP(FLC)},
+    {SND_CHMAP_FRC,     MP_SP(FRC)},
+    {SND_CHMAP_FLW,     MP_SP(WL)},
+    {SND_CHMAP_FRW,     MP_SP(WR)},
+    {SND_CHMAP_TC,      MP_SP(TC)},
+    {SND_CHMAP_TFL,     MP_SP(TFL)},
+    {SND_CHMAP_TFR,     MP_SP(TFR)},
+    {SND_CHMAP_TFC,     MP_SP(TFC)},
+    {SND_CHMAP_TRL,     MP_SP(TBL)},
+    {SND_CHMAP_TRR,     MP_SP(TBR)},
+    {SND_CHMAP_TRC,     MP_SP(TBC)},
+    {SND_CHMAP_RRC,     MP_SP(SDR)},
+    {SND_CHMAP_RLC,     MP_SP(SDL)},
+    {SND_CHMAP_MONO,    MP_SP(FC)},
+    {SND_CHMAP_NA,      MP_SPEAKER_ID_NA},
+    {SND_CHMAP_UNKNOWN, MP_SPEAKER_ID_NA},
+    {SND_CHMAP_LAST,    MP_SPEAKER_ID_COUNT}
+};
+
+static int find_mp_channel(int alsa_channel)
+{
+    for (int i = 0; alsa_to_mp_channels[i][1] != MP_SPEAKER_ID_COUNT; i++) {
+        if (alsa_to_mp_channels[i][0] == alsa_channel)
+            return alsa_to_mp_channels[i][1];
+    }
+
+    return MP_SPEAKER_ID_COUNT;
+}
+
+#define CHMAP(n, ...) &(struct mp_chmap) MP_CONCAT(MP_CHMAP, n) (__VA_ARGS__)
+
+// Replace each channel in a with b (a->num == b->num)
+static void replace_submap(struct mp_chmap *dst, struct mp_chmap *a,
+                           struct mp_chmap *b)
+{
+    struct mp_chmap t = *dst;
+    if (!mp_chmap_is_valid(&t) || mp_chmap_diffn(a, &t) != 0)
+        return;
+    assert(a->num == b->num);
+    for (int n = 0; n < t.num; n++) {
+        for (int i = 0; i < a->num; i++) {
+            if (t.speaker[n] == a->speaker[i]) {
+                t.speaker[n] = b->speaker[i];
+                break;
+            }
+        }
+    }
+    if (mp_chmap_is_valid(&t))
+        *dst = t;
+}
+
+static bool mp_chmap_from_alsa(struct mp_chmap *dst, snd_pcm_chmap_t *src)
+{
+    *dst = (struct mp_chmap) {0};
+
+    if (src->channels > MP_NUM_CHANNELS)
+        return false;
+
+    dst->num = src->channels;
+    for (int c = 0; c < dst->num; c++)
+        dst->speaker[c] = find_mp_channel(src->pos[c]);
+
+    // Assume anything with 1 channel is mono.
+    if (dst->num == 1)
+        dst->speaker[0] = MP_SP(FC);
+
+    // Remap weird Intel HDA HDMI 7.1 layouts correctly.
+    replace_submap(dst, CHMAP(6, FL, FR, BL, BR, SDL, SDR),
+                        CHMAP(6, FL, FR, SL, SR, BL,  BR));
+
+    return mp_chmap_is_valid(dst);
+}
+
+static bool query_chmaps(struct ao *ao, struct mp_chmap *chmap)
+{
+    struct priv *p = ao->priv;
+    struct mp_chmap_sel chmap_sel = {.tmp = p};
+
+    snd_pcm_chmap_query_t **maps = snd_pcm_query_chmaps(p->alsa);
+    if (!maps) {
+        MP_VERBOSE(ao, "snd_pcm_query_chmaps() returned NULL\n");
+        return false;
+    }
+
+    for (int i = 0; maps[i] != NULL; i++) {
+        char aname[128];
+        if (snd_pcm_chmap_print(&maps[i]->map, sizeof(aname), aname) <= 0)
+            aname[0] = '\0';
+
+        struct mp_chmap entry;
+        if (mp_chmap_from_alsa(&entry, &maps[i]->map)) {
+            struct mp_chmap reorder = entry;
+            mp_chmap_reorder_norm(&reorder);
+
+            MP_DBG(ao, "got ALSA chmap: %s (%s) -> %s", aname,
+                   snd_pcm_chmap_type_name(maps[i]->type),
+                   mp_chmap_to_str(&entry));
+            if (!mp_chmap_equals(&entry, &reorder))
+                MP_DBG(ao, " -> %s", mp_chmap_to_str(&reorder));
+            MP_DBG(ao, "\n");
+
+            struct mp_chmap final =
+                maps[i]->type == SND_CHMAP_TYPE_VAR ? reorder : entry;
+            mp_chmap_sel_add_map(&chmap_sel, &final);
+        } else {
+            MP_VERBOSE(ao, "skipping unknown ALSA channel map: %s\n", aname);
+        }
+    }
+
+    snd_pcm_free_chmaps(maps);
+
+    return ao_chmap_sel_adjust2(ao, &chmap_sel, chmap, false);
+}
+
+// Map back our selected channel layout to an ALSA one. This is done this way so
+// that our ALSA->mp_chmap mapping function only has to go one way.
+// The return value is to be freed with free().
+static snd_pcm_chmap_t *map_back_chmap(struct ao *ao, struct mp_chmap *chmap)
+{
+    struct priv *p = ao->priv;
+    if (!mp_chmap_is_valid(chmap))
+        return NULL;
+
+    snd_pcm_chmap_query_t **maps = snd_pcm_query_chmaps(p->alsa);
+    if (!maps)
+        return NULL;
+
+    snd_pcm_chmap_t *alsa_chmap = NULL;
+
+    for (int i = 0; maps[i] != NULL; i++) {
+        struct mp_chmap entry;
+        if (!mp_chmap_from_alsa(&entry, &maps[i]->map))
+            continue;
+
+        if (mp_chmap_equals(chmap, &entry) ||
+            (mp_chmap_equals_reordered(chmap, &entry) &&
+                maps[i]->type == SND_CHMAP_TYPE_VAR))
+        {
+            alsa_chmap = calloc(1, sizeof(*alsa_chmap) +
+                                   sizeof(alsa_chmap->pos[0]) * entry.num);
+            if (!alsa_chmap)
+                break;
+            alsa_chmap->channels = entry.num;
+
+            // Undo if mp_chmap_reorder() was called on the result.
+            int reorder[MP_NUM_CHANNELS];
+            mp_chmap_get_reorder(reorder, chmap, &entry);
+            for (int n = 0; n < entry.num; n++)
+                alsa_chmap->pos[n] = maps[i]->map.pos[reorder[n]];
+            break;
+        }
+    }
+
+    snd_pcm_free_chmaps(maps);
+    return alsa_chmap;
+}
+
+
+static int set_chmap(struct ao *ao, struct mp_chmap *dev_chmap, int num_channels)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    snd_pcm_chmap_t *alsa_chmap = map_back_chmap(ao, dev_chmap);
+    if (alsa_chmap) {
+        char tmp[128];
+        if (snd_pcm_chmap_print(alsa_chmap, sizeof(tmp), tmp) > 0)
+            MP_VERBOSE(ao, "trying to set ALSA channel map: %s\n", tmp);
+
+        err = snd_pcm_set_chmap(p->alsa, alsa_chmap);
+        if (err == -ENXIO) {
+            // A device my not be able to set any channel map, even channel maps
+            // that were reported as supported. This is either because the ALSA
+            // device is broken (dmix), or because the driver has only 1
+            // channel map per channel count, and setting the map is not needed.
+            MP_VERBOSE(ao, "device returned ENXIO when setting channel map %s\n",
+                       mp_chmap_to_str(dev_chmap));
+        } else {
+            CHECK_ALSA_WARN("Channel map setup failed");
+        }
+
+        free(alsa_chmap);
+    }
+
+    alsa_chmap = snd_pcm_get_chmap(p->alsa);
+    if (alsa_chmap) {
+        char tmp[128];
+        if (snd_pcm_chmap_print(alsa_chmap, sizeof(tmp), tmp) > 0)
+            MP_VERBOSE(ao, "channel map reported by ALSA: %s\n", tmp);
+
+        struct mp_chmap chmap;
+        mp_chmap_from_alsa(&chmap, alsa_chmap);
+
+        MP_VERBOSE(ao, "which we understand as: %s\n", mp_chmap_to_str(&chmap));
+
+        if (p->opts->ignore_chmap) {
+            MP_VERBOSE(ao, "user set ignore-chmap; ignoring the channel map.\n");
+        } else if (af_fmt_is_spdif(ao->format)) {
+            MP_VERBOSE(ao, "using spdif passthrough; ignoring the channel map.\n");
+        } else if (!mp_chmap_is_valid(&chmap)) {
+            MP_WARN(ao, "Got unknown channel map from ALSA.\n");
+        } else if (chmap.num != num_channels) {
+            MP_WARN(ao, "ALSA channel map conflicts with channel count!\n");
+        } else {
+            if (mp_chmap_equals(&chmap, &ao->channels)) {
+                MP_VERBOSE(ao, "which is what we requested.\n");
+            } else if (!mp_chmap_is_valid(dev_chmap)) {
+                MP_VERBOSE(ao, "ignoring the ALSA channel map.\n");
+            } else {
+                MP_VERBOSE(ao, "using the ALSA channel map.\n");
+                ao->channels = chmap;
+            }
+        }
+
+        free(alsa_chmap);
+    }
+
+    return 0;
+}
+
+#else /* HAVE_CHMAP_API */
+
+static bool query_chmaps(struct ao *ao, struct mp_chmap *chmap)
+{
+    return false;
+}
+
+static int set_chmap(struct ao *ao, struct mp_chmap *dev_chmap, int num_channels)
+{
+    return 0;
+}
+
+#endif /* else HAVE_CHMAP_API */
+
+static void dump_hw_params(struct ao *ao, const char *msg,
+                           snd_pcm_hw_params_t *hw_params)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    err = snd_pcm_hw_params_dump(hw_params, p->output);
+    CHECK_ALSA_WARN("Dump hwparams error");
+
+    char *tmp = NULL;
+    size_t tmp_s = snd_output_buffer_string(p->output, &tmp);
+    if (tmp)
+        mp_msg(ao->log, MSGL_DEBUG, "%s---\n%.*s---\n", msg, (int)tmp_s, tmp);
+    snd_output_flush(p->output);
+}
+
+static int map_iec958_srate(int srate)
+{
+    switch (srate) {
+    case 44100:     return IEC958_AES3_CON_FS_44100;
+    case 48000:     return IEC958_AES3_CON_FS_48000;
+    case 32000:     return IEC958_AES3_CON_FS_32000;
+    case 22050:     return IEC958_AES3_CON_FS_22050;
+    case 24000:     return IEC958_AES3_CON_FS_24000;
+    case 88200:     return IEC958_AES3_CON_FS_88200;
+    case 768000:    return IEC958_AES3_CON_FS_768000;
+    case 96000:     return IEC958_AES3_CON_FS_96000;
+    case 176400:    return IEC958_AES3_CON_FS_176400;
+    case 192000:    return IEC958_AES3_CON_FS_192000;
+    default:        return IEC958_AES3_CON_FS_NOTID;
+    }
+}
+
+// ALSA device strings can have parameters. They are usually appended to the
+// device name. There can be various forms, and we (sometimes) want to append
+// them to unknown device strings, which possibly already include params.
+static char *append_params(void *ta_parent, const char *device, const char *p)
+{
+    if (!p || !p[0])
+        return talloc_strdup(ta_parent, device);
+
+    int len = strlen(device);
+    char *end = strchr(device, ':');
+    if (!end) {
+        /* no existing parameters: add it behind device name */
+        return talloc_asprintf(ta_parent, "%s:%s", device, p);
+    } else if (end[1] == '\0') {
+        /* ":" but no parameters */
+        return talloc_asprintf(ta_parent, "%s%s", device, p);
+    } else if (end[1] == '{' && device[len - 1] == '}') {
+        /* parameters in config syntax: add it inside the { } block */
+        return talloc_asprintf(ta_parent, "%.*s %s}", len - 1, device, p);
+    } else {
+        /* a simple list of parameters: add it at the end of the list */
+        return talloc_asprintf(ta_parent, "%s,%s", device, p);
+    }
+    MP_ASSERT_UNREACHABLE();
+}
+
+static int try_open_device(struct ao *ao, const char *device, int mode)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    if (af_fmt_is_spdif(ao->format)) {
+        void *tmp = talloc_new(NULL);
+        char *params = talloc_asprintf(tmp,
+                        "AES0=%d,AES1=%d,AES2=0,AES3=%d",
+                        IEC958_AES0_NONAUDIO | IEC958_AES0_PRO_EMPHASIS_NONE,
+                        IEC958_AES1_CON_ORIGINAL | IEC958_AES1_CON_PCM_CODER,
+                        map_iec958_srate(ao->samplerate));
+        const char *ac3_device = append_params(tmp, device, params);
+        MP_VERBOSE(ao, "opening device '%s' => '%s'\n", device, ac3_device);
+        err = snd_pcm_open(&p->alsa, ac3_device, SND_PCM_STREAM_PLAYBACK, mode);
+        if (err < 0) {
+            // Some spdif-capable devices do not accept the AES0 parameter,
+            // and instead require the iec958 pseudo-device (they will play
+            // noise otherwise). Unfortunately, ALSA gives us no way to map
+            // these devices, so try it for the default device only.
+            bstr dev;
+            bstr_split_tok(bstr0(device), ":", &dev, &(bstr){0});
+            if (bstr_equals0(dev, "default")) {
+                const char *const fallbacks[] = {"hdmi", "iec958", NULL};
+                for (int n = 0; fallbacks[n]; n++) {
+                    char *ndev = append_params(tmp, fallbacks[n], params);
+                    MP_VERBOSE(ao, "got error '%s'; opening iec fallback "
+                               "device '%s'\n", snd_strerror(err), ndev);
+                    err = snd_pcm_open
+                                (&p->alsa, ndev, SND_PCM_STREAM_PLAYBACK, mode);
+                    if (err >= 0)
+                        break;
+                }
+            }
+        }
+        talloc_free(tmp);
+    } else {
+        MP_VERBOSE(ao, "opening device '%s'\n", device);
+        err = snd_pcm_open(&p->alsa, device, SND_PCM_STREAM_PLAYBACK, mode);
+    }
+
+    return err;
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    if (p->output)
+        snd_output_close(p->output);
+    p->output = NULL;
+
+    if (p->alsa) {
+        int err;
+
+        err = snd_pcm_close(p->alsa);
+        p->alsa = NULL;
+        CHECK_ALSA_ERROR("pcm close error");
+    }
+
+alsa_error: ;
+}
+
+#define INIT_DEVICE_ERR_GENERIC -1
+#define INIT_DEVICE_ERR_HWPARAMS -2
+static int init_device(struct ao *ao, int mode)
+{
+    struct priv *p = ao->priv;
+    struct ao_alsa_opts *opts = p->opts;
+    int ret = INIT_DEVICE_ERR_GENERIC;
+    char *tmp;
+    size_t tmp_s;
+    int err;
+
+    p->alsa_fmt = SND_PCM_FORMAT_UNKNOWN;
+
+    err = snd_output_buffer_open(&p->output);
+    CHECK_ALSA_ERROR("Unable to create output buffer");
+
+    const char *device = "default";
+    if (ao->device)
+        device = ao->device;
+
+    err = try_open_device(ao, device, mode);
+    CHECK_ALSA_ERROR("Playback open error");
+
+    err = snd_pcm_dump(p->alsa, p->output);
+    CHECK_ALSA_WARN("Dump PCM error");
+    tmp_s = snd_output_buffer_string(p->output, &tmp);
+    if (tmp)
+        MP_DBG(ao, "PCM setup:\n---\n%.*s---\n", (int)tmp_s, tmp);
+    snd_output_flush(p->output);
+
+    err = snd_pcm_nonblock(p->alsa, 0);
+    CHECK_ALSA_WARN("Unable to set blocking mode");
+
+    snd_pcm_hw_params_t *alsa_hwparams;
+    snd_pcm_hw_params_alloca(&alsa_hwparams);
+
+    err = snd_pcm_hw_params_any(p->alsa, alsa_hwparams);
+    CHECK_ALSA_ERROR("Unable to get initial parameters");
+
+    dump_hw_params(ao, "Start HW params:\n", alsa_hwparams);
+
+    // Some ALSA drivers have broken delay reporting, so disable the ALSA
+    // resampling plugin by default.
+    if (!p->opts->resample) {
+        err = snd_pcm_hw_params_set_rate_resample(p->alsa, alsa_hwparams, 0);
+        CHECK_ALSA_ERROR("Unable to disable resampling");
+    }
+    dump_hw_params(ao, "HW params after rate:\n", alsa_hwparams);
+
+    snd_pcm_access_t access = af_fmt_is_planar(ao->format)
+                                    ? SND_PCM_ACCESS_RW_NONINTERLEAVED
+                                    : SND_PCM_ACCESS_RW_INTERLEAVED;
+    err = snd_pcm_hw_params_set_access(p->alsa, alsa_hwparams, access);
+    if (err < 0 && af_fmt_is_planar(ao->format)) {
+        ao->format = af_fmt_from_planar(ao->format);
+        access = SND_PCM_ACCESS_RW_INTERLEAVED;
+        err = snd_pcm_hw_params_set_access(p->alsa, alsa_hwparams, access);
+    }
+    CHECK_ALSA_ERROR("Unable to set access type");
+    dump_hw_params(ao, "HW params after access:\n", alsa_hwparams);
+
+    bool found_format = false;
+    int try_formats[AF_FORMAT_COUNT + 1];
+    af_get_best_sample_formats(ao->format, try_formats);
+    for (int n = 0; try_formats[n] && !found_format; n++) {
+        int mp_format = try_formats[n];
+        if (af_fmt_is_planar(ao->format) != af_fmt_is_planar(mp_format))
+            continue; // implied SND_PCM_ACCESS mismatches
+        int mp_pformat = af_fmt_from_planar(mp_format);
+        if (af_fmt_is_spdif(mp_pformat))
+            mp_pformat = AF_FORMAT_S16;
+        const struct alsa_fmt *fmt = find_alsa_format(mp_pformat);
+        if (!fmt)
+            continue;
+        for (; fmt->mp_format == mp_pformat; fmt++) {
+            p->alsa_fmt = fmt->alsa_format;
+            p->convert = (struct ao_convert_fmt){
+                .src_fmt = mp_format,
+                .dst_bits = fmt->bits ? fmt->bits : af_fmt_to_bytes(mp_format) * 8,
+                .pad_msb = fmt->pad_msb,
+            };
+            if (!ao_can_convert_inplace(&p->convert))
+                continue;
+            MP_VERBOSE(ao, "trying format %s/%d\n", af_fmt_to_str(mp_pformat),
+                       p->alsa_fmt);
+            if (snd_pcm_hw_params_test_format(p->alsa, alsa_hwparams,
+                                              p->alsa_fmt) >= 0)
+            {
+                ao->format = mp_format;
+                found_format = true;
+                break;
+            }
+        }
+    }
+
+    if (!found_format) {
+        MP_ERR(ao, "Can't find appropriate sample format.\n");
+        goto alsa_error;
+    }
+
+    err = snd_pcm_hw_params_set_format(p->alsa, alsa_hwparams, p->alsa_fmt);
+    CHECK_ALSA_ERROR("Unable to set format");
+    dump_hw_params(ao, "HW params after format:\n", alsa_hwparams);
+
+    // Stereo, or mono if input is 1 channel.
+    struct mp_chmap reduced;
+    mp_chmap_from_channels(&reduced, MPMIN(2, ao->channels.num));
+
+    struct mp_chmap dev_chmap = {0};
+    if (!af_fmt_is_spdif(ao->format) && !p->opts->ignore_chmap &&
+        !mp_chmap_equals(&ao->channels, &reduced))
+    {
+        struct mp_chmap res = ao->channels;
+        if (query_chmaps(ao, &res))
+            dev_chmap = res;
+
+        // Whatever it is, we dumb it down to mono or stereo. Some drivers may
+        // return things like bl-br, but the user (probably) still wants stereo.
+        // This also handles the failure case (dev_chmap.num==0).
+        if (dev_chmap.num <= 2) {
+            dev_chmap.num = 0;
+            ao->channels = reduced;
+        } else if (dev_chmap.num) {
+            ao->channels = dev_chmap;
+        }
+    }
+
+    int num_channels = ao->channels.num;
+    err = snd_pcm_hw_params_set_channels_near
+            (p->alsa, alsa_hwparams, &num_channels);
+    CHECK_ALSA_ERROR("Unable to set channels");
+    dump_hw_params(ao, "HW params after channels:\n", alsa_hwparams);
+
+    if (num_channels > MP_NUM_CHANNELS) {
+        MP_FATAL(ao, "Too many audio channels (%d).\n", num_channels);
+        goto alsa_error;
+    }
+
+    err = snd_pcm_hw_params_set_rate_near
+            (p->alsa, alsa_hwparams, &ao->samplerate, NULL);
+    CHECK_ALSA_ERROR("Unable to set samplerate-2");
+    dump_hw_params(ao, "HW params after rate-2:\n", alsa_hwparams);
+
+    snd_pcm_hw_params_t *hwparams_backup;
+    snd_pcm_hw_params_alloca(&hwparams_backup);
+    snd_pcm_hw_params_copy(hwparams_backup, alsa_hwparams);
+
+    // Cargo-culted buffer settings; might still be useful for PulseAudio.
+    err = 0;
+    if (opts->buffer_time) {
+        err = snd_pcm_hw_params_set_buffer_time_near
+                (p->alsa, alsa_hwparams, &(unsigned int){opts->buffer_time}, NULL);
+        CHECK_ALSA_WARN("Unable to set buffer time near");
+    }
+    if (err >= 0 && opts->frags) {
+        err = snd_pcm_hw_params_set_periods_near
+                    (p->alsa, alsa_hwparams, &(unsigned int){opts->frags}, NULL);
+        CHECK_ALSA_WARN("Unable to set periods");
+    }
+    if (err < 0)
+        snd_pcm_hw_params_copy(alsa_hwparams, hwparams_backup);
+
+    dump_hw_params(ao, "Going to set final HW params:\n", alsa_hwparams);
+
+    /* finally install hardware parameters */
+    err = snd_pcm_hw_params(p->alsa, alsa_hwparams);
+    ret = INIT_DEVICE_ERR_HWPARAMS;
+    CHECK_ALSA_ERROR("Unable to set hw-parameters");
+    ret = INIT_DEVICE_ERR_GENERIC;
+    dump_hw_params(ao, "Final HW params:\n", alsa_hwparams);
+
+    if (set_chmap(ao, &dev_chmap, num_channels) < 0)
+        goto alsa_error;
+
+    if (num_channels != ao->channels.num) {
+        int req = ao->channels.num;
+        mp_chmap_from_channels(&ao->channels, MPMIN(2, num_channels));
+        mp_chmap_fill_na(&ao->channels, num_channels);
+        MP_ERR(ao, "Asked for %d channels, got %d - fallback to %s.\n", req,
+               num_channels, mp_chmap_to_str(&ao->channels));
+        if (num_channels != ao->channels.num) {
+            MP_FATAL(ao, "mismatching channel counts.\n");
+            goto alsa_error;
+        }
+    }
+
+    err = snd_pcm_hw_params_get_buffer_size(alsa_hwparams, &p->buffersize);
+    CHECK_ALSA_ERROR("Unable to get buffersize");
+
+    err = snd_pcm_hw_params_get_period_size(alsa_hwparams, &p->outburst, NULL);
+    CHECK_ALSA_ERROR("Unable to get period size");
+
+    p->can_pause = snd_pcm_hw_params_can_pause(alsa_hwparams);
+
+    snd_pcm_sw_params_t *alsa_swparams;
+    snd_pcm_sw_params_alloca(&alsa_swparams);
+
+    err = snd_pcm_sw_params_current(p->alsa, alsa_swparams);
+    CHECK_ALSA_ERROR("Unable to get sw-parameters");
+
+    snd_pcm_uframes_t boundary;
+    err = snd_pcm_sw_params_get_boundary(alsa_swparams, &boundary);
+    CHECK_ALSA_ERROR("Unable to get boundary");
+
+    // Manual trigger; INT_MAX as suggested by ALSA doxygen (they call it MAXINT).
+    err = snd_pcm_sw_params_set_start_threshold(p->alsa, alsa_swparams, INT_MAX);
+    CHECK_ALSA_ERROR("Unable to set start threshold");
+
+    /* play silence when there is an underrun */
+    err = snd_pcm_sw_params_set_silence_size
+            (p->alsa, alsa_swparams, boundary);
+    CHECK_ALSA_ERROR("Unable to set silence size");
+
+    err = snd_pcm_sw_params(p->alsa, alsa_swparams);
+    CHECK_ALSA_ERROR("Unable to set sw-parameters");
+
+    MP_VERBOSE(ao, "hw pausing supported: %s\n", p->can_pause ? "yes" : "no");
+    MP_VERBOSE(ao, "buffersize: %d samples\n", (int)p->buffersize);
+    MP_VERBOSE(ao, "period size: %d samples\n", (int)p->outburst);
+
+    ao->device_buffer = p->buffersize;
+
+    p->convert.channels = ao->channels.num;
+
+    err = snd_pcm_prepare(p->alsa);
+    CHECK_ALSA_ERROR("pcm prepare error");
+
+    return 0;
+
+alsa_error:
+    uninit(ao);
+    return ret;
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    p->opts = mp_get_config_group(ao, ao->global, &ao_alsa_conf);
+
+    if (!p->opts->ni)
+        ao->format = af_fmt_from_planar(ao->format);
+
+    MP_VERBOSE(ao, "using ALSA version: %s\n", snd_asoundlib_version());
+
+    int mode = 0;
+    int r = init_device(ao, mode);
+    if (r == INIT_DEVICE_ERR_HWPARAMS) {
+        // With some drivers, ALSA appears to be unable to set valid hwparams,
+        // but they work if at least SND_PCM_NO_AUTO_FORMAT is set. Also, it
+        // appears you can set this flag only on opening a device, thus there
+        // is the need to retry opening the device.
+        MP_WARN(ao, "Attempting to work around even more ALSA bugs...\n");
+        mode |= SND_PCM_NO_AUTO_CHANNELS | SND_PCM_NO_AUTO_FORMAT |
+                SND_PCM_NO_AUTO_RESAMPLE;
+        r = init_device(ao, mode);
+    }
+
+    // Sometimes, ALSA will advertise certain chmaps, but it's not possible to
+    // set them. This can happen with dmix: as of alsa 1.0.29, dmix can do
+    // stereo only, but advertises the surround chmaps of the underlying device.
+    // In this case, e.g. setting 6 channels will succeed, but requesting  5.1
+    // afterwards will fail. Then it will return something like "FL FR NA NA NA NA"
+    // as channel map. This means we would have to pad stereo output to 6
+    // channels with silence, which would require lots of extra processing. You
+    // can't change the number of channels to 2 either, because the hw params
+    // are already set! So just fuck it and reopen the device with the chmap
+    // "cleaned out" of NA entries.
+    if (r >= 0) {
+        struct mp_chmap without_na = ao->channels;
+        mp_chmap_remove_na(&without_na);
+
+        if (mp_chmap_is_valid(&without_na) && without_na.num <= 2 &&
+            ao->channels.num > 2)
+        {
+            MP_VERBOSE(ao, "Working around braindead dmix multichannel behavior.\n");
+            uninit(ao);
+            ao->channels = without_na;
+            r = init_device(ao, mode);
+        }
+    }
+
+    return r;
+}
+
+// Function for dealing with playback state. This attempts to recover the ALSA
+// state (bring it into SND_PCM_STATE_{PREPARED,RUNNING,PAUSED,UNDERRUN}). If
+// state!=NULL, fill it after recovery is attempted.
+// Returns true if PCM is in one the expected states.
+static bool recover_and_get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    snd_pcm_status_t *st;
+    snd_pcm_status_alloca(&st);
+
+    bool state_ok = false;
+    snd_pcm_state_t pcmst = SND_PCM_STATE_DISCONNECTED;
+
+    // Give it a number of chances to recover. This tries to deal with the fact
+    // that the API is asynchronous, and to account for some past cargo-cult
+    // (where things were retried in a loop).
+    for (int n = 0; n < 10; n++) {
+        err = snd_pcm_status(p->alsa, st);
+        if (err == -EPIPE) {
+            // ALSA APIs can return -EPIPE when an XRUN happens,
+            // we skip right to handling it by setting pcmst
+            // manually.
+            pcmst = SND_PCM_STATE_XRUN;
+        } else {
+            // Otherwise do error checking and query the PCM state properly.
+            CHECK_ALSA_ERROR("snd_pcm_status");
+
+            pcmst = snd_pcm_status_get_state(st);
+        }
+
+        if (pcmst == SND_PCM_STATE_PREPARED ||
+            pcmst == SND_PCM_STATE_RUNNING ||
+            pcmst == SND_PCM_STATE_PAUSED)
+        {
+            state_ok = true;
+            break;
+        }
+
+        MP_VERBOSE(ao, "attempt %d to recover from state '%s'...\n",
+                   n + 1, snd_pcm_state_name(pcmst));
+
+        switch (pcmst) {
+        // Underrun; recover. (We never use draining.)
+        case SND_PCM_STATE_XRUN:
+        case SND_PCM_STATE_DRAINING:
+            err = snd_pcm_prepare(p->alsa);
+            CHECK_ALSA_ERROR("pcm prepare error");
+            continue;
+        // Hardware suspend.
+        case SND_PCM_STATE_SUSPENDED:
+            MP_INFO(ao, "PCM in suspend mode, trying to resume.\n");
+            err = snd_pcm_resume(p->alsa);
+            if (err == -EAGAIN) {
+                // Cargo-cult from decades ago, with a cargo cult timeout.
+                MP_INFO(ao, "PCM resume EAGAIN - retrying.\n");
+                sleep(1);
+                continue;
+            }
+            if (err == -ENOSYS) {
+                // As suggested by ALSA doxygen.
+                MP_VERBOSE(ao, "ENOSYS, retrying with snd_pcm_prepare().\n");
+                err = snd_pcm_prepare(p->alsa);
+            }
+            if (err < 0)
+                MP_ERR(ao, "resuming from SUSPENDED: %s\n", snd_strerror(err));
+            continue;
+        // Device lost. OPEN/SETUP are states we never enter after init, so
+        // treat them like DISCONNECTED.
+        case SND_PCM_STATE_DISCONNECTED:
+        case SND_PCM_STATE_OPEN:
+        case SND_PCM_STATE_SETUP:
+        default:
+            if (!p->device_lost) {
+                MP_WARN(ao, "Device lost, trying to recover...\n");
+                ao_request_reload(ao);
+                p->device_lost = true;
+            }
+            goto alsa_error;
+        }
+    }
+
+    if (!state_ok) {
+        MP_ERR(ao, "could not recover\n");
+    }
+
+alsa_error:
+
+    if (state) {
+        snd_pcm_sframes_t del = state_ok ? snd_pcm_status_get_delay(st) : 0;
+        state->delay = MPMAX(del, 0) / (double)ao->samplerate;
+        state->free_samples = state_ok ? snd_pcm_status_get_avail(st) : 0;
+        state->free_samples = MPCLAMP(state->free_samples, 0, ao->device_buffer);
+        // Align to period size.
+        state->free_samples = state->free_samples / p->outburst * p->outburst;
+        state->queued_samples = ao->device_buffer - state->free_samples;
+        state->playing = pcmst == SND_PCM_STATE_RUNNING ||
+                         pcmst == SND_PCM_STATE_PAUSED;
+    }
+
+    return state_ok;
+}
+
+static void audio_get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    recover_and_get_state(ao, state);
+}
+
+static void audio_start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    recover_and_get_state(ao, NULL);
+
+    err = snd_pcm_start(p->alsa);
+    CHECK_ALSA_ERROR("pcm start error");
+
+alsa_error: ;
+}
+
+static void audio_reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    err = snd_pcm_drop(p->alsa);
+    CHECK_ALSA_ERROR("pcm drop error");
+    err = snd_pcm_prepare(p->alsa);
+    CHECK_ALSA_ERROR("pcm prepare error");
+
+    recover_and_get_state(ao, NULL);
+
+alsa_error: ;
+}
+
+static bool audio_set_paused(struct ao *ao, bool paused)
+{
+    struct priv *p = ao->priv;
+    int err;
+
+    recover_and_get_state(ao, NULL);
+
+    if (!p->can_pause)
+        return false;
+
+    snd_pcm_state_t pcmst = snd_pcm_state(p->alsa);
+    if (pcmst == SND_PCM_STATE_RUNNING && paused) {
+        err = snd_pcm_pause(p->alsa, 1);
+        CHECK_ALSA_ERROR("pcm pause error");
+    } else if (pcmst == SND_PCM_STATE_PAUSED && !paused) {
+        err = snd_pcm_pause(p->alsa, 0);
+        CHECK_ALSA_ERROR("pcm resume error");
+    }
+
+    return true;
+
+alsa_error:
+    return false;
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *p = ao->priv;
+
+    ao_convert_inplace(&p->convert, data, samples);
+
+    if (!recover_and_get_state(ao, NULL))
+        return false;
+
+    snd_pcm_sframes_t err = 0;
+    if (af_fmt_is_planar(ao->format)) {
+        err = snd_pcm_writen(p->alsa, data, samples);
+    } else {
+        err = snd_pcm_writei(p->alsa, data[0], samples);
+    }
+
+    CHECK_ALSA_ERROR("pcm write error");
+    if (err >= 0 && err != samples) {
+        MP_ERR(ao, "unexpected partial write (%d of %d frames), dropping audio\n",
+               (int)err, samples);
+    }
+
+    return true;
+
+alsa_error:
+    return false;
+}
+
+static bool is_useless_device(char *name)
+{
+    char *crap[] = {"rear", "center_lfe", "side", "pulse", "null", "dsnoop", "hw"};
+    for (int i = 0; i < MP_ARRAY_SIZE(crap); i++) {
+        int l = strlen(crap[i]);
+        if (name && strncmp(name, crap[i], l) == 0 &&
+            (!name[l] || name[l] == ':'))
+            return true;
+    }
+    // The standard default entry will achieve exactly the same.
+    if (name && strcmp(name, "default") == 0)
+        return true;
+    return false;
+}
+
+static void list_devs(struct ao *ao, struct ao_device_list *list)
+{
+    void **hints;
+    if (snd_device_name_hint(-1, "pcm", &hints) < 0)
+        return;
+
+    ao_device_list_add(list, ao, &(struct ao_device_desc){"", ""});
+
+    for (int n = 0; hints[n]; n++) {
+        char *name = snd_device_name_get_hint(hints[n], "NAME");
+        char *desc = snd_device_name_get_hint(hints[n], "DESC");
+        char *io = snd_device_name_get_hint(hints[n], "IOID");
+        if (!is_useless_device(name) && (!io || strcmp(io, "Output") == 0)) {
+            char desc2[1024];
+            snprintf(desc2, sizeof(desc2), "%s", desc ? desc : "");
+            for (int i = 0; desc2[i]; i++) {
+                if (desc2[i] == '\n')
+                    desc2[i] = '/';
+            }
+            ao_device_list_add(list, ao, &(struct ao_device_desc){name, desc2});
+        }
+        free(name);
+        free(desc);
+        free(io);
+    }
+
+    snd_device_name_free_hint(hints);
+}
+
+const struct ao_driver audio_out_alsa = {
+    .description = "ALSA audio output",
+    .name      = "alsa",
+    .init      = init,
+    .uninit    = uninit,
+    .control   = control,
+    .get_state = audio_get_state,
+    .write     = audio_write,
+    .start     = audio_start,
+    .set_pause = audio_set_paused,
+    .reset     = audio_reset,
+    .list_devs = list_devs,
+    .priv_size = sizeof(struct priv),
+    .global_opts = &ao_alsa_conf,
+};
diff --git a/audio/out/ao_audiotrack.c b/audio/out/ao_audiotrack.c
new file mode 100644
index 0000000..1392699
--- /dev/null
+++ b/audio/out/ao_audiotrack.c
@@ -0,0 +1,852 @@
+/*
+ * Android AudioTrack audio output driver.
+ * Copyright (C) 2018 Aman Gupta <aman@tmm1.net>
+ * Copyright (C) 2012-2015 VLC authors and VideoLAN, VideoLabs
+ * Authors: Thomas Guillem <thomas@gllm.fr>
+ *          Ming Hu <tewilove@gmail.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "ao.h"
+#include "internal.h"
+#include "common/msg.h"
+#include "audio/format.h"
+#include "options/m_option.h"
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+#include "misc/jni.h"
+
+struct priv {
+    jobject audiotrack;
+    jint samplerate;
+    jint channel_config;
+    jint format;
+    jint size;
+
+    jobject timestamp;
+    int64_t timestamp_fetched;
+    bool timestamp_set;
+    int timestamp_stable;
+
+    uint32_t written_frames; /* requires uint32_t rollover semantics */
+    uint32_t playhead_pos;
+    uint32_t playhead_offset;
+    bool reset_pending;
+
+    void *chunk;
+    int chunksize;
+    jbyteArray bytearray;
+    jshortArray shortarray;
+    jfloatArray floatarray;
+    jobject bbuf;
+
+    bool cfg_pcm_float;
+    int cfg_session_id;
+
+    bool needs_timestamp_offset;
+    int64_t timestamp_offset;
+
+    bool thread_terminate;
+    bool thread_created;
+    mp_thread thread;
+    mp_mutex lock;
+    mp_cond wakeup;
+};
+
+struct JNIByteBuffer {
+    jclass clazz;
+    jmethodID clear;
+    struct MPJniField mapping[];
+} ByteBuffer = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIByteBuffer, member)
+    {"java/nio/ByteBuffer", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 1},
+    {"java/nio/ByteBuffer", "clear", "()Ljava/nio/Buffer;", MP_JNI_METHOD, OFFSET(clear), 1},
+    {0},
+    #undef OFFSET
+}};
+
+struct JNIAudioTrack {
+    jclass clazz;
+    jmethodID ctor;
+    jmethodID ctorV21;
+    jmethodID release;
+    jmethodID getState;
+    jmethodID getPlayState;
+    jmethodID play;
+    jmethodID stop;
+    jmethodID flush;
+    jmethodID pause;
+    jmethodID write;
+    jmethodID writeFloat;
+    jmethodID writeShortV23;
+    jmethodID writeBufferV21;
+    jmethodID getBufferSizeInFramesV23;
+    jmethodID getPlaybackHeadPosition;
+    jmethodID getTimestamp;
+    jmethodID getLatency;
+    jmethodID getMinBufferSize;
+    jmethodID getNativeOutputSampleRate;
+    jint STATE_INITIALIZED;
+    jint PLAYSTATE_STOPPED;
+    jint PLAYSTATE_PAUSED;
+    jint PLAYSTATE_PLAYING;
+    jint MODE_STREAM;
+    jint ERROR;
+    jint ERROR_BAD_VALUE;
+    jint ERROR_INVALID_OPERATION;
+    jint WRITE_BLOCKING;
+    jint WRITE_NON_BLOCKING;
+    struct MPJniField mapping[];
+} AudioTrack = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioTrack, member)
+    {"android/media/AudioTrack", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 1},
+    {"android/media/AudioTrack", "<init>", "(IIIIIII)V", MP_JNI_METHOD, OFFSET(ctor), 1},
+    {"android/media/AudioTrack", "<init>", "(Landroid/media/AudioAttributes;Landroid/media/AudioFormat;III)V", MP_JNI_METHOD, OFFSET(ctorV21), 0},
+    {"android/media/AudioTrack", "release", "()V", MP_JNI_METHOD, OFFSET(release), 1},
+    {"android/media/AudioTrack", "getState", "()I", MP_JNI_METHOD, OFFSET(getState), 1},
+    {"android/media/AudioTrack", "getPlayState", "()I", MP_JNI_METHOD, OFFSET(getPlayState), 1},
+    {"android/media/AudioTrack", "play", "()V", MP_JNI_METHOD, OFFSET(play), 1},
+    {"android/media/AudioTrack", "stop", "()V", MP_JNI_METHOD, OFFSET(stop), 1},
+    {"android/media/AudioTrack", "flush", "()V", MP_JNI_METHOD, OFFSET(flush), 1},
+    {"android/media/AudioTrack", "pause", "()V", MP_JNI_METHOD, OFFSET(pause), 1},
+    {"android/media/AudioTrack", "write", "([BII)I", MP_JNI_METHOD, OFFSET(write), 1},
+    {"android/media/AudioTrack", "write", "([FIII)I", MP_JNI_METHOD, OFFSET(writeFloat), 1},
+    {"android/media/AudioTrack", "write", "([SIII)I", MP_JNI_METHOD, OFFSET(writeShortV23), 0},
+    {"android/media/AudioTrack", "write", "(Ljava/nio/ByteBuffer;II)I", MP_JNI_METHOD, OFFSET(writeBufferV21), 1},
+    {"android/media/AudioTrack", "getBufferSizeInFrames", "()I", MP_JNI_METHOD, OFFSET(getBufferSizeInFramesV23), 0},
+    {"android/media/AudioTrack", "getTimestamp", "(Landroid/media/AudioTimestamp;)Z", MP_JNI_METHOD, OFFSET(getTimestamp), 1},
+    {"android/media/AudioTrack", "getPlaybackHeadPosition", "()I", MP_JNI_METHOD, OFFSET(getPlaybackHeadPosition), 1},
+    {"android/media/AudioTrack", "getLatency", "()I", MP_JNI_METHOD, OFFSET(getLatency), 1},
+    {"android/media/AudioTrack", "getMinBufferSize", "(III)I", MP_JNI_STATIC_METHOD, OFFSET(getMinBufferSize), 1},
+    {"android/media/AudioTrack", "getNativeOutputSampleRate", "(I)I", MP_JNI_STATIC_METHOD, OFFSET(getNativeOutputSampleRate), 1},
+    {"android/media/AudioTrack", "WRITE_BLOCKING", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(WRITE_BLOCKING), 0},
+    {"android/media/AudioTrack", "WRITE_NON_BLOCKING", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(WRITE_NON_BLOCKING), 0},
+    {"android/media/AudioTrack", "STATE_INITIALIZED", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(STATE_INITIALIZED), 1},
+    {"android/media/AudioTrack", "PLAYSTATE_STOPPED", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(PLAYSTATE_STOPPED), 1},
+    {"android/media/AudioTrack", "PLAYSTATE_PAUSED", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(PLAYSTATE_PAUSED), 1},
+    {"android/media/AudioTrack", "PLAYSTATE_PLAYING", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(PLAYSTATE_PLAYING), 1},
+    {"android/media/AudioTrack", "MODE_STREAM", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(MODE_STREAM), 1},
+    {"android/media/AudioTrack", "ERROR", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ERROR), 1},
+    {"android/media/AudioTrack", "ERROR_BAD_VALUE", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ERROR_BAD_VALUE), 1},
+    {"android/media/AudioTrack", "ERROR_INVALID_OPERATION", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ERROR_INVALID_OPERATION), 1},
+    {0}
+    #undef OFFSET
+}};
+
+struct JNIAudioAttributes {
+    jclass clazz;
+    jint CONTENT_TYPE_MOVIE;
+    jint CONTENT_TYPE_MUSIC;
+    jint USAGE_MEDIA;
+    struct MPJniField mapping[];
+} AudioAttributes = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioAttributes, member)
+    {"android/media/AudioAttributes", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 0},
+    {"android/media/AudioAttributes", "CONTENT_TYPE_MOVIE", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CONTENT_TYPE_MOVIE), 0},
+    {"android/media/AudioAttributes", "CONTENT_TYPE_MUSIC", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CONTENT_TYPE_MUSIC), 0},
+    {"android/media/AudioAttributes", "USAGE_MEDIA", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(USAGE_MEDIA), 0},
+    {0}
+    #undef OFFSET
+}};
+
+struct JNIAudioAttributesBuilder {
+    jclass clazz;
+    jmethodID ctor;
+    jmethodID setUsage;
+    jmethodID setContentType;
+    jmethodID build;
+    struct MPJniField mapping[];
+} AudioAttributesBuilder = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioAttributesBuilder, member)
+    {"android/media/AudioAttributes$Builder", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 0},
+    {"android/media/AudioAttributes$Builder", "<init>", "()V", MP_JNI_METHOD, OFFSET(ctor), 0},
+    {"android/media/AudioAttributes$Builder", "setUsage", "(I)Landroid/media/AudioAttributes$Builder;", MP_JNI_METHOD, OFFSET(setUsage), 0},
+    {"android/media/AudioAttributes$Builder", "setContentType", "(I)Landroid/media/AudioAttributes$Builder;", MP_JNI_METHOD, OFFSET(setContentType), 0},
+    {"android/media/AudioAttributes$Builder", "build", "()Landroid/media/AudioAttributes;", MP_JNI_METHOD, OFFSET(build), 0},
+    {0}
+    #undef OFFSET
+}};
+
+struct JNIAudioFormat {
+    jclass clazz;
+    jint ENCODING_PCM_8BIT;
+    jint ENCODING_PCM_16BIT;
+    jint ENCODING_PCM_FLOAT;
+    jint ENCODING_IEC61937;
+    jint CHANNEL_OUT_MONO;
+    jint CHANNEL_OUT_STEREO;
+    jint CHANNEL_OUT_FRONT_CENTER;
+    jint CHANNEL_OUT_QUAD;
+    jint CHANNEL_OUT_5POINT1;
+    jint CHANNEL_OUT_BACK_CENTER;
+    jint CHANNEL_OUT_7POINT1_SURROUND;
+    struct MPJniField mapping[];
+} AudioFormat = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioFormat, member)
+    {"android/media/AudioFormat", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 1},
+    {"android/media/AudioFormat", "ENCODING_PCM_8BIT", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ENCODING_PCM_8BIT), 1},
+    {"android/media/AudioFormat", "ENCODING_PCM_16BIT", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ENCODING_PCM_16BIT), 1},
+    {"android/media/AudioFormat", "ENCODING_PCM_FLOAT", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ENCODING_PCM_FLOAT), 1},
+    {"android/media/AudioFormat", "ENCODING_IEC61937", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ENCODING_IEC61937), 0},
+    {"android/media/AudioFormat", "CHANNEL_OUT_MONO", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_MONO), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_STEREO", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_STEREO), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_FRONT_CENTER", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_FRONT_CENTER), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_QUAD", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_QUAD), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_5POINT1", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_5POINT1), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_BACK_CENTER", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_BACK_CENTER), 1},
+    {"android/media/AudioFormat", "CHANNEL_OUT_7POINT1_SURROUND", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(CHANNEL_OUT_7POINT1_SURROUND), 0},
+    {0}
+    #undef OFFSET
+}};
+
+struct JNIAudioFormatBuilder {
+    jclass clazz;
+    jmethodID ctor;
+    jmethodID setEncoding;
+    jmethodID setSampleRate;
+    jmethodID setChannelMask;
+    jmethodID build;
+    struct MPJniField mapping[];
+} AudioFormatBuilder = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioFormatBuilder, member)
+    {"android/media/AudioFormat$Builder", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 0},
+    {"android/media/AudioFormat$Builder", "<init>", "()V", MP_JNI_METHOD, OFFSET(ctor), 0},
+    {"android/media/AudioFormat$Builder", "setEncoding", "(I)Landroid/media/AudioFormat$Builder;", MP_JNI_METHOD, OFFSET(setEncoding), 0},
+    {"android/media/AudioFormat$Builder", "setSampleRate", "(I)Landroid/media/AudioFormat$Builder;", MP_JNI_METHOD, OFFSET(setSampleRate), 0},
+    {"android/media/AudioFormat$Builder", "setChannelMask", "(I)Landroid/media/AudioFormat$Builder;", MP_JNI_METHOD, OFFSET(setChannelMask), 0},
+    {"android/media/AudioFormat$Builder", "build", "()Landroid/media/AudioFormat;", MP_JNI_METHOD, OFFSET(build), 0},
+    {0}
+    #undef OFFSET
+}};
+
+
+struct JNIAudioManager {
+    jclass clazz;
+    jint ERROR_DEAD_OBJECT;
+    jint STREAM_MUSIC;
+    struct MPJniField mapping[];
+} AudioManager = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioManager, member)
+    {"android/media/AudioManager", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 1},
+    {"android/media/AudioManager", "STREAM_MUSIC", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(STREAM_MUSIC), 1},
+    {"android/media/AudioManager", "ERROR_DEAD_OBJECT", "I", MP_JNI_STATIC_FIELD_AS_INT, OFFSET(ERROR_DEAD_OBJECT), 0},
+    {0}
+    #undef OFFSET
+}};
+
+struct JNIAudioTimestamp {
+    jclass clazz;
+    jmethodID ctor;
+    jfieldID framePosition;
+    jfieldID nanoTime;
+    struct MPJniField mapping[];
+} AudioTimestamp = {.mapping = {
+    #define OFFSET(member) offsetof(struct JNIAudioTimestamp, member)
+    {"android/media/AudioTimestamp", NULL, NULL, MP_JNI_CLASS, OFFSET(clazz), 1},
+    {"android/media/AudioTimestamp", "<init>", "()V", MP_JNI_METHOD, OFFSET(ctor), 1},
+    {"android/media/AudioTimestamp", "framePosition", "J", MP_JNI_FIELD, OFFSET(framePosition), 1},
+    {"android/media/AudioTimestamp", "nanoTime", "J", MP_JNI_FIELD, OFFSET(nanoTime), 1},
+    {0}
+    #undef OFFSET
+}};
+
+#define MP_JNI_DELETELOCAL(o) (*env)->DeleteLocalRef(env, o)
+
+static int AudioTrack_New(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    jobject audiotrack = NULL;
+
+    if (AudioTrack.ctorV21) {
+        MP_VERBOSE(ao, "Using API21 initializer\n");
+        jobject tmp = NULL;
+
+        jobject format_builder = MP_JNI_NEW(AudioFormatBuilder.clazz, AudioFormatBuilder.ctor);
+        MP_JNI_EXCEPTION_LOG(ao);
+        tmp = MP_JNI_CALL_OBJECT(format_builder, AudioFormatBuilder.setEncoding, p->format);
+        MP_JNI_DELETELOCAL(tmp);
+        tmp = MP_JNI_CALL_OBJECT(format_builder, AudioFormatBuilder.setSampleRate, p->samplerate);
+        MP_JNI_DELETELOCAL(tmp);
+        tmp = MP_JNI_CALL_OBJECT(format_builder, AudioFormatBuilder.setChannelMask, p->channel_config);
+        MP_JNI_DELETELOCAL(tmp);
+        jobject format = MP_JNI_CALL_OBJECT(format_builder, AudioFormatBuilder.build);
+        MP_JNI_DELETELOCAL(format_builder);
+
+        jobject attr_builder = MP_JNI_NEW(AudioAttributesBuilder.clazz, AudioAttributesBuilder.ctor);
+        MP_JNI_EXCEPTION_LOG(ao);
+        tmp = MP_JNI_CALL_OBJECT(attr_builder, AudioAttributesBuilder.setUsage, AudioAttributes.USAGE_MEDIA);
+        MP_JNI_DELETELOCAL(tmp);
+        jint content_type = (ao->init_flags & AO_INIT_MEDIA_ROLE_MUSIC) ?
+            AudioAttributes.CONTENT_TYPE_MUSIC : AudioAttributes.CONTENT_TYPE_MOVIE;
+        tmp = MP_JNI_CALL_OBJECT(attr_builder, AudioAttributesBuilder.setContentType, content_type);
+        MP_JNI_DELETELOCAL(tmp);
+        jobject attr = MP_JNI_CALL_OBJECT(attr_builder, AudioAttributesBuilder.build);
+        MP_JNI_DELETELOCAL(attr_builder);
+
+        audiotrack = MP_JNI_NEW(
+            AudioTrack.clazz,
+            AudioTrack.ctorV21,
+            attr,
+            format,
+            p->size,
+            AudioTrack.MODE_STREAM,
+            p->cfg_session_id
+        );
+
+        MP_JNI_DELETELOCAL(format);
+        MP_JNI_DELETELOCAL(attr);
+    } else {
+        MP_VERBOSE(ao, "Using legacy initializer\n");
+        audiotrack = MP_JNI_NEW(
+            AudioTrack.clazz,
+            AudioTrack.ctor,
+            AudioManager.STREAM_MUSIC,
+            p->samplerate,
+            p->channel_config,
+            p->format,
+            p->size,
+            AudioTrack.MODE_STREAM,
+            p->cfg_session_id
+        );
+    }
+    if (MP_JNI_EXCEPTION_LOG(ao) < 0 || !audiotrack) {
+        MP_FATAL(ao, "AudioTrack Init failed\n");
+        return -1;
+    }
+
+    if (MP_JNI_CALL_INT(audiotrack, AudioTrack.getState) != AudioTrack.STATE_INITIALIZED) {
+        MP_JNI_CALL_VOID(audiotrack, AudioTrack.release);
+        MP_JNI_EXCEPTION_LOG(ao);
+        (*env)->DeleteLocalRef(env, audiotrack);
+        MP_ERR(ao, "AudioTrack.getState failed\n");
+        return -1;
+    }
+
+    if (AudioTrack.getBufferSizeInFramesV23) {
+        int bufferSize = MP_JNI_CALL_INT(audiotrack, AudioTrack.getBufferSizeInFramesV23);
+        if (bufferSize > 0) {
+            MP_VERBOSE(ao, "AudioTrack.getBufferSizeInFrames = %d\n", bufferSize);
+            ao->device_buffer = bufferSize;
+        }
+    }
+
+    p->audiotrack = (*env)->NewGlobalRef(env, audiotrack);
+    (*env)->DeleteLocalRef(env, audiotrack);
+    if (!p->audiotrack)
+        return -1;
+
+    return 0;
+}
+
+static int AudioTrack_Recreate(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+
+    MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.release);
+    MP_JNI_EXCEPTION_LOG(ao);
+    (*env)->DeleteGlobalRef(env, p->audiotrack);
+    p->audiotrack = NULL;
+    return AudioTrack_New(ao);
+}
+
+static uint32_t AudioTrack_getPlaybackHeadPosition(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    if (!p->audiotrack)
+        return 0;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    uint32_t pos = 0;
+    int64_t now = mp_raw_time_ns();
+    int state = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.getPlayState);
+
+    int stable_count = 20;
+    int64_t wait = p->timestamp_stable < stable_count ? 50000000 : 3000000000;
+
+    if (state == AudioTrack.PLAYSTATE_PLAYING && p->format != AudioFormat.ENCODING_IEC61937 &&
+        (p->timestamp_fetched == 0 || now - p->timestamp_fetched >= wait)) {
+        if (!p->timestamp_fetched)
+            p->timestamp_stable = 0;
+
+        int64_t time1 = MP_JNI_GET_LONG(p->timestamp, AudioTimestamp.nanoTime);
+        if (MP_JNI_CALL_BOOL(p->audiotrack, AudioTrack.getTimestamp, p->timestamp)) {
+            p->timestamp_set = true;
+            p->timestamp_fetched = now;
+            if (p->timestamp_stable < stable_count) {
+                uint32_t fpos = 0xFFFFFFFFL & MP_JNI_GET_LONG(p->timestamp, AudioTimestamp.framePosition);
+                int64_t time2 = MP_JNI_GET_LONG(p->timestamp, AudioTimestamp.nanoTime);
+                //MP_VERBOSE(ao, "getTimestamp: fpos= %u / time= %"PRId64" / now= %"PRId64" / stable= %d\n", fpos, time2, now, p->timestamp_stable);
+                if (time1 != time2 && time2 != 0 && fpos != 0) {
+                    p->timestamp_stable++;
+                }
+            }
+        }
+    }
+
+    /* AudioTrack's framePosition and playbackHeadPosition return a signed integer,
+     * but documentation states it should be interpreted as a 32-bit unsigned integer.
+     */
+    if (p->timestamp_set) {
+        pos = 0xFFFFFFFFL & MP_JNI_GET_LONG(p->timestamp, AudioTimestamp.framePosition);
+        uint32_t fpos = pos;
+        int64_t time = MP_JNI_GET_LONG(p->timestamp, AudioTimestamp.nanoTime);
+        if (time == 0)
+            fpos = pos = 0;
+        if (p->needs_timestamp_offset) {
+            if (time != 0 && !p->timestamp_offset)
+                p->timestamp_offset = now - time;
+            time += p->timestamp_offset;
+        }
+        if (fpos != 0 && time != 0 && state == AudioTrack.PLAYSTATE_PLAYING) {
+            double diff = (double)(now - time) / 1e9;
+            pos += diff * ao->samplerate;
+        }
+        //MP_VERBOSE(ao, "position = %u via getTimestamp (state = %d / fpos= %u / time= %"PRId64")\n", pos, state, fpos, time);
+    } else {
+        pos = 0xFFFFFFFFL & MP_JNI_CALL_INT(p->audiotrack, AudioTrack.getPlaybackHeadPosition);
+        //MP_VERBOSE(ao, "playbackHeadPosition = %u (reset_pending=%d)\n", pos, p->reset_pending);
+    }
+
+
+    if (p->format == AudioFormat.ENCODING_IEC61937) {
+        if (p->reset_pending) {
+            // after a flush(), playbackHeadPosition will not reset to 0 right away.
+            // sometimes, it will never reset at all.
+            // save the initial offset after the reset, to subtract it going forward.
+            if (p->playhead_offset == 0)
+                p->playhead_offset = pos;
+            p->reset_pending = false;
+            MP_VERBOSE(ao, "IEC/playbackHead offset = %d\n", pos);
+        }
+
+        // usually shortly after a flush(), playbackHeadPosition will reset to 0.
+        // clear out the position and offset to avoid regular "rollover" below
+        if (pos == 0 && p->playhead_offset != 0) {
+            MP_VERBOSE(ao, "IEC/playbackHeadPosition %d -> %d (flush)\n", p->playhead_pos, pos);
+            p->playhead_offset = 0;
+            p->playhead_pos = 0;
+        }
+
+        // sometimes on a new AudioTrack instance, playbackHeadPosition will reset
+        // to 0 shortly after playback starts for no reason.
+        if (pos == 0 && p->playhead_pos != 0) {
+            MP_VERBOSE(ao, "IEC/playbackHeadPosition %d -> %d (reset)\n", p->playhead_pos, pos);
+            p->playhead_offset = 0;
+            p->playhead_pos = 0;
+            p->written_frames = 0;
+        }
+    }
+
+    p->playhead_pos = pos;
+    return p->playhead_pos - p->playhead_offset;
+}
+
+static double AudioTrack_getLatency(struct ao *ao)
+{
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    struct priv *p = ao->priv;
+    if (!p->audiotrack)
+        return 0;
+
+    uint32_t playhead = AudioTrack_getPlaybackHeadPosition(ao);
+    uint32_t diff = p->written_frames - playhead;
+    double delay = diff / (double)(ao->samplerate);
+    if (!p->timestamp_set &&
+        p->format != AudioFormat.ENCODING_IEC61937)
+        delay += (double)MP_JNI_CALL_INT(p->audiotrack, AudioTrack.getLatency)/1000.0;
+    if (delay > 2.0) {
+        //MP_WARN(ao, "getLatency: written=%u playhead=%u diff=%u delay=%f\n", p->written_frames, playhead, diff, delay);
+        p->timestamp_fetched = 0;
+        return 0;
+    }
+    return MPCLAMP(delay, 0.0, 2.0);
+}
+
+static int AudioTrack_write(struct ao *ao, int len)
+{
+    struct priv *p = ao->priv;
+    if (!p->audiotrack)
+        return -1;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    void *buf = p->chunk;
+
+    jint ret;
+    if (p->format == AudioFormat.ENCODING_IEC61937) {
+        (*env)->SetShortArrayRegion(env, p->shortarray, 0, len / 2, buf);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        ret = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.writeShortV23, p->shortarray, 0, len / 2, AudioTrack.WRITE_BLOCKING);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        if (ret > 0) ret *= 2;
+
+    } else if (AudioTrack.writeBufferV21) {
+        // reset positions for reading
+        jobject bbuf = MP_JNI_CALL_OBJECT(p->bbuf, ByteBuffer.clear);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        (*env)->DeleteLocalRef(env, bbuf);
+        ret = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.writeBufferV21, p->bbuf, len, AudioTrack.WRITE_BLOCKING);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+
+    } else if (p->format == AudioFormat.ENCODING_PCM_FLOAT) {
+        (*env)->SetFloatArrayRegion(env, p->floatarray, 0, len / sizeof(float), buf);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        ret = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.writeFloat, p->floatarray, 0, len / sizeof(float), AudioTrack.WRITE_BLOCKING);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        if (ret > 0) ret *= sizeof(float);
+
+    } else {
+        (*env)->SetByteArrayRegion(env, p->bytearray, 0, len, buf);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+        ret = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.write, p->bytearray, 0, len);
+        if (MP_JNI_EXCEPTION_LOG(ao) < 0) return -1;
+    }
+
+    return ret;
+}
+
+static void uninit_jni(struct ao *ao)
+{
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    mp_jni_reset_jfields(env, &AudioTrack, AudioTrack.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioTimestamp, AudioTimestamp.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioManager, AudioManager.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioFormat, AudioFormat.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioFormatBuilder, AudioFormatBuilder.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioAttributes, AudioAttributes.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &AudioAttributesBuilder, AudioAttributesBuilder.mapping, 1, ao->log);
+    mp_jni_reset_jfields(env, &ByteBuffer, ByteBuffer.mapping, 1, ao->log);
+}
+
+static int init_jni(struct ao *ao)
+{
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    if (mp_jni_init_jfields(env, &AudioTrack, AudioTrack.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &ByteBuffer, ByteBuffer.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioTimestamp, AudioTimestamp.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioManager, AudioManager.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioAttributes, AudioAttributes.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioAttributesBuilder, AudioAttributesBuilder.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioFormatBuilder, AudioFormatBuilder.mapping, 1, ao->log) < 0 ||
+        mp_jni_init_jfields(env, &AudioFormat, AudioFormat.mapping, 1, ao->log) < 0) {
+            uninit_jni(ao);
+            return -1;
+    }
+
+    return 0;
+}
+
+static MP_THREAD_VOID playthread(void *arg)
+{
+    struct ao *ao = arg;
+    struct priv *p = ao->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    mp_thread_set_name("ao/audiotrack");
+    mp_mutex_lock(&p->lock);
+    while (!p->thread_terminate) {
+        int state = AudioTrack.PLAYSTATE_PAUSED;
+        if (p->audiotrack) {
+            state = MP_JNI_CALL_INT(p->audiotrack, AudioTrack.getPlayState);
+        }
+        if (state == AudioTrack.PLAYSTATE_PLAYING) {
+            int read_samples = p->chunksize / ao->sstride;
+            int64_t ts = mp_time_ns();
+            ts += MP_TIME_S_TO_NS(read_samples / (double)(ao->samplerate));
+            ts += MP_TIME_S_TO_NS(AudioTrack_getLatency(ao));
+            int samples = ao_read_data_nonblocking(ao, &p->chunk, read_samples, ts);
+            int ret = AudioTrack_write(ao, samples * ao->sstride);
+            if (ret >= 0) {
+                p->written_frames += ret / ao->sstride;
+            } else if (ret == AudioManager.ERROR_DEAD_OBJECT) {
+                MP_WARN(ao, "AudioTrack.write failed with ERROR_DEAD_OBJECT. Recreating AudioTrack...\n");
+                if (AudioTrack_Recreate(ao) < 0) {
+                    MP_ERR(ao, "AudioTrack_Recreate failed\n");
+                }
+            } else {
+                MP_ERR(ao, "AudioTrack.write failed with %d\n", ret);
+            }
+        } else {
+            mp_cond_timedwait(&p->wakeup, &p->lock, MP_TIME_MS_TO_NS(300));
+        }
+    }
+    mp_mutex_unlock(&p->lock);
+    MP_THREAD_RETURN();
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    if (p->audiotrack) {
+        MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.stop);
+        MP_JNI_EXCEPTION_LOG(ao);
+        MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.flush);
+        MP_JNI_EXCEPTION_LOG(ao);
+    }
+
+    mp_mutex_lock(&p->lock);
+    p->thread_terminate = true;
+    mp_cond_signal(&p->wakeup);
+    mp_mutex_unlock(&p->lock);
+
+    if (p->thread_created)
+        mp_thread_join(p->thread);
+
+    if (p->audiotrack) {
+        MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.release);
+        MP_JNI_EXCEPTION_LOG(ao);
+        (*env)->DeleteGlobalRef(env, p->audiotrack);
+        p->audiotrack = NULL;
+    }
+
+    if (p->bytearray) {
+        (*env)->DeleteGlobalRef(env, p->bytearray);
+        p->bytearray = NULL;
+    }
+
+    if (p->shortarray) {
+        (*env)->DeleteGlobalRef(env, p->shortarray);
+        p->shortarray = NULL;
+    }
+
+    if (p->floatarray) {
+        (*env)->DeleteGlobalRef(env, p->floatarray);
+        p->floatarray = NULL;
+    }
+
+    if (p->bbuf) {
+        (*env)->DeleteGlobalRef(env, p->bbuf);
+        p->bbuf = NULL;
+    }
+
+    if (p->timestamp) {
+        (*env)->DeleteGlobalRef(env, p->timestamp);
+        p->timestamp = NULL;
+    }
+
+    mp_cond_destroy(&p->wakeup);
+    mp_mutex_destroy(&p->lock);
+
+    uninit_jni(ao);
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    if (!env)
+        return -1;
+
+    mp_mutex_init(&p->lock);
+    mp_cond_init(&p->wakeup);
+
+    if (init_jni(ao) < 0)
+        return -1;
+
+    if (af_fmt_is_spdif(ao->format)) {
+        p->format = AudioFormat.ENCODING_IEC61937;
+    } else if (ao->format == AF_FORMAT_U8) {
+        p->format = AudioFormat.ENCODING_PCM_8BIT;
+    } else if (p->cfg_pcm_float && af_fmt_is_float(ao->format)) {
+        ao->format = AF_FORMAT_FLOAT;
+        p->format = AudioFormat.ENCODING_PCM_FLOAT;
+    } else {
+        ao->format = AF_FORMAT_S16;
+        p->format = AudioFormat.ENCODING_PCM_16BIT;
+    }
+
+    if (AudioTrack.getNativeOutputSampleRate) {
+        jint samplerate = MP_JNI_CALL_STATIC_INT(
+            AudioTrack.clazz,
+            AudioTrack.getNativeOutputSampleRate,
+            AudioManager.STREAM_MUSIC
+        );
+        if (MP_JNI_EXCEPTION_LOG(ao) == 0) {
+            MP_VERBOSE(ao, "AudioTrack.nativeOutputSampleRate = %d\n", samplerate);
+            ao->samplerate = MPMIN(samplerate, ao->samplerate);
+        }
+    }
+    p->samplerate = ao->samplerate;
+
+    /* https://developer.android.com/reference/android/media/AudioFormat#channelPositionMask */
+    static const struct mp_chmap layouts[] = {
+        {0},                                        // empty
+        MP_CHMAP_INIT_MONO,                         // mono
+        MP_CHMAP_INIT_STEREO,                       // stereo
+        MP_CHMAP3(FL, FR, FC),                      // 3.0
+        MP_CHMAP4(FL, FR, BL, BR),                  // quad
+        MP_CHMAP5(FL, FR, FC, BL, BR),              // 5.0
+        MP_CHMAP6(FL, FR, FC, LFE, BL, BR),         // 5.1
+        MP_CHMAP7(FL, FR, FC, LFE, BL, BR, BC),     // 6.1
+        MP_CHMAP8(FL, FR, FC, LFE, BL, BR, SL, SR), // 7.1
+    };
+    const jint layout_map[] = {
+        0,
+        AudioFormat.CHANNEL_OUT_MONO,
+        AudioFormat.CHANNEL_OUT_STEREO,
+        AudioFormat.CHANNEL_OUT_STEREO | AudioFormat.CHANNEL_OUT_FRONT_CENTER,
+        AudioFormat.CHANNEL_OUT_QUAD,
+        AudioFormat.CHANNEL_OUT_QUAD | AudioFormat.CHANNEL_OUT_FRONT_CENTER,
+        AudioFormat.CHANNEL_OUT_5POINT1,
+        AudioFormat.CHANNEL_OUT_5POINT1 | AudioFormat.CHANNEL_OUT_BACK_CENTER,
+        AudioFormat.CHANNEL_OUT_7POINT1_SURROUND,
+    };
+    static_assert(MP_ARRAY_SIZE(layout_map) == MP_ARRAY_SIZE(layouts), "");
+    if (p->format == AudioFormat.ENCODING_IEC61937) {
+        p->channel_config = AudioFormat.CHANNEL_OUT_STEREO;
+    } else {
+        struct mp_chmap_sel sel = {0};
+        for (int i = 0; i < MP_ARRAY_SIZE(layouts); i++) {
+            if (layout_map[i])
+                mp_chmap_sel_add_map(&sel, &layouts[i]);
+        }
+        if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels))
+            goto error;
+        p->channel_config = layout_map[ao->channels.num];
+        assert(p->channel_config);
+    }
+
+    jint buffer_size = MP_JNI_CALL_STATIC_INT(
+        AudioTrack.clazz,
+        AudioTrack.getMinBufferSize,
+        p->samplerate,
+        p->channel_config,
+        p->format
+    );
+    if (MP_JNI_EXCEPTION_LOG(ao) < 0 || buffer_size <= 0) {
+        MP_FATAL(ao, "AudioTrack.getMinBufferSize returned an invalid size: %d", buffer_size);
+        return -1;
+    }
+
+    // Choose double of the minimum buffer size suggested by the driver, but not
+    // less than 75ms or more than 150ms.
+    const int bps = af_fmt_to_bytes(ao->format);
+    int min = 0.075 * p->samplerate * bps * ao->channels.num;
+    int max = min * 2;
+    min = MP_ALIGN_UP(min, bps);
+    max = MP_ALIGN_UP(max, bps);
+    p->size = MPCLAMP(buffer_size * 2, min, max);
+    MP_VERBOSE(ao, "Setting bufferSize = %d (driver=%d, min=%d, max=%d)\n", p->size, buffer_size, min, max);
+    assert(p->size % bps == 0);
+    ao->device_buffer = p->size / bps;
+
+    p->chunksize = p->size;
+    p->chunk = talloc_size(ao, p->size);
+
+    jobject timestamp = MP_JNI_NEW(AudioTimestamp.clazz, AudioTimestamp.ctor);
+    if (MP_JNI_EXCEPTION_LOG(ao) < 0 || !timestamp) {
+        MP_FATAL(ao, "AudioTimestamp could not be created\n");
+        return -1;
+    }
+    p->timestamp = (*env)->NewGlobalRef(env, timestamp);
+    (*env)->DeleteLocalRef(env, timestamp);
+
+    // decide and create buffer of right type
+    if (p->format == AudioFormat.ENCODING_IEC61937) {
+        jshortArray shortarray = (*env)->NewShortArray(env, p->chunksize / 2);
+        p->shortarray = (*env)->NewGlobalRef(env, shortarray);
+        (*env)->DeleteLocalRef(env, shortarray);
+    } else if (AudioTrack.writeBufferV21) {
+        MP_VERBOSE(ao, "Using NIO ByteBuffer\n");
+        jobject bbuf = (*env)->NewDirectByteBuffer(env, p->chunk, p->chunksize);
+        p->bbuf = (*env)->NewGlobalRef(env, bbuf);
+        (*env)->DeleteLocalRef(env, bbuf);
+    } else if (p->format == AudioFormat.ENCODING_PCM_FLOAT) {
+        jfloatArray floatarray = (*env)->NewFloatArray(env, p->chunksize / sizeof(float));
+        p->floatarray = (*env)->NewGlobalRef(env, floatarray);
+        (*env)->DeleteLocalRef(env, floatarray);
+    } else {
+        jbyteArray bytearray = (*env)->NewByteArray(env, p->chunksize);
+        p->bytearray = (*env)->NewGlobalRef(env, bytearray);
+        (*env)->DeleteLocalRef(env, bytearray);
+    }
+
+    /* create AudioTrack object */
+    if (AudioTrack_New(ao) != 0) {
+        MP_FATAL(ao, "Failed to create AudioTrack\n");
+        goto error;
+    }
+
+    if (mp_thread_create(&p->thread, playthread, ao)) {
+        MP_ERR(ao, "pthread creation failed\n");
+        goto error;
+    }
+    p->thread_created = true;
+
+    return 1;
+
+error:
+    uninit(ao);
+    return -1;
+}
+
+static void stop(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    if (!p->audiotrack) {
+        MP_ERR(ao, "AudioTrack does not exist to stop!\n");
+        return;
+    }
+
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.pause);
+    MP_JNI_EXCEPTION_LOG(ao);
+    MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.flush);
+    MP_JNI_EXCEPTION_LOG(ao);
+
+    p->playhead_offset = 0;
+    p->reset_pending = true;
+    p->written_frames = 0;
+    p->timestamp_fetched = 0;
+    p->timestamp_set = false;
+    p->timestamp_offset = 0;
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    if (!p->audiotrack) {
+        MP_ERR(ao, "AudioTrack does not exist to start!\n");
+        return;
+    }
+
+    JNIEnv *env = MP_JNI_GET_ENV(ao);
+    MP_JNI_CALL_VOID(p->audiotrack, AudioTrack.play);
+    MP_JNI_EXCEPTION_LOG(ao);
+
+    mp_cond_signal(&p->wakeup);
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_audiotrack = {
+    .description = "Android AudioTrack audio output",
+    .name      = "audiotrack",
+    .init      = init,
+    .uninit    = uninit,
+    .reset     = stop,
+    .start     = start,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const OPT_BASE_STRUCT) {
+        .cfg_pcm_float = 1,
+    },
+    .options   = (const struct m_option[]) {
+        {"pcm-float", OPT_BOOL(cfg_pcm_float)},
+        {"session-id", OPT_INT(cfg_session_id)},
+        {0}
+    },
+    .options_prefix = "audiotrack",
+};
diff --git a/audio/out/ao_audiounit.m b/audio/out/ao_audiounit.m
new file mode 100644
index 0000000..85b1226
--- /dev/null
+++ b/audio/out/ao_audiounit.m
@@ -0,0 +1,260 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "options/m_option.h"
+#include "common/msg.h"
+#include "ao_coreaudio_utils.h"
+#include "ao_coreaudio_chmap.h"
+
+#import <AudioUnit/AudioUnit.h>
+#import <CoreAudio/CoreAudioTypes.h>
+#import <AudioToolbox/AudioToolbox.h>
+#import <AVFoundation/AVFoundation.h>
+#import <mach/mach_time.h>
+
+struct priv {
+    AudioUnit audio_unit;
+    double device_latency;
+};
+
+static OSStatus au_get_ary(AudioUnit unit, AudioUnitPropertyID inID, AudioUnitScope inScope, AudioUnitElement inElement, void **data, UInt32 *outDataSize)
+{
+    OSStatus err;
+
+    err = AudioUnitGetPropertyInfo(unit, inID, inScope, inElement, outDataSize, NULL);
+    CHECK_CA_ERROR_SILENT_L(coreaudio_error);
+
+    *data = talloc_zero_size(NULL, *outDataSize);
+
+    err = AudioUnitGetProperty(unit, inID, inScope, inElement, *data, outDataSize);
+    CHECK_CA_ERROR_SILENT_L(coreaudio_error_free);
+
+    return err;
+coreaudio_error_free:
+    talloc_free(*data);
+coreaudio_error:
+    return err;
+}
+
+static AudioChannelLayout *convert_layout(AudioChannelLayout *layout, UInt32* size)
+{
+    AudioChannelLayoutTag tag = layout->mChannelLayoutTag;
+    AudioChannelLayout *new_layout;
+    if (tag == kAudioChannelLayoutTag_UseChannelDescriptions)
+        return layout;
+    else if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+        AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForBitmap,
+                                   sizeof(UInt32), &layout->mChannelBitmap, size);
+    else
+        AudioFormatGetPropertyInfo(kAudioFormatProperty_ChannelLayoutForTag,
+                                   sizeof(AudioChannelLayoutTag), &tag, size);
+    new_layout = talloc_zero_size(NULL, *size);
+    if (!new_layout) {
+        talloc_free(layout);
+        return NULL;
+    }
+    if (tag == kAudioChannelLayoutTag_UseChannelBitmap)
+        AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForBitmap,
+                               sizeof(UInt32), &layout->mChannelBitmap, size, new_layout);
+    else
+        AudioFormatGetProperty(kAudioFormatProperty_ChannelLayoutForTag,
+                               sizeof(AudioChannelLayoutTag), &tag, size, new_layout);
+    new_layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
+    talloc_free(layout);
+    return new_layout;
+}
+
+
+static OSStatus render_cb_lpcm(void *ctx, AudioUnitRenderActionFlags *aflags,
+                              const AudioTimeStamp *ts, UInt32 bus,
+                              UInt32 frames, AudioBufferList *buffer_list)
+{
+    struct ao *ao = ctx;
+    struct priv *p = ao->priv;
+    void *planes[MP_NUM_CHANNELS] = {0};
+
+    for (int n = 0; n < ao->num_planes; n++)
+        planes[n] = buffer_list->mBuffers[n].mData;
+
+    int64_t end = mp_time_ns();
+    end += MP_TIME_S_TO_NS(p->device_latency);
+    end += ca_get_latency(ts) + ca_frames_to_ns(ao, frames);
+    ao_read_data(ao, planes, frames, end);
+    return noErr;
+}
+
+static bool init_audiounit(struct ao *ao)
+{
+    AudioStreamBasicDescription asbd;
+    OSStatus err;
+    uint32_t size;
+    AudioChannelLayout *layout = NULL;
+    struct priv *p = ao->priv;
+    AVAudioSession *instance = AVAudioSession.sharedInstance;
+    AVAudioSessionPortDescription *port = nil;
+    NSInteger maxChannels = instance.maximumOutputNumberOfChannels;
+    NSInteger prefChannels = MIN(maxChannels, ao->channels.num);
+
+    MP_VERBOSE(ao, "max channels: %ld, requested: %d\n", maxChannels, (int)ao->channels.num);
+
+    [instance setCategory:AVAudioSessionCategoryPlayback error:nil];
+    [instance setMode:AVAudioSessionModeMoviePlayback error:nil];
+    [instance setActive:YES error:nil];
+    [instance setPreferredOutputNumberOfChannels:prefChannels error:nil];
+
+    AudioComponentDescription desc = (AudioComponentDescription) {
+        .componentType         = kAudioUnitType_Output,
+        .componentSubType      = kAudioUnitSubType_RemoteIO,
+        .componentManufacturer = kAudioUnitManufacturer_Apple,
+        .componentFlags        = 0,
+        .componentFlagsMask    = 0,
+    };
+
+    AudioComponent comp = AudioComponentFindNext(NULL, &desc);
+    if (comp == NULL) {
+        MP_ERR(ao, "unable to find audio component\n");
+        goto coreaudio_error;
+    }
+
+    err = AudioComponentInstanceNew(comp, &(p->audio_unit));
+    CHECK_CA_ERROR("unable to open audio component");
+
+    err = AudioUnitInitialize(p->audio_unit);
+    CHECK_CA_ERROR_L(coreaudio_error_component,
+                     "unable to initialize audio unit");
+
+    err = au_get_ary(p->audio_unit, kAudioUnitProperty_AudioChannelLayout, kAudioUnitScope_Output,
+                     0, (void**)&layout, &size);
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "unable to retrieve audio unit channel layout");
+
+    MP_VERBOSE(ao, "AU channel layout tag: %x (%x)\n", layout->mChannelLayoutTag, layout->mChannelBitmap);
+
+    layout = convert_layout(layout, &size);
+    if (!layout) {
+        MP_ERR(ao, "unable to convert channel layout to list format\n");
+        goto coreaudio_error_audiounit;
+    }
+
+    for (UInt32 i = 0; i < layout->mNumberChannelDescriptions; i++) {
+        MP_VERBOSE(ao, "channel map: %i: %u\n", i, layout->mChannelDescriptions[i].mChannelLabel);
+    }
+
+    if (af_fmt_is_spdif(ao->format) || instance.outputNumberOfChannels <= 2) {
+        ao->channels = (struct mp_chmap)MP_CHMAP_INIT_STEREO;
+        MP_VERBOSE(ao, "using stereo output\n");
+    } else {
+        ao->channels.num = (uint8_t)layout->mNumberChannelDescriptions;
+        for (UInt32 i = 0; i < layout->mNumberChannelDescriptions; i++) {
+          ao->channels.speaker[i] =
+              ca_label_to_mp_speaker_id(layout->mChannelDescriptions[i].mChannelLabel);
+        }
+        MP_VERBOSE(ao, "using standard channel mapping\n");
+    }
+
+    ca_fill_asbd(ao, &asbd);
+    size = sizeof(AudioStreamBasicDescription);
+    err = AudioUnitSetProperty(p->audio_unit,
+                               kAudioUnitProperty_StreamFormat,
+                               kAudioUnitScope_Input, 0, &asbd, size);
+
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "unable to set the input format on the audio unit");
+
+    AURenderCallbackStruct render_cb = (AURenderCallbackStruct) {
+        .inputProc       = render_cb_lpcm,
+        .inputProcRefCon = ao,
+    };
+
+    err = AudioUnitSetProperty(p->audio_unit,
+                               kAudioUnitProperty_SetRenderCallback,
+                               kAudioUnitScope_Input, 0, &render_cb,
+                               sizeof(AURenderCallbackStruct));
+
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "unable to set render callback on audio unit");
+
+    talloc_free(layout);
+
+    return true;
+
+coreaudio_error_audiounit:
+    AudioUnitUninitialize(p->audio_unit);
+coreaudio_error_component:
+    AudioComponentInstanceDispose(p->audio_unit);
+coreaudio_error:
+    talloc_free(layout);
+    return false;
+}
+
+static void stop(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    OSStatus err = AudioOutputUnitStop(p->audio_unit);
+    CHECK_CA_WARN("can't stop audio unit");
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    AVAudioSession *instance = AVAudioSession.sharedInstance;
+
+    p->device_latency = [instance outputLatency];
+
+    OSStatus err = AudioOutputUnitStart(p->audio_unit);
+    CHECK_CA_WARN("can't start audio unit");
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    AudioOutputUnitStop(p->audio_unit);
+    AudioUnitUninitialize(p->audio_unit);
+    AudioComponentInstanceDispose(p->audio_unit);
+
+    [AVAudioSession.sharedInstance
+        setActive:NO
+        withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
+        error:nil];
+}
+
+static int init(struct ao *ao)
+{
+    if (!init_audiounit(ao))
+        goto coreaudio_error;
+
+    return CONTROL_OK;
+
+coreaudio_error:
+    return CONTROL_ERROR;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_audiounit = {
+    .description    = "AudioUnit (iOS)",
+    .name           = "audiounit",
+    .uninit         = uninit,
+    .init           = init,
+    .reset          = stop,
+    .start          = start,
+    .priv_size      = sizeof(struct priv),
+};
diff --git a/audio/out/ao_coreaudio.c b/audio/out/ao_coreaudio.c
new file mode 100644
index 0000000..37f1313
--- /dev/null
+++ b/audio/out/ao_coreaudio.c
@@ -0,0 +1,435 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <CoreAudio/HostTime.h>
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "options/m_option.h"
+#include "common/msg.h"
+#include "ao_coreaudio_chmap.h"
+#include "ao_coreaudio_properties.h"
+#include "ao_coreaudio_utils.h"
+
+struct priv {
+    AudioDeviceID device;
+    AudioUnit audio_unit;
+
+    uint64_t hw_latency_ns;
+
+    AudioStreamBasicDescription original_asbd;
+    AudioStreamID original_asbd_stream;
+
+    bool change_physical_format;
+};
+
+static int64_t ca_get_hardware_latency(struct ao *ao) {
+    struct priv *p = ao->priv;
+
+    double audiounit_latency_sec = 0.0;
+    uint32_t size = sizeof(audiounit_latency_sec);
+    OSStatus err = AudioUnitGetProperty(
+            p->audio_unit,
+            kAudioUnitProperty_Latency,
+            kAudioUnitScope_Global,
+            0,
+            &audiounit_latency_sec,
+            &size);
+    CHECK_CA_ERROR("cannot get audio unit latency");
+
+    uint64_t audiounit_latency_ns = MP_TIME_S_TO_NS(audiounit_latency_sec);
+    uint64_t device_latency_ns    = ca_get_device_latency_ns(ao, p->device);
+
+    MP_VERBOSE(ao, "audiounit latency [ns]: %lld\n", audiounit_latency_ns);
+    MP_VERBOSE(ao, "device latency [ns]: %lld\n", device_latency_ns);
+
+    return audiounit_latency_ns + device_latency_ns;
+
+coreaudio_error:
+    return 0;
+}
+
+static OSStatus render_cb_lpcm(void *ctx, AudioUnitRenderActionFlags *aflags,
+                              const AudioTimeStamp *ts, UInt32 bus,
+                              UInt32 frames, AudioBufferList *buffer_list)
+{
+    struct ao *ao   = ctx;
+    struct priv *p  = ao->priv;
+    void *planes[MP_NUM_CHANNELS] = {0};
+
+    for (int n = 0; n < ao->num_planes; n++)
+        planes[n] = buffer_list->mBuffers[n].mData;
+
+    int64_t end = mp_time_ns();
+    end += p->hw_latency_ns + ca_get_latency(ts) + ca_frames_to_ns(ao, frames);
+    int samples = ao_read_data_nonblocking(ao, planes, frames, end);
+
+    if (samples == 0)
+        *aflags |= kAudioUnitRenderAction_OutputIsSilence;
+
+    for (int n = 0; n < buffer_list->mNumberBuffers; n++)
+        buffer_list->mBuffers[n].mDataByteSize = samples * ao->sstride;
+
+    return noErr;
+}
+
+static int get_volume(struct ao *ao, float *vol) {
+    struct priv *p = ao->priv;
+    float auvol;
+    OSStatus err =
+        AudioUnitGetParameter(p->audio_unit, kHALOutputParam_Volume,
+                              kAudioUnitScope_Global, 0, &auvol);
+
+    CHECK_CA_ERROR("could not get HAL output volume");
+    *vol = auvol * 100.0;
+    return CONTROL_TRUE;
+coreaudio_error:
+    return CONTROL_ERROR;
+}
+
+static int set_volume(struct ao *ao, float *vol) {
+    struct priv *p = ao->priv;
+    float auvol = *vol / 100.0;
+    OSStatus err =
+        AudioUnitSetParameter(p->audio_unit, kHALOutputParam_Volume,
+                              kAudioUnitScope_Global, 0, auvol, 0);
+    CHECK_CA_ERROR("could not set HAL output volume");
+    return CONTROL_TRUE;
+coreaudio_error:
+    return CONTROL_ERROR;
+}
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+        return get_volume(ao, arg);
+    case AOCONTROL_SET_VOLUME:
+        return set_volume(ao, arg);
+    }
+    return CONTROL_UNKNOWN;
+}
+
+static bool init_audiounit(struct ao *ao, AudioStreamBasicDescription asbd);
+static void init_physical_format(struct ao *ao);
+
+static bool reinit_device(struct ao *ao) {
+    struct priv *p = ao->priv;
+
+    OSStatus err = ca_select_device(ao, ao->device, &p->device);
+    CHECK_CA_ERROR("failed to select device");
+
+    return true;
+
+coreaudio_error:
+    return false;
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    if (!af_fmt_is_pcm(ao->format) || (ao->init_flags & AO_INIT_EXCLUSIVE)) {
+        MP_VERBOSE(ao, "redirecting to coreaudio_exclusive\n");
+        ao->redirect = "coreaudio_exclusive";
+        return CONTROL_ERROR;
+    }
+
+    if (!reinit_device(ao))
+        goto coreaudio_error;
+
+    if (p->change_physical_format)
+        init_physical_format(ao);
+
+    if (!ca_init_chmap(ao, p->device))
+        goto coreaudio_error;
+
+    AudioStreamBasicDescription asbd;
+    ca_fill_asbd(ao, &asbd);
+
+    if (!init_audiounit(ao, asbd))
+        goto coreaudio_error;
+
+    return CONTROL_OK;
+
+coreaudio_error:
+    return CONTROL_ERROR;
+}
+
+static void init_physical_format(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    OSErr err;
+
+    void *tmp = talloc_new(NULL);
+
+    AudioStreamBasicDescription asbd;
+    ca_fill_asbd(ao, &asbd);
+
+    AudioStreamID *streams;
+    size_t n_streams;
+
+    err = CA_GET_ARY_O(p->device, kAudioDevicePropertyStreams,
+                       &streams, &n_streams);
+    CHECK_CA_ERROR("could not get number of streams");
+
+    talloc_steal(tmp, streams);
+
+    MP_VERBOSE(ao, "Found %zd substream(s).\n", n_streams);
+
+    for (int i = 0; i < n_streams; i++) {
+        AudioStreamRangedDescription *formats;
+        size_t n_formats;
+
+        MP_VERBOSE(ao, "Looking at formats in substream %d...\n", i);
+
+        err = CA_GET_ARY(streams[i], kAudioStreamPropertyAvailablePhysicalFormats,
+                         &formats, &n_formats);
+
+        if (!CHECK_CA_WARN("could not get number of stream formats"))
+            continue; // try next one
+
+        talloc_steal(tmp, formats);
+
+        uint32_t direction;
+        err = CA_GET(streams[i], kAudioStreamPropertyDirection, &direction);
+        CHECK_CA_ERROR("could not get stream direction");
+        if (direction != 0) {
+            MP_VERBOSE(ao, "Not an output stream.\n");
+            continue;
+        }
+
+        AudioStreamBasicDescription best_asbd = {0};
+
+        for (int j = 0; j < n_formats; j++) {
+            AudioStreamBasicDescription *stream_asbd = &formats[j].mFormat;
+
+            ca_print_asbd(ao, "- ", stream_asbd);
+
+            if (!best_asbd.mFormatID || ca_asbd_is_better(&asbd, &best_asbd,
+                                                          stream_asbd))
+                best_asbd = *stream_asbd;
+        }
+
+        if (best_asbd.mFormatID) {
+            p->original_asbd_stream = streams[i];
+            err = CA_GET(p->original_asbd_stream,
+                         kAudioStreamPropertyPhysicalFormat,
+                         &p->original_asbd);
+            CHECK_CA_WARN("could not get current physical stream format");
+
+            if (ca_asbd_equals(&p->original_asbd, &best_asbd)) {
+                MP_VERBOSE(ao, "Requested format already set, not changing.\n");
+                p->original_asbd.mFormatID = 0;
+                break;
+            }
+
+            if (!ca_change_physical_format_sync(ao, streams[i], best_asbd))
+                p->original_asbd = (AudioStreamBasicDescription){0};
+            break;
+        }
+    }
+
+coreaudio_error:
+    talloc_free(tmp);
+    return;
+}
+
+static bool init_audiounit(struct ao *ao, AudioStreamBasicDescription asbd)
+{
+    OSStatus err;
+    uint32_t size;
+    struct priv *p = ao->priv;
+
+    AudioComponentDescription desc = (AudioComponentDescription) {
+        .componentType         = kAudioUnitType_Output,
+        .componentSubType      = (ao->device) ?
+                                    kAudioUnitSubType_HALOutput :
+                                    kAudioUnitSubType_DefaultOutput,
+        .componentManufacturer = kAudioUnitManufacturer_Apple,
+        .componentFlags        = 0,
+        .componentFlagsMask    = 0,
+    };
+
+    AudioComponent comp = AudioComponentFindNext(NULL, &desc);
+    if (comp == NULL) {
+        MP_ERR(ao, "unable to find audio component\n");
+        goto coreaudio_error;
+    }
+
+    err = AudioComponentInstanceNew(comp, &(p->audio_unit));
+    CHECK_CA_ERROR("unable to open audio component");
+
+    err = AudioUnitInitialize(p->audio_unit);
+    CHECK_CA_ERROR_L(coreaudio_error_component,
+                     "unable to initialize audio unit");
+
+    size = sizeof(AudioStreamBasicDescription);
+    err = AudioUnitSetProperty(p->audio_unit,
+                               kAudioUnitProperty_StreamFormat,
+                               kAudioUnitScope_Input, 0, &asbd, size);
+
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "unable to set the input format on the audio unit");
+
+    err = AudioUnitSetProperty(p->audio_unit,
+                               kAudioOutputUnitProperty_CurrentDevice,
+                               kAudioUnitScope_Global, 0, &p->device,
+                               sizeof(p->device));
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "can't link audio unit to selected device");
+
+    p->hw_latency_ns = ca_get_hardware_latency(ao);
+
+    AURenderCallbackStruct render_cb = (AURenderCallbackStruct) {
+        .inputProc       = render_cb_lpcm,
+        .inputProcRefCon = ao,
+    };
+
+    err = AudioUnitSetProperty(p->audio_unit,
+                               kAudioUnitProperty_SetRenderCallback,
+                               kAudioUnitScope_Input, 0, &render_cb,
+                               sizeof(AURenderCallbackStruct));
+
+    CHECK_CA_ERROR_L(coreaudio_error_audiounit,
+                     "unable to set render callback on audio unit");
+
+    return true;
+
+coreaudio_error_audiounit:
+    AudioUnitUninitialize(p->audio_unit);
+coreaudio_error_component:
+    AudioComponentInstanceDispose(p->audio_unit);
+coreaudio_error:
+    return false;
+}
+
+static void reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    OSStatus err = AudioUnitReset(p->audio_unit, kAudioUnitScope_Global, 0);
+    CHECK_CA_WARN("can't reset audio unit");
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    OSStatus err = AudioOutputUnitStart(p->audio_unit);
+    CHECK_CA_WARN("can't start audio unit");
+}
+
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    AudioOutputUnitStop(p->audio_unit);
+    AudioUnitUninitialize(p->audio_unit);
+    AudioComponentInstanceDispose(p->audio_unit);
+
+    if (p->original_asbd.mFormatID) {
+        OSStatus err = CA_SET(p->original_asbd_stream,
+                              kAudioStreamPropertyPhysicalFormat,
+                              &p->original_asbd);
+        CHECK_CA_WARN("could not restore physical stream format");
+    }
+}
+
+static OSStatus hotplug_cb(AudioObjectID id, UInt32 naddr,
+                           const AudioObjectPropertyAddress addr[],
+                           void *ctx)
+{
+    struct ao *ao = ctx;
+    MP_VERBOSE(ao, "Handling potential hotplug event...\n");
+    reinit_device(ao);
+    ao_hotplug_event(ao);
+    return noErr;
+}
+
+static uint32_t hotplug_properties[] = {
+    kAudioHardwarePropertyDevices,
+    kAudioHardwarePropertyDefaultOutputDevice
+};
+
+static int hotplug_init(struct ao *ao)
+{
+    if (!reinit_device(ao))
+        goto coreaudio_error;
+
+    OSStatus err = noErr;
+    for (int i = 0; i < MP_ARRAY_SIZE(hotplug_properties); i++) {
+        AudioObjectPropertyAddress addr = {
+            hotplug_properties[i],
+            kAudioObjectPropertyScopeGlobal,
+            kAudioObjectPropertyElementMaster
+        };
+        err = AudioObjectAddPropertyListener(
+            kAudioObjectSystemObject, &addr, hotplug_cb, (void *)ao);
+        if (err != noErr) {
+            char *c1 = mp_tag_str(hotplug_properties[i]);
+            char *c2 = mp_tag_str(err);
+            MP_ERR(ao, "failed to set device listener %s (%s)", c1, c2);
+            goto coreaudio_error;
+        }
+    }
+
+    return 0;
+
+coreaudio_error:
+    return -1;
+}
+
+static void hotplug_uninit(struct ao *ao)
+{
+    OSStatus err = noErr;
+    for (int i = 0; i < MP_ARRAY_SIZE(hotplug_properties); i++) {
+        AudioObjectPropertyAddress addr = {
+            hotplug_properties[i],
+            kAudioObjectPropertyScopeGlobal,
+            kAudioObjectPropertyElementMaster
+        };
+        err = AudioObjectRemovePropertyListener(
+            kAudioObjectSystemObject, &addr, hotplug_cb, (void *)ao);
+        if (err != noErr) {
+            char *c1 = mp_tag_str(hotplug_properties[i]);
+            char *c2 = mp_tag_str(err);
+            MP_ERR(ao, "failed to set device listener %s (%s)", c1, c2);
+        }
+    }
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_coreaudio = {
+    .description    = "CoreAudio AudioUnit",
+    .name           = "coreaudio",
+    .uninit         = uninit,
+    .init           = init,
+    .control        = control,
+    .reset          = reset,
+    .start          = start,
+    .hotplug_init   = hotplug_init,
+    .hotplug_uninit = hotplug_uninit,
+    .list_devs      = ca_get_device_list,
+    .priv_size      = sizeof(struct priv),
+    .options = (const struct m_option[]){
+        {"change-physical-format", OPT_BOOL(change_physical_format)},
+        {0}
+    },
+    .options_prefix = "coreaudio",
+};
diff --git a/audio/out/ao_coreaudio_chmap.c b/audio/out/ao_coreaudio_chmap.c
new file mode 100644
index 0000000..3fd9550
--- /dev/null
+++ b/audio/out/ao_coreaudio_chmap.c
@@ -0,0 +1,340 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <Availability.h>
+
+#include "common/common.h"
+
+#include "ao_coreaudio_utils.h"
+
+#include "ao_coreaudio_chmap.h"
+
+static const int speaker_map[][2] = {
+    { kAudioChannelLabel_Left,                 MP_SPEAKER_ID_FL   },
+    { kAudioChannelLabel_Right,                MP_SPEAKER_ID_FR   },
+    { kAudioChannelLabel_Center,               MP_SPEAKER_ID_FC   },
+    { kAudioChannelLabel_LFEScreen,            MP_SPEAKER_ID_LFE  },
+    { kAudioChannelLabel_LeftSurround,         MP_SPEAKER_ID_BL   },
+    { kAudioChannelLabel_RightSurround,        MP_SPEAKER_ID_BR   },
+    { kAudioChannelLabel_LeftCenter,           MP_SPEAKER_ID_FLC  },
+    { kAudioChannelLabel_RightCenter,          MP_SPEAKER_ID_FRC  },
+    { kAudioChannelLabel_CenterSurround,       MP_SPEAKER_ID_BC   },
+    { kAudioChannelLabel_LeftSurroundDirect,   MP_SPEAKER_ID_SL   },
+    { kAudioChannelLabel_RightSurroundDirect,  MP_SPEAKER_ID_SR   },
+    { kAudioChannelLabel_TopCenterSurround,    MP_SPEAKER_ID_TC   },
+    { kAudioChannelLabel_VerticalHeightLeft,   MP_SPEAKER_ID_TFL  },
+    { kAudioChannelLabel_VerticalHeightCenter, MP_SPEAKER_ID_TFC  },
+    { kAudioChannelLabel_VerticalHeightRight,  MP_SPEAKER_ID_TFR  },
+    { kAudioChannelLabel_TopBackLeft,          MP_SPEAKER_ID_TBL  },
+    { kAudioChannelLabel_TopBackCenter,        MP_SPEAKER_ID_TBC  },
+    { kAudioChannelLabel_TopBackRight,         MP_SPEAKER_ID_TBR  },
+
+    // unofficial extensions
+    { kAudioChannelLabel_RearSurroundLeft,     MP_SPEAKER_ID_SDL  },
+    { kAudioChannelLabel_RearSurroundRight,    MP_SPEAKER_ID_SDR  },
+    { kAudioChannelLabel_LeftWide,             MP_SPEAKER_ID_WL   },
+    { kAudioChannelLabel_RightWide,            MP_SPEAKER_ID_WR   },
+    { kAudioChannelLabel_LFE2,                 MP_SPEAKER_ID_LFE2 },
+#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 130000
+    { kAudioChannelLabel_LeftTopSurround,      MP_SPEAKER_ID_TSL  },
+    { kAudioChannelLabel_RightTopSurround,     MP_SPEAKER_ID_TSR  },
+    { kAudioChannelLabel_CenterBottom,         MP_SPEAKER_ID_BFC  },
+    { kAudioChannelLabel_LeftBottom,           MP_SPEAKER_ID_BFL  },
+    { kAudioChannelLabel_RightBottom,          MP_SPEAKER_ID_BFR  },
+#endif
+
+    { kAudioChannelLabel_HeadphonesLeft,       MP_SPEAKER_ID_DL   },
+    { kAudioChannelLabel_HeadphonesRight,      MP_SPEAKER_ID_DR   },
+
+    { kAudioChannelLabel_Unknown,              MP_SPEAKER_ID_NA   },
+
+    { 0,                                       -1                 },
+};
+
+int ca_label_to_mp_speaker_id(AudioChannelLabel label)
+{
+    for (int i = 0; speaker_map[i][1] >= 0; i++)
+        if (speaker_map[i][0] == label)
+            return speaker_map[i][1];
+    return -1;
+}
+
+#if HAVE_COREAUDIO
+static void ca_log_layout(struct ao *ao, int l, AudioChannelLayout *layout)
+{
+    if (!mp_msg_test(ao->log, l))
+        return;
+
+    AudioChannelDescription *descs = layout->mChannelDescriptions;
+
+    mp_msg(ao->log, l, "layout: tag: <%u>, bitmap: <%u>, "
+                       "descriptions <%u>\n",
+                       (unsigned) layout->mChannelLayoutTag,
+                       (unsigned) layout->mChannelBitmap,
+                       (unsigned) layout->mNumberChannelDescriptions);
+
+    for (int i = 0; i < layout->mNumberChannelDescriptions; i++) {
+        AudioChannelDescription d = descs[i];
+        mp_msg(ao->log, l, " - description %d: label <%u, %u>, "
+            " flags: <%u>, coords: <%f, %f, %f>\n", i,
+            (unsigned) d.mChannelLabel,
+            (unsigned) ca_label_to_mp_speaker_id(d.mChannelLabel),
+            (unsigned) d.mChannelFlags,
+            d.mCoordinates[0],
+            d.mCoordinates[1],
+            d.mCoordinates[2]);
+    }
+}
+
+static AudioChannelLayout *ca_layout_to_custom_layout(struct ao *ao,
+                                                      void *talloc_ctx,
+                                                      AudioChannelLayout *l)
+{
+    AudioChannelLayoutTag tag = l->mChannelLayoutTag;
+    AudioChannelLayout *r;
+    OSStatus err;
+
+    if (tag == kAudioChannelLayoutTag_UseChannelDescriptions)
+        return l;
+
+    if (tag == kAudioChannelLayoutTag_UseChannelBitmap) {
+        uint32_t psize;
+        err = AudioFormatGetPropertyInfo(
+            kAudioFormatProperty_ChannelLayoutForBitmap,
+            sizeof(uint32_t), &l->mChannelBitmap, &psize);
+        CHECK_CA_ERROR("failed to convert channel bitmap to descriptions (info)");
+        r = talloc_size(NULL, psize);
+        err = AudioFormatGetProperty(
+            kAudioFormatProperty_ChannelLayoutForBitmap,
+            sizeof(uint32_t), &l->mChannelBitmap, &psize, r);
+        CHECK_CA_ERROR("failed to convert channel bitmap to descriptions (get)");
+    } else {
+        uint32_t psize;
+        err = AudioFormatGetPropertyInfo(
+            kAudioFormatProperty_ChannelLayoutForTag,
+            sizeof(AudioChannelLayoutTag), &l->mChannelLayoutTag, &psize);
+        r = talloc_size(NULL, psize);
+        CHECK_CA_ERROR("failed to convert channel tag to descriptions (info)");
+        err = AudioFormatGetProperty(
+            kAudioFormatProperty_ChannelLayoutForTag,
+            sizeof(AudioChannelLayoutTag), &l->mChannelLayoutTag, &psize, r);
+        CHECK_CA_ERROR("failed to convert channel tag to descriptions (get)");
+    }
+
+    MP_VERBOSE(ao, "converted input channel layout:\n");
+    ca_log_layout(ao, MSGL_V, l);
+
+    return r;
+coreaudio_error:
+    return NULL;
+}
+
+
+#define CHMAP(n, ...) &(struct mp_chmap) MP_CONCAT(MP_CHMAP, n) (__VA_ARGS__)
+
+// Replace each channel in a with b (a->num == b->num)
+static void replace_submap(struct mp_chmap *dst, struct mp_chmap *a,
+                           struct mp_chmap *b)
+{
+    struct mp_chmap t = *dst;
+    if (!mp_chmap_is_valid(&t) || mp_chmap_diffn(a, &t) != 0)
+        return;
+    assert(a->num == b->num);
+    for (int n = 0; n < t.num; n++) {
+        for (int i = 0; i < a->num; i++) {
+            if (t.speaker[n] == a->speaker[i]) {
+                t.speaker[n] = b->speaker[i];
+                break;
+            }
+        }
+    }
+    if (mp_chmap_is_valid(&t))
+        *dst = t;
+}
+
+static bool ca_layout_to_mp_chmap(struct ao *ao, AudioChannelLayout *layout,
+                                  struct mp_chmap *chmap)
+{
+    void *talloc_ctx = talloc_new(NULL);
+
+    MP_VERBOSE(ao, "input channel layout:\n");
+    ca_log_layout(ao, MSGL_V, layout);
+
+    AudioChannelLayout *l = ca_layout_to_custom_layout(ao, talloc_ctx, layout);
+    if (!l)
+        goto coreaudio_error;
+
+    if (l->mNumberChannelDescriptions > MP_NUM_CHANNELS) {
+        MP_VERBOSE(ao, "layout has too many descriptions (%u, max: %d)\n",
+                   (unsigned) l->mNumberChannelDescriptions, MP_NUM_CHANNELS);
+        return false;
+    }
+
+    chmap->num = l->mNumberChannelDescriptions;
+    for (int n = 0; n < l->mNumberChannelDescriptions; n++) {
+        AudioChannelLabel label = l->mChannelDescriptions[n].mChannelLabel;
+        int speaker = ca_label_to_mp_speaker_id(label);
+        if (speaker < 0) {
+            MP_VERBOSE(ao, "channel label=%u unusable to build channel "
+                           "bitmap, skipping layout\n", (unsigned) label);
+            goto coreaudio_error;
+        }
+        chmap->speaker[n] = speaker;
+    }
+
+    // Remap weird 7.1(rear) layouts correctly.
+    replace_submap(chmap, CHMAP(6, FL, FR, BL, BR, SDL, SDR),
+                          CHMAP(6, FL, FR, SL, SR, BL,  BR));
+
+    talloc_free(talloc_ctx);
+    MP_VERBOSE(ao, "mp chmap: %s\n", mp_chmap_to_str(chmap));
+    return mp_chmap_is_valid(chmap) && !mp_chmap_is_unknown(chmap);
+coreaudio_error:
+    MP_VERBOSE(ao, "converted input channel layout (failed):\n");
+    ca_log_layout(ao, MSGL_V, layout);
+    talloc_free(talloc_ctx);
+    return false;
+}
+
+static AudioChannelLayout* ca_query_layout(struct ao *ao,
+                                           AudioDeviceID device,
+                                           void *talloc_ctx)
+{
+    OSStatus err;
+    uint32_t psize;
+    AudioChannelLayout *r = NULL;
+
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = kAudioDevicePropertyPreferredChannelLayout,
+        .mScope    = kAudioDevicePropertyScopeOutput,
+        .mElement  = kAudioObjectPropertyElementWildcard,
+    };
+
+    err = AudioObjectGetPropertyDataSize(device, &p_addr, 0, NULL, &psize);
+    CHECK_CA_ERROR("could not get device preferred layout (size)");
+
+    r = talloc_size(talloc_ctx, psize);
+
+    err = AudioObjectGetPropertyData(device, &p_addr, 0, NULL, &psize, r);
+    CHECK_CA_ERROR("could not get device preferred layout (get)");
+
+coreaudio_error:
+    return r;
+}
+
+static AudioChannelLayout* ca_query_stereo_layout(struct ao *ao,
+                                                  AudioDeviceID device,
+                                                  void *talloc_ctx)
+{
+    OSStatus err;
+    const int nch = 2;
+    uint32_t channels[nch];
+    AudioChannelLayout *r = NULL;
+
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = kAudioDevicePropertyPreferredChannelsForStereo,
+        .mScope    = kAudioDevicePropertyScopeOutput,
+        .mElement  = kAudioObjectPropertyElementWildcard,
+    };
+
+    uint32_t psize = sizeof(channels);
+    err = AudioObjectGetPropertyData(device, &p_addr, 0, NULL, &psize, channels);
+    CHECK_CA_ERROR("could not get device preferred stereo layout");
+
+    psize = sizeof(AudioChannelLayout) + nch * sizeof(AudioChannelDescription);
+    r = talloc_zero_size(talloc_ctx, psize);
+    r->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
+    r->mNumberChannelDescriptions = nch;
+
+    AudioChannelDescription desc = {0};
+    desc.mChannelFlags = kAudioChannelFlags_AllOff;
+
+    for(int i = 0; i < nch; i++) {
+        desc.mChannelLabel = channels[i];
+        r->mChannelDescriptions[i] = desc;
+    }
+
+coreaudio_error:
+    return r;
+}
+
+static void ca_retrieve_layouts(struct ao *ao, struct mp_chmap_sel *s,
+                                AudioDeviceID device)
+{
+    void *ta_ctx = talloc_new(NULL);
+    struct mp_chmap chmap;
+
+    AudioChannelLayout *ml = ca_query_layout(ao, device, ta_ctx);
+    if (ml && ca_layout_to_mp_chmap(ao, ml, &chmap))
+        mp_chmap_sel_add_map(s, &chmap);
+
+    AudioChannelLayout *sl = ca_query_stereo_layout(ao, device, ta_ctx);
+    if (sl && ca_layout_to_mp_chmap(ao, sl, &chmap))
+        mp_chmap_sel_add_map(s, &chmap);
+
+    talloc_free(ta_ctx);
+}
+
+bool ca_init_chmap(struct ao *ao, AudioDeviceID device)
+{
+    struct mp_chmap_sel chmap_sel = {0};
+    ca_retrieve_layouts(ao, &chmap_sel, device);
+
+    if (!chmap_sel.num_chmaps)
+        mp_chmap_sel_add_map(&chmap_sel, &(struct mp_chmap)MP_CHMAP_INIT_STEREO);
+
+    mp_chmap_sel_add_map(&chmap_sel, &(struct mp_chmap)MP_CHMAP_INIT_MONO);
+
+    if (!ao_chmap_sel_adjust(ao, &chmap_sel, &ao->channels)) {
+        MP_ERR(ao, "could not select a suitable channel map among the "
+                   "hardware supported ones. Make sure to configure your "
+                   "output device correctly in 'Audio MIDI Setup.app'\n");
+        return false;
+    }
+    return true;
+}
+
+void ca_get_active_chmap(struct ao *ao, AudioDeviceID device, int channel_count,
+                         struct mp_chmap *out_map)
+{
+    // Apparently, we have to guess by looking back at the supported layouts,
+    // and I haven't found a property that retrieves the actual currently
+    // active channel layout.
+
+    struct mp_chmap_sel chmap_sel = {0};
+    ca_retrieve_layouts(ao, &chmap_sel, device);
+
+    // Use any exact match.
+    for (int n = 0; n < chmap_sel.num_chmaps; n++) {
+        if (chmap_sel.chmaps[n].num == channel_count) {
+            MP_VERBOSE(ao, "mismatching channels - fallback #%d\n", n);
+            *out_map = chmap_sel.chmaps[n];
+            return;
+        }
+    }
+
+    // Fall back to stereo or mono, and fill the rest with silence. (We don't
+    // know what the device expects. We could use a larger default layout here,
+    // but let's not.)
+    mp_chmap_from_channels(out_map, MPMIN(2, channel_count));
+    out_map->num = channel_count;
+    for (int n = 2; n < out_map->num; n++)
+        out_map->speaker[n] = MP_SPEAKER_ID_NA;
+    MP_WARN(ao, "mismatching channels - falling back to %s\n",
+            mp_chmap_to_str(out_map));
+}
+#endif
diff --git a/audio/out/ao_coreaudio_chmap.h b/audio/out/ao_coreaudio_chmap.h
new file mode 100644
index 0000000..b6d160c
--- /dev/null
+++ b/audio/out/ao_coreaudio_chmap.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPV_COREAUDIO_CHMAP_H
+#define MPV_COREAUDIO_CHMAP_H
+
+#include <AudioToolbox/AudioToolbox.h>
+
+#include "config.h"
+
+struct mp_chmap;
+
+int ca_label_to_mp_speaker_id(AudioChannelLabel label);
+
+#if HAVE_COREAUDIO
+bool ca_init_chmap(struct ao *ao, AudioDeviceID device);
+void ca_get_active_chmap(struct ao *ao, AudioDeviceID device, int channel_count,
+                         struct mp_chmap *out_map);
+#endif
+
+#endif
diff --git a/audio/out/ao_coreaudio_exclusive.c b/audio/out/ao_coreaudio_exclusive.c
new file mode 100644
index 0000000..e24f791
--- /dev/null
+++ b/audio/out/ao_coreaudio_exclusive.c
@@ -0,0 +1,472 @@
+/*
+ * CoreAudio audio output driver for Mac OS X
+ *
+ * original copyright (C) Timothy J. Wood - Aug 2000
+ * ported to MPlayer libao2 by Dan Christiansen
+ *
+ * Chris Roccati
+ * Stefano Pigozzi
+ *
+ * The S/PDIF part of the code is based on the auhal audio output
+ * module from VideoLAN:
+ * Copyright (c) 2006 Derk-Jan Hartman <hartman at videolan dot org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * The MacOS X CoreAudio framework doesn't mesh as simply as some
+ * simpler frameworks do.  This is due to the fact that CoreAudio pulls
+ * audio samples rather than having them pushed at it (which is nice
+ * when you are wanting to do good buffering of audio).
+ */
+
+#include <stdatomic.h>
+
+#include <CoreAudio/HostTime.h>
+
+#include <libavutil/intreadwrite.h>
+#include <libavutil/intfloat.h>
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "options/m_option.h"
+#include "common/msg.h"
+#include "audio/out/ao_coreaudio_chmap.h"
+#include "audio/out/ao_coreaudio_properties.h"
+#include "audio/out/ao_coreaudio_utils.h"
+
+struct priv {
+    AudioDeviceID device;   // selected device
+
+    bool paused;
+
+    // audio render callback
+    AudioDeviceIOProcID render_cb;
+
+    // pid set for hog mode, (-1) means that hog mode on the device was
+    // released. hog mode is exclusive access to a device
+    pid_t hog_pid;
+
+    AudioStreamID stream;
+
+    // stream index in an AudioBufferList
+    int stream_idx;
+
+    // format we changed the stream to, and the original format to restore
+    AudioStreamBasicDescription stream_asbd;
+    AudioStreamBasicDescription original_asbd;
+
+    // Output s16 physical format, float32 virtual format, ac3/dts mpv format
+    bool spdif_hack;
+
+    bool changed_mixing;
+
+    atomic_bool reload_requested;
+
+    uint64_t hw_latency_ns;
+};
+
+static OSStatus property_listener_cb(
+    AudioObjectID object, uint32_t n_addresses,
+    const AudioObjectPropertyAddress addresses[],
+    void *data)
+{
+    struct ao *ao = data;
+    struct priv *p = ao->priv;
+
+    // Check whether we need to reset the compressed output stream.
+    AudioStreamBasicDescription f;
+    OSErr err = CA_GET(p->stream, kAudioStreamPropertyVirtualFormat, &f);
+    CHECK_CA_WARN("could not get stream format");
+    if (err != noErr || !ca_asbd_equals(&p->stream_asbd, &f)) {
+        if (atomic_compare_exchange_strong(&p->reload_requested,
+                                           &(bool){false}, true))
+        {
+            ao_request_reload(ao);
+            MP_INFO(ao, "Stream format changed! Reloading.\n");
+        }
+    }
+
+    return noErr;
+}
+
+static OSStatus enable_property_listener(struct ao *ao, bool enabled)
+{
+    struct priv *p = ao->priv;
+
+    uint32_t selectors[] = {kAudioDevicePropertyDeviceHasChanged,
+                            kAudioHardwarePropertyDevices};
+    AudioDeviceID devs[] = {p->device,
+                            kAudioObjectSystemObject};
+    assert(MP_ARRAY_SIZE(selectors) == MP_ARRAY_SIZE(devs));
+
+    OSStatus status = noErr;
+    for (int n = 0; n < MP_ARRAY_SIZE(devs); n++) {
+        AudioObjectPropertyAddress addr = {
+            .mScope    = kAudioObjectPropertyScopeGlobal,
+            .mElement  = kAudioObjectPropertyElementMaster,
+            .mSelector = selectors[n],
+        };
+        AudioDeviceID device = devs[n];
+
+        OSStatus status2;
+        if (enabled) {
+            status2 = AudioObjectAddPropertyListener(
+                device, &addr, property_listener_cb, ao);
+        } else {
+            status2 = AudioObjectRemovePropertyListener(
+                device, &addr, property_listener_cb, ao);
+        }
+        if (status == noErr)
+            status = status2;
+    }
+
+    return status;
+}
+
+// This is a hack for passing through AC3/DTS on drivers which don't support it.
+// The goal is to have the driver output the AC3 data bitexact, so basically we
+// feed it float data by converting the AC3 data to float in the reverse way we
+// assume the driver outputs it.
+// Input: data_as_int16[0..samples]
+// Output: data_as_float[0..samples]
+// The conversion is done in-place.
+static void bad_hack_mygodwhy(char *data, int samples)
+{
+    // In reverse, so we can do it in-place.
+    for (int n = samples - 1; n >= 0; n--) {
+        int16_t val = AV_RN16(data + n * 2);
+        float fval = val / (float)(1 << 15);
+        uint32_t ival = av_float2int(fval);
+        AV_WN32(data + n * 4, ival);
+    }
+}
+
+static OSStatus render_cb_compressed(
+        AudioDeviceID device, const AudioTimeStamp *ts,
+        const void *in_data, const AudioTimeStamp *in_ts,
+        AudioBufferList *out_data, const AudioTimeStamp *out_ts, void *ctx)
+{
+    struct ao *ao    = ctx;
+    struct priv *p   = ao->priv;
+    AudioBuffer buf  = out_data->mBuffers[p->stream_idx];
+    int requested    = buf.mDataByteSize;
+    int sstride      = p->spdif_hack ? 4 * ao->channels.num : ao->sstride;
+
+    int pseudo_frames = requested / sstride;
+
+    // we expect the callback to read full frames, which are aligned accordingly
+    if (pseudo_frames * sstride != requested) {
+        MP_ERR(ao, "Unsupported unaligned read of %d bytes.\n", requested);
+        return kAudioHardwareUnspecifiedError;
+    }
+
+    int64_t end = mp_time_ns();
+    end += p->hw_latency_ns + ca_get_latency(ts)
+        + ca_frames_to_ns(ao, pseudo_frames);
+
+    ao_read_data(ao, &buf.mData, pseudo_frames, end);
+
+    if (p->spdif_hack)
+        bad_hack_mygodwhy(buf.mData, pseudo_frames * ao->channels.num);
+
+    return noErr;
+}
+
+// Apparently, audio devices can have multiple sub-streams. It's not clear to
+// me what devices with multiple streams actually do. So only select the first
+// one that fulfills some minimum requirements.
+// If this is not sufficient, we could duplicate the device list entries for
+// each sub-stream, and make it explicit.
+static int select_stream(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    AudioStreamID *streams;
+    size_t n_streams;
+    OSStatus err;
+
+    /* Get a list of all the streams on this device. */
+    err = CA_GET_ARY_O(p->device, kAudioDevicePropertyStreams,
+                       &streams, &n_streams);
+    CHECK_CA_ERROR("could not get number of streams");
+    for (int i = 0; i < n_streams; i++) {
+        uint32_t direction;
+        err = CA_GET(streams[i], kAudioStreamPropertyDirection, &direction);
+        CHECK_CA_WARN("could not get stream direction");
+        if (err == noErr && direction != 0) {
+            MP_VERBOSE(ao, "Substream %d is not an output stream.\n", i);
+            continue;
+        }
+
+        if (af_fmt_is_pcm(ao->format) || p->spdif_hack ||
+            ca_stream_supports_compressed(ao, streams[i]))
+        {
+            MP_VERBOSE(ao, "Using substream %d/%zd.\n", i, n_streams);
+            p->stream = streams[i];
+            p->stream_idx = i;
+            break;
+        }
+    }
+
+    talloc_free(streams);
+
+    if (p->stream_idx < 0) {
+        MP_ERR(ao, "No useable substream found.\n");
+        goto coreaudio_error;
+    }
+
+    return 0;
+
+coreaudio_error:
+    return -1;
+}
+
+static int find_best_format(struct ao *ao, AudioStreamBasicDescription *out_fmt)
+{
+    struct priv *p = ao->priv;
+
+    // Build ASBD for the input format
+    AudioStreamBasicDescription asbd;
+    ca_fill_asbd(ao, &asbd);
+    ca_print_asbd(ao, "our format:", &asbd);
+
+    *out_fmt = (AudioStreamBasicDescription){0};
+
+    AudioStreamRangedDescription *formats;
+    size_t n_formats;
+    OSStatus err;
+
+    err = CA_GET_ARY(p->stream, kAudioStreamPropertyAvailablePhysicalFormats,
+                     &formats, &n_formats);
+    CHECK_CA_ERROR("could not get number of stream formats");
+
+    for (int j = 0; j < n_formats; j++) {
+        AudioStreamBasicDescription *stream_asbd = &formats[j].mFormat;
+
+        ca_print_asbd(ao, "- ", stream_asbd);
+
+        if (!out_fmt->mFormatID || ca_asbd_is_better(&asbd, out_fmt, stream_asbd))
+            *out_fmt = *stream_asbd;
+    }
+
+    talloc_free(formats);
+
+    if (!out_fmt->mFormatID) {
+        MP_ERR(ao, "no format found\n");
+        return -1;
+    }
+
+    return 0;
+coreaudio_error:
+    return -1;
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    int original_format = ao->format;
+
+    OSStatus err = ca_select_device(ao, ao->device, &p->device);
+    CHECK_CA_ERROR_L(coreaudio_error_nounlock, "failed to select device");
+
+    ao->format = af_fmt_from_planar(ao->format);
+
+    if (!af_fmt_is_pcm(ao->format) && !af_fmt_is_spdif(ao->format)) {
+        MP_ERR(ao, "Unsupported format.\n");
+        goto coreaudio_error_nounlock;
+    }
+
+    if (af_fmt_is_pcm(ao->format))
+        p->spdif_hack = false;
+
+    if (p->spdif_hack) {
+        if (af_fmt_to_bytes(ao->format) != 2) {
+            MP_ERR(ao, "HD formats not supported with spdif hack.\n");
+            goto coreaudio_error_nounlock;
+        }
+        // Let the pure evil begin!
+        ao->format = AF_FORMAT_S16;
+    }
+
+    uint32_t is_alive = 1;
+    err = CA_GET(p->device, kAudioDevicePropertyDeviceIsAlive, &is_alive);
+    CHECK_CA_WARN("could not check whether device is alive");
+
+    if (!is_alive)
+        MP_WARN(ao, "device is not alive\n");
+
+    err = ca_lock_device(p->device, &p->hog_pid);
+    CHECK_CA_WARN("failed to set hogmode");
+
+    err = ca_disable_mixing(ao, p->device, &p->changed_mixing);
+    CHECK_CA_WARN("failed to disable mixing");
+
+    if (select_stream(ao) < 0)
+        goto coreaudio_error;
+
+    AudioStreamBasicDescription hwfmt;
+    if (find_best_format(ao, &hwfmt) < 0)
+        goto coreaudio_error;
+
+    err = CA_GET(p->stream, kAudioStreamPropertyPhysicalFormat,
+                 &p->original_asbd);
+    CHECK_CA_ERROR("could not get stream's original physical format");
+
+    // Even if changing the physical format fails, we can try using the current
+    // virtual format.
+    ca_change_physical_format_sync(ao, p->stream, hwfmt);
+
+    if (!ca_init_chmap(ao, p->device))
+        goto coreaudio_error;
+
+    err = CA_GET(p->stream, kAudioStreamPropertyVirtualFormat, &p->stream_asbd);
+    CHECK_CA_ERROR("could not get stream's virtual format");
+
+    ca_print_asbd(ao, "virtual format", &p->stream_asbd);
+
+    if (p->stream_asbd.mChannelsPerFrame > MP_NUM_CHANNELS) {
+        MP_ERR(ao, "unsupported number of channels: %d > %d.\n",
+               p->stream_asbd.mChannelsPerFrame, MP_NUM_CHANNELS);
+        goto coreaudio_error;
+    }
+
+    int new_format = ca_asbd_to_mp_format(&p->stream_asbd);
+
+    // If both old and new formats are spdif, avoid changing it due to the
+    // imperfect mapping between mp and CA formats.
+    if (!(af_fmt_is_spdif(ao->format) && af_fmt_is_spdif(new_format)))
+        ao->format = new_format;
+
+    if (!ao->format || af_fmt_is_planar(ao->format)) {
+        MP_ERR(ao, "hardware format not supported\n");
+        goto coreaudio_error;
+    }
+
+    ao->samplerate = p->stream_asbd.mSampleRate;
+
+    if (ao->channels.num != p->stream_asbd.mChannelsPerFrame) {
+        ca_get_active_chmap(ao, p->device, p->stream_asbd.mChannelsPerFrame,
+                            &ao->channels);
+    }
+    if (!ao->channels.num) {
+        MP_ERR(ao, "number of channels changed, and unknown channel layout!\n");
+        goto coreaudio_error;
+    }
+
+    if (p->spdif_hack) {
+        AudioStreamBasicDescription physical_format = {0};
+        err = CA_GET(p->stream, kAudioStreamPropertyPhysicalFormat,
+                     &physical_format);
+        CHECK_CA_ERROR("could not get stream's physical format");
+        int ph_format = ca_asbd_to_mp_format(&physical_format);
+        if (ao->format != AF_FORMAT_FLOAT || ph_format != AF_FORMAT_S16) {
+            MP_ERR(ao, "Wrong parameters for spdif hack (%d / %d)\n",
+                   ao->format, ph_format);
+        }
+        ao->format = original_format; // pretend AC3 or DTS *evil laughter*
+        MP_WARN(ao, "Using spdif passthrough hack. This could produce noise.\n");
+    }
+
+    p->hw_latency_ns = ca_get_device_latency_ns(ao, p->device);
+    MP_VERBOSE(ao, "base latency: %lld nanoseconds\n", p->hw_latency_ns);
+
+    err = enable_property_listener(ao, true);
+    CHECK_CA_ERROR("cannot install format change listener during init");
+
+    err = AudioDeviceCreateIOProcID(p->device,
+                                    (AudioDeviceIOProc)render_cb_compressed,
+                                    (void *)ao,
+                                    &p->render_cb);
+    CHECK_CA_ERROR("failed to register audio render callback");
+
+    return CONTROL_TRUE;
+
+coreaudio_error:
+    err = enable_property_listener(ao, false);
+    CHECK_CA_WARN("can't remove format change listener");
+    err = ca_unlock_device(p->device, &p->hog_pid);
+    CHECK_CA_WARN("can't release hog mode");
+coreaudio_error_nounlock:
+    return CONTROL_ERROR;
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    OSStatus err = noErr;
+
+    err = enable_property_listener(ao, false);
+    CHECK_CA_WARN("can't remove device listener, this may cause a crash");
+
+    err = AudioDeviceStop(p->device, p->render_cb);
+    CHECK_CA_WARN("failed to stop audio device");
+
+    err = AudioDeviceDestroyIOProcID(p->device, p->render_cb);
+    CHECK_CA_WARN("failed to remove device render callback");
+
+    if (!ca_change_physical_format_sync(ao, p->stream, p->original_asbd))
+        MP_WARN(ao, "can't revert to original device format\n");
+
+    err = ca_enable_mixing(ao, p->device, p->changed_mixing);
+    CHECK_CA_WARN("can't re-enable mixing");
+
+    err = ca_unlock_device(p->device, &p->hog_pid);
+    CHECK_CA_WARN("can't release hog mode");
+}
+
+static void audio_pause(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    OSStatus err = AudioDeviceStop(p->device, p->render_cb);
+    CHECK_CA_WARN("can't stop audio device");
+}
+
+static void audio_resume(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    OSStatus err = AudioDeviceStart(p->device, p->render_cb);
+    CHECK_CA_WARN("can't start audio device");
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_coreaudio_exclusive = {
+    .description = "CoreAudio Exclusive Mode",
+    .name      = "coreaudio_exclusive",
+    .uninit    = uninit,
+    .init      = init,
+    .reset     = audio_pause,
+    .start     = audio_resume,
+    .list_devs = ca_get_device_list,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv){
+        .hog_pid = -1,
+        .stream = 0,
+        .stream_idx = -1,
+        .changed_mixing = false,
+    },
+    .options = (const struct m_option[]){
+        {"spdif-hack", OPT_BOOL(spdif_hack)},
+        {0}
+    },
+    .options_prefix = "coreaudio",
+};
diff --git a/audio/out/ao_coreaudio_properties.c b/audio/out/ao_coreaudio_properties.c
new file mode 100644
index 0000000..e25170a
--- /dev/null
+++ b/audio/out/ao_coreaudio_properties.c
@@ -0,0 +1,103 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Abstractions on the CoreAudio API to make property setting/getting suck less
+*/
+
+#include "audio/out/ao_coreaudio_properties.h"
+#include "audio/out/ao_coreaudio_utils.h"
+#include "mpv_talloc.h"
+
+OSStatus ca_get(AudioObjectID id, ca_scope scope, ca_sel selector,
+                uint32_t size, void *data)
+{
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = selector,
+        .mScope    = scope,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    return AudioObjectGetPropertyData(id, &p_addr, 0, NULL, &size, data);
+}
+
+OSStatus ca_set(AudioObjectID id, ca_scope scope, ca_sel selector,
+                uint32_t size, void *data)
+{
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = selector,
+        .mScope    = scope,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    return AudioObjectSetPropertyData(id, &p_addr, 0, NULL, size, data);
+}
+
+OSStatus ca_get_ary(AudioObjectID id, ca_scope scope, ca_sel selector,
+                    uint32_t element_size, void **data, size_t *elements)
+{
+    OSStatus err;
+    uint32_t p_size;
+
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = selector,
+        .mScope    = scope,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    err = AudioObjectGetPropertyDataSize(id, &p_addr, 0, NULL, &p_size);
+    CHECK_CA_ERROR_SILENT_L(coreaudio_error);
+
+    *data = talloc_zero_size(NULL, p_size);
+    *elements = p_size / element_size;
+
+    err = ca_get(id, scope, selector, p_size, *data);
+    CHECK_CA_ERROR_SILENT_L(coreaudio_error_free);
+
+    return err;
+coreaudio_error_free:
+    talloc_free(*data);
+coreaudio_error:
+    return err;
+}
+
+OSStatus ca_get_str(AudioObjectID id, ca_scope scope, ca_sel selector,
+                    char **data)
+{
+    CFStringRef string;
+    OSStatus err =
+        ca_get(id, scope, selector, sizeof(CFStringRef), (void **)&string);
+    CHECK_CA_ERROR_SILENT_L(coreaudio_error);
+
+    *data = cfstr_get_cstr(string);
+    CFRelease(string);
+coreaudio_error:
+    return err;
+}
+
+Boolean ca_settable(AudioObjectID id, ca_scope scope, ca_sel selector,
+                    Boolean *data)
+{
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = selector,
+        .mScope    = kAudioObjectPropertyScopeGlobal,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    return AudioObjectIsPropertySettable(id, &p_addr, data);
+}
+
diff --git a/audio/out/ao_coreaudio_properties.h b/audio/out/ao_coreaudio_properties.h
new file mode 100644
index 0000000..f293968
--- /dev/null
+++ b/audio/out/ao_coreaudio_properties.h
@@ -0,0 +1,61 @@
+/*
+ * This file is part of mpv.
+ * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPV_COREAUDIO_PROPERTIES_H
+#define MPV_COREAUDIO_PROPERTIES_H
+
+#include <AudioToolbox/AudioToolbox.h>
+
+#include "internal.h"
+
+// CoreAudio names are way too verbose
+#define ca_sel    AudioObjectPropertySelector
+#define ca_scope  AudioObjectPropertyScope
+#define CA_GLOBAL kAudioObjectPropertyScopeGlobal
+#define CA_OUTPUT kAudioDevicePropertyScopeOutput
+
+OSStatus ca_get(AudioObjectID id, ca_scope scope, ca_sel selector,
+                uint32_t size, void *data);
+
+OSStatus ca_set(AudioObjectID id, ca_scope scope, ca_sel selector,
+                uint32_t size, void *data);
+
+#define CA_GET(id, sel, data) ca_get(id, CA_GLOBAL, sel, sizeof(*(data)), data)
+#define CA_SET(id, sel, data) ca_set(id, CA_GLOBAL, sel, sizeof(*(data)), data)
+#define CA_GET_O(id, sel, data) ca_get(id, CA_OUTPUT, sel, sizeof(*(data)), data)
+
+OSStatus ca_get_ary(AudioObjectID id, ca_scope scope, ca_sel selector,
+                    uint32_t element_size, void **data, size_t *elements);
+
+#define CA_GET_ARY(id, sel, data, elements) \
+    ca_get_ary(id, CA_GLOBAL, sel, sizeof(**(data)), (void **)data, elements)
+
+#define CA_GET_ARY_O(id, sel, data, elements) \
+    ca_get_ary(id, CA_OUTPUT, sel, sizeof(**(data)), (void **)data, elements)
+
+OSStatus ca_get_str(AudioObjectID id, ca_scope scope,ca_sel selector,
+                    char **data);
+
+#define CA_GET_STR(id, sel, data) ca_get_str(id, CA_GLOBAL, sel, data)
+
+Boolean ca_settable(AudioObjectID id, ca_scope scope, ca_sel selector,
+                    Boolean *data);
+
+#define CA_SETTABLE(id, sel, data) ca_settable(id, CA_GLOBAL, sel, data)
+
+#endif /* MPV_COREAUDIO_PROPERTIES_H */
diff --git a/audio/out/ao_coreaudio_utils.c b/audio/out/ao_coreaudio_utils.c
new file mode 100644
index 0000000..14db8e3
--- /dev/null
+++ b/audio/out/ao_coreaudio_utils.c
@@ -0,0 +1,539 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This file contains functions interacting with the CoreAudio framework
+ * that are not specific to the AUHAL. These are split in a separate file for
+ * the sake of readability. In the future the could be used by other AOs based
+ * on CoreAudio but not the AUHAL (such as using AudioQueue services).
+ */
+
+#include "audio/out/ao_coreaudio_utils.h"
+#include "osdep/timer.h"
+#include "osdep/endian.h"
+#include "osdep/semaphore.h"
+#include "audio/format.h"
+
+#if HAVE_COREAUDIO
+#include "audio/out/ao_coreaudio_properties.h"
+#include <CoreAudio/HostTime.h>
+#else
+#include <mach/mach_time.h>
+#endif
+
+#if HAVE_COREAUDIO
+static bool ca_is_output_device(struct ao *ao, AudioDeviceID dev)
+{
+    size_t n_buffers;
+    AudioBufferList *buffers;
+    const ca_scope scope = kAudioDevicePropertyStreamConfiguration;
+    OSStatus err = CA_GET_ARY_O(dev, scope, &buffers, &n_buffers);
+    if (err != noErr)
+        return false;
+    talloc_free(buffers);
+    return n_buffers > 0;
+}
+
+void ca_get_device_list(struct ao *ao, struct ao_device_list *list)
+{
+    AudioDeviceID *devs;
+    size_t n_devs;
+    OSStatus err =
+        CA_GET_ARY(kAudioObjectSystemObject, kAudioHardwarePropertyDevices,
+                   &devs, &n_devs);
+    CHECK_CA_ERROR("Failed to get list of output devices.");
+    for (int i = 0; i < n_devs; i++) {
+        if (!ca_is_output_device(ao, devs[i]))
+            continue;
+        void *ta_ctx = talloc_new(NULL);
+        char *name;
+        char *desc;
+        err = CA_GET_STR(devs[i], kAudioDevicePropertyDeviceUID, &name);
+        if (err != noErr) {
+            MP_VERBOSE(ao, "skipping device %d, which has no UID\n", i);
+            talloc_free(ta_ctx);
+            continue;
+        }
+        talloc_steal(ta_ctx, name);
+        err = CA_GET_STR(devs[i], kAudioObjectPropertyName, &desc);
+        if (err != noErr)
+            desc = talloc_strdup(NULL, "Unknown");
+        talloc_steal(ta_ctx, desc);
+        ao_device_list_add(list, ao, &(struct ao_device_desc){name, desc});
+        talloc_free(ta_ctx);
+    }
+    talloc_free(devs);
+coreaudio_error:
+    return;
+}
+
+OSStatus ca_select_device(struct ao *ao, char* name, AudioDeviceID *device)
+{
+    OSStatus err = noErr;
+    *device = kAudioObjectUnknown;
+
+    if (name && name[0]) {
+        CFStringRef uid = cfstr_from_cstr(name);
+        AudioValueTranslation v = (AudioValueTranslation) {
+            .mInputData = &uid,
+            .mInputDataSize = sizeof(CFStringRef),
+            .mOutputData = device,
+            .mOutputDataSize = sizeof(*device),
+        };
+        uint32_t size = sizeof(AudioValueTranslation);
+        AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+            .mSelector = kAudioHardwarePropertyDeviceForUID,
+            .mScope    = kAudioObjectPropertyScopeGlobal,
+            .mElement  = kAudioObjectPropertyElementMaster,
+        };
+        err = AudioObjectGetPropertyData(
+            kAudioObjectSystemObject, &p_addr, 0, 0, &size, &v);
+        CFRelease(uid);
+        CHECK_CA_ERROR("unable to query for device UID");
+
+        uint32_t is_alive = 1;
+        err = CA_GET(*device, kAudioDevicePropertyDeviceIsAlive, &is_alive);
+        CHECK_CA_ERROR("could not check whether device is alive (invalid device?)");
+
+        if (!is_alive)
+            MP_WARN(ao, "device is not alive!\n");
+    } else {
+        // device not set by user, get the default one
+        err = CA_GET(kAudioObjectSystemObject,
+                     kAudioHardwarePropertyDefaultOutputDevice,
+                     device);
+        CHECK_CA_ERROR("could not get default audio device");
+    }
+
+    if (mp_msg_test(ao->log, MSGL_V)) {
+        char *desc;
+        OSStatus err2 = CA_GET_STR(*device, kAudioObjectPropertyName, &desc);
+        if (err2 == noErr) {
+            MP_VERBOSE(ao, "selected audio output device: %s (%" PRIu32 ")\n",
+                           desc, *device);
+            talloc_free(desc);
+        }
+    }
+
+coreaudio_error:
+    return err;
+}
+#endif
+
+bool check_ca_st(struct ao *ao, int level, OSStatus code, const char *message)
+{
+    if (code == noErr) return true;
+
+    mp_msg(ao->log, level, "%s (%s/%d)\n", message, mp_tag_str(code), (int)code);
+
+    return false;
+}
+
+static void ca_fill_asbd_raw(AudioStreamBasicDescription *asbd, int mp_format,
+                             int samplerate, int num_channels)
+{
+    asbd->mSampleRate       = samplerate;
+    // Set "AC3" for other spdif formats too - unknown if that works.
+    asbd->mFormatID         = af_fmt_is_spdif(mp_format) ?
+                              kAudioFormat60958AC3 :
+                              kAudioFormatLinearPCM;
+    asbd->mChannelsPerFrame = num_channels;
+    asbd->mBitsPerChannel   = af_fmt_to_bytes(mp_format) * 8;
+    asbd->mFormatFlags      = kAudioFormatFlagIsPacked;
+
+    int channels_per_buffer = num_channels;
+    if (af_fmt_is_planar(mp_format)) {
+        asbd->mFormatFlags |= kAudioFormatFlagIsNonInterleaved;
+        channels_per_buffer = 1;
+    }
+
+    if (af_fmt_is_float(mp_format)) {
+        asbd->mFormatFlags |= kAudioFormatFlagIsFloat;
+    } else if (!af_fmt_is_unsigned(mp_format)) {
+        asbd->mFormatFlags |= kAudioFormatFlagIsSignedInteger;
+    }
+
+    if (BYTE_ORDER == BIG_ENDIAN)
+        asbd->mFormatFlags |= kAudioFormatFlagIsBigEndian;
+
+    asbd->mFramesPerPacket = 1;
+    asbd->mBytesPerPacket = asbd->mBytesPerFrame =
+        asbd->mFramesPerPacket * channels_per_buffer *
+        (asbd->mBitsPerChannel / 8);
+}
+
+void ca_fill_asbd(struct ao *ao, AudioStreamBasicDescription *asbd)
+{
+    ca_fill_asbd_raw(asbd, ao->format, ao->samplerate, ao->channels.num);
+}
+
+bool ca_formatid_is_compressed(uint32_t formatid)
+{
+    switch (formatid)
+    case 'IAC3':
+    case 'iac3':
+    case  kAudioFormat60958AC3:
+    case  kAudioFormatAC3:
+        return true;
+    return false;
+}
+
+// This might be wrong, but for now it's sufficient for us.
+static uint32_t ca_normalize_formatid(uint32_t formatID)
+{
+    return ca_formatid_is_compressed(formatID) ? kAudioFormat60958AC3 : formatID;
+}
+
+bool ca_asbd_equals(const AudioStreamBasicDescription *a,
+                    const AudioStreamBasicDescription *b)
+{
+    int flags = kAudioFormatFlagIsPacked | kAudioFormatFlagIsFloat |
+                kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsBigEndian;
+    bool spdif = ca_formatid_is_compressed(a->mFormatID) &&
+                 ca_formatid_is_compressed(b->mFormatID);
+
+    return (a->mFormatFlags & flags) == (b->mFormatFlags & flags) &&
+           a->mBitsPerChannel == b->mBitsPerChannel &&
+           ca_normalize_formatid(a->mFormatID) ==
+                ca_normalize_formatid(b->mFormatID) &&
+           (spdif || a->mBytesPerPacket == b->mBytesPerPacket) &&
+           (spdif || a->mChannelsPerFrame == b->mChannelsPerFrame) &&
+           a->mSampleRate == b->mSampleRate;
+}
+
+// Return the AF_FORMAT_* (AF_FORMAT_S16 etc.) corresponding to the asbd.
+int ca_asbd_to_mp_format(const AudioStreamBasicDescription *asbd)
+{
+    for (int fmt = 1; fmt < AF_FORMAT_COUNT; fmt++) {
+        AudioStreamBasicDescription mp_asbd = {0};
+        ca_fill_asbd_raw(&mp_asbd, fmt, asbd->mSampleRate, asbd->mChannelsPerFrame);
+        if (ca_asbd_equals(&mp_asbd, asbd))
+            return af_fmt_is_spdif(fmt) ? AF_FORMAT_S_AC3 : fmt;
+    }
+    return 0;
+}
+
+void ca_print_asbd(struct ao *ao, const char *description,
+                   const AudioStreamBasicDescription *asbd)
+{
+    uint32_t flags  = asbd->mFormatFlags;
+    char *format    = mp_tag_str(asbd->mFormatID);
+    int mpfmt       = ca_asbd_to_mp_format(asbd);
+
+    MP_VERBOSE(ao,
+       "%s %7.1fHz %" PRIu32 "bit %s "
+       "[%" PRIu32 "][%" PRIu32 "bpp][%" PRIu32 "fbp]"
+       "[%" PRIu32 "bpf][%" PRIu32 "ch] "
+       "%s %s %s%s%s%s (%s)\n",
+       description, asbd->mSampleRate, asbd->mBitsPerChannel, format,
+       asbd->mFormatFlags, asbd->mBytesPerPacket, asbd->mFramesPerPacket,
+       asbd->mBytesPerFrame, asbd->mChannelsPerFrame,
+       (flags & kAudioFormatFlagIsFloat) ? "float" : "int",
+       (flags & kAudioFormatFlagIsBigEndian) ? "BE" : "LE",
+       (flags & kAudioFormatFlagIsSignedInteger) ? "S" : "U",
+       (flags & kAudioFormatFlagIsPacked) ? " packed" : "",
+       (flags & kAudioFormatFlagIsAlignedHigh) ? " aligned" : "",
+       (flags & kAudioFormatFlagIsNonInterleaved) ? " P" : "",
+       mpfmt ? af_fmt_to_str(mpfmt) : "-");
+}
+
+// Return whether new is an improvement over old. Assume a higher value means
+// better quality, and we always prefer the value closest to the requested one,
+// which is still larger than the requested one.
+// Equal values prefer the new one (so ca_asbd_is_better() checks other params).
+static bool value_is_better(double req, double old, double new)
+{
+    if (new >= req) {
+        return old < req || new <= old;
+    } else {
+        return old < req && new >= old;
+    }
+}
+
+// Return whether new is an improvement over old (req is the requested format).
+bool ca_asbd_is_better(AudioStreamBasicDescription *req,
+                       AudioStreamBasicDescription *old,
+                       AudioStreamBasicDescription *new)
+{
+    if (new->mChannelsPerFrame > MP_NUM_CHANNELS)
+        return false;
+    if (old->mChannelsPerFrame > MP_NUM_CHANNELS)
+        return true;
+    if (req->mFormatID != new->mFormatID)
+        return false;
+    if (req->mFormatID != old->mFormatID)
+        return true;
+
+    if (!value_is_better(req->mBitsPerChannel, old->mBitsPerChannel,
+                         new->mBitsPerChannel))
+        return false;
+
+    if (!value_is_better(req->mSampleRate, old->mSampleRate, new->mSampleRate))
+        return false;
+
+    if (!value_is_better(req->mChannelsPerFrame, old->mChannelsPerFrame,
+                         new->mChannelsPerFrame))
+        return false;
+
+    return true;
+}
+
+int64_t ca_frames_to_ns(struct ao *ao, uint32_t frames)
+{
+    return MP_TIME_S_TO_NS(frames / (double)ao->samplerate);
+}
+
+int64_t ca_get_latency(const AudioTimeStamp *ts)
+{
+#if HAVE_COREAUDIO
+    uint64_t out = AudioConvertHostTimeToNanos(ts->mHostTime);
+    uint64_t now = AudioConvertHostTimeToNanos(AudioGetCurrentHostTime());
+
+    if (now > out)
+        return 0;
+
+    return out - now;
+#else
+    static mach_timebase_info_data_t timebase;
+    if (timebase.denom == 0)
+        mach_timebase_info(&timebase);
+
+    uint64_t out = ts->mHostTime;
+    uint64_t now = mach_absolute_time();
+
+    if (now > out)
+        return 0;
+
+    return (out - now) * timebase.numer / timebase.denom;
+#endif
+}
+
+#if HAVE_COREAUDIO
+bool ca_stream_supports_compressed(struct ao *ao, AudioStreamID stream)
+{
+    AudioStreamRangedDescription *formats = NULL;
+    size_t n_formats;
+
+    OSStatus err =
+        CA_GET_ARY(stream, kAudioStreamPropertyAvailablePhysicalFormats,
+                   &formats, &n_formats);
+
+    CHECK_CA_ERROR("Could not get number of stream formats.");
+
+    for (int i = 0; i < n_formats; i++) {
+        AudioStreamBasicDescription asbd = formats[i].mFormat;
+
+        ca_print_asbd(ao, "- ", &asbd);
+
+        if (ca_formatid_is_compressed(asbd.mFormatID)) {
+            talloc_free(formats);
+            return true;
+        }
+    }
+
+    talloc_free(formats);
+coreaudio_error:
+    return false;
+}
+
+OSStatus ca_lock_device(AudioDeviceID device, pid_t *pid)
+{
+    *pid = getpid();
+    OSStatus err = CA_SET(device, kAudioDevicePropertyHogMode, pid);
+    if (err != noErr)
+        *pid = -1;
+
+    return err;
+}
+
+OSStatus ca_unlock_device(AudioDeviceID device, pid_t *pid)
+{
+    if (*pid == getpid()) {
+        *pid = -1;
+        return CA_SET(device, kAudioDevicePropertyHogMode, &pid);
+    }
+    return noErr;
+}
+
+static OSStatus ca_change_mixing(struct ao *ao, AudioDeviceID device,
+                                 uint32_t val, bool *changed)
+{
+    *changed = false;
+
+    AudioObjectPropertyAddress p_addr = (AudioObjectPropertyAddress) {
+        .mSelector = kAudioDevicePropertySupportsMixing,
+        .mScope    = kAudioObjectPropertyScopeGlobal,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    if (AudioObjectHasProperty(device, &p_addr)) {
+        OSStatus err;
+        Boolean writeable = 0;
+        err = CA_SETTABLE(device, kAudioDevicePropertySupportsMixing,
+                          &writeable);
+
+        if (!CHECK_CA_WARN("can't tell if mixing property is settable")) {
+            return err;
+        }
+
+        if (!writeable)
+            return noErr;
+
+        err = CA_SET(device, kAudioDevicePropertySupportsMixing, &val);
+        if (err != noErr)
+            return err;
+
+        if (!CHECK_CA_WARN("can't set mix mode")) {
+            return err;
+        }
+
+        *changed = true;
+    }
+
+    return noErr;
+}
+
+OSStatus ca_disable_mixing(struct ao *ao, AudioDeviceID device, bool *changed)
+{
+    return ca_change_mixing(ao, device, 0, changed);
+}
+
+OSStatus ca_enable_mixing(struct ao *ao, AudioDeviceID device, bool changed)
+{
+    if (changed) {
+        bool dont_care = false;
+        return ca_change_mixing(ao, device, 1, &dont_care);
+    }
+
+    return noErr;
+}
+
+int64_t ca_get_device_latency_ns(struct ao *ao, AudioDeviceID device)
+{
+    uint32_t latency_frames = 0;
+    uint32_t latency_properties[] = {
+        kAudioDevicePropertyLatency,
+        kAudioDevicePropertyBufferFrameSize,
+        kAudioDevicePropertySafetyOffset,
+    };
+    for (int n = 0; n < MP_ARRAY_SIZE(latency_properties); n++) {
+        uint32_t temp;
+        OSStatus err = CA_GET_O(device, latency_properties[n], &temp);
+        CHECK_CA_WARN("cannot get device latency");
+        if (err == noErr) {
+            latency_frames += temp;
+            MP_VERBOSE(ao, "Latency property %s: %d frames\n",
+                       mp_tag_str(latency_properties[n]), (int)temp);
+        }
+    }
+
+    double sample_rate = ao->samplerate;
+    OSStatus err = CA_GET_O(device, kAudioDevicePropertyNominalSampleRate,
+                            &sample_rate);
+    CHECK_CA_WARN("cannot get device sample rate, falling back to AO sample rate!");
+    if (err == noErr) {
+        MP_VERBOSE(ao, "Device sample rate: %f\n", sample_rate);
+    }
+
+    return MP_TIME_S_TO_NS(latency_frames / sample_rate);
+}
+
+static OSStatus ca_change_format_listener(
+    AudioObjectID object, uint32_t n_addresses,
+    const AudioObjectPropertyAddress addresses[],
+    void *data)
+{
+    mp_sem_t *sem = data;
+    mp_sem_post(sem);
+    return noErr;
+}
+
+bool ca_change_physical_format_sync(struct ao *ao, AudioStreamID stream,
+                                    AudioStreamBasicDescription change_format)
+{
+    OSStatus err = noErr;
+    bool format_set = false;
+
+    ca_print_asbd(ao, "setting stream physical format:", &change_format);
+
+    sem_t wakeup;
+    if (mp_sem_init(&wakeup, 0, 0)) {
+        MP_WARN(ao, "OOM\n");
+        return false;
+    }
+
+    AudioStreamBasicDescription prev_format;
+    err = CA_GET(stream, kAudioStreamPropertyPhysicalFormat, &prev_format);
+    CHECK_CA_ERROR("can't get current physical format");
+
+    ca_print_asbd(ao, "format in use before switching:", &prev_format);
+
+    /* Install the callback. */
+    AudioObjectPropertyAddress p_addr = {
+        .mSelector = kAudioStreamPropertyPhysicalFormat,
+        .mScope    = kAudioObjectPropertyScopeGlobal,
+        .mElement  = kAudioObjectPropertyElementMaster,
+    };
+
+    err = AudioObjectAddPropertyListener(stream, &p_addr,
+                                         ca_change_format_listener,
+                                         &wakeup);
+    CHECK_CA_ERROR("can't add property listener during format change");
+
+    /* Change the format. */
+    err = CA_SET(stream, kAudioStreamPropertyPhysicalFormat, &change_format);
+    CHECK_CA_WARN("error changing physical format");
+
+    /* The AudioStreamSetProperty is not only asynchronous,
+     * it is also not Atomic, in its behaviour. */
+    int64_t wait_until = mp_time_ns() + MP_TIME_S_TO_NS(2);
+    AudioStreamBasicDescription actual_format = {0};
+    while (1) {
+        err = CA_GET(stream, kAudioStreamPropertyPhysicalFormat, &actual_format);
+        if (!CHECK_CA_WARN("could not retrieve physical format"))
+            break;
+
+        format_set = ca_asbd_equals(&change_format, &actual_format);
+        if (format_set)
+            break;
+
+        if (mp_sem_timedwait(&wakeup, wait_until)) {
+            MP_VERBOSE(ao, "reached timeout\n");
+            break;
+        }
+    }
+
+    ca_print_asbd(ao, "actual format in use:", &actual_format);
+
+    if (!format_set) {
+        MP_WARN(ao, "changing physical format failed\n");
+        // Some drivers just fuck up and get into a broken state. Restore the
+        // old format in this case.
+        err = CA_SET(stream, kAudioStreamPropertyPhysicalFormat, &prev_format);
+        CHECK_CA_WARN("error restoring physical format");
+    }
+
+    err = AudioObjectRemovePropertyListener(stream, &p_addr,
+                                            ca_change_format_listener,
+                                            &wakeup);
+    CHECK_CA_ERROR("can't remove property listener");
+
+coreaudio_error:
+    mp_sem_destroy(&wakeup);
+    return format_set;
+}
+#endif
diff --git a/audio/out/ao_coreaudio_utils.h b/audio/out/ao_coreaudio_utils.h
new file mode 100644
index 0000000..0e2b8b1
--- /dev/null
+++ b/audio/out/ao_coreaudio_utils.h
@@ -0,0 +1,79 @@
+/*
+ * This file is part of mpv.
+ * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPV_COREAUDIO_UTILS_H
+#define MPV_COREAUDIO_UTILS_H
+
+#include <AudioToolbox/AudioToolbox.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "config.h"
+#include "common/msg.h"
+#include "audio/out/ao.h"
+#include "internal.h"
+#include "osdep/apple_utils.h"
+
+bool check_ca_st(struct ao *ao, int level, OSStatus code, const char *message);
+
+#define CHECK_CA_ERROR_L(label, message) \
+    do { \
+        if (!check_ca_st(ao, MSGL_ERR, err, message)) { \
+            goto label; \
+        } \
+    } while (0)
+
+#define CHECK_CA_ERROR(message) CHECK_CA_ERROR_L(coreaudio_error, message)
+#define CHECK_CA_WARN(message)  check_ca_st(ao, MSGL_WARN, err, message)
+
+#define CHECK_CA_ERROR_SILENT_L(label) \
+    do { \
+        if (err != noErr) goto label; \
+    } while (0)
+
+void ca_get_device_list(struct ao *ao, struct ao_device_list *list);
+#if HAVE_COREAUDIO
+OSStatus ca_select_device(struct ao *ao, char* name, AudioDeviceID *device);
+#endif
+
+bool ca_formatid_is_compressed(uint32_t formatid);
+void ca_fill_asbd(struct ao *ao, AudioStreamBasicDescription *asbd);
+void ca_print_asbd(struct ao *ao, const char *description,
+                   const AudioStreamBasicDescription *asbd);
+bool ca_asbd_equals(const AudioStreamBasicDescription *a,
+                    const AudioStreamBasicDescription *b);
+int ca_asbd_to_mp_format(const AudioStreamBasicDescription *asbd);
+bool ca_asbd_is_better(AudioStreamBasicDescription *req,
+                       AudioStreamBasicDescription *old,
+                       AudioStreamBasicDescription *new);
+
+int64_t ca_frames_to_ns(struct ao *ao, uint32_t frames);
+int64_t ca_get_latency(const AudioTimeStamp *ts);
+
+#if HAVE_COREAUDIO
+bool ca_stream_supports_compressed(struct ao *ao, AudioStreamID stream);
+OSStatus ca_lock_device(AudioDeviceID device, pid_t *pid);
+OSStatus ca_unlock_device(AudioDeviceID device, pid_t *pid);
+OSStatus ca_disable_mixing(struct ao *ao, AudioDeviceID device, bool *changed);
+OSStatus ca_enable_mixing(struct ao *ao, AudioDeviceID device, bool changed);
+int64_t ca_get_device_latency_ns(struct ao *ao, AudioDeviceID device);
+bool ca_change_physical_format_sync(struct ao *ao, AudioStreamID stream,
+                                    AudioStreamBasicDescription change_format);
+#endif
+
+#endif /* MPV_COREAUDIO_UTILS_H */
diff --git a/audio/out/ao_jack.c b/audio/out/ao_jack.c
new file mode 100644
index 0000000..412e91d
--- /dev/null
+++ b/audio/out/ao_jack.c
@@ -0,0 +1,284 @@
+/*
+ * JACK audio output driver for MPlayer
+ *
+ * Copyleft 2001 by Felix Bünemann (atmosfear@users.sf.net)
+ * and Reimar Döffinger (Reimar.Doeffinger@stud.uni-karlsruhe.de)
+ *
+ * Copyleft 2013 by William Light <wrl@illest.net> for the mpv project
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "common/msg.h"
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+
+#include <jack/jack.h>
+
+#if !HAVE_GPL
+#error GPL only
+#endif
+
+struct jack_opts {
+    char *port;
+    char *client_name;
+    bool connect;
+    bool autostart;
+    int stdlayout;
+};
+
+#define OPT_BASE_STRUCT struct jack_opts
+static const struct m_sub_options ao_jack_conf = {
+    .opts = (const struct m_option[]){
+        {"jack-port", OPT_STRING(port)},
+        {"jack-name", OPT_STRING(client_name)},
+        {"jack-autostart", OPT_BOOL(autostart)},
+        {"jack-connect", OPT_BOOL(connect)},
+        {"jack-std-channel-layout", OPT_CHOICE(stdlayout,
+            {"waveext", 0}, {"any", 1})},
+        {0}
+    },
+    .defaults = &(const struct jack_opts) {
+        .client_name = "mpv",
+        .connect = true,
+    },
+    .size = sizeof(struct jack_opts),
+};
+
+struct priv {
+    jack_client_t *client;
+
+    atomic_uint graph_latency_max;
+    atomic_uint buffer_size;
+
+    int last_chunk;
+
+    int num_ports;
+    jack_port_t *ports[MP_NUM_CHANNELS];
+
+    int activated;
+
+    struct jack_opts *opts;
+};
+
+static int graph_order_cb(void *arg)
+{
+    struct ao *ao = arg;
+    struct priv *p = ao->priv;
+
+    jack_latency_range_t jack_latency_range;
+    jack_port_get_latency_range(p->ports[0], JackPlaybackLatency,
+                                &jack_latency_range);
+    atomic_store(&p->graph_latency_max, jack_latency_range.max);
+
+    return 0;
+}
+
+static int buffer_size_cb(jack_nframes_t nframes, void *arg)
+{
+    struct ao *ao = arg;
+    struct priv *p = ao->priv;
+
+    atomic_store(&p->buffer_size, nframes);
+
+    return 0;
+}
+
+static int process(jack_nframes_t nframes, void *arg)
+{
+    struct ao *ao = arg;
+    struct priv *p = ao->priv;
+
+    void *buffers[MP_NUM_CHANNELS];
+
+    for (int i = 0; i < p->num_ports; i++)
+        buffers[i] = jack_port_get_buffer(p->ports[i], nframes);
+
+    jack_nframes_t jack_latency =
+        atomic_load(&p->graph_latency_max) + atomic_load(&p->buffer_size);
+
+    int64_t end_time = mp_time_ns();
+    end_time += MP_TIME_S_TO_NS((jack_latency + nframes) / (double)ao->samplerate);
+
+    ao_read_data(ao, buffers, nframes, end_time);
+
+    return 0;
+}
+
+static int
+connect_to_outports(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    char *port_name = (p->opts->port && p->opts->port[0]) ? p->opts->port : NULL;
+    const char **matching_ports = NULL;
+    int port_flags = JackPortIsInput;
+    int i;
+
+    if (!port_name)
+        port_flags |= JackPortIsPhysical;
+
+    const char *port_type = JACK_DEFAULT_AUDIO_TYPE; // exclude MIDI ports
+    matching_ports = jack_get_ports(p->client, port_name, port_type, port_flags);
+
+    if (!matching_ports || !matching_ports[0]) {
+        MP_FATAL(ao, "no ports to connect to\n");
+        goto err_get_ports;
+    }
+
+    for (i = 0; i < p->num_ports && matching_ports[i]; i++) {
+        if (jack_connect(p->client, jack_port_name(p->ports[i]),
+                         matching_ports[i]))
+        {
+            MP_FATAL(ao, "connecting failed\n");
+            goto err_connect;
+        }
+    }
+
+    free(matching_ports);
+    return 0;
+
+err_connect:
+    free(matching_ports);
+err_get_ports:
+    return -1;
+}
+
+static int
+create_ports(struct ao *ao, int nports)
+{
+    struct priv *p = ao->priv;
+    char pname[30];
+    int i;
+
+    for (i = 0; i < nports; i++) {
+        snprintf(pname, sizeof(pname), "out_%d", i);
+        p->ports[i] = jack_port_register(p->client, pname, JACK_DEFAULT_AUDIO_TYPE,
+                                         JackPortIsOutput, 0);
+
+        if (!p->ports[i]) {
+            MP_FATAL(ao, "not enough ports available\n");
+            goto err_port_register;
+        }
+    }
+
+    p->num_ports = nports;
+    return 0;
+
+err_port_register:
+    return -1;
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    if (!p->activated) {
+        p->activated = true;
+
+        if (jack_activate(p->client))
+            MP_FATAL(ao, "activate failed\n");
+
+        if (p->opts->connect)
+            connect_to_outports(ao);
+    }
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    struct mp_chmap_sel sel = {0};
+    jack_options_t open_options;
+
+    p->opts = mp_get_config_group(ao, ao->global, &ao_jack_conf);
+
+    ao->format = AF_FORMAT_FLOATP;
+
+    switch (p->opts->stdlayout) {
+    case 0:
+        mp_chmap_sel_add_waveext(&sel);
+        break;
+
+    default:
+        mp_chmap_sel_add_any(&sel);
+    }
+
+    if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels))
+        goto err_chmap;
+
+    open_options = JackNullOption;
+    if (!p->opts->autostart)
+        open_options |= JackNoStartServer;
+
+    p->client = jack_client_open(p->opts->client_name, open_options, NULL);
+    if (!p->client) {
+        MP_FATAL(ao, "cannot open server\n");
+        goto err_client_open;
+    }
+
+    if (create_ports(ao, ao->channels.num))
+        goto err_create_ports;
+
+    jack_set_process_callback(p->client, process, ao);
+
+    ao->samplerate = jack_get_sample_rate(p->client);
+    // The actual device buffer can change, but this is enough for pre-buffer
+    ao->device_buffer = jack_get_buffer_size(p->client);
+
+    jack_set_buffer_size_callback(p->client, buffer_size_cb, ao);
+    jack_set_graph_order_callback(p->client, graph_order_cb, ao);
+
+    if (!ao_chmap_sel_get_def(ao, &sel, &ao->channels, p->num_ports))
+        goto err_chmap_sel_get_def;
+
+    return 0;
+
+err_chmap_sel_get_def:
+err_create_ports:
+    jack_client_close(p->client);
+err_client_open:
+err_chmap:
+    return -1;
+}
+
+// close audio device
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    jack_client_close(p->client);
+}
+
+const struct ao_driver audio_out_jack = {
+    .description = "JACK audio output",
+    .name        = "jack",
+    .init      = init,
+    .uninit    = uninit,
+    .start     = start,
+    .priv_size = sizeof(struct priv),
+    .global_opts = &ao_jack_conf,
+};
diff --git a/audio/out/ao_lavc.c b/audio/out/ao_lavc.c
new file mode 100644
index 0000000..163fdca
--- /dev/null
+++ b/audio/out/ao_lavc.c
@@ -0,0 +1,337 @@
+/*
+ * audio encoding using libavformat
+ *
+ * Copyright (C) 2011-2012 Rudolf Polzer <divVerent@xonotic.org>
+ * NOTE: this file is partially based on ao_pcm.c by Atmosfear
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+#include <libavutil/common.h>
+
+#include "config.h"
+#include "options/options.h"
+#include "common/common.h"
+#include "audio/aframe.h"
+#include "audio/chmap_avchannel.h"
+#include "audio/format.h"
+#include "audio/fmt-conversion.h"
+#include "filters/filter_internal.h"
+#include "filters/f_utils.h"
+#include "mpv_talloc.h"
+#include "ao.h"
+#include "internal.h"
+#include "common/msg.h"
+
+#include "common/encode_lavc.h"
+
+struct priv {
+    struct encoder_context *enc;
+
+    int pcmhack;
+    int aframesize;
+    int framecount;
+    int64_t lastpts;
+    int sample_size;
+    double expected_next_pts;
+    struct mp_filter *filter_root;
+    struct mp_filter *fix_frame_size;
+
+    AVRational worst_time_base;
+
+    bool shutdown;
+};
+
+static bool write_frame(struct ao *ao, struct mp_frame frame);
+
+static bool supports_format(const AVCodec *codec, int format)
+{
+    for (const enum AVSampleFormat *sampleformat = codec->sample_fmts;
+         sampleformat && *sampleformat != AV_SAMPLE_FMT_NONE;
+         sampleformat++)
+    {
+        if (af_from_avformat(*sampleformat) == format)
+            return true;
+    }
+    return false;
+}
+
+static void select_format(struct ao *ao, const AVCodec *codec)
+{
+    int formats[AF_FORMAT_COUNT + 1];
+    af_get_best_sample_formats(ao->format, formats);
+
+    for (int n = 0; formats[n]; n++) {
+        if (supports_format(codec, formats[n])) {
+            ao->format = formats[n];
+            break;
+        }
+    }
+}
+
+static void on_ready(void *ptr)
+{
+    struct ao *ao = ptr;
+    struct priv *ac = ao->priv;
+
+    ac->worst_time_base = encoder_get_mux_timebase_unlocked(ac->enc);
+
+    ao_add_events(ao, AO_EVENT_INITIAL_UNBLOCK);
+}
+
+// open & setup audio device
+static int init(struct ao *ao)
+{
+    struct priv *ac = ao->priv;
+
+    ac->enc = encoder_context_alloc(ao->encode_lavc_ctx, STREAM_AUDIO, ao->log);
+    if (!ac->enc)
+        return -1;
+    talloc_steal(ac, ac->enc);
+
+    AVCodecContext *encoder = ac->enc->encoder;
+    const AVCodec *codec = encoder->codec;
+
+    int samplerate = af_select_best_samplerate(ao->samplerate,
+                                               codec->supported_samplerates);
+    if (samplerate > 0)
+        ao->samplerate = samplerate;
+
+    encoder->time_base.num = 1;
+    encoder->time_base.den = ao->samplerate;
+
+    encoder->sample_rate = ao->samplerate;
+
+    struct mp_chmap_sel sel = {0};
+    mp_chmap_sel_add_any(&sel);
+    if (!ao_chmap_sel_adjust2(ao, &sel, &ao->channels, false))
+        goto fail;
+    mp_chmap_reorder_to_lavc(&ao->channels);
+
+#if !HAVE_AV_CHANNEL_LAYOUT
+    encoder->channels = ao->channels.num;
+    encoder->channel_layout = mp_chmap_to_lavc(&ao->channels);
+#else
+    mp_chmap_to_av_layout(&encoder->ch_layout, &ao->channels);
+#endif
+
+    encoder->sample_fmt = AV_SAMPLE_FMT_NONE;
+
+    select_format(ao, codec);
+
+    ac->sample_size = af_fmt_to_bytes(ao->format);
+    encoder->sample_fmt = af_to_avformat(ao->format);
+    encoder->bits_per_raw_sample = ac->sample_size * 8;
+
+    if (!encoder_init_codec_and_muxer(ac->enc, on_ready, ao))
+        goto fail;
+
+    ac->pcmhack = 0;
+    if (encoder->frame_size <= 1)
+        ac->pcmhack = av_get_bits_per_sample(encoder->codec_id) / 8;
+
+    if (ac->pcmhack) {
+        ac->aframesize = 16384; // "enough"
+    } else {
+        ac->aframesize = encoder->frame_size;
+    }
+
+    // enough frames for at least 0.25 seconds
+    ac->framecount = ceil(ao->samplerate * 0.25 / ac->aframesize);
+    // but at least one!
+    ac->framecount = MPMAX(ac->framecount, 1);
+
+    ac->lastpts = AV_NOPTS_VALUE;
+
+    ao->untimed = true;
+
+    ao->device_buffer = ac->aframesize * ac->framecount;
+
+    ac->filter_root = mp_filter_create_root(ao->global);
+    ac->fix_frame_size = mp_fixed_aframe_size_create(ac->filter_root,
+                                                     ac->aframesize, true);
+    MP_HANDLE_OOM(ac->fix_frame_size);
+
+    return 0;
+
+fail:
+    mp_mutex_unlock(&ao->encode_lavc_ctx->lock);
+    ac->shutdown = true;
+    return -1;
+}
+
+// close audio device
+static void uninit(struct ao *ao)
+{
+    struct priv *ac = ao->priv;
+
+    if (!ac->shutdown) {
+        if (!write_frame(ao, MP_EOF_FRAME))
+            MP_WARN(ao, "could not flush last frame\n");
+        encoder_encode(ac->enc, NULL);
+    }
+
+    talloc_free(ac->filter_root);
+}
+
+// must get exactly ac->aframesize amount of data
+static void encode(struct ao *ao, struct mp_aframe *af)
+{
+    struct priv *ac = ao->priv;
+    AVCodecContext *encoder = ac->enc->encoder;
+    double outpts = mp_aframe_get_pts(af);
+
+    AVFrame *frame = mp_aframe_to_avframe(af);
+    MP_HANDLE_OOM(frame);
+
+    frame->pts = rint(outpts * av_q2d(av_inv_q(encoder->time_base)));
+
+    int64_t frame_pts = av_rescale_q(frame->pts, encoder->time_base,
+                                     ac->worst_time_base);
+    if (ac->lastpts != AV_NOPTS_VALUE && frame_pts <= ac->lastpts) {
+        // whatever the fuck this code does?
+        MP_WARN(ao, "audio frame pts went backwards (%d <- %d), autofixed\n",
+                (int)frame->pts, (int)ac->lastpts);
+        frame_pts = ac->lastpts + 1;
+        ac->lastpts = frame_pts;
+        frame->pts = av_rescale_q(frame_pts, ac->worst_time_base,
+                                  encoder->time_base);
+        frame_pts = av_rescale_q(frame->pts, encoder->time_base,
+                                 ac->worst_time_base);
+    }
+    ac->lastpts = frame_pts;
+
+    frame->quality = encoder->global_quality;
+    encoder_encode(ac->enc, frame);
+    av_frame_free(&frame);
+}
+
+static bool write_frame(struct ao *ao, struct mp_frame frame)
+{
+    struct priv *ac = ao->priv;
+
+    // Can't push in frame if it doesn't want it output one.
+    mp_pin_out_request_data(ac->fix_frame_size->pins[1]);
+
+    if (!mp_pin_in_write(ac->fix_frame_size->pins[0], frame))
+        return false; // shouldn't happen™
+
+    while (1) {
+        struct mp_frame fr = mp_pin_out_read(ac->fix_frame_size->pins[1]);
+        if (!fr.type)
+            break;
+        if (fr.type != MP_FRAME_AUDIO)
+            continue;
+        struct mp_aframe *af = fr.data;
+        encode(ao, af);
+        mp_frame_unref(&fr);
+    }
+
+    return true;
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *ac = ao->priv;
+    struct encode_lavc_context *ectx = ao->encode_lavc_ctx;
+
+    // See ao_driver.write_frames.
+    struct mp_aframe *af = mp_aframe_new_ref(*(struct mp_aframe **)data);
+
+    double nextpts;
+    double pts = mp_aframe_get_pts(af);
+    double outpts = pts;
+
+    // for ectx PTS fields
+    mp_mutex_lock(&ectx->lock);
+
+    if (!ectx->options->rawts) {
+        // Fix and apply the discontinuity pts offset.
+        nextpts = pts;
+        if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) {
+            ectx->discontinuity_pts_offset = ectx->next_in_pts - nextpts;
+        } else if (fabs(nextpts + ectx->discontinuity_pts_offset -
+                        ectx->next_in_pts) > 30)
+        {
+            MP_WARN(ao, "detected an unexpected discontinuity (pts jumped by "
+                    "%f seconds)\n",
+                    nextpts + ectx->discontinuity_pts_offset - ectx->next_in_pts);
+            ectx->discontinuity_pts_offset = ectx->next_in_pts - nextpts;
+        }
+
+        outpts = pts + ectx->discontinuity_pts_offset;
+    }
+
+    // Calculate expected pts of next audio frame (input side).
+    ac->expected_next_pts = pts + mp_aframe_get_size(af) / (double) ao->samplerate;
+
+    // Set next allowed input pts value (input side).
+    if (!ectx->options->rawts) {
+        nextpts = ac->expected_next_pts + ectx->discontinuity_pts_offset;
+        if (nextpts > ectx->next_in_pts)
+            ectx->next_in_pts = nextpts;
+    }
+
+    mp_mutex_unlock(&ectx->lock);
+
+    mp_aframe_set_pts(af, outpts);
+
+    return write_frame(ao, MAKE_FRAME(MP_FRAME_AUDIO, af));
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    state->free_samples = 1;
+    state->queued_samples = 0;
+    state->delay = 0;
+}
+
+static bool set_pause(struct ao *ao, bool paused)
+{
+    return true; // signal support so common code doesn't write silence
+}
+
+static void start(struct ao *ao)
+{
+    // we use data immediately
+}
+
+static void reset(struct ao *ao)
+{
+}
+
+const struct ao_driver audio_out_lavc = {
+    .encode = true,
+    .description = "audio encoding using libavcodec",
+    .name      = "lavc",
+    .initially_blocked = true,
+    .write_frames = true,
+    .priv_size = sizeof(struct priv),
+    .init      = init,
+    .uninit    = uninit,
+    .get_state = get_state,
+    .set_pause = set_pause,
+    .write     = audio_write,
+    .start     = start,
+    .reset     = reset,
+};
+
+// vim: sw=4 ts=4 et tw=80
diff --git a/audio/out/ao_null.c b/audio/out/ao_null.c
new file mode 100644
index 0000000..fcb61d2
--- /dev/null
+++ b/audio/out/ao_null.c
@@ -0,0 +1,230 @@
+/*
+ * null audio output driver
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Note: this does much more than just ignoring audio output. It simulates
+ *       (to some degree) an ideal AO.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "mpv_talloc.h"
+
+#include "osdep/timer.h"
+#include "options/m_option.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "audio/format.h"
+#include "ao.h"
+#include "internal.h"
+
+struct priv {
+    bool paused;
+    double last_time;
+    float buffered;     // samples
+    int buffersize;     // samples
+    bool playing;
+
+    bool untimed;
+    float bufferlen;    // seconds
+    float speed;        // multiplier
+    float latency_sec;  // seconds
+    float latency;      // samples
+    bool broken_eof;
+    bool broken_delay;
+
+    // Minimal unit of audio samples that can be written at once. If play() is
+    // called with sizes not aligned to this, a rounded size will be returned.
+    // (This is not needed by the AO API, but many AOs behave this way.)
+    int outburst;       // samples
+
+    struct m_channels channel_layouts;
+    int format;
+};
+
+static void drain(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    if (ao->untimed) {
+        priv->buffered = 0;
+        return;
+    }
+
+    if (priv->paused)
+        return;
+
+    double now = mp_time_sec();
+    if (priv->buffered > 0) {
+        priv->buffered -= (now - priv->last_time) * ao->samplerate * priv->speed;
+        if (priv->buffered < 0)
+            priv->buffered = 0;
+    }
+    priv->last_time = now;
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    if (priv->format)
+        ao->format = priv->format;
+
+    ao->untimed = priv->untimed;
+
+    struct mp_chmap_sel sel = {.tmp = ao};
+    if (priv->channel_layouts.num_chmaps) {
+        for (int n = 0; n < priv->channel_layouts.num_chmaps; n++)
+            mp_chmap_sel_add_map(&sel, &priv->channel_layouts.chmaps[n]);
+    } else {
+        mp_chmap_sel_add_any(&sel);
+    }
+    if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels))
+        mp_chmap_from_channels(&ao->channels, 2);
+
+    priv->latency = priv->latency_sec * ao->samplerate;
+
+    // A "buffer" for this many seconds of audio
+    int bursts = (int)(ao->samplerate * priv->bufferlen + 1) / priv->outburst;
+    ao->device_buffer = priv->outburst * bursts + priv->latency;
+
+    priv->last_time = mp_time_sec();
+
+    return 0;
+}
+
+// close audio device
+static void uninit(struct ao *ao)
+{
+}
+
+// stop playing and empty buffers (for seeking/pause)
+static void reset(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    priv->buffered = 0;
+    priv->playing = false;
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    if (priv->paused)
+        MP_ERR(ao, "illegal state: start() while paused\n");
+
+    drain(ao);
+    priv->paused = false;
+    priv->last_time = mp_time_sec();
+    priv->playing = true;
+}
+
+static bool set_pause(struct ao *ao, bool paused)
+{
+    struct priv *priv = ao->priv;
+
+    if (!priv->playing)
+        MP_ERR(ao, "illegal state: set_pause() while not playing\n");
+
+    if (priv->paused != paused) {
+
+        drain(ao);
+        priv->paused = paused;
+        if (!priv->paused)
+            priv->last_time = mp_time_sec();
+    }
+
+    return true;
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *priv = ao->priv;
+
+    if (priv->buffered <= 0)
+        priv->buffered = priv->latency; // emulate fixed latency
+
+    priv->buffered += samples;
+    return true;
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *priv = ao->priv;
+
+    drain(ao);
+
+    state->free_samples = ao->device_buffer - priv->latency - priv->buffered;
+    state->free_samples = state->free_samples / priv->outburst * priv->outburst;
+    state->queued_samples = priv->buffered;
+
+    // Note how get_state returns the delay in audio device time (instead of
+    // adjusting for speed), since most AOs seem to also do that.
+    state->delay = priv->buffered;
+
+    // Drivers with broken EOF handling usually always report the same device-
+    // level delay that is additional to the buffer time.
+    if (priv->broken_eof && priv->buffered < priv->latency)
+        state->delay = priv->latency;
+
+    state->delay /= ao->samplerate;
+
+    if (priv->broken_delay) { // Report only multiples of outburst
+        double q = priv->outburst / (double)ao->samplerate;
+        if (state->delay > 0)
+            state->delay = (int)(state->delay / q) * q;
+    }
+
+    state->playing = priv->playing && priv->buffered > 0;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_null = {
+    .description = "Null audio output",
+    .name      = "null",
+    .init      = init,
+    .uninit    = uninit,
+    .reset     = reset,
+    .get_state = get_state,
+    .set_pause = set_pause,
+    .write     = audio_write,
+    .start     = start,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .bufferlen = 0.2,
+        .outburst = 256,
+        .speed = 1,
+    },
+    .options = (const struct m_option[]) {
+        {"untimed", OPT_BOOL(untimed)},
+        {"buffer", OPT_FLOAT(bufferlen), M_RANGE(0, 100)},
+        {"outburst", OPT_INT(outburst), M_RANGE(1, 100000)},
+        {"speed", OPT_FLOAT(speed), M_RANGE(0, 10000)},
+        {"latency", OPT_FLOAT(latency_sec), M_RANGE(0, 100)},
+        {"broken-eof", OPT_BOOL(broken_eof)},
+        {"broken-delay", OPT_BOOL(broken_delay)},
+        {"channel-layouts", OPT_CHANNELS(channel_layouts)},
+        {"format", OPT_AUDIOFORMAT(format)},
+        {0}
+    },
+    .options_prefix = "ao-null",
+};
diff --git a/audio/out/ao_openal.c b/audio/out/ao_openal.c
new file mode 100644
index 0000000..7172908
--- /dev/null
+++ b/audio/out/ao_openal.c
@@ -0,0 +1,401 @@
+/*
+ * OpenAL audio output driver for MPlayer
+ *
+ * Copyleft 2006 by Reimar Döffinger (Reimar.Doeffinger@stud.uni-karlsruhe.de)
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#ifdef OPENAL_AL_H
+#include <OpenAL/alc.h>
+#include <OpenAL/al.h>
+#include <OpenAL/alext.h>
+#else
+#include <AL/alc.h>
+#include <AL/al.h>
+#include <AL/alext.h>
+#endif
+
+#include "common/msg.h"
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "options/m_option.h"
+
+#define MAX_CHANS MP_NUM_CHANNELS
+#define MAX_BUF 128
+#define MAX_SAMPLES 32768
+static ALuint buffers[MAX_BUF];
+static ALuint buffer_size[MAX_BUF];
+static ALuint source;
+
+static int cur_buf;
+static int unqueue_buf;
+
+static struct ao *ao_data;
+
+struct priv {
+    ALenum al_format;
+    int num_buffers;
+    int num_samples;
+    bool direct_channels;
+};
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+    case AOCONTROL_SET_VOLUME: {
+        ALfloat volume;
+        float *vol = arg;
+        if (cmd == AOCONTROL_SET_VOLUME) {
+            volume = *vol / 100.0;
+            alListenerf(AL_GAIN, volume);
+        }
+        alGetListenerf(AL_GAIN, &volume);
+        *vol = volume * 100;
+        return CONTROL_TRUE;
+    }
+    case AOCONTROL_GET_MUTE:
+    case AOCONTROL_SET_MUTE: {
+        bool mute = *(bool *)arg;
+
+        // openal has no mute control, only gain.
+        // Thus reverse the muted state to get required gain
+        ALfloat al_mute = (ALfloat)(!mute);
+        if (cmd == AOCONTROL_SET_MUTE) {
+            alSourcef(source, AL_GAIN, al_mute);
+        }
+        alGetSourcef(source, AL_GAIN, &al_mute);
+        *(bool *)arg = !((bool)al_mute);
+        return CONTROL_TRUE;
+    }
+
+    }
+    return CONTROL_UNKNOWN;
+}
+
+static enum af_format get_supported_format(int format)
+{
+    switch (format) {
+    case AF_FORMAT_U8:
+        if (alGetEnumValue((ALchar*)"AL_FORMAT_MONO8"))
+            return AF_FORMAT_U8;
+        break;
+
+    case AF_FORMAT_S16:
+        if (alGetEnumValue((ALchar*)"AL_FORMAT_MONO16"))
+            return AF_FORMAT_S16;
+        break;
+
+    case AF_FORMAT_S32:
+        if (strstr(alGetString(AL_RENDERER), "X-Fi") != NULL)
+            return AF_FORMAT_S32;
+        break;
+
+    case AF_FORMAT_FLOAT:
+        if (alIsExtensionPresent((ALchar*)"AL_EXT_float32") == AL_TRUE)
+            return AF_FORMAT_FLOAT;
+        break;
+    }
+    return AL_FALSE;
+}
+
+static ALenum get_supported_layout(int format, int channels)
+{
+    const char *channel_str[] = {
+        [1] = "MONO",
+        [2] = "STEREO",
+        [4] = "QUAD",
+        [6] = "51CHN",
+        [7] = "61CHN",
+        [8] = "71CHN",
+    };
+    const char *format_str[] = {
+        [AF_FORMAT_U8] = "8",
+        [AF_FORMAT_S16] = "16",
+        [AF_FORMAT_S32] = "32",
+        [AF_FORMAT_FLOAT] = "_FLOAT32",
+    };
+    if (channel_str[channels] == NULL || format_str[format] == NULL)
+        return AL_FALSE;
+
+    char enum_name[32];
+    // AF_FORMAT_FLOAT uses same enum name as AF_FORMAT_S32 for multichannel
+    // playback, while it is different for mono and stereo.
+    // OpenAL Soft does not support AF_FORMAT_S32 and seems to reuse the names.
+    if (channels > 2 && format == AF_FORMAT_FLOAT)
+        format = AF_FORMAT_S32;
+    snprintf(enum_name, sizeof(enum_name), "AL_FORMAT_%s%s", channel_str[channels],
+             format_str[format]);
+
+    if (alGetEnumValue((ALchar*)enum_name)) {
+        return alGetEnumValue((ALchar*)enum_name);
+    }
+    return AL_FALSE;
+}
+
+// close audio device
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    alSourceStop(source);
+    alSourcei(source, AL_BUFFER, 0);
+
+    alDeleteBuffers(p->num_buffers, buffers);
+    alDeleteSources(1, &source);
+
+    ALCcontext *ctx = alcGetCurrentContext();
+    ALCdevice *dev = alcGetContextsDevice(ctx);
+    alcMakeContextCurrent(NULL);
+    alcDestroyContext(ctx);
+    alcCloseDevice(dev);
+    ao_data = NULL;
+}
+
+static int init(struct ao *ao)
+{
+    float position[3] = {0, 0, 0};
+    float direction[6] = {0, 0, -1, 0, 1, 0};
+    ALCdevice *dev = NULL;
+    ALCcontext *ctx = NULL;
+    ALCint freq = 0;
+    ALCint attribs[] = {ALC_FREQUENCY, ao->samplerate, 0, 0};
+    struct priv *p = ao->priv;
+    if (ao_data) {
+        MP_FATAL(ao, "Not reentrant!\n");
+        return -1;
+    }
+    ao_data = ao;
+    char *dev_name = ao->device;
+    dev = alcOpenDevice(dev_name && dev_name[0] ? dev_name : NULL);
+    if (!dev) {
+        MP_FATAL(ao, "could not open device\n");
+        goto err_out;
+    }
+    ctx = alcCreateContext(dev, attribs);
+    alcMakeContextCurrent(ctx);
+    alListenerfv(AL_POSITION, position);
+    alListenerfv(AL_ORIENTATION, direction);
+
+    alGenSources(1, &source);
+    if (p->direct_channels) {
+        if (alIsExtensionPresent("AL_SOFT_direct_channels_remix")) {
+            alSourcei(source,
+                alGetEnumValue((ALchar*)"AL_DIRECT_CHANNELS_SOFT"),
+                alcGetEnumValue(dev, "AL_REMIX_UNMATCHED_SOFT"));
+        } else {
+            MP_WARN(ao, "Direct channels aren't supported by this version of OpenAL\n");
+        }
+    }
+
+    cur_buf = 0;
+    unqueue_buf = 0;
+    for (int i = 0; i < p->num_buffers; ++i) {
+        buffer_size[i] = 0;
+    }
+
+    alGenBuffers(p->num_buffers, buffers);
+
+    alcGetIntegerv(dev, ALC_FREQUENCY, 1, &freq);
+    if (alcGetError(dev) == ALC_NO_ERROR && freq)
+        ao->samplerate = freq;
+
+    // Check sample format
+    int try_formats[AF_FORMAT_COUNT + 1];
+    enum af_format sample_format = 0;
+    af_get_best_sample_formats(ao->format, try_formats);
+    for (int n = 0; try_formats[n]; n++) {
+        sample_format = get_supported_format(try_formats[n]);
+        if (sample_format != AF_FORMAT_UNKNOWN) {
+            ao->format = try_formats[n];
+            break;
+        }
+    }
+
+    if (sample_format == AF_FORMAT_UNKNOWN) {
+        MP_FATAL(ao, "Can't find appropriate sample format.\n");
+        uninit(ao);
+        goto err_out;
+    }
+
+    // Check if OpenAL driver supports the desired number of channels.
+    int num_channels = ao->channels.num;
+    do {
+        p->al_format = get_supported_layout(sample_format, num_channels);
+        if (p->al_format == AL_FALSE) {
+            num_channels = num_channels - 1;
+        }
+    } while (p->al_format == AL_FALSE && num_channels > 1);
+
+    // Request number of speakers for output from ao.
+    const struct mp_chmap possible_layouts[] = {
+        {0},                                        // empty
+        MP_CHMAP_INIT_MONO,                         // mono
+        MP_CHMAP_INIT_STEREO,                       // stereo
+        {0},                                        // 2.1
+        MP_CHMAP4(FL, FR, BL, BR),                  // 4.0
+        {0},                                        // 5.0
+        MP_CHMAP6(FL, FR, FC, LFE, BL, BR),         // 5.1
+        MP_CHMAP7(FL, FR, FC, LFE, SL, SR, BC),     // 6.1
+        MP_CHMAP8(FL, FR, FC, LFE, BL, BR, SL, SR), // 7.1
+    };
+    ao->channels = possible_layouts[num_channels];
+    if (!ao->channels.num)
+        mp_chmap_set_unknown(&ao->channels, num_channels);
+
+    if (p->al_format == AL_FALSE || !mp_chmap_is_valid(&ao->channels)) {
+        MP_FATAL(ao, "Can't find appropriate channel layout.\n");
+        uninit(ao);
+        goto err_out;
+    }
+
+    ao->device_buffer = p->num_buffers * p->num_samples;
+    return 0;
+
+err_out:
+    ao_data = NULL;
+    return -1;
+}
+
+static void unqueue_buffers(struct ao *ao)
+{
+    struct priv *q = ao->priv;
+    ALint p;
+    int till_wrap = q->num_buffers - unqueue_buf;
+    alGetSourcei(source, AL_BUFFERS_PROCESSED, &p);
+    if (p >= till_wrap) {
+        alSourceUnqueueBuffers(source, till_wrap, &buffers[unqueue_buf]);
+        unqueue_buf = 0;
+        p -= till_wrap;
+    }
+    if (p) {
+        alSourceUnqueueBuffers(source, p, &buffers[unqueue_buf]);
+        unqueue_buf += p;
+    }
+}
+
+static void reset(struct ao *ao)
+{
+    alSourceStop(source);
+    unqueue_buffers(ao);
+}
+
+static bool audio_set_pause(struct ao *ao, bool pause)
+{
+    if (pause) {
+        alSourcePause(source);
+    } else {
+        alSourcePlay(source);
+    }
+    return true;
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *p = ao->priv;
+
+    int num = (samples + p->num_samples - 1) / p->num_samples;
+
+    for (int i = 0; i < num; i++) {
+        char *d = *data;
+        buffer_size[cur_buf] =
+            MPMIN(samples - i * p->num_samples, p->num_samples);
+        d += i * buffer_size[cur_buf] * ao->sstride;
+        alBufferData(buffers[cur_buf], p->al_format, d,
+            buffer_size[cur_buf] * ao->sstride, ao->samplerate);
+        alSourceQueueBuffers(source, 1, &buffers[cur_buf]);
+        cur_buf = (cur_buf + 1) % p->num_buffers;
+    }
+
+    return true;
+}
+
+static void audio_start(struct ao *ao)
+{
+    alSourcePlay(source);
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *p = ao->priv;
+
+    ALint queued;
+    unqueue_buffers(ao);
+    alGetSourcei(source, AL_BUFFERS_QUEUED, &queued);
+
+    double source_offset = 0;
+    if(alIsExtensionPresent("AL_SOFT_source_latency")) {
+        ALdouble offsets[2];
+        LPALGETSOURCEDVSOFT alGetSourcedvSOFT = alGetProcAddress("alGetSourcedvSOFT");
+        alGetSourcedvSOFT(source, AL_SEC_OFFSET_LATENCY_SOFT, offsets);
+        // Additional latency to the play buffer, the remaining seconds to be
+        // played minus the offset (seconds already played)
+        source_offset = offsets[1] - offsets[0];
+    } else {
+        float offset = 0;
+        alGetSourcef(source, AL_SEC_OFFSET, &offset);
+        source_offset = -offset;
+    }
+
+    int queued_samples = 0;
+    for (int i = 0, index = cur_buf; i < queued; ++i) {
+        queued_samples += buffer_size[index];
+        index = (index + 1) % p->num_buffers;
+    }
+
+    state->delay = queued_samples / (double)ao->samplerate + source_offset;
+
+    state->queued_samples = queued_samples;
+    state->free_samples = MPMAX(p->num_buffers - queued, 0) * p->num_samples;
+
+    ALint source_state = 0;
+    alGetSourcei(source, AL_SOURCE_STATE, &source_state);
+    state->playing = source_state == AL_PLAYING;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_openal = {
+    .description = "OpenAL audio output",
+    .name      = "openal",
+    .init      = init,
+    .uninit    = uninit,
+    .control   = control,
+    .get_state = get_state,
+    .write     = audio_write,
+    .start     = audio_start,
+    .set_pause = audio_set_pause,
+    .reset     = reset,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .num_buffers = 4,
+        .num_samples = 8192,
+        .direct_channels = true,
+    },
+    .options = (const struct m_option[]) {
+        {"num-buffers", OPT_INT(num_buffers), M_RANGE(2, MAX_BUF)},
+        {"num-samples", OPT_INT(num_samples), M_RANGE(256, MAX_SAMPLES)},
+        {"direct-channels", OPT_BOOL(direct_channels)},
+        {0}
+    },
+    .options_prefix = "openal",
+};
diff --git a/audio/out/ao_opensles.c b/audio/out/ao_opensles.c
new file mode 100644
index 0000000..ddcff19
--- /dev/null
+++ b/audio/out/ao_opensles.c
@@ -0,0 +1,265 @@
+/*
+ * OpenSL ES audio output driver.
+ * Copyright (C) 2016 Ilya Zhuravlev <whatever@xyz.is>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "ao.h"
+#include "internal.h"
+#include "common/msg.h"
+#include "audio/format.h"
+#include "options/m_option.h"
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+
+#include <SLES/OpenSLES.h>
+#include <SLES/OpenSLES_Android.h>
+
+struct priv {
+    SLObjectItf sl, output_mix, player;
+    SLBufferQueueItf buffer_queue;
+    SLEngineItf engine;
+    SLPlayItf play;
+    void *buf;
+    int bytes_per_enqueue;
+    mp_mutex buffer_lock;
+    double audio_latency;
+
+    int frames_per_enqueue;
+    int buffer_size_in_ms;
+};
+
+#define DESTROY(thing) \
+    if (p->thing) { \
+        (*p->thing)->Destroy(p->thing); \
+        p->thing = NULL; \
+    }
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    DESTROY(player);
+    DESTROY(output_mix);
+    DESTROY(sl);
+
+    p->buffer_queue = NULL;
+    p->engine = NULL;
+    p->play = NULL;
+
+    mp_mutex_destroy(&p->buffer_lock);
+
+    free(p->buf);
+    p->buf = NULL;
+}
+
+#undef DESTROY
+
+static void buffer_callback(SLBufferQueueItf buffer_queue, void *context)
+{
+    struct ao *ao = context;
+    struct priv *p = ao->priv;
+    SLresult res;
+    double delay;
+
+    mp_mutex_lock(&p->buffer_lock);
+
+    delay = p->frames_per_enqueue / (double)ao->samplerate;
+    delay += p->audio_latency;
+    ao_read_data(ao, &p->buf, p->frames_per_enqueue,
+        mp_time_ns() + MP_TIME_S_TO_NS(delay));
+
+    res = (*buffer_queue)->Enqueue(buffer_queue, p->buf, p->bytes_per_enqueue);
+    if (res != SL_RESULT_SUCCESS)
+        MP_ERR(ao, "Failed to Enqueue: %d\n", res);
+
+    mp_mutex_unlock(&p->buffer_lock);
+}
+
+#define CHK(stmt) \
+    { \
+        SLresult res = stmt; \
+        if (res != SL_RESULT_SUCCESS) { \
+            MP_ERR(ao, "%s: %d\n", #stmt, res); \
+            goto error; \
+        } \
+    }
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    SLDataLocator_BufferQueue locator_buffer_queue;
+    SLDataLocator_OutputMix locator_output_mix;
+    SLAndroidDataFormat_PCM_EX pcm;
+    SLDataSource audio_source;
+    SLDataSink audio_sink;
+
+    // This AO only supports two channels at the moment
+    mp_chmap_from_channels(&ao->channels, 2);
+    // Upstream "Wilhelm" supports only 8000 <= rate <= 192000
+    ao->samplerate = MPCLAMP(ao->samplerate, 8000, 192000);
+
+    CHK(slCreateEngine(&p->sl, 0, NULL, 0, NULL, NULL));
+    CHK((*p->sl)->Realize(p->sl, SL_BOOLEAN_FALSE));
+    CHK((*p->sl)->GetInterface(p->sl, SL_IID_ENGINE, (void*)&p->engine));
+    CHK((*p->engine)->CreateOutputMix(p->engine, &p->output_mix, 0, NULL, NULL));
+    CHK((*p->output_mix)->Realize(p->output_mix, SL_BOOLEAN_FALSE));
+
+    locator_buffer_queue.locatorType = SL_DATALOCATOR_BUFFERQUEUE;
+    locator_buffer_queue.numBuffers = 8;
+
+    if (af_fmt_is_int(ao->format)) {
+        // Be future-proof
+        if (af_fmt_to_bytes(ao->format) > 2)
+            ao->format = AF_FORMAT_S32;
+        else
+            ao->format = af_fmt_from_planar(ao->format);
+        pcm.formatType = SL_DATAFORMAT_PCM;
+    } else {
+        ao->format = AF_FORMAT_FLOAT;
+        pcm.formatType = SL_ANDROID_DATAFORMAT_PCM_EX;
+        pcm.representation = SL_ANDROID_PCM_REPRESENTATION_FLOAT;
+    }
+    pcm.numChannels = ao->channels.num;
+    pcm.containerSize = pcm.bitsPerSample = 8 * af_fmt_to_bytes(ao->format);
+    pcm.channelMask = SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT;
+    pcm.endianness = SL_BYTEORDER_LITTLEENDIAN;
+    pcm.sampleRate = ao->samplerate * 1000;
+
+    if (p->buffer_size_in_ms) {
+        ao->device_buffer = ao->samplerate * p->buffer_size_in_ms / 1000;
+        // As the purpose of buffer_size_in_ms is to request a specific
+        // soft buffer size:
+        ao->def_buffer = 0;
+    }
+
+    // But it does not make sense if it is smaller than the enqueue size:
+    if (p->frames_per_enqueue) {
+        ao->device_buffer = MPMAX(ao->device_buffer, p->frames_per_enqueue);
+    } else {
+        if (ao->device_buffer) {
+            p->frames_per_enqueue = ao->device_buffer;
+        } else if (ao->def_buffer) {
+            p->frames_per_enqueue = ao->def_buffer * ao->samplerate;
+        } else {
+            MP_ERR(ao, "Enqueue size is not set and can neither be derived\n");
+            goto error;
+        }
+    }
+
+    p->bytes_per_enqueue = p->frames_per_enqueue * ao->channels.num *
+        af_fmt_to_bytes(ao->format);
+    p->buf = calloc(1, p->bytes_per_enqueue);
+    if (!p->buf) {
+        MP_ERR(ao, "Failed to allocate device buffer\n");
+        goto error;
+    }
+
+    int r = mp_mutex_init(&p->buffer_lock);
+    if (r) {
+        MP_ERR(ao, "Failed to initialize the mutex: %d\n", r);
+        goto error;
+    }
+
+    audio_source.pFormat = (void*)&pcm;
+    audio_source.pLocator = (void*)&locator_buffer_queue;
+
+    locator_output_mix.locatorType = SL_DATALOCATOR_OUTPUTMIX;
+    locator_output_mix.outputMix = p->output_mix;
+
+    audio_sink.pLocator = (void*)&locator_output_mix;
+    audio_sink.pFormat = NULL;
+
+    SLInterfaceID iid_array[] = { SL_IID_BUFFERQUEUE, SL_IID_ANDROIDCONFIGURATION };
+    SLboolean required[] = { SL_BOOLEAN_TRUE, SL_BOOLEAN_FALSE };
+    CHK((*p->engine)->CreateAudioPlayer(p->engine, &p->player, &audio_source,
+        &audio_sink, 2, iid_array, required));
+
+    CHK((*p->player)->Realize(p->player, SL_BOOLEAN_FALSE));
+    CHK((*p->player)->GetInterface(p->player, SL_IID_PLAY, (void*)&p->play));
+    CHK((*p->player)->GetInterface(p->player, SL_IID_BUFFERQUEUE,
+        (void*)&p->buffer_queue));
+    CHK((*p->buffer_queue)->RegisterCallback(p->buffer_queue,
+        buffer_callback, ao));
+    CHK((*p->play)->SetPlayState(p->play, SL_PLAYSTATE_PLAYING));
+
+    SLAndroidConfigurationItf android_config;
+    SLuint32 audio_latency = 0, value_size = sizeof(SLuint32);
+
+    SLint32 get_interface_result = (*p->player)->GetInterface(
+        p->player,
+        SL_IID_ANDROIDCONFIGURATION,
+        &android_config
+    );
+
+    if (get_interface_result == SL_RESULT_SUCCESS) {
+        SLint32 get_configuration_result = (*android_config)->GetConfiguration(
+            android_config,
+            (const SLchar *)"androidGetAudioLatency",
+            &value_size,
+            &audio_latency
+        );
+
+        if (get_configuration_result == SL_RESULT_SUCCESS) {
+            p->audio_latency = (double)audio_latency / 1000.0;
+            MP_INFO(ao, "Device latency is %f\n", p->audio_latency);
+        }
+    }
+
+    return 1;
+error:
+    uninit(ao);
+    return -1;
+}
+
+#undef CHK
+
+static void reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    (*p->buffer_queue)->Clear(p->buffer_queue);
+}
+
+static void resume(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    buffer_callback(p->buffer_queue, ao);
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_opensles = {
+    .description = "OpenSL ES audio output",
+    .name      = "opensles",
+    .init      = init,
+    .uninit    = uninit,
+    .reset     = reset,
+    .start     = resume,
+
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .buffer_size_in_ms = 250,
+    },
+    .options = (const struct m_option[]) {
+        {"frames-per-enqueue", OPT_INT(frames_per_enqueue),
+            M_RANGE(1, 96000)},
+        {"buffer-size-in-ms", OPT_INT(buffer_size_in_ms),
+            M_RANGE(0, 500)},
+        {0}
+    },
+    .options_prefix = "opensles",
+};
diff --git a/audio/out/ao_oss.c b/audio/out/ao_oss.c
new file mode 100644
index 0000000..5c0b8c9
--- /dev/null
+++ b/audio/out/ao_oss.c
@@ -0,0 +1,400 @@
+/*
+ * OSS audio output driver
+ *
+ * Original author: A'rpi
+ * Support for >2 output channels added 2001-11-25
+ * - Steve Davies <steve@daviesfam.org>
+ * Rozhuk Ivan <rozhuk.im@gmail.com> 2020-2023
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/soundcard.h>
+#include <sys/stat.h>
+#if defined(__DragonFly__) || defined(__FreeBSD__)
+#include <sys/sysctl.h>
+#endif
+#include <sys/types.h>
+
+#include "audio/format.h"
+#include "common/msg.h"
+#include "options/options.h"
+#include "osdep/endian.h"
+#include "osdep/io.h"
+#include "ao.h"
+#include "internal.h"
+
+#ifndef AFMT_AC3
+#define AFMT_AC3 -1
+#endif
+
+#define PATH_DEV_DSP "/dev/dsp"
+#define PATH_DEV_MIXER "/dev/mixer"
+
+struct priv {
+    int dsp_fd;
+    double bps; /* Bytes per second. */
+};
+
+/* like alsa except for 6.1 and 7.1, from pcm/matrix_map.h */
+static const struct mp_chmap oss_layouts[MP_NUM_CHANNELS + 1] = {
+    {0},                                        /* empty */
+    MP_CHMAP_INIT_MONO,                         /* mono */
+    MP_CHMAP2(FL, FR),                          /* stereo */
+    MP_CHMAP3(FL, FR, LFE),                     /* 2.1 */
+    MP_CHMAP4(FL, FR, BL, BR),                  /* 4.0 */
+    MP_CHMAP5(FL, FR, BL, BR, FC),              /* 5.0 */
+    MP_CHMAP6(FL, FR, BL, BR, FC, LFE),         /* 5.1 */
+    MP_CHMAP7(FL, FR, BL, BR, FC, LFE, BC),     /* 6.1 */
+    MP_CHMAP8(FL, FR, BL, BR, FC, LFE, SL, SR), /* 7.1 */
+};
+
+#if !defined(AFMT_S32_NE) && defined(AFMT_S32_LE) && defined(AFMT_S32_BE)
+#define AFMT_S32_NE AFMT_S32MP_SELECT_LE_BE(AFMT_S32_LE, AFMT_S32_BE)
+#endif
+
+static const int format_table[][2] = {
+    {AFMT_U8,           AF_FORMAT_U8},
+    {AFMT_S16_NE,       AF_FORMAT_S16},
+#ifdef AFMT_S32_NE
+    {AFMT_S32_NE,       AF_FORMAT_S32},
+#endif
+#ifdef AFMT_FLOAT
+    {AFMT_FLOAT,        AF_FORMAT_FLOAT},
+#endif
+#ifdef AFMT_MPEG
+    {AFMT_MPEG,         AF_FORMAT_S_MP3},
+#endif
+    {-1, -1}
+};
+
+#define MP_WARN_IOCTL_ERR(__ao) \
+    MP_WARN((__ao), "%s: ioctl() fail, err = %i: %s\n", \
+        __FUNCTION__, errno, strerror(errno))
+
+
+static void uninit(struct ao *ao);
+
+
+static void device_descr_get(size_t dev_idx, char *buf, size_t buf_size)
+{
+#if defined(__DragonFly__) || defined(__FreeBSD__)
+    char dev_path[32];
+    size_t tmp = (buf_size - 1);
+
+    snprintf(dev_path, sizeof(dev_path), "dev.pcm.%zu.%%desc", dev_idx);
+    if (sysctlbyname(dev_path, buf, &tmp, NULL, 0) != 0) {
+        tmp = 0;
+    }
+    buf[tmp] = 0x00;
+#elif defined(SOUND_MIXER_INFO)
+    size_t tmp = 0;
+    char dev_path[32];
+    mixer_info mi;
+
+    snprintf(dev_path, sizeof(dev_path), PATH_DEV_MIXER"%zu", dev_idx);
+    int fd = open(dev_path, O_RDONLY);
+    if (ioctl(fd, SOUND_MIXER_INFO, &mi) == 0) {
+        strncpy(buf, mi.name, buf_size - 1);
+        tmp = (buf_size - 1);
+    }
+    close(fd);
+    buf[tmp] = 0x00;
+#else
+    buf[0] = 0x00;
+#endif
+}
+
+static int format2oss(int format)
+{
+    for (size_t i = 0; format_table[i][0] != -1; i++) {
+        if (format_table[i][1] == format)
+            return format_table[i][0];
+    }
+    return -1;
+}
+
+static bool try_format(struct ao *ao, int *format)
+{
+    struct priv *p = ao->priv;
+    int oss_format = format2oss(*format);
+
+    if (oss_format == -1 && af_fmt_is_spdif(*format))
+        oss_format = AFMT_AC3;
+
+    if (oss_format == -1) {
+        MP_VERBOSE(ao, "Unknown/not supported internal format: %s\n",
+            af_fmt_to_str(*format));
+        *format = 0;
+        return false;
+    }
+
+    return (ioctl(p->dsp_fd, SNDCTL_DSP_SETFMT, &oss_format) != -1);
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    struct mp_chmap channels = ao->channels;
+    audio_buf_info info;
+    size_t i;
+    int format, samplerate, nchannels, reqchannels, trig = 0;
+    int best_sample_formats[AF_FORMAT_COUNT + 1];
+    const char *device = ((ao->device) ? ao->device : PATH_DEV_DSP);
+
+    /* Opening device. */
+    MP_VERBOSE(ao, "Using '%s' audio device.\n", device);
+    p->dsp_fd = open(device, (O_WRONLY | O_CLOEXEC));
+    if (p->dsp_fd < 0) {
+        MP_ERR(ao, "Can't open audio device %s: %s.\n",
+            device, mp_strerror(errno));
+        goto err_out;
+    }
+
+    /* Selecting sound format. */
+    format = af_fmt_from_planar(ao->format);
+    af_get_best_sample_formats(format, best_sample_formats);
+    for (i = 0; best_sample_formats[i]; i++) {
+        format = best_sample_formats[i];
+        if (try_format(ao, &format))
+            break;
+    }
+    if (!format) {
+        MP_ERR(ao, "Can't set sample format.\n");
+        goto err_out;
+    }
+    MP_VERBOSE(ao, "Sample format: %s\n", af_fmt_to_str(format));
+
+    /* Channels count. */
+    if (af_fmt_is_spdif(format)) {
+        nchannels = reqchannels = channels.num;
+        if (ioctl(p->dsp_fd, SNDCTL_DSP_CHANNELS, &nchannels) == -1) {
+            MP_ERR(ao, "Failed to set audio device to %d channels.\n",
+                reqchannels);
+            goto err_out_ioctl;
+        }
+    } else {
+        struct mp_chmap_sel sel = {0};
+        for (i = 0; i < MP_ARRAY_SIZE(oss_layouts); i++) {
+            mp_chmap_sel_add_map(&sel, &oss_layouts[i]);
+        }
+        if (!ao_chmap_sel_adjust(ao, &sel, &channels))
+            goto err_out;
+        nchannels = reqchannels = channels.num;
+        if (ioctl(p->dsp_fd, SNDCTL_DSP_CHANNELS, &nchannels) == -1) {
+            MP_ERR(ao, "Failed to set audio device to %d channels.\n",
+                reqchannels);
+            goto err_out_ioctl;
+        }
+        if (nchannels != reqchannels) {
+            /* Update number of channels to OSS suggested value. */
+            if (!ao_chmap_sel_get_def(ao, &sel, &channels, nchannels))
+                goto err_out;
+        }
+        MP_VERBOSE(ao, "Using %d channels (requested: %d).\n",
+            channels.num, reqchannels);
+    }
+
+    /* Sample rate. */
+    samplerate = ao->samplerate;
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_SPEED, &samplerate) == -1)
+        goto err_out_ioctl;
+    MP_VERBOSE(ao, "Using %d Hz samplerate.\n", samplerate);
+
+    /* Get buffer size. */
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_GETOSPACE, &info) == -1)
+        goto err_out_ioctl;
+    /* See ao.c ao->sstride initializations and get_state(). */
+    ao->device_buffer = ((info.fragstotal * info.fragsize) /
+        af_fmt_to_bytes(format));
+    if (!af_fmt_is_planar(format)) {
+        ao->device_buffer /= channels.num;
+    }
+
+    /* Do not start playback after data written. */
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_SETTRIGGER, &trig) == -1)
+        goto err_out_ioctl;
+
+    /* Update sound params. */
+    ao->format = format;
+    ao->samplerate = samplerate;
+    ao->channels = channels;
+    p->bps = (channels.num * samplerate * af_fmt_to_bytes(format));
+
+    return 0;
+
+err_out_ioctl:
+    MP_WARN_IOCTL_ERR(ao);
+err_out:
+    uninit(ao);
+    return -1;
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    if (p->dsp_fd == -1)
+        return;
+    ioctl(p->dsp_fd, SNDCTL_DSP_HALT, NULL);
+    close(p->dsp_fd);
+    p->dsp_fd = -1;
+}
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct priv *p = ao->priv;
+    float *vol = arg;
+    int v;
+
+    if (p->dsp_fd < 0)
+        return CONTROL_ERROR;
+
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+        if (ioctl(p->dsp_fd, SNDCTL_DSP_GETPLAYVOL, &v) == -1) {
+            MP_WARN_IOCTL_ERR(ao);
+            return CONTROL_ERROR;
+        }
+        *vol = ((v & 0x00ff) + ((v & 0xff00) >> 8)) / 2.0;
+        return CONTROL_OK;
+    case AOCONTROL_SET_VOLUME:
+        v = ((int)*vol << 8) | (int)*vol;
+        if (ioctl(p->dsp_fd, SNDCTL_DSP_SETPLAYVOL, &v) == -1) {
+            MP_WARN_IOCTL_ERR(ao);
+            return CONTROL_ERROR;
+        }
+        return CONTROL_OK;
+    }
+
+    return CONTROL_UNKNOWN;
+}
+
+static void reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    int trig = 0;
+
+    /* Clear buf and do not start playback after data written. */
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_HALT, NULL) == -1 ||
+        ioctl(p->dsp_fd, SNDCTL_DSP_SETTRIGGER, &trig) == -1)
+    {
+        MP_WARN_IOCTL_ERR(ao);
+        MP_WARN(ao, "Force reinitialize audio device.\n");
+        uninit(ao);
+        init(ao);
+    }
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    int trig = PCM_ENABLE_OUTPUT;
+
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_SETTRIGGER, &trig) == -1) {
+        MP_WARN_IOCTL_ERR(ao);
+        return;
+    }
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *p = ao->priv;
+    ssize_t rc;
+    const size_t size = (samples * ao->sstride);
+
+    if (size == 0)
+        return true;
+
+    while ((rc = write(p->dsp_fd, data[0], size)) == -1) {
+        if (errno == EINTR)
+			continue;
+        MP_WARN(ao, "audio_write: write() fail, err = %i: %s.\n",
+            errno, strerror(errno));
+        return false;
+    }
+    if ((size_t)rc != size) {
+        MP_WARN(ao, "audio_write: unexpected partial write: required: %zu, written: %zu.\n",
+            size, (size_t)rc);
+        return false;
+    }
+
+    return true;
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *p = ao->priv;
+    audio_buf_info info;
+    int odelay;
+
+    if (ioctl(p->dsp_fd, SNDCTL_DSP_GETOSPACE, &info) == -1 ||
+        ioctl(p->dsp_fd, SNDCTL_DSP_GETODELAY, &odelay) == -1)
+    {
+        MP_WARN_IOCTL_ERR(ao);
+        memset(state, 0x00, sizeof(struct mp_pcm_state));
+        state->delay = 0.0;
+        return;
+    }
+    state->free_samples = (info.bytes / ao->sstride);
+    state->queued_samples = (ao->device_buffer - state->free_samples);
+    state->delay = (odelay / p->bps);
+    state->playing = (state->queued_samples != 0);
+}
+
+static void list_devs(struct ao *ao, struct ao_device_list *list)
+{
+    struct stat st;
+    char dev_path[32] = PATH_DEV_DSP, dev_descr[256] = "Default";
+    struct ao_device_desc dev = {.name = dev_path, .desc = dev_descr};
+
+    if (stat(PATH_DEV_DSP, &st) == 0) {
+        ao_device_list_add(list, ao, &dev);
+    }
+
+    /* Auto detect. */
+    for (size_t i = 0, fail_cnt = 0; fail_cnt < 8; i ++, fail_cnt ++) {
+        snprintf(dev_path, sizeof(dev_path), PATH_DEV_DSP"%zu", i);
+        if (stat(dev_path, &st) != 0)
+            continue;
+        device_descr_get(i, dev_descr, sizeof(dev_descr));
+        ao_device_list_add(list, ao, &dev);
+        fail_cnt = 0; /* Reset fail counter. */
+    }
+}
+
+const struct ao_driver audio_out_oss = {
+    .name      = "oss",
+    .description = "OSS/ioctl audio output",
+    .init      = init,
+    .uninit    = uninit,
+    .control   = control,
+    .reset     = reset,
+    .start     = start,
+    .write     = audio_write,
+    .get_state = get_state,
+    .list_devs = list_devs,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .dsp_fd = -1,
+    },
+};
diff --git a/audio/out/ao_pcm.c b/audio/out/ao_pcm.c
new file mode 100644
index 0000000..4097aa3
--- /dev/null
+++ b/audio/out/ao_pcm.c
@@ -0,0 +1,248 @@
+/*
+ * PCM audio output driver
+ *
+ * Original author: Atmosfear
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libavutil/common.h>
+
+#include "mpv_talloc.h"
+
+#include "options/m_option.h"
+#include "audio/format.h"
+#include "ao.h"
+#include "internal.h"
+#include "common/msg.h"
+#include "osdep/endian.h"
+
+#ifdef __MINGW32__
+// for GetFileType to detect pipes
+#include <windows.h>
+#include <io.h>
+#endif
+
+struct priv {
+    char *outputfilename;
+    bool waveheader;
+    bool append;
+    uint64_t data_length;
+    FILE *fp;
+};
+
+#define WAV_ID_RIFF 0x46464952 /* "RIFF" */
+#define WAV_ID_WAVE 0x45564157 /* "WAVE" */
+#define WAV_ID_FMT  0x20746d66 /* "fmt " */
+#define WAV_ID_DATA 0x61746164 /* "data" */
+#define WAV_ID_PCM  0x0001
+#define WAV_ID_FLOAT_PCM  0x0003
+#define WAV_ID_FORMAT_EXTENSIBLE 0xfffe
+
+static void fput16le(uint16_t val, FILE *fp)
+{
+    uint8_t bytes[2] = {val, val >> 8};
+    fwrite(bytes, 1, 2, fp);
+}
+
+static void fput32le(uint32_t val, FILE *fp)
+{
+    uint8_t bytes[4] = {val, val >> 8, val >> 16, val >> 24};
+    fwrite(bytes, 1, 4, fp);
+}
+
+static void write_wave_header(struct ao *ao, FILE *fp, uint64_t data_length)
+{
+    uint16_t fmt = ao->format == AF_FORMAT_FLOAT ? WAV_ID_FLOAT_PCM : WAV_ID_PCM;
+    int bits = af_fmt_to_bytes(ao->format) * 8;
+
+    // Master RIFF chunk
+    fput32le(WAV_ID_RIFF, fp);
+    // RIFF chunk size: 'WAVE' + 'fmt ' + 4 + 40 +
+    // data chunk hdr (8) + data length
+    fput32le(12 + 40 + 8 + data_length, fp);
+    fput32le(WAV_ID_WAVE, fp);
+
+    // Format chunk
+    fput32le(WAV_ID_FMT, fp);
+    fput32le(40, fp);
+    fput16le(WAV_ID_FORMAT_EXTENSIBLE, fp);
+    fput16le(ao->channels.num, fp);
+    fput32le(ao->samplerate, fp);
+    fput32le(ao->bps, fp);
+    fput16le(ao->channels.num * (bits / 8), fp);
+    fput16le(bits, fp);
+
+    // Extension chunk
+    fput16le(22, fp);
+    fput16le(bits, fp);
+    fput32le(mp_chmap_to_waveext(&ao->channels), fp);
+    // 2 bytes format + 14 bytes guid
+    fput32le(fmt, fp);
+    fput32le(0x00100000, fp);
+    fput32le(0xAA000080, fp);
+    fput32le(0x719B3800, fp);
+
+    // Data chunk
+    fput32le(WAV_ID_DATA, fp);
+    fput32le(data_length, fp);
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    char *outputfilename = priv->outputfilename;
+    if (!outputfilename) {
+        outputfilename = talloc_strdup(priv, priv->waveheader ? "audiodump.wav"
+                                                              : "audiodump.pcm");
+    }
+
+    ao->format = af_fmt_from_planar(ao->format);
+
+    if (priv->waveheader) {
+        // WAV files must have one of the following formats
+
+        // And they don't work in big endian; fixing it would be simple, but
+        // nobody cares.
+        if (BYTE_ORDER == BIG_ENDIAN) {
+            MP_FATAL(ao, "Not supported on big endian.\n");
+            return -1;
+        }
+
+        switch (ao->format) {
+        case AF_FORMAT_U8:
+        case AF_FORMAT_S16:
+        case AF_FORMAT_S32:
+        case AF_FORMAT_FLOAT:
+             break;
+        default:
+            if (!af_fmt_is_spdif(ao->format))
+                ao->format = AF_FORMAT_S16;
+            break;
+        }
+    }
+
+    struct mp_chmap_sel sel = {0};
+    mp_chmap_sel_add_waveext(&sel);
+    if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels))
+        return -1;
+
+    ao->bps = ao->channels.num * ao->samplerate * af_fmt_to_bytes(ao->format);
+
+    MP_INFO(ao, "File: %s (%s)\nPCM: Samplerate: %d Hz Channels: %d Format: %s\n",
+            outputfilename,
+            priv->waveheader ? "WAVE" : "RAW PCM", ao->samplerate,
+            ao->channels.num, af_fmt_to_str(ao->format));
+
+    priv->fp = fopen(outputfilename, priv->append ? "ab" : "wb");
+    if (!priv->fp) {
+        MP_ERR(ao, "Failed to open %s for writing!\n", outputfilename);
+        return -1;
+    }
+    if (priv->waveheader)  // Reserve space for wave header
+        write_wave_header(ao, priv->fp, 0x7ffff000);
+    ao->untimed = true;
+    ao->device_buffer = 1 << 16;
+
+    return 0;
+}
+
+// close audio device
+static void uninit(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    if (priv->waveheader) {    // Rewrite wave header
+        bool broken_seek = false;
+#ifdef __MINGW32__
+        // Windows, in its usual idiocy "emulates" seeks on pipes so it always
+        // looks like they work. So we have to detect them brute-force.
+        broken_seek = FILE_TYPE_DISK !=
+            GetFileType((HANDLE)_get_osfhandle(_fileno(priv->fp)));
+#endif
+        if (broken_seek || fseek(priv->fp, 0, SEEK_SET) != 0)
+            MP_ERR(ao, "Could not seek to start, WAV size headers not updated!\n");
+        else {
+            if (priv->data_length > 0xfffff000) {
+                MP_ERR(ao, "File larger than allowed for "
+                       "WAV files, may play truncated!\n");
+                priv->data_length = 0xfffff000;
+            }
+            write_wave_header(ao, priv->fp, priv->data_length);
+        }
+    }
+    fclose(priv->fp);
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *priv = ao->priv;
+    int len = samples * ao->sstride;
+
+    fwrite(data[0], len, 1, priv->fp);
+    priv->data_length += len;
+
+    return true;
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    state->free_samples = ao->device_buffer;
+    state->queued_samples = 0;
+    state->delay = 0;
+}
+
+static bool set_pause(struct ao *ao, bool paused)
+{
+    return true; // signal support so common code doesn't write silence
+}
+
+static void start(struct ao *ao)
+{
+    // we use data immediately
+}
+
+static void reset(struct ao *ao)
+{
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_pcm = {
+    .description = "RAW PCM/WAVE file writer audio output",
+    .name      = "pcm",
+    .init      = init,
+    .uninit    = uninit,
+    .get_state = get_state,
+    .set_pause = set_pause,
+    .write     = audio_write,
+    .start     = start,
+    .reset     = reset,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) { .waveheader = true },
+    .options = (const struct m_option[]) {
+        {"file", OPT_STRING(outputfilename), .flags = M_OPT_FILE},
+        {"waveheader", OPT_BOOL(waveheader)},
+        {"append", OPT_BOOL(append)},
+        {0}
+    },
+    .options_prefix = "ao-pcm",
+};
diff --git a/audio/out/ao_pipewire.c b/audio/out/ao_pipewire.c
new file mode 100644
index 0000000..3fbcbf6
--- /dev/null
+++ b/audio/out/ao_pipewire.c
@@ -0,0 +1,883 @@
+/*
+ * PipeWire audio output driver.
+ * Copyright (C) 2021 Thomas Weißschuh <thomas@t-8ch.de>
+ * Copyright (C) 2021 Oschowa <oschowa@web.de>
+ * Copyright (C) 2020 Andreas Kempf <aakempf@gmail.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <pipewire/pipewire.h>
+#include <pipewire/global.h>
+#include <spa/param/audio/format-utils.h>
+#include <spa/param/props.h>
+#include <spa/utils/result.h>
+#include <math.h>
+
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+#include "ao.h"
+#include "audio/format.h"
+#include "internal.h"
+#include "osdep/timer.h"
+
+#if !PW_CHECK_VERSION(0, 3, 50)
+static inline int pw_stream_get_time_n(struct pw_stream *stream, struct pw_time *time, size_t size) {
+	return pw_stream_get_time(stream, time);
+}
+#endif
+
+#if !PW_CHECK_VERSION(0, 3, 57)
+// Earlier versions segfault on zeroed hooks
+#define spa_hook_remove(hook) if ((hook)->link.prev) spa_hook_remove(hook)
+#endif
+
+enum init_state {
+    INIT_STATE_NONE,
+    INIT_STATE_SUCCESS,
+    INIT_STATE_ERROR,
+};
+
+enum {
+    VOLUME_MODE_CHANNEL,
+    VOLUME_MODE_GLOBAL,
+};
+
+struct priv {
+    struct pw_thread_loop *loop;
+    struct pw_stream *stream;
+    struct pw_core *core;
+    struct spa_hook stream_listener;
+    struct spa_hook core_listener;
+    enum init_state init_state;
+
+    bool muted;
+    float volume;
+
+    struct {
+        int buffer_msec;
+        char *remote;
+        int volume_mode;
+    } options;
+
+    struct {
+        struct pw_registry *registry;
+        struct spa_hook registry_listener;
+        struct spa_list sinks;
+    } hotplug;
+};
+
+struct id_list {
+    uint32_t id;
+    struct spa_list node;
+};
+
+static enum spa_audio_format af_fmt_to_pw(struct ao *ao, enum af_format format)
+{
+    switch (format) {
+    case AF_FORMAT_U8:          return SPA_AUDIO_FORMAT_U8;
+    case AF_FORMAT_S16:         return SPA_AUDIO_FORMAT_S16;
+    case AF_FORMAT_S32:         return SPA_AUDIO_FORMAT_S32;
+    case AF_FORMAT_FLOAT:       return SPA_AUDIO_FORMAT_F32;
+    case AF_FORMAT_DOUBLE:      return SPA_AUDIO_FORMAT_F64;
+    case AF_FORMAT_U8P:         return SPA_AUDIO_FORMAT_U8P;
+    case AF_FORMAT_S16P:        return SPA_AUDIO_FORMAT_S16P;
+    case AF_FORMAT_S32P:        return SPA_AUDIO_FORMAT_S32P;
+    case AF_FORMAT_FLOATP:      return SPA_AUDIO_FORMAT_F32P;
+    case AF_FORMAT_DOUBLEP:     return SPA_AUDIO_FORMAT_F64P;
+    default:
+                                MP_WARN(ao, "Unhandled format %d\n", format);
+                                return SPA_AUDIO_FORMAT_UNKNOWN;
+    }
+}
+
+static enum spa_audio_channel mp_speaker_id_to_spa(struct ao *ao, enum mp_speaker_id mp_speaker_id)
+{
+    switch (mp_speaker_id) {
+    case MP_SPEAKER_ID_FL:   return SPA_AUDIO_CHANNEL_FL;
+    case MP_SPEAKER_ID_FR:   return SPA_AUDIO_CHANNEL_FR;
+    case MP_SPEAKER_ID_FC:   return SPA_AUDIO_CHANNEL_FC;
+    case MP_SPEAKER_ID_LFE:  return SPA_AUDIO_CHANNEL_LFE;
+    case MP_SPEAKER_ID_BL:   return SPA_AUDIO_CHANNEL_RL;
+    case MP_SPEAKER_ID_BR:   return SPA_AUDIO_CHANNEL_RR;
+    case MP_SPEAKER_ID_FLC:  return SPA_AUDIO_CHANNEL_FLC;
+    case MP_SPEAKER_ID_FRC:  return SPA_AUDIO_CHANNEL_FRC;
+    case MP_SPEAKER_ID_BC:   return SPA_AUDIO_CHANNEL_RC;
+    case MP_SPEAKER_ID_SL:   return SPA_AUDIO_CHANNEL_SL;
+    case MP_SPEAKER_ID_SR:   return SPA_AUDIO_CHANNEL_SR;
+    case MP_SPEAKER_ID_TC:   return SPA_AUDIO_CHANNEL_TC;
+    case MP_SPEAKER_ID_TFL:  return SPA_AUDIO_CHANNEL_TFL;
+    case MP_SPEAKER_ID_TFC:  return SPA_AUDIO_CHANNEL_TFC;
+    case MP_SPEAKER_ID_TFR:  return SPA_AUDIO_CHANNEL_TFR;
+    case MP_SPEAKER_ID_TBL:  return SPA_AUDIO_CHANNEL_TRL;
+    case MP_SPEAKER_ID_TBC:  return SPA_AUDIO_CHANNEL_TRC;
+    case MP_SPEAKER_ID_TBR:  return SPA_AUDIO_CHANNEL_TRR;
+    case MP_SPEAKER_ID_DL:   return SPA_AUDIO_CHANNEL_FL;
+    case MP_SPEAKER_ID_DR:   return SPA_AUDIO_CHANNEL_FR;
+    case MP_SPEAKER_ID_WL:   return SPA_AUDIO_CHANNEL_FL;
+    case MP_SPEAKER_ID_WR:   return SPA_AUDIO_CHANNEL_FR;
+    case MP_SPEAKER_ID_SDL:  return SPA_AUDIO_CHANNEL_SL;
+    case MP_SPEAKER_ID_SDR:  return SPA_AUDIO_CHANNEL_SL;
+    case MP_SPEAKER_ID_LFE2: return SPA_AUDIO_CHANNEL_LFE2;
+    case MP_SPEAKER_ID_TSL:  return SPA_AUDIO_CHANNEL_TSL;
+    case MP_SPEAKER_ID_TSR:  return SPA_AUDIO_CHANNEL_TSR;
+    case MP_SPEAKER_ID_BFC:  return SPA_AUDIO_CHANNEL_BC;
+    case MP_SPEAKER_ID_BFL:  return SPA_AUDIO_CHANNEL_BLC;
+    case MP_SPEAKER_ID_BFR:  return SPA_AUDIO_CHANNEL_BRC;
+    case MP_SPEAKER_ID_NA:   return SPA_AUDIO_CHANNEL_NA;
+    default:
+                             MP_WARN(ao, "Unhandled channel %d\n", mp_speaker_id);
+                             return SPA_AUDIO_CHANNEL_UNKNOWN;
+    };
+}
+
+static void on_process(void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *p = ao->priv;
+    struct pw_time time;
+    struct pw_buffer *b;
+    void *data[MP_NUM_CHANNELS];
+
+    if ((b = pw_stream_dequeue_buffer(p->stream)) == NULL) {
+        MP_WARN(ao, "out of buffers: %s\n", strerror(errno));
+        return;
+    }
+
+    struct spa_buffer *buf = b->buffer;
+
+    int bytes_per_channel = buf->datas[0].maxsize / ao->channels.num;
+    int nframes = bytes_per_channel / ao->sstride;
+#if PW_CHECK_VERSION(0, 3, 49)
+    if (b->requested != 0)
+        nframes = MPMIN(b->requested, nframes);
+#endif
+
+    for (int i = 0; i < buf->n_datas; i++)
+        data[i] = buf->datas[i].data;
+
+    pw_stream_get_time_n(p->stream, &time, sizeof(time));
+    if (time.rate.denom == 0)
+        time.rate.denom = ao->samplerate;
+    if (time.rate.num == 0)
+        time.rate.num = 1;
+
+    int64_t end_time = mp_time_ns();
+    /* time.queued is always going to be 0, so we don't need to care */
+    end_time += (nframes * 1e9 / ao->samplerate) +
+                ((double) time.delay * SPA_NSEC_PER_SEC * time.rate.num / time.rate.denom);
+
+    int samples = ao_read_data_nonblocking(ao, data, nframes, end_time);
+    b->size = samples;
+
+    for (int i = 0; i < buf->n_datas; i++) {
+        buf->datas[i].chunk->size = samples * ao->sstride;
+        buf->datas[i].chunk->offset = 0;
+        buf->datas[i].chunk->stride = ao->sstride;
+    }
+
+    pw_stream_queue_buffer(p->stream, b);
+
+    MP_TRACE(ao, "queued %d of %d samples\n", samples, nframes);
+}
+
+static void on_param_changed(void *userdata, uint32_t id, const struct spa_pod *param)
+{
+    struct ao *ao = userdata;
+    struct priv *p = ao->priv;
+    const struct spa_pod *params[1];
+    uint8_t buffer[1024];
+    struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
+
+    /* We want to know when our node is linked.
+     * As there is no proper callback for this we use the Latency param for this
+     */
+    if (id == SPA_PARAM_Latency) {
+        p->init_state = INIT_STATE_SUCCESS;
+        pw_thread_loop_signal(p->loop, false);
+    }
+
+    if (param == NULL || id != SPA_PARAM_Format)
+        return;
+
+    int buffer_size = ao->device_buffer * af_fmt_to_bytes(ao->format) * ao->channels.num;
+
+    params[0] = spa_pod_builder_add_object(&b,
+                    SPA_TYPE_OBJECT_ParamBuffers, SPA_PARAM_Buffers,
+                    SPA_PARAM_BUFFERS_blocks,     SPA_POD_Int(ao->num_planes),
+                    SPA_PARAM_BUFFERS_size,       SPA_POD_CHOICE_RANGE_Int(
+                                                    buffer_size, 0, INT32_MAX),
+                    SPA_PARAM_BUFFERS_stride,     SPA_POD_Int(ao->sstride));
+    if (!params[0]) {
+        MP_ERR(ao, "Could not build parameter pod\n");
+        return;
+    }
+
+    if (pw_stream_update_params(p->stream, params, 1) < 0) {
+        MP_ERR(ao, "Could not update stream parameters\n");
+        return;
+    }
+}
+
+static void on_state_changed(void *userdata, enum pw_stream_state old, enum pw_stream_state state, const char *error)
+{
+    struct ao *ao = userdata;
+    struct priv *p = ao->priv;
+    MP_DBG(ao, "Stream state changed: old_state=%s state=%s error=%s\n",
+           pw_stream_state_as_string(old), pw_stream_state_as_string(state), error);
+
+    if (state == PW_STREAM_STATE_ERROR) {
+        MP_WARN(ao, "Stream in error state, trying to reload...\n");
+        p->init_state = INIT_STATE_ERROR;
+        pw_thread_loop_signal(p->loop, false);
+        ao_request_reload(ao);
+    }
+
+    if (state == PW_STREAM_STATE_UNCONNECTED && old != PW_STREAM_STATE_UNCONNECTED) {
+        MP_WARN(ao, "Stream disconnected, trying to reload...\n");
+        ao_request_reload(ao);
+    }
+}
+
+static float spa_volume_to_mp_volume(float vol)
+{
+        return vol * 100;
+}
+
+static float mp_volume_to_spa_volume(float vol)
+{
+        return vol / 100;
+}
+
+static float volume_avg(float* vols, uint32_t n)
+{
+    float sum = 0.0;
+    for (int i = 0; i < n; i++)
+        sum += vols[i];
+    return sum / n;
+}
+
+static void on_control_info(void *userdata, uint32_t id,
+        const struct pw_stream_control *control)
+{
+    struct ao *ao = userdata;
+    struct priv *p = ao->priv;
+
+    switch (id) {
+        case SPA_PROP_mute:
+            if (control->n_values == 1)
+                p->muted = control->values[0] >= 0.5;
+            break;
+        case SPA_PROP_channelVolumes:
+            if (p->options.volume_mode != VOLUME_MODE_CHANNEL)
+                break;
+            if (control->n_values > 0)
+                p->volume = volume_avg(control->values, control->n_values);
+            break;
+        case SPA_PROP_volume:
+            if (p->options.volume_mode != VOLUME_MODE_GLOBAL)
+                break;
+            if (control->n_values > 0)
+                p->volume = control->values[0];
+            break;
+    }
+}
+
+static const struct pw_stream_events stream_events = {
+    .version = PW_VERSION_STREAM_EVENTS,
+    .param_changed = on_param_changed,
+    .process = on_process,
+    .state_changed = on_state_changed,
+    .control_info = on_control_info,
+};
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    if (p->loop)
+        pw_thread_loop_stop(p->loop);
+    spa_hook_remove(&p->stream_listener);
+    spa_zero(p->stream_listener);
+    if (p->stream)
+        pw_stream_destroy(p->stream);
+    p->stream = NULL;
+    if (p->core)
+        pw_context_destroy(pw_core_get_context(p->core));
+    p->core = NULL;
+    if (p->loop)
+        pw_thread_loop_destroy(p->loop);
+    p->loop = NULL;
+    pw_deinit();
+}
+
+struct registry_event_global_ctx {
+    struct ao *ao;
+    void (*sink_cb) (struct ao *ao, uint32_t id, const struct spa_dict *props, void *sink_cb_ctx);
+    void *sink_cb_ctx;
+};
+
+static bool is_sink_node(const char *type, const struct spa_dict *props)
+{
+    if (strcmp(type, PW_TYPE_INTERFACE_Node) != 0)
+        return false;
+
+    if (!props)
+        return false;
+
+    const char *class = spa_dict_lookup(props, PW_KEY_MEDIA_CLASS);
+    if (!class || strcmp(class, "Audio/Sink") != 0)
+        return false;
+
+    return true;
+}
+
+static void for_each_sink_registry_event_global(void *data, uint32_t id,
+                                                uint32_t permissions, const
+                                                char *type, uint32_t version,
+                                                const struct spa_dict *props)
+{
+    struct registry_event_global_ctx *ctx = data;
+
+    if (!is_sink_node(type, props))
+        return;
+
+    ctx->sink_cb(ctx->ao, id, props, ctx->sink_cb_ctx);
+}
+
+
+struct for_each_done_ctx {
+    struct pw_thread_loop *loop;
+    bool done;
+};
+
+static const struct pw_registry_events for_each_sink_registry_events = {
+    .version = PW_VERSION_REGISTRY_EVENTS,
+    .global = for_each_sink_registry_event_global,
+};
+
+static void for_each_sink_done(void *data, uint32_t it, int seq)
+{
+    struct for_each_done_ctx *ctx = data;
+    ctx->done = true;
+    pw_thread_loop_signal(ctx->loop, false);
+}
+
+static const struct pw_core_events for_each_sink_core_events = {
+    .version = PW_VERSION_CORE_EVENTS,
+    .done = for_each_sink_done,
+};
+
+static int for_each_sink(struct ao *ao, void (cb) (struct ao *ao, uint32_t id,
+                         const struct spa_dict *props, void *ctx), void *cb_ctx)
+{
+    struct priv *priv = ao->priv;
+    struct pw_registry *registry;
+    struct spa_hook core_listener;
+    struct for_each_done_ctx done_ctx = {
+        .loop = priv->loop,
+        .done = false,
+    };
+    int ret = -1;
+
+    pw_thread_loop_lock(priv->loop);
+
+    spa_zero(core_listener);
+    if (pw_core_add_listener(priv->core, &core_listener, &for_each_sink_core_events, &done_ctx) < 0)
+        goto unlock_loop;
+
+    registry = pw_core_get_registry(priv->core, PW_VERSION_REGISTRY, 0);
+    if (!registry)
+        goto remove_core_listener;
+
+    pw_core_sync(priv->core, 0, 0);
+
+    struct spa_hook registry_listener;
+    struct registry_event_global_ctx revents_ctx = {
+            .ao = ao,
+            .sink_cb = cb,
+            .sink_cb_ctx = cb_ctx,
+    };
+    spa_zero(registry_listener);
+    if (pw_registry_add_listener(registry, &registry_listener, &for_each_sink_registry_events, &revents_ctx) < 0)
+        goto destroy_registry;
+
+    while (!done_ctx.done)
+        pw_thread_loop_wait(priv->loop);
+
+    spa_hook_remove(&registry_listener);
+
+    ret = 0;
+
+destroy_registry:
+    pw_proxy_destroy((struct pw_proxy *)registry);
+
+remove_core_listener:
+    spa_hook_remove(&core_listener);
+
+unlock_loop:
+    pw_thread_loop_unlock(priv->loop);
+
+    return ret;
+}
+
+static void have_sink(struct ao *ao, uint32_t id, const struct spa_dict *props, void *ctx)
+{
+    bool *b = ctx;
+    *b = true;
+}
+
+static bool session_has_sinks(struct ao *ao)
+{
+    bool b = false;
+
+    if (for_each_sink(ao, have_sink, &b) < 0)
+        MP_WARN(ao, "Could not list devices, sink detection may be wrong\n");
+
+    return b;
+}
+
+static void on_error(void *data, uint32_t id, int seq, int res, const char *message)
+{
+    struct ao *ao = data;
+
+    MP_WARN(ao, "Error during playback: %s, %s\n", spa_strerror(res), message);
+}
+
+static void on_core_info(void *data, const struct pw_core_info *info)
+{
+    struct ao *ao = data;
+
+    MP_VERBOSE(ao, "Core user: %s\n", info->user_name);
+    MP_VERBOSE(ao, "Core host: %s\n", info->host_name);
+    MP_VERBOSE(ao, "Core version: %s\n", info->version);
+    MP_VERBOSE(ao, "Core name: %s\n", info->name);
+}
+
+static const struct pw_core_events core_events = {
+    .version = PW_VERSION_CORE_EVENTS,
+    .error = on_error,
+    .info = on_core_info,
+};
+
+static int pipewire_init_boilerplate(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    struct pw_context *context;
+
+    pw_init(NULL, NULL);
+
+    MP_VERBOSE(ao, "Headers version: %s\n", pw_get_headers_version());
+    MP_VERBOSE(ao, "Library version: %s\n", pw_get_library_version());
+
+    p->loop = pw_thread_loop_new("mpv/ao/pipewire", NULL);
+    if (p->loop == NULL)
+        return -1;
+
+    pw_thread_loop_lock(p->loop);
+
+    if (pw_thread_loop_start(p->loop) < 0)
+        goto error;
+
+    context = pw_context_new(
+            pw_thread_loop_get_loop(p->loop),
+            pw_properties_new(PW_KEY_CONFIG_NAME, "client-rt.conf", NULL),
+            0);
+    if (!context)
+        goto error;
+
+    p->core = pw_context_connect(
+            context,
+            pw_properties_new(PW_KEY_REMOTE_NAME, p->options.remote, NULL),
+            0);
+    if (!p->core) {
+        MP_MSG(ao, ao->probing ? MSGL_V : MSGL_ERR,
+               "Could not connect to context '%s': %s\n",
+               p->options.remote, strerror(errno));
+        pw_context_destroy(context);
+        goto error;
+    }
+
+    if (pw_core_add_listener(p->core, &p->core_listener, &core_events, ao) < 0)
+        goto error;
+
+    pw_thread_loop_unlock(p->loop);
+
+    if (!session_has_sinks(ao)) {
+        MP_VERBOSE(ao, "PipeWire does not have any audio sinks, skipping\n");
+        return -1;
+    }
+
+    return 0;
+
+error:
+    pw_thread_loop_unlock(p->loop);
+    return -1;
+}
+
+static void wait_for_init_done(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    struct timespec abstime;
+    int r;
+
+    r = pw_thread_loop_get_time(p->loop, &abstime, 50 * SPA_NSEC_PER_MSEC);
+    if (r < 0) {
+        MP_WARN(ao, "Could not get timeout for initialization: %s\n", spa_strerror(r));
+        return;
+    }
+
+    while (p->init_state == INIT_STATE_NONE) {
+        r = pw_thread_loop_timed_wait_full(p->loop, &abstime);
+        if (r < 0) {
+            MP_WARN(ao, "Could not wait for initialization: %s\n", spa_strerror(r));
+            return;
+        }
+    }
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    uint8_t buffer[1024];
+    struct spa_pod_builder b = SPA_POD_BUILDER_INIT(buffer, sizeof(buffer));
+    const struct spa_pod *params[1];
+    struct pw_properties *props = pw_properties_new(
+        PW_KEY_MEDIA_TYPE, "Audio",
+        PW_KEY_MEDIA_CATEGORY, "Playback",
+        PW_KEY_MEDIA_ROLE, ao->init_flags & AO_INIT_MEDIA_ROLE_MUSIC ?  "Music" : "Movie",
+        PW_KEY_NODE_NAME, ao->client_name,
+        PW_KEY_NODE_DESCRIPTION, ao->client_name,
+        PW_KEY_APP_NAME, ao->client_name,
+        PW_KEY_APP_ID, ao->client_name,
+        PW_KEY_APP_ICON_NAME, ao->client_name,
+        PW_KEY_NODE_ALWAYS_PROCESS, "true",
+        PW_KEY_TARGET_OBJECT, ao->device,
+        NULL
+    );
+
+    if (pipewire_init_boilerplate(ao) < 0)
+        goto error_props;
+
+    if (p->options.buffer_msec) {
+        ao->device_buffer = p->options.buffer_msec * ao->samplerate / 1000;
+
+        pw_properties_setf(props, PW_KEY_NODE_LATENCY, "%d/%d", ao->device_buffer, ao->samplerate);
+    }
+
+    pw_properties_setf(props, PW_KEY_NODE_RATE, "1/%d", ao->samplerate);
+
+    enum spa_audio_format spa_format = af_fmt_to_pw(ao, ao->format);
+    if (spa_format == SPA_AUDIO_FORMAT_UNKNOWN) {
+        ao->format = AF_FORMAT_FLOATP;
+        spa_format = SPA_AUDIO_FORMAT_F32P;
+    }
+
+    struct spa_audio_info_raw audio_info = {
+        .format = spa_format,
+        .rate = ao->samplerate,
+        .channels = ao->channels.num,
+    };
+
+    for (int i = 0; i < ao->channels.num; i++)
+        audio_info.position[i] = mp_speaker_id_to_spa(ao, ao->channels.speaker[i]);
+
+    params[0] = spa_format_audio_raw_build(&b, SPA_PARAM_EnumFormat, &audio_info);
+    if (!params[0])
+        goto error_props;
+
+    if (af_fmt_is_planar(ao->format)) {
+        ao->num_planes = ao->channels.num;
+        ao->sstride = af_fmt_to_bytes(ao->format);
+    } else {
+        ao->num_planes = 1;
+        ao->sstride = ao->channels.num * af_fmt_to_bytes(ao->format);
+    }
+
+    pw_thread_loop_lock(p->loop);
+
+    p->stream = pw_stream_new(p->core, "audio-src", props);
+    if (p->stream == NULL) {
+        pw_thread_loop_unlock(p->loop);
+        goto error;
+    }
+
+    pw_stream_add_listener(p->stream, &p->stream_listener, &stream_events, ao);
+
+    enum pw_stream_flags flags = PW_STREAM_FLAG_AUTOCONNECT |
+                                 PW_STREAM_FLAG_INACTIVE |
+                                 PW_STREAM_FLAG_MAP_BUFFERS |
+                                 PW_STREAM_FLAG_RT_PROCESS;
+
+    if (ao->init_flags & AO_INIT_EXCLUSIVE)
+        flags |= PW_STREAM_FLAG_EXCLUSIVE;
+
+    if (pw_stream_connect(p->stream,
+                    PW_DIRECTION_OUTPUT, PW_ID_ANY, flags, params, 1) < 0) {
+        pw_thread_loop_unlock(p->loop);
+        goto error;
+    }
+
+    wait_for_init_done(ao);
+
+    pw_thread_loop_unlock(p->loop);
+
+    if (p->init_state == INIT_STATE_ERROR)
+        goto error;
+
+    return 0;
+
+error_props:
+    pw_properties_free(props);
+error:
+    uninit(ao);
+    return -1;
+}
+
+static void reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    pw_thread_loop_lock(p->loop);
+    pw_stream_set_active(p->stream, false);
+    pw_stream_flush(p->stream, false);
+    pw_thread_loop_unlock(p->loop);
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    pw_thread_loop_lock(p->loop);
+    pw_stream_set_active(p->stream, true);
+    pw_thread_loop_unlock(p->loop);
+}
+
+#define CONTROL_RET(r) (!r ? CONTROL_OK : CONTROL_ERROR)
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct priv *p = ao->priv;
+
+    switch (cmd) {
+        case AOCONTROL_GET_VOLUME: {
+            float *vol = arg;
+            *vol = spa_volume_to_mp_volume(p->volume);
+            return CONTROL_OK;
+        }
+        case AOCONTROL_GET_MUTE: {
+            bool *muted = arg;
+            *muted = p->muted;
+            return CONTROL_OK;
+        }
+        case AOCONTROL_SET_VOLUME:
+        case AOCONTROL_SET_MUTE:
+        case AOCONTROL_UPDATE_STREAM_TITLE: {
+            int ret;
+
+            pw_thread_loop_lock(p->loop);
+            switch (cmd) {
+                case AOCONTROL_SET_VOLUME: {
+                    float *vol = arg;
+                    uint8_t n = ao->channels.num;
+                    if (p->options.volume_mode == VOLUME_MODE_CHANNEL) {
+                        float values[MP_NUM_CHANNELS] = {0};
+                        for (int i = 0; i < n; i++)
+                            values[i] = mp_volume_to_spa_volume(*vol);
+                        ret = CONTROL_RET(pw_stream_set_control(
+                                    p->stream, SPA_PROP_channelVolumes, n, values, 0));
+                    } else {
+                        float value = mp_volume_to_spa_volume(*vol);
+                        ret = CONTROL_RET(pw_stream_set_control(
+                                    p->stream, SPA_PROP_volume, 1, &value, 0));
+                    }
+                    break;
+                }
+                case AOCONTROL_SET_MUTE: {
+                    bool *muted = arg;
+                    float value = *muted ? 1.f : 0.f;
+                    ret = CONTROL_RET(pw_stream_set_control(p->stream, SPA_PROP_mute, 1, &value, 0));
+                    break;
+                }
+                case AOCONTROL_UPDATE_STREAM_TITLE: {
+                    char *title = arg;
+                    struct spa_dict_item items[1];
+                    items[0] = SPA_DICT_ITEM_INIT(PW_KEY_MEDIA_NAME, title);
+                    ret = CONTROL_RET(pw_stream_update_properties(p->stream, &SPA_DICT_INIT(items, MP_ARRAY_SIZE(items))));
+                    break;
+                }
+                default:
+                    ret = CONTROL_NA;
+            }
+            pw_thread_loop_unlock(p->loop);
+            return ret;
+        }
+        default:
+            return CONTROL_UNKNOWN;
+    }
+}
+
+static void add_device_to_list(struct ao *ao, uint32_t id, const struct spa_dict *props, void *ctx)
+{
+    struct ao_device_list *list = ctx;
+    const char *name = spa_dict_lookup(props, PW_KEY_NODE_NAME);
+
+    if (!name)
+        return;
+
+    const char *description = spa_dict_lookup(props, PW_KEY_NODE_DESCRIPTION);
+
+    ao_device_list_add(list, ao, &(struct ao_device_desc){name, description});
+}
+
+static void hotplug_registry_global_cb(void *data, uint32_t id,
+                                       uint32_t permissions, const char *type,
+                                       uint32_t version, const struct spa_dict *props)
+{
+    struct ao *ao = data;
+    struct priv *priv = ao->priv;
+
+    if (!is_sink_node(type, props))
+        return;
+
+    pw_thread_loop_lock(priv->loop);
+    struct id_list *item = talloc(ao, struct id_list);
+    item->id = id;
+    spa_list_init(&item->node);
+    spa_list_append(&priv->hotplug.sinks, &item->node);
+    pw_thread_loop_unlock(priv->loop);
+
+    ao_hotplug_event(ao);
+}
+
+static void hotplug_registry_global_remove_cb(void *data, uint32_t id)
+{
+    struct ao *ao = data;
+    struct priv *priv = ao->priv;
+    bool removed_sink = false;
+
+    struct id_list *e;
+
+    pw_thread_loop_lock(priv->loop);
+    spa_list_for_each(e, &priv->hotplug.sinks, node) {
+        if (e->id == id) {
+            removed_sink = true;
+            spa_list_remove(&e->node);
+            talloc_free(e);
+            break;
+        }
+    }
+
+    pw_thread_loop_unlock(priv->loop);
+
+    if (removed_sink)
+        ao_hotplug_event(ao);
+}
+
+static const struct pw_registry_events hotplug_registry_events = {
+    .version = PW_VERSION_REGISTRY_EVENTS,
+    .global = hotplug_registry_global_cb,
+    .global_remove = hotplug_registry_global_remove_cb,
+};
+
+static int hotplug_init(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    int res = pipewire_init_boilerplate(ao);
+    if (res)
+        goto error_no_unlock;
+
+    pw_thread_loop_lock(priv->loop);
+
+    spa_zero(priv->hotplug);
+    spa_list_init(&priv->hotplug.sinks);
+
+    priv->hotplug.registry = pw_core_get_registry(priv->core, PW_VERSION_REGISTRY, 0);
+    if (!priv->hotplug.registry)
+        goto error;
+
+    if (pw_registry_add_listener(priv->hotplug.registry, &priv->hotplug.registry_listener, &hotplug_registry_events, ao) < 0) {
+        pw_proxy_destroy((struct pw_proxy *)priv->hotplug.registry);
+        goto error;
+    }
+
+    pw_thread_loop_unlock(priv->loop);
+
+    return res;
+
+error:
+    pw_thread_loop_unlock(priv->loop);
+error_no_unlock:
+    uninit(ao);
+    return -1;
+}
+
+static void hotplug_uninit(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    pw_thread_loop_lock(priv->loop);
+
+    spa_hook_remove(&priv->hotplug.registry_listener);
+    pw_proxy_destroy((struct pw_proxy *)priv->hotplug.registry);
+
+    pw_thread_loop_unlock(priv->loop);
+    uninit(ao);
+}
+
+static void list_devs(struct ao *ao, struct ao_device_list *list)
+{
+    ao_device_list_add(list, ao, &(struct ao_device_desc){});
+
+    if (for_each_sink(ao, add_device_to_list, list) < 0)
+        MP_WARN(ao, "Could not list devices, list may be incomplete\n");
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_pipewire = {
+    .description = "PipeWire audio output",
+    .name        = "pipewire",
+
+    .init        = init,
+    .uninit      = uninit,
+    .reset       = reset,
+    .start       = start,
+
+    .control     = control,
+
+    .hotplug_init   = hotplug_init,
+    .hotplug_uninit = hotplug_uninit,
+    .list_devs      = list_devs,
+
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv)
+    {
+        .loop = NULL,
+        .stream = NULL,
+        .init_state = INIT_STATE_NONE,
+        .options.buffer_msec = 0,
+        .options.volume_mode = VOLUME_MODE_CHANNEL,
+    },
+    .options_prefix = "pipewire",
+    .options = (const struct m_option[]) {
+        {"buffer", OPT_CHOICE(options.buffer_msec, {"native", 0}),
+            M_RANGE(1, 2000)},
+        {"remote", OPT_STRING(options.remote) },
+        {"volume-mode", OPT_CHOICE(options.volume_mode,
+            {"channel", VOLUME_MODE_CHANNEL}, {"global", VOLUME_MODE_GLOBAL})},
+        {0}
+    },
+};
diff --git a/audio/out/ao_pulse.c b/audio/out/ao_pulse.c
new file mode 100644
index 0000000..3b29b1a
--- /dev/null
+++ b/audio/out/ao_pulse.c
@@ -0,0 +1,817 @@
+/*
+ * PulseAudio audio output driver.
+ * Copyright (C) 2006 Lennart Poettering
+ * Copyright (C) 2007 Reimar Doeffinger
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+
+#include <pulse/pulseaudio.h>
+
+#include "audio/format.h"
+#include "common/msg.h"
+#include "options/m_option.h"
+#include "ao.h"
+#include "internal.h"
+
+#define VOL_PA2MP(v) ((v) * 100.0 / PA_VOLUME_NORM)
+#define VOL_MP2PA(v) lrint((v) * PA_VOLUME_NORM / 100)
+
+struct priv {
+    // PulseAudio playback stream object
+    struct pa_stream *stream;
+
+    // PulseAudio connection context
+    struct pa_context *context;
+
+    // Main event loop object
+    struct pa_threaded_mainloop *mainloop;
+
+    // temporary during control()
+    struct pa_sink_input_info pi;
+
+    int retval;
+    bool playing;
+    bool underrun_signalled;
+
+    char *cfg_host;
+    int cfg_buffer;
+    bool cfg_latency_hacks;
+    bool cfg_allow_suspended;
+};
+
+#define GENERIC_ERR_MSG(str) \
+    MP_ERR(ao, str": %s\n", \
+           pa_strerror(pa_context_errno(((struct priv *)ao->priv)->context)))
+
+static void context_state_cb(pa_context *c, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    switch (pa_context_get_state(c)) {
+    case PA_CONTEXT_READY:
+    case PA_CONTEXT_TERMINATED:
+    case PA_CONTEXT_FAILED:
+        pa_threaded_mainloop_signal(priv->mainloop, 0);
+        break;
+    }
+}
+
+static void subscribe_cb(pa_context *c, pa_subscription_event_type_t t,
+                         uint32_t idx, void *userdata)
+{
+    struct ao *ao = userdata;
+    int type = t & PA_SUBSCRIPTION_MASK_SINK;
+    int fac = t & PA_SUBSCRIPTION_EVENT_FACILITY_MASK;
+    if ((type == PA_SUBSCRIPTION_EVENT_NEW || type == PA_SUBSCRIPTION_EVENT_REMOVE)
+        && fac == PA_SUBSCRIPTION_EVENT_SINK)
+    {
+        ao_hotplug_event(ao);
+    }
+}
+
+static void context_success_cb(pa_context *c, int success, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    priv->retval = success;
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+static void stream_state_cb(pa_stream *s, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    switch (pa_stream_get_state(s)) {
+    case PA_STREAM_FAILED:
+        MP_VERBOSE(ao, "Stream failed.\n");
+        ao_request_reload(ao);
+        pa_threaded_mainloop_signal(priv->mainloop, 0);
+        break;
+    case PA_STREAM_READY:
+    case PA_STREAM_TERMINATED:
+        pa_threaded_mainloop_signal(priv->mainloop, 0);
+        break;
+    }
+}
+
+static void stream_request_cb(pa_stream *s, size_t length, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    ao_wakeup_playthread(ao);
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+static void stream_latency_update_cb(pa_stream *s, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+static void underflow_cb(pa_stream *s, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    priv->playing = false;
+    priv->underrun_signalled = true;
+    ao_wakeup_playthread(ao);
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+static void success_cb(pa_stream *s, int success, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    priv->retval = success;
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+// Like waitop(), but keep the lock (even if it may unlock temporarily).
+static bool waitop_no_unlock(struct priv *priv, pa_operation *op)
+{
+    if (!op)
+        return false;
+    pa_operation_state_t state = pa_operation_get_state(op);
+    while (state == PA_OPERATION_RUNNING) {
+        pa_threaded_mainloop_wait(priv->mainloop);
+        state = pa_operation_get_state(op);
+    }
+    pa_operation_unref(op);
+    return state == PA_OPERATION_DONE;
+}
+
+/**
+ * \brief waits for a pulseaudio operation to finish, frees it and
+ *        unlocks the mainloop
+ * \param op operation to wait for
+ * \return 1 if operation has finished normally (DONE state), 0 otherwise
+ */
+static bool waitop(struct priv *priv, pa_operation *op)
+{
+    bool r = waitop_no_unlock(priv, op);
+    pa_threaded_mainloop_unlock(priv->mainloop);
+    return r;
+}
+
+static const struct format_map {
+    int mp_format;
+    pa_sample_format_t pa_format;
+} format_maps[] = {
+    {AF_FORMAT_FLOAT, PA_SAMPLE_FLOAT32NE},
+    {AF_FORMAT_S32, PA_SAMPLE_S32NE},
+    {AF_FORMAT_S16, PA_SAMPLE_S16NE},
+    {AF_FORMAT_U8, PA_SAMPLE_U8},
+    {AF_FORMAT_UNKNOWN, 0}
+};
+
+static pa_encoding_t map_digital_format(int format)
+{
+    switch (format) {
+    case AF_FORMAT_S_AC3:    return PA_ENCODING_AC3_IEC61937;
+    case AF_FORMAT_S_EAC3:   return PA_ENCODING_EAC3_IEC61937;
+    case AF_FORMAT_S_MP3:    return PA_ENCODING_MPEG_IEC61937;
+    case AF_FORMAT_S_DTS:    return PA_ENCODING_DTS_IEC61937;
+#ifdef PA_ENCODING_DTSHD_IEC61937
+    case AF_FORMAT_S_DTSHD:  return PA_ENCODING_DTSHD_IEC61937;
+#endif
+#ifdef PA_ENCODING_MPEG2_AAC_IEC61937
+    case AF_FORMAT_S_AAC:    return PA_ENCODING_MPEG2_AAC_IEC61937;
+#endif
+#ifdef PA_ENCODING_TRUEHD_IEC61937
+    case AF_FORMAT_S_TRUEHD: return PA_ENCODING_TRUEHD_IEC61937;
+#endif
+    default:
+        if (af_fmt_is_spdif(format))
+            return PA_ENCODING_ANY;
+        return PA_ENCODING_PCM;
+    }
+}
+
+static const int speaker_map[][2] = {
+  {PA_CHANNEL_POSITION_FRONT_LEFT,              MP_SPEAKER_ID_FL},
+  {PA_CHANNEL_POSITION_FRONT_RIGHT,             MP_SPEAKER_ID_FR},
+  {PA_CHANNEL_POSITION_FRONT_CENTER,            MP_SPEAKER_ID_FC},
+  {PA_CHANNEL_POSITION_REAR_CENTER,             MP_SPEAKER_ID_BC},
+  {PA_CHANNEL_POSITION_REAR_LEFT,               MP_SPEAKER_ID_BL},
+  {PA_CHANNEL_POSITION_REAR_RIGHT,              MP_SPEAKER_ID_BR},
+  {PA_CHANNEL_POSITION_LFE,                     MP_SPEAKER_ID_LFE},
+  {PA_CHANNEL_POSITION_FRONT_LEFT_OF_CENTER,    MP_SPEAKER_ID_FLC},
+  {PA_CHANNEL_POSITION_FRONT_RIGHT_OF_CENTER,   MP_SPEAKER_ID_FRC},
+  {PA_CHANNEL_POSITION_SIDE_LEFT,               MP_SPEAKER_ID_SL},
+  {PA_CHANNEL_POSITION_SIDE_RIGHT,              MP_SPEAKER_ID_SR},
+  {PA_CHANNEL_POSITION_TOP_CENTER,              MP_SPEAKER_ID_TC},
+  {PA_CHANNEL_POSITION_TOP_FRONT_LEFT,          MP_SPEAKER_ID_TFL},
+  {PA_CHANNEL_POSITION_TOP_FRONT_RIGHT,         MP_SPEAKER_ID_TFR},
+  {PA_CHANNEL_POSITION_TOP_FRONT_CENTER,        MP_SPEAKER_ID_TFC},
+  {PA_CHANNEL_POSITION_TOP_REAR_LEFT,           MP_SPEAKER_ID_TBL},
+  {PA_CHANNEL_POSITION_TOP_REAR_RIGHT,          MP_SPEAKER_ID_TBR},
+  {PA_CHANNEL_POSITION_TOP_REAR_CENTER,         MP_SPEAKER_ID_TBC},
+  {PA_CHANNEL_POSITION_INVALID,                 -1}
+};
+
+static bool chmap_pa_from_mp(pa_channel_map *dst, struct mp_chmap *src)
+{
+    if (src->num > PA_CHANNELS_MAX)
+        return false;
+    dst->channels = src->num;
+    if (mp_chmap_equals(src, &(const struct mp_chmap)MP_CHMAP_INIT_MONO)) {
+        dst->map[0] = PA_CHANNEL_POSITION_MONO;
+        return true;
+    }
+    for (int n = 0; n < src->num; n++) {
+        int mp_speaker = src->speaker[n];
+        int pa_speaker = PA_CHANNEL_POSITION_INVALID;
+        for (int i = 0; speaker_map[i][1] != -1; i++) {
+            if (speaker_map[i][1] == mp_speaker) {
+                pa_speaker = speaker_map[i][0];
+                break;
+            }
+        }
+        if (pa_speaker == PA_CHANNEL_POSITION_INVALID)
+            return false;
+        dst->map[n] = pa_speaker;
+    }
+    return true;
+}
+
+static bool select_chmap(struct ao *ao, pa_channel_map *dst)
+{
+    struct mp_chmap_sel sel = {0};
+    for (int n = 0; speaker_map[n][1] != -1; n++)
+        mp_chmap_sel_add_speaker(&sel, speaker_map[n][1]);
+    return ao_chmap_sel_adjust(ao, &sel, &ao->channels) &&
+           chmap_pa_from_mp(dst, &ao->channels);
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+
+    if (priv->mainloop)
+        pa_threaded_mainloop_stop(priv->mainloop);
+
+    if (priv->stream) {
+        pa_stream_disconnect(priv->stream);
+        pa_stream_unref(priv->stream);
+        priv->stream = NULL;
+    }
+
+    if (priv->context) {
+        pa_context_disconnect(priv->context);
+        pa_context_unref(priv->context);
+        priv->context = NULL;
+    }
+
+    if (priv->mainloop) {
+        pa_threaded_mainloop_free(priv->mainloop);
+        priv->mainloop = NULL;
+    }
+}
+
+static int pa_init_boilerplate(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    char *host = priv->cfg_host && priv->cfg_host[0] ? priv->cfg_host : NULL;
+    bool locked = false;
+
+    if (!(priv->mainloop = pa_threaded_mainloop_new())) {
+        MP_ERR(ao, "Failed to allocate main loop\n");
+        goto fail;
+    }
+
+    if (pa_threaded_mainloop_start(priv->mainloop) < 0)
+        goto fail;
+
+    pa_threaded_mainloop_lock(priv->mainloop);
+    locked = true;
+
+    if (!(priv->context = pa_context_new(pa_threaded_mainloop_get_api(
+                                         priv->mainloop), ao->client_name)))
+    {
+        MP_ERR(ao, "Failed to allocate context\n");
+        goto fail;
+    }
+
+    MP_VERBOSE(ao, "Library version: %s\n", pa_get_library_version());
+    MP_VERBOSE(ao, "Proto: %lu\n",
+        (long)pa_context_get_protocol_version(priv->context));
+    MP_VERBOSE(ao, "Server proto: %lu\n",
+        (long)pa_context_get_server_protocol_version(priv->context));
+
+    pa_context_set_state_callback(priv->context, context_state_cb, ao);
+    pa_context_set_subscribe_callback(priv->context, subscribe_cb, ao);
+
+    if (pa_context_connect(priv->context, host, 0, NULL) < 0)
+        goto fail;
+
+    /* Wait until the context is ready */
+    while (1) {
+        int state = pa_context_get_state(priv->context);
+        if (state == PA_CONTEXT_READY)
+            break;
+        if (!PA_CONTEXT_IS_GOOD(state))
+            goto fail;
+        pa_threaded_mainloop_wait(priv->mainloop);
+    }
+
+    pa_threaded_mainloop_unlock(priv->mainloop);
+    return 0;
+
+fail:
+    if (locked)
+        pa_threaded_mainloop_unlock(priv->mainloop);
+
+    if (priv->context) {
+        pa_threaded_mainloop_lock(priv->mainloop);
+        if (!(pa_context_errno(priv->context) == PA_ERR_CONNECTIONREFUSED
+              && ao->probing))
+            GENERIC_ERR_MSG("Init failed");
+        pa_threaded_mainloop_unlock(priv->mainloop);
+    }
+    uninit(ao);
+    return -1;
+}
+
+static bool set_format(struct ao *ao, pa_format_info *format)
+{
+    ao->format = af_fmt_from_planar(ao->format);
+
+    format->encoding = map_digital_format(ao->format);
+    if (format->encoding == PA_ENCODING_PCM) {
+        const struct format_map *fmt_map = format_maps;
+
+        while (fmt_map->mp_format != ao->format) {
+            if (fmt_map->mp_format == AF_FORMAT_UNKNOWN) {
+                MP_VERBOSE(ao, "Unsupported format, using default\n");
+                fmt_map = format_maps;
+                break;
+            }
+            fmt_map++;
+        }
+        ao->format = fmt_map->mp_format;
+
+        pa_format_info_set_sample_format(format, fmt_map->pa_format);
+    }
+
+    struct pa_channel_map map;
+
+    if (!select_chmap(ao, &map))
+        return false;
+
+    pa_format_info_set_rate(format, ao->samplerate);
+    pa_format_info_set_channels(format, ao->channels.num);
+    pa_format_info_set_channel_map(format, &map);
+
+    return ao->samplerate < PA_RATE_MAX && pa_format_info_valid(format);
+}
+
+static int init(struct ao *ao)
+{
+    pa_proplist *proplist = NULL;
+    pa_format_info *format = NULL;
+    struct priv *priv = ao->priv;
+    char *sink = ao->device;
+
+    if (pa_init_boilerplate(ao) < 0)
+        return -1;
+
+    pa_threaded_mainloop_lock(priv->mainloop);
+
+    if (!(proplist = pa_proplist_new())) {
+        MP_ERR(ao, "Failed to allocate proplist\n");
+        goto unlock_and_fail;
+    }
+    (void)pa_proplist_sets(proplist, PA_PROP_MEDIA_ICON_NAME, ao->client_name);
+
+    if (!(format = pa_format_info_new()))
+        goto unlock_and_fail;
+
+    if (!set_format(ao, format)) {
+        ao->channels = (struct mp_chmap) MP_CHMAP_INIT_STEREO;
+        ao->samplerate = 48000;
+        ao->format = AF_FORMAT_FLOAT;
+        if (!set_format(ao, format)) {
+            MP_ERR(ao, "Invalid audio format\n");
+            goto unlock_and_fail;
+        }
+    }
+
+    if (!(priv->stream = pa_stream_new_extended(priv->context, "audio stream",
+                                                &format, 1, proplist)))
+        goto unlock_and_fail;
+
+    pa_format_info_free(format);
+    format = NULL;
+
+    pa_proplist_free(proplist);
+    proplist = NULL;
+
+    pa_stream_set_state_callback(priv->stream, stream_state_cb, ao);
+    pa_stream_set_write_callback(priv->stream, stream_request_cb, ao);
+    pa_stream_set_latency_update_callback(priv->stream,
+                                          stream_latency_update_cb, ao);
+    pa_stream_set_underflow_callback(priv->stream, underflow_cb, ao);
+    uint32_t buf_size = ao->samplerate * (priv->cfg_buffer / 1000.0) *
+        af_fmt_to_bytes(ao->format) * ao->channels.num;
+    pa_buffer_attr bufattr = {
+        .maxlength = -1,
+        .tlength = buf_size > 0 ? buf_size : -1,
+        .prebuf = 0,
+        .minreq = -1,
+        .fragsize = -1,
+    };
+
+    int flags = PA_STREAM_NOT_MONOTONIC | PA_STREAM_START_CORKED;
+    if (!priv->cfg_latency_hacks)
+        flags |= PA_STREAM_INTERPOLATE_TIMING|PA_STREAM_AUTO_TIMING_UPDATE;
+
+    if (pa_stream_connect_playback(priv->stream, sink, &bufattr,
+                                   flags, NULL, NULL) < 0)
+        goto unlock_and_fail;
+
+    /* Wait until the stream is ready */
+    while (1) {
+        int state = pa_stream_get_state(priv->stream);
+        if (state == PA_STREAM_READY)
+            break;
+        if (!PA_STREAM_IS_GOOD(state))
+            goto unlock_and_fail;
+        pa_threaded_mainloop_wait(priv->mainloop);
+    }
+
+    if (pa_stream_is_suspended(priv->stream) && !priv->cfg_allow_suspended) {
+        MP_ERR(ao, "The stream is suspended. Bailing out.\n");
+        goto unlock_and_fail;
+    }
+
+    const pa_buffer_attr* final_bufattr = pa_stream_get_buffer_attr(priv->stream);
+    if(!final_bufattr) {
+        MP_ERR(ao, "PulseAudio didn't tell us what buffer sizes it set. Bailing out.\n");
+        goto unlock_and_fail;
+    }
+    ao->device_buffer = final_bufattr->tlength /
+                        af_fmt_to_bytes(ao->format) / ao->channels.num;
+
+    pa_threaded_mainloop_unlock(priv->mainloop);
+    return 0;
+
+unlock_and_fail:
+    pa_threaded_mainloop_unlock(priv->mainloop);
+
+    if (format)
+        pa_format_info_free(format);
+
+    if (proplist)
+        pa_proplist_free(proplist);
+
+    uninit(ao);
+    return -1;
+}
+
+static void cork(struct ao *ao, bool pause)
+{
+    struct priv *priv = ao->priv;
+    pa_threaded_mainloop_lock(priv->mainloop);
+    priv->retval = 0;
+    if (waitop_no_unlock(priv, pa_stream_cork(priv->stream, pause, success_cb, ao))
+        && priv->retval)
+    {
+        if (!pause)
+            priv->playing = true;
+    } else {
+        GENERIC_ERR_MSG("pa_stream_cork() failed");
+        priv->playing = false;
+    }
+    pa_threaded_mainloop_unlock(priv->mainloop);
+}
+
+// Play the specified data to the pulseaudio server
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *priv = ao->priv;
+    bool res = true;
+    pa_threaded_mainloop_lock(priv->mainloop);
+    if (pa_stream_write(priv->stream, data[0], samples * ao->sstride, NULL, 0,
+                        PA_SEEK_RELATIVE) < 0) {
+        GENERIC_ERR_MSG("pa_stream_write() failed");
+        res = false;
+    }
+    pa_threaded_mainloop_unlock(priv->mainloop);
+    return res;
+}
+
+static void start(struct ao *ao)
+{
+    cork(ao, false);
+}
+
+// Reset the audio stream, i.e. flush the playback buffer on the server side
+static void reset(struct ao *ao)
+{
+    // pa_stream_flush() works badly if not corked
+    cork(ao, true);
+    struct priv *priv = ao->priv;
+    pa_threaded_mainloop_lock(priv->mainloop);
+    priv->playing = false;
+    priv->retval = 0;
+    if (!waitop(priv, pa_stream_flush(priv->stream, success_cb, ao)) ||
+        !priv->retval)
+        GENERIC_ERR_MSG("pa_stream_flush() failed");
+}
+
+static bool set_pause(struct ao *ao, bool paused)
+{
+    cork(ao, paused);
+    return true;
+}
+
+static double get_delay_hackfixed(struct ao *ao)
+{
+    /* This code basically does what pa_stream_get_latency() _should_
+     * do, but doesn't due to multiple known bugs in PulseAudio (at
+     * PulseAudio version 2.1). In particular, the timing interpolation
+     * mode (PA_STREAM_INTERPOLATE_TIMING) can return completely bogus
+     * values, and the non-interpolating code has a bug causing too
+     * large results at end of stream (so a stream never seems to finish).
+     * This code can still return wrong values in some cases due to known
+     * PulseAudio bugs that can not be worked around on the client side.
+     *
+     * We always query the server for latest timing info. This may take
+     * too long to work well with remote audio servers, but at least
+     * this should be enough to fix the normal local playback case.
+     */
+    struct priv *priv = ao->priv;
+    if (!waitop_no_unlock(priv, pa_stream_update_timing_info(priv->stream,
+                                                             NULL, NULL)))
+    {
+        GENERIC_ERR_MSG("pa_stream_update_timing_info() failed");
+        return 0;
+    }
+    const pa_timing_info *ti = pa_stream_get_timing_info(priv->stream);
+    if (!ti) {
+        GENERIC_ERR_MSG("pa_stream_get_timing_info() failed");
+        return 0;
+    }
+    const struct pa_sample_spec *ss = pa_stream_get_sample_spec(priv->stream);
+    if (!ss) {
+        GENERIC_ERR_MSG("pa_stream_get_sample_spec() failed");
+        return 0;
+    }
+    // data left in PulseAudio's main buffers (not written to sink yet)
+    int64_t latency = pa_bytes_to_usec(ti->write_index - ti->read_index, ss);
+    // since this info may be from a while ago, playback has progressed since
+    latency -= ti->transport_usec;
+    // data already moved from buffers to sink, but not played yet
+    int64_t sink_latency = ti->sink_usec;
+    if (!ti->playing)
+        /* At the end of a stream, part of the data "left" in the sink may
+         * be padding silence after the end; that should be subtracted to
+         * get the amount of real audio from our stream. This adjustment
+         * is missing from Pulseaudio's own get_latency calculations
+         * (as of PulseAudio 2.1). */
+        sink_latency -= pa_bytes_to_usec(ti->since_underrun, ss);
+    if (sink_latency > 0)
+        latency += sink_latency;
+    if (latency < 0)
+        latency = 0;
+    return latency / 1e6;
+}
+
+static double get_delay_pulse(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    pa_usec_t latency = (pa_usec_t) -1;
+    while (pa_stream_get_latency(priv->stream, &latency, NULL) < 0) {
+        if (pa_context_errno(priv->context) != PA_ERR_NODATA) {
+            GENERIC_ERR_MSG("pa_stream_get_latency() failed");
+            break;
+        }
+        /* Wait until latency data is available again */
+        pa_threaded_mainloop_wait(priv->mainloop);
+    }
+    return latency == (pa_usec_t) -1 ? 0 : latency / 1000000.0;
+}
+
+static void audio_get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *priv = ao->priv;
+
+    pa_threaded_mainloop_lock(priv->mainloop);
+
+    size_t space = pa_stream_writable_size(priv->stream);
+    state->free_samples = space == (size_t)-1 ? 0 : space / ao->sstride;
+
+    state->queued_samples = ao->device_buffer - state->free_samples; // dunno
+
+    if (priv->cfg_latency_hacks) {
+        state->delay = get_delay_hackfixed(ao);
+    } else {
+        state->delay = get_delay_pulse(ao);
+    }
+
+    state->playing = priv->playing;
+
+    pa_threaded_mainloop_unlock(priv->mainloop);
+
+    // Otherwise, PA will keep hammering us for underruns (which it does instead
+    // of stopping the stream automatically).
+    if (!state->playing && priv->underrun_signalled) {
+        reset(ao);
+        priv->underrun_signalled = false;
+    }
+}
+
+/* A callback function that is called when the
+ * pa_context_get_sink_input_info() operation completes. Saves the
+ * volume field of the specified structure to the global variable volume.
+ */
+static void info_func(struct pa_context *c, const struct pa_sink_input_info *i,
+                      int is_last, void *userdata)
+{
+    struct ao *ao = userdata;
+    struct priv *priv = ao->priv;
+    if (is_last < 0) {
+        GENERIC_ERR_MSG("Failed to get sink input info");
+        return;
+    }
+    if (!i)
+        return;
+    priv->pi = *i;
+    pa_threaded_mainloop_signal(priv->mainloop, 0);
+}
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct priv *priv = ao->priv;
+    switch (cmd) {
+    case AOCONTROL_GET_MUTE:
+    case AOCONTROL_GET_VOLUME: {
+        uint32_t devidx = pa_stream_get_index(priv->stream);
+        pa_threaded_mainloop_lock(priv->mainloop);
+        if (!waitop(priv, pa_context_get_sink_input_info(priv->context, devidx,
+                                                         info_func, ao))) {
+            GENERIC_ERR_MSG("pa_context_get_sink_input_info() failed");
+            return CONTROL_ERROR;
+        }
+        // Warning: some information in pi might be unaccessible, because
+        // we naively copied the struct, without updating pointers etc.
+        // Pointers might point to invalid data, accessors might fail.
+        if (cmd == AOCONTROL_GET_VOLUME) {
+            float *vol = arg;
+            *vol = VOL_PA2MP(pa_cvolume_avg(&priv->pi.volume));
+        } else if (cmd == AOCONTROL_GET_MUTE) {
+            bool *mute = arg;
+            *mute = priv->pi.mute;
+        }
+        return CONTROL_OK;
+    }
+
+    case AOCONTROL_SET_MUTE:
+    case AOCONTROL_SET_VOLUME: {
+        pa_threaded_mainloop_lock(priv->mainloop);
+        priv->retval = 0;
+        uint32_t stream_index = pa_stream_get_index(priv->stream);
+        if (cmd == AOCONTROL_SET_VOLUME) {
+            const float *vol = arg;
+            struct pa_cvolume volume;
+
+            pa_cvolume_reset(&volume, ao->channels.num);
+            pa_cvolume_set(&volume, volume.channels, VOL_MP2PA(*vol));
+            if (!waitop(priv, pa_context_set_sink_input_volume(priv->context,
+                                                               stream_index,
+                                                               &volume,
+                                                               context_success_cb, ao)) ||
+                !priv->retval) {
+                GENERIC_ERR_MSG("pa_context_set_sink_input_volume() failed");
+                return CONTROL_ERROR;
+            }
+        } else if (cmd == AOCONTROL_SET_MUTE) {
+            const bool *mute = arg;
+            if (!waitop(priv, pa_context_set_sink_input_mute(priv->context,
+                                                             stream_index,
+                                                             *mute,
+                                                             context_success_cb, ao)) ||
+                !priv->retval) {
+                GENERIC_ERR_MSG("pa_context_set_sink_input_mute() failed");
+                return CONTROL_ERROR;
+            }
+        } else {
+            MP_ASSERT_UNREACHABLE();
+        }
+        return CONTROL_OK;
+    }
+
+    case AOCONTROL_UPDATE_STREAM_TITLE: {
+        char *title = (char *)arg;
+        pa_threaded_mainloop_lock(priv->mainloop);
+        if (!waitop(priv, pa_stream_set_name(priv->stream, title,
+                                             success_cb, ao)))
+        {
+            GENERIC_ERR_MSG("pa_stream_set_name() failed");
+            return CONTROL_ERROR;
+        }
+        return CONTROL_OK;
+    }
+
+    default:
+        return CONTROL_UNKNOWN;
+    }
+}
+
+struct sink_cb_ctx {
+    struct ao *ao;
+    struct ao_device_list *list;
+};
+
+static void sink_info_cb(pa_context *c, const pa_sink_info *i, int eol, void *ud)
+{
+    struct sink_cb_ctx *ctx = ud;
+    struct priv *priv = ctx->ao->priv;
+
+    if (eol) {
+        pa_threaded_mainloop_signal(priv->mainloop, 0); // wakeup waitop()
+        return;
+    }
+
+    struct ao_device_desc entry = {.name = i->name, .desc = i->description};
+    ao_device_list_add(ctx->list, ctx->ao, &entry);
+}
+
+static int hotplug_init(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    if (pa_init_boilerplate(ao) < 0)
+        return -1;
+
+    pa_threaded_mainloop_lock(priv->mainloop);
+    waitop(priv, pa_context_subscribe(priv->context, PA_SUBSCRIPTION_MASK_SINK,
+                                      context_success_cb, ao));
+
+    return 0;
+}
+
+static void list_devs(struct ao *ao, struct ao_device_list *list)
+{
+    struct priv *priv = ao->priv;
+    struct sink_cb_ctx ctx = {ao, list};
+
+    pa_threaded_mainloop_lock(priv->mainloop);
+    waitop(priv, pa_context_get_sink_info_list(priv->context, sink_info_cb, &ctx));
+}
+
+static void hotplug_uninit(struct ao *ao)
+{
+    uninit(ao);
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_pulse = {
+    .description = "PulseAudio audio output",
+    .name      = "pulse",
+    .control   = control,
+    .init      = init,
+    .uninit    = uninit,
+    .reset     = reset,
+    .get_state = audio_get_state,
+    .write     = audio_write,
+    .start     = start,
+    .set_pause = set_pause,
+    .hotplug_init = hotplug_init,
+    .hotplug_uninit = hotplug_uninit,
+    .list_devs = list_devs,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .cfg_buffer = 100,
+    },
+    .options = (const struct m_option[]) {
+        {"host", OPT_STRING(cfg_host)},
+        {"buffer", OPT_CHOICE(cfg_buffer, {"native", 0}),
+            M_RANGE(1, 2000)},
+        {"latency-hacks", OPT_BOOL(cfg_latency_hacks)},
+        {"allow-suspended", OPT_BOOL(cfg_allow_suspended)},
+        {0}
+    },
+    .options_prefix = "pulse",
+};
diff --git a/audio/out/ao_sdl.c b/audio/out/ao_sdl.c
new file mode 100644
index 0000000..5a6a58b
--- /dev/null
+++ b/audio/out/ao_sdl.c
@@ -0,0 +1,216 @@
+/*
+ * audio output driver for SDL 1.2+
+ * Copyright (C) 2012 Rudolf Polzer <divVerent@xonotic.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "audio/format.h"
+#include "mpv_talloc.h"
+#include "ao.h"
+#include "internal.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "options/m_option.h"
+#include "osdep/timer.h"
+
+#include <SDL.h>
+
+struct priv
+{
+    bool paused;
+
+    float buflen;
+};
+
+static const int fmtmap[][2] = {
+    {AF_FORMAT_U8,      AUDIO_U8},
+    {AF_FORMAT_S16,     AUDIO_S16SYS},
+#ifdef AUDIO_S32SYS
+    {AF_FORMAT_S32,     AUDIO_S32SYS},
+#endif
+#ifdef AUDIO_F32SYS
+    {AF_FORMAT_FLOAT,   AUDIO_F32SYS},
+#endif
+    {0}
+};
+
+static void audio_callback(void *userdata, Uint8 *stream, int len)
+{
+    struct ao *ao = userdata;
+
+    void *data[1] = {stream};
+
+    if (len % ao->sstride)
+        MP_ERR(ao, "SDL audio callback not sample aligned");
+
+    // Time this buffer will take, plus assume 1 period (1 callback invocation)
+    // fixed latency.
+    double delay = 2 * len / (double)ao->bps;
+
+    ao_read_data(ao, data, len / ao->sstride, mp_time_ns() + MP_TIME_S_TO_NS(delay));
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    if (!priv)
+        return;
+
+    if (SDL_WasInit(SDL_INIT_AUDIO)) {
+        // make sure the callback exits
+        SDL_LockAudio();
+
+        // close audio device
+        SDL_QuitSubSystem(SDL_INIT_AUDIO);
+    }
+}
+
+static unsigned int ceil_power_of_two(unsigned int x)
+{
+    int y = 1;
+    while (y < x)
+        y *= 2;
+    return y;
+}
+
+static int init(struct ao *ao)
+{
+    if (SDL_WasInit(SDL_INIT_AUDIO)) {
+        MP_ERR(ao, "already initialized\n");
+        return -1;
+    }
+
+    struct priv *priv = ao->priv;
+
+    if (SDL_InitSubSystem(SDL_INIT_AUDIO)) {
+        if (!ao->probing)
+            MP_ERR(ao, "SDL_Init failed\n");
+        uninit(ao);
+        return -1;
+    }
+
+    struct mp_chmap_sel sel = {0};
+    mp_chmap_sel_add_waveext_def(&sel);
+    if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels)) {
+        uninit(ao);
+        return -1;
+    }
+
+    ao->format = af_fmt_from_planar(ao->format);
+
+    SDL_AudioSpec desired = {0};
+    desired.format = AUDIO_S16SYS;
+    for (int n = 0; fmtmap[n][0]; n++) {
+        if (ao->format == fmtmap[n][0]) {
+            desired.format = fmtmap[n][1];
+            break;
+        }
+    }
+    desired.freq = ao->samplerate;
+    desired.channels = ao->channels.num;
+    if (priv->buflen) {
+        desired.samples = MPMIN(32768, ceil_power_of_two(ao->samplerate *
+                                                         priv->buflen));
+    }
+    desired.callback = audio_callback;
+    desired.userdata = ao;
+
+    MP_VERBOSE(ao, "requested format: %d Hz, %d channels, %x, "
+               "buffer size: %d samples\n",
+               (int) desired.freq, (int) desired.channels,
+               (int) desired.format, (int) desired.samples);
+
+    SDL_AudioSpec obtained = desired;
+    if (SDL_OpenAudio(&desired, &obtained)) {
+        if (!ao->probing)
+            MP_ERR(ao, "could not open audio: %s\n", SDL_GetError());
+        uninit(ao);
+        return -1;
+    }
+
+    MP_VERBOSE(ao, "obtained format: %d Hz, %d channels, %x, "
+               "buffer size: %d samples\n",
+               (int) obtained.freq, (int) obtained.channels,
+               (int) obtained.format, (int) obtained.samples);
+
+    // The sample count is usually the number of samples the callback requests,
+    // which we assume is the period size. Normally, ao.c will allocate a large
+    // enough buffer. But in case the period size should be pathologically
+    // large, this will help.
+    ao->device_buffer = 3 * obtained.samples;
+
+    ao->format = 0;
+    for (int n = 0; fmtmap[n][0]; n++) {
+        if (obtained.format == fmtmap[n][1]) {
+            ao->format = fmtmap[n][0];
+            break;
+        }
+    }
+    if (!ao->format) {
+        if (!ao->probing)
+            MP_ERR(ao, "could not find matching format\n");
+        uninit(ao);
+        return -1;
+    }
+
+    if (!ao_chmap_sel_get_def(ao, &sel, &ao->channels, obtained.channels)) {
+        uninit(ao);
+        return -1;
+    }
+
+    ao->samplerate = obtained.freq;
+
+    priv->paused = 1;
+
+    return 1;
+}
+
+static void reset(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    if (!priv->paused)
+        SDL_PauseAudio(SDL_TRUE);
+    priv->paused = 1;
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *priv = ao->priv;
+    if (priv->paused)
+        SDL_PauseAudio(SDL_FALSE);
+    priv->paused = 0;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct ao_driver audio_out_sdl = {
+    .description = "SDL Audio",
+    .name      = "sdl",
+    .init      = init,
+    .uninit    = uninit,
+    .reset     = reset,
+    .start     = start,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .buflen = 0, // use SDL default
+    },
+    .options = (const struct m_option[]) {
+        {"buflen", OPT_FLOAT(buflen)},
+        {0}
+    },
+    .options_prefix = "sdl",
+};
diff --git a/audio/out/ao_sndio.c b/audio/out/ao_sndio.c
new file mode 100644
index 0000000..fce7139
--- /dev/null
+++ b/audio/out/ao_sndio.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2008 Alexandre Ratchov <alex@caoua.org>
+ * Copyright (c) 2013 Christian Neukirchen <chneukirchen@gmail.com>
+ * Copyright (c) 2020 Rozhuk Ivan <rozhuk.im@gmail.com>
+ * Copyright (c) 2021 Andrew Krasavin <noiseless-ak@yandex.ru>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <poll.h>
+#include <errno.h>
+#include <sndio.h>
+
+#include "options/m_option.h"
+#include "common/msg.h"
+
+#include "audio/format.h"
+#include "ao.h"
+#include "internal.h"
+
+struct priv {
+    struct sio_hdl *hdl;
+    struct sio_par par;
+    int delay;
+    bool playing;
+    int vol;
+    int havevol;
+    struct pollfd *pfd;
+};
+
+
+static const struct mp_chmap sndio_layouts[] = {
+    {0},                                        /* empty */
+    {1, {MP_SPEAKER_ID_FL}},                    /* mono */
+    MP_CHMAP2(FL, FR),                          /* stereo */
+    {0},                                        /* 2.1 */
+    MP_CHMAP4(FL, FR, BL, BR),                  /* 4.0 */
+    {0},                                        /* 5.0 */
+    MP_CHMAP6(FL, FR, BL, BR, FC, LFE),         /* 5.1 */
+    {0},                                        /* 6.1 */
+    MP_CHMAP8(FL, FR, BL, BR, FC, LFE, SL, SR), /* 7.1 */
+    /* Above is the fixed channel assignment for sndio, since we need to
+     * fill all channels and cannot insert silence, not all layouts are
+     * supported.
+     * NOTE: MP_SPEAKER_ID_NA could be used to add padding channels. */
+};
+
+static void uninit(struct ao *ao);
+
+
+/* Make libsndio call movecb(). */
+static void process_events(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    int n = sio_pollfd(p->hdl, p->pfd, POLLOUT);
+    while (poll(p->pfd, n, 0) < 0 && errno == EINTR);
+
+    sio_revents(p->hdl, p->pfd);
+}
+
+/* Call-back invoked to notify of the hardware position. */
+static void movecb(void *addr, int delta)
+{
+    struct ao *ao = addr;
+    struct priv *p = ao->priv;
+
+    p->delay -= delta;
+}
+
+/* Call-back invoked to notify about volume changes. */
+static void volcb(void *addr, unsigned newvol)
+{
+    struct ao *ao = addr;
+    struct priv *p = ao->priv;
+
+    p->vol = newvol;
+}
+
+static int init(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+    struct mp_chmap_sel sel = {0};
+    size_t i;
+    struct af_to_par {
+        int format, bits, sig;
+    };
+    static const struct af_to_par af_to_par[] = {
+        {AF_FORMAT_U8,   8, 0},
+        {AF_FORMAT_S16, 16, 1},
+        {AF_FORMAT_S32, 32, 1},
+    };
+    const struct af_to_par *ap;
+    const char *device = ((ao->device) ? ao->device : SIO_DEVANY);
+
+    /* Opening device. */
+    MP_VERBOSE(ao, "Using '%s' audio device.\n", device);
+    p->hdl = sio_open(device, SIO_PLAY, 0);
+    if (p->hdl == NULL) {
+        MP_ERR(ao, "Can't open audio device %s.\n", device);
+        goto err_out;
+    }
+
+    sio_initpar(&p->par);
+
+    /* Selecting sound format. */
+    ao->format = af_fmt_from_planar(ao->format);
+
+    p->par.bits = 16;
+    p->par.sig = 1;
+    p->par.le = SIO_LE_NATIVE;
+    for (i = 0; i < MP_ARRAY_SIZE(af_to_par); i++) {
+        ap = &af_to_par[i];
+        if (ap->format == ao->format) {
+            p->par.bits = ap->bits;
+            p->par.sig = ap->sig;
+            break;
+        }
+    }
+
+    p->par.rate = ao->samplerate;
+
+    /* Channels count. */
+    for (i = 0; i < MP_ARRAY_SIZE(sndio_layouts); i++) {
+        mp_chmap_sel_add_map(&sel, &sndio_layouts[i]);
+    }
+    if (!ao_chmap_sel_adjust(ao, &sel, &ao->channels))
+        goto err_out;
+
+    p->par.pchan = ao->channels.num;
+    p->par.appbufsz = p->par.rate * 250 / 1000;    /* 250ms buffer */
+    p->par.round = p->par.rate * 10 / 1000;    /*  10ms block size */
+
+    if (!sio_setpar(p->hdl, &p->par)) {
+        MP_ERR(ao, "couldn't set params\n");
+        goto err_out;
+    }
+
+    /* Get current sound params. */
+    if (!sio_getpar(p->hdl, &p->par)) {
+        MP_ERR(ao, "couldn't get params\n");
+        goto err_out;
+    }
+    if (p->par.bps > 1 && p->par.le != SIO_LE_NATIVE) {
+        MP_ERR(ao, "swapped endian output not supported\n");
+        goto err_out;
+    }
+
+    /* Update sound params. */
+    if (p->par.bits == 8 && p->par.bps == 1 && !p->par.sig) {
+        ao->format = AF_FORMAT_U8;
+    } else if (p->par.bits == 16 && p->par.bps == 2 && p->par.sig) {
+        ao->format = AF_FORMAT_S16;
+    } else if ((p->par.bits == 32 || p->par.msb) && p->par.bps == 4 && p->par.sig) {
+        ao->format = AF_FORMAT_S32;
+    } else {
+        MP_ERR(ao, "couldn't set format\n");
+        goto err_out;
+    }
+
+    p->havevol = sio_onvol(p->hdl, volcb, ao);
+    sio_onmove(p->hdl, movecb, ao);
+
+    p->pfd = talloc_array_ptrtype(p, p->pfd, sio_nfds(p->hdl));
+    if (!p->pfd)
+        goto err_out;
+
+    ao->device_buffer = p->par.bufsz;
+    MP_VERBOSE(ao, "bufsz = %i, appbufsz = %i, round = %i\n",
+        p->par.bufsz, p->par.appbufsz, p->par.round);
+
+    p->delay = 0;
+    p->playing = false;
+    if (!sio_start(p->hdl)) {
+        MP_ERR(ao, "start: sio_start() fail.\n");
+        goto err_out;
+    }
+
+    return 0;
+
+err_out:
+    uninit(ao);
+    return -1;
+}
+
+static void uninit(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    if (p->hdl) {
+        sio_close(p->hdl);
+        p->hdl = NULL;
+    }
+    p->pfd = NULL;
+    p->playing = false;
+}
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct priv *p = ao->priv;
+    float *vol = arg;
+
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+        if (!p->havevol)
+            return CONTROL_FALSE;
+        *vol = p->vol * 100 / SIO_MAXVOL;
+        break;
+    case AOCONTROL_SET_VOLUME:
+        if (!p->havevol)
+            return CONTROL_FALSE;
+        sio_setvol(p->hdl, *vol * SIO_MAXVOL / 100);
+        break;
+    default:
+        return CONTROL_UNKNOWN;
+    }
+    return CONTROL_OK;
+}
+
+static void reset(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    if (p->playing) {
+        p->playing = false;
+
+#if HAVE_SNDIO_1_9
+        if (!sio_flush(p->hdl)) {
+            MP_ERR(ao, "reset: couldn't sio_flush()\n");
+#else
+        if (!sio_stop(p->hdl)) {
+            MP_ERR(ao, "reset: couldn't sio_stop()\n");
+#endif
+        }
+        p->delay = 0;
+        if (!sio_start(p->hdl)) {
+            MP_ERR(ao, "reset: sio_start() fail.\n");
+        }
+    }
+}
+
+static void start(struct ao *ao)
+{
+    struct priv *p = ao->priv;
+
+    p->playing = true;
+    process_events(ao);
+}
+
+static bool audio_write(struct ao *ao, void **data, int samples)
+{
+    struct priv *p = ao->priv;
+    const size_t size = (samples * ao->sstride);
+    size_t rc;
+
+    rc = sio_write(p->hdl, data[0], size);
+    if (rc != size) {
+        MP_WARN(ao, "audio_write: unexpected partial write: required: %zu, written: %zu.\n",
+            size, rc);
+        reset(ao);
+        p->playing = false;
+        return false;
+    }
+    p->delay += samples;
+
+    return true;
+}
+
+static void get_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct priv *p = ao->priv;
+
+    process_events(ao);
+
+    /* how many samples we can play without blocking */
+    state->free_samples = ao->device_buffer - p->delay;
+    state->free_samples = state->free_samples / p->par.round * p->par.round;
+    /* how many samples are already in the buffer to be played */
+    state->queued_samples = p->delay;
+    /* delay in seconds between first and last sample in buffer */
+    state->delay = p->delay / (double)p->par.rate;
+
+    /* report unexpected EOF / underrun */
+    if ((state->queued_samples && state->queued_samples &&
+        (state->queued_samples < state->free_samples) &&
+        p->playing) || sio_eof(p->hdl))
+    {
+        MP_VERBOSE(ao, "get_state: EOF/underrun detected.\n");
+        MP_VERBOSE(ao, "get_state: free: %d, queued: %d, delay: %lf\n", \
+                state->free_samples, state->queued_samples, state->delay);
+        p->playing = false;
+        state->playing = p->playing;
+        ao_wakeup_playthread(ao);
+    } else {
+        state->playing = p->playing;
+    }
+}
+
+const struct ao_driver audio_out_sndio = {
+    .name      = "sndio",
+    .description = "sndio audio output",
+    .init      = init,
+    .uninit    = uninit,
+    .control   = control,
+    .reset     = reset,
+    .start     = start,
+    .write     = audio_write,
+    .get_state = get_state,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/audio/out/ao_wasapi.c b/audio/out/ao_wasapi.c
new file mode 100644
index 0000000..b201f26
--- /dev/null
+++ b/audio/out/ao_wasapi.c
@@ -0,0 +1,504 @@
+/*
+ * This file is part of mpv.
+ *
+ * Original author: Jonathan Yong <10walls@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include <inttypes.h>
+#include <libavutil/mathematics.h>
+
+#include "options/m_option.h"
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+#include "osdep/io.h"
+#include "misc/dispatch.h"
+#include "ao_wasapi.h"
+
+// naive av_rescale for unsigned
+static UINT64 uint64_scale(UINT64 x, UINT64 num, UINT64 den)
+{
+    return (x / den) * num
+        + ((x % den) * (num / den))
+        + ((x % den) * (num % den)) / den;
+}
+
+static HRESULT get_device_delay(struct wasapi_state *state, double *delay_ns)
+{
+    UINT64 sample_count = atomic_load(&state->sample_count);
+    UINT64 position, qpc_position;
+    HRESULT hr;
+
+    hr = IAudioClock_GetPosition(state->pAudioClock, &position, &qpc_position);
+    EXIT_ON_ERROR(hr);
+    // GetPosition succeeded, but the result may be
+    // inaccurate due to the length of the call
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd370889%28v=vs.85%29.aspx
+    if (hr == S_FALSE)
+        MP_VERBOSE(state, "Possibly inaccurate device position.\n");
+
+    // convert position to number of samples careful to avoid overflow
+    UINT64 sample_position = uint64_scale(position,
+                                          state->format.Format.nSamplesPerSec,
+                                          state->clock_frequency);
+    INT64 diff = sample_count - sample_position;
+    *delay_ns = diff * 1e9 / state->format.Format.nSamplesPerSec;
+
+    // Correct for any delay in IAudioClock_GetPosition above.
+    // This should normally be very small (<1 us), but just in case. . .
+    LARGE_INTEGER qpc;
+    QueryPerformanceCounter(&qpc);
+    INT64 qpc_diff = av_rescale(qpc.QuadPart, 10000000, state->qpc_frequency.QuadPart)
+                     - qpc_position;
+    // ignore the above calculation if it yields more than 10 seconds (due to
+    // possible overflow inside IAudioClock_GetPosition)
+    if (qpc_diff < 10 * 10000000) {
+        *delay_ns -= qpc_diff * 100.0; // convert to ns
+    } else {
+        MP_VERBOSE(state, "Insane qpc delay correction of %g seconds. "
+                   "Ignoring it.\n", qpc_diff / 10000000.0);
+    }
+
+    if (sample_count > 0 && *delay_ns <= 0) {
+        MP_WARN(state, "Under-run: Device delay: %g ns\n", *delay_ns);
+    } else {
+        MP_TRACE(state, "Device delay: %g ns\n", *delay_ns);
+    }
+
+    return S_OK;
+exit_label:
+    MP_ERR(state, "Error getting device delay: %s\n", mp_HRESULT_to_str(hr));
+    return hr;
+}
+
+static bool thread_feed(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    HRESULT hr;
+
+    UINT32 frame_count = state->bufferFrameCount;
+    UINT32 padding;
+    hr = IAudioClient_GetCurrentPadding(state->pAudioClient, &padding);
+    EXIT_ON_ERROR(hr);
+    bool refill = false;
+    if (state->share_mode == AUDCLNT_SHAREMODE_SHARED) {
+        // Return if there's nothing to do.
+        if (frame_count <= padding)
+            return false;
+        // In shared mode, there is only one buffer of size bufferFrameCount.
+        // We must therefore take care not to overwrite the samples that have
+        // yet to play.
+        frame_count -= padding;
+    } else if (padding >= 2 * frame_count) {
+        // In exclusive mode, we exchange entire buffers of size
+        // bufferFrameCount with the device. If there are already two such
+        // full buffers waiting to play, there is no work to do.
+        return false;
+    } else if (padding < frame_count) {
+        // If there is not at least one full buffer of audio queued to play in
+        // exclusive mode, call this function again immediately to try and catch
+        // up and avoid a cascade of under-runs. WASAPI doesn't seem to be smart
+        // enough to send more feed events when it gets behind.
+        refill = true;
+    }
+    MP_TRACE(ao, "Frame to fill: %"PRIu32". Padding: %"PRIu32"\n",
+             frame_count, padding);
+
+    double delay_ns;
+    hr = get_device_delay(state, &delay_ns);
+    EXIT_ON_ERROR(hr);
+    // add the buffer delay
+    delay_ns += frame_count * 1e9 / state->format.Format.nSamplesPerSec;
+
+    BYTE *pData;
+    hr = IAudioRenderClient_GetBuffer(state->pRenderClient,
+                                      frame_count, &pData);
+    EXIT_ON_ERROR(hr);
+
+    BYTE *data[1] = {pData};
+
+    ao_read_data_converted(ao, &state->convert_format,
+                           (void **)data, frame_count,
+                           mp_time_ns() + (int64_t)llrint(delay_ns));
+
+    // note, we can't use ao_read_data return value here since we already
+    // committed to frame_count above in the GetBuffer call
+    hr = IAudioRenderClient_ReleaseBuffer(state->pRenderClient,
+                                          frame_count, 0);
+    EXIT_ON_ERROR(hr);
+
+    atomic_fetch_add(&state->sample_count, frame_count);
+
+    return refill;
+exit_label:
+    MP_ERR(state, "Error feeding audio: %s\n", mp_HRESULT_to_str(hr));
+    MP_VERBOSE(ao, "Requesting ao reload\n");
+    ao_request_reload(ao);
+    return false;
+}
+
+static void thread_reset(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    HRESULT hr;
+    MP_DBG(state, "Thread Reset\n");
+    hr = IAudioClient_Stop(state->pAudioClient);
+    if (FAILED(hr))
+        MP_ERR(state, "IAudioClient_Stop returned: %s\n", mp_HRESULT_to_str(hr));
+
+    hr = IAudioClient_Reset(state->pAudioClient);
+    if (FAILED(hr))
+        MP_ERR(state, "IAudioClient_Reset returned: %s\n", mp_HRESULT_to_str(hr));
+
+    atomic_store(&state->sample_count, 0);
+}
+
+static void thread_resume(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    MP_DBG(state, "Thread Resume\n");
+    thread_reset(ao);
+    thread_feed(ao);
+
+    HRESULT hr = IAudioClient_Start(state->pAudioClient);
+    if (FAILED(hr)) {
+        MP_ERR(state, "IAudioClient_Start returned %s\n",
+               mp_HRESULT_to_str(hr));
+    }
+}
+
+static void thread_wakeup(void *ptr)
+{
+    struct ao *ao = ptr;
+    struct wasapi_state *state = ao->priv;
+    SetEvent(state->hWake);
+}
+
+static void set_thread_state(struct ao *ao,
+                             enum wasapi_thread_state thread_state)
+{
+    struct wasapi_state *state = ao->priv;
+    atomic_store(&state->thread_state, thread_state);
+    thread_wakeup(ao);
+}
+
+static DWORD __stdcall AudioThread(void *lpParameter)
+{
+    struct ao *ao = lpParameter;
+    struct wasapi_state *state = ao->priv;
+    mp_thread_set_name("ao/wasapi");
+    CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);
+
+    state->init_ok = wasapi_thread_init(ao);
+    SetEvent(state->hInitDone);
+    if (!state->init_ok)
+        goto exit_label;
+
+    MP_DBG(ao, "Entering dispatch loop\n");
+    while (true) {
+        if (WaitForSingleObject(state->hWake, INFINITE) != WAIT_OBJECT_0)
+            MP_ERR(ao, "Unexpected return value from WaitForSingleObject\n");
+
+        mp_dispatch_queue_process(state->dispatch, 0);
+
+        int thread_state = atomic_load(&state->thread_state);
+        switch (thread_state) {
+        case WASAPI_THREAD_FEED:
+            // fill twice on under-full buffer (see comment in thread_feed)
+            if (thread_feed(ao) && thread_feed(ao))
+                MP_ERR(ao, "Unable to fill buffer fast enough\n");
+            break;
+        case WASAPI_THREAD_RESET:
+            thread_reset(ao);
+            break;
+        case WASAPI_THREAD_RESUME:
+            thread_resume(ao);
+            break;
+        case WASAPI_THREAD_SHUTDOWN:
+            thread_reset(ao);
+            goto exit_label;
+        default:
+            MP_ERR(ao, "Unhandled thread state: %d\n", thread_state);
+        }
+        // the default is to feed unless something else is requested
+        atomic_compare_exchange_strong(&state->thread_state, &thread_state,
+                                       WASAPI_THREAD_FEED);
+    }
+exit_label:
+    wasapi_thread_uninit(ao);
+
+    CoUninitialize();
+    MP_DBG(ao, "Thread return\n");
+    return 0;
+}
+
+static void uninit(struct ao *ao)
+{
+    MP_DBG(ao, "Uninit wasapi\n");
+    struct wasapi_state *state = ao->priv;
+    if (state->hWake)
+        set_thread_state(ao, WASAPI_THREAD_SHUTDOWN);
+
+    if (state->hAudioThread &&
+        WaitForSingleObject(state->hAudioThread, INFINITE) != WAIT_OBJECT_0)
+    {
+        MP_ERR(ao, "Unexpected return value from WaitForSingleObject "
+               "while waiting for audio thread to terminate\n");
+    }
+
+    SAFE_DESTROY(state->hInitDone,   CloseHandle(state->hInitDone));
+    SAFE_DESTROY(state->hWake,       CloseHandle(state->hWake));
+    SAFE_DESTROY(state->hAudioThread,CloseHandle(state->hAudioThread));
+
+    wasapi_change_uninit(ao);
+
+    talloc_free(state->deviceID);
+
+    CoUninitialize();
+    MP_DBG(ao, "Uninit wasapi done\n");
+}
+
+static int init(struct ao *ao)
+{
+    MP_DBG(ao, "Init wasapi\n");
+    CoInitializeEx(NULL, COINIT_MULTITHREADED);
+
+    struct wasapi_state *state = ao->priv;
+    state->log = ao->log;
+
+    state->opt_exclusive |= ao->init_flags & AO_INIT_EXCLUSIVE;
+
+#if !HAVE_UWP
+    state->deviceID = wasapi_find_deviceID(ao);
+    if (!state->deviceID) {
+        uninit(ao);
+        return -1;
+    }
+#endif
+
+    if (state->deviceID)
+        wasapi_change_init(ao, false);
+
+    state->hInitDone = CreateEventW(NULL, FALSE, FALSE, NULL);
+    state->hWake     = CreateEventW(NULL, FALSE, FALSE, NULL);
+    if (!state->hInitDone || !state->hWake) {
+        MP_FATAL(ao, "Error creating events\n");
+        uninit(ao);
+        return -1;
+    }
+
+    state->dispatch = mp_dispatch_create(state);
+    mp_dispatch_set_wakeup_fn(state->dispatch, thread_wakeup, ao);
+
+    state->init_ok = false;
+    state->hAudioThread = CreateThread(NULL, 0, &AudioThread, ao, 0, NULL);
+    if (!state->hAudioThread) {
+        MP_FATAL(ao, "Failed to create audio thread\n");
+        uninit(ao);
+        return -1;
+    }
+
+    WaitForSingleObject(state->hInitDone, INFINITE); // wait on init complete
+    SAFE_DESTROY(state->hInitDone,CloseHandle(state->hInitDone));
+    if (!state->init_ok) {
+        if (!ao->probing)
+            MP_FATAL(ao, "Received failure from audio thread\n");
+        uninit(ao);
+        return -1;
+    }
+
+    MP_DBG(ao, "Init wasapi done\n");
+    return 0;
+}
+
+static int thread_control_exclusive(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct wasapi_state *state = ao->priv;
+    if (!state->pEndpointVolume)
+        return CONTROL_UNKNOWN;
+
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+    case AOCONTROL_SET_VOLUME:
+        if (!(state->vol_hw_support & ENDPOINT_HARDWARE_SUPPORT_VOLUME))
+            return CONTROL_FALSE;
+        break;
+    case AOCONTROL_GET_MUTE:
+    case AOCONTROL_SET_MUTE:
+        if (!(state->vol_hw_support & ENDPOINT_HARDWARE_SUPPORT_MUTE))
+            return CONTROL_FALSE;
+        break;
+    }
+
+    float volume;
+    BOOL mute;
+    switch (cmd) {
+    case AOCONTROL_GET_VOLUME:
+        IAudioEndpointVolume_GetMasterVolumeLevelScalar(
+            state->pEndpointVolume, &volume);
+        *(float *)arg = volume;
+        return CONTROL_OK;
+    case AOCONTROL_SET_VOLUME:
+        volume = (*(float *)arg) / 100.f;
+        IAudioEndpointVolume_SetMasterVolumeLevelScalar(
+            state->pEndpointVolume, volume, NULL);
+        return CONTROL_OK;
+    case AOCONTROL_GET_MUTE:
+        IAudioEndpointVolume_GetMute(state->pEndpointVolume, &mute);
+        *(bool *)arg = mute;
+        return CONTROL_OK;
+    case AOCONTROL_SET_MUTE:
+        mute = *(bool *)arg;
+        IAudioEndpointVolume_SetMute(state->pEndpointVolume, mute, NULL);
+        return CONTROL_OK;
+    }
+    return CONTROL_UNKNOWN;
+}
+
+static int thread_control_shared(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct wasapi_state *state = ao->priv;
+    if (!state->pAudioVolume)
+        return CONTROL_UNKNOWN;
+
+    float volume;
+    BOOL mute;
+    switch(cmd) {
+    case AOCONTROL_GET_VOLUME:
+        ISimpleAudioVolume_GetMasterVolume(state->pAudioVolume, &volume);
+        *(float *)arg = volume;
+        return CONTROL_OK;
+    case AOCONTROL_SET_VOLUME:
+        volume = (*(float *)arg) / 100.f;
+        ISimpleAudioVolume_SetMasterVolume(state->pAudioVolume, volume, NULL);
+        return CONTROL_OK;
+    case AOCONTROL_GET_MUTE:
+        ISimpleAudioVolume_GetMute(state->pAudioVolume, &mute);
+        *(bool *)arg = mute;
+        return CONTROL_OK;
+    case AOCONTROL_SET_MUTE:
+        mute = *(bool *)arg;
+        ISimpleAudioVolume_SetMute(state->pAudioVolume, mute, NULL);
+        return CONTROL_OK;
+    }
+    return CONTROL_UNKNOWN;
+}
+
+static int thread_control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct wasapi_state *state = ao->priv;
+
+    // common to exclusive and shared
+    switch (cmd) {
+    case AOCONTROL_UPDATE_STREAM_TITLE:
+        if (!state->pSessionControl)
+            return CONTROL_FALSE;
+
+        wchar_t *title = mp_from_utf8(NULL, (const char *)arg);
+        HRESULT hr = IAudioSessionControl_SetDisplayName(state->pSessionControl,
+                                                         title,NULL);
+        talloc_free(title);
+
+        if (SUCCEEDED(hr))
+            return CONTROL_OK;
+
+        MP_WARN(ao, "Error setting audio session name: %s\n",
+                mp_HRESULT_to_str(hr));
+
+        assert(ao->client_name);
+        if (!ao->client_name)
+            return CONTROL_ERROR;
+
+        // Fallback to client name
+        title = mp_from_utf8(NULL, ao->client_name);
+        IAudioSessionControl_SetDisplayName(state->pSessionControl,
+                                            title, NULL);
+        talloc_free(title);
+
+        return CONTROL_ERROR;
+    }
+
+    return state->share_mode == AUDCLNT_SHAREMODE_EXCLUSIVE ?
+        thread_control_exclusive(ao, cmd, arg) :
+        thread_control_shared(ao, cmd, arg);
+}
+
+static void run_control(void *p)
+{
+    void **pp = p;
+    struct ao *ao      = pp[0];
+    enum aocontrol cmd = *(enum aocontrol *)pp[1];
+    void *arg          = pp[2];
+    *(int *)pp[3]      = thread_control(ao, cmd, arg);
+}
+
+static int control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct wasapi_state *state = ao->priv;
+    int ret;
+    void *p[] = {ao, &cmd, arg, &ret};
+    mp_dispatch_run(state->dispatch, run_control, p);
+    return ret;
+}
+
+static void audio_reset(struct ao *ao)
+{
+    set_thread_state(ao, WASAPI_THREAD_RESET);
+}
+
+static void audio_resume(struct ao *ao)
+{
+    set_thread_state(ao, WASAPI_THREAD_RESUME);
+}
+
+static void hotplug_uninit(struct ao *ao)
+{
+    MP_DBG(ao, "Hotplug uninit\n");
+    wasapi_change_uninit(ao);
+    CoUninitialize();
+}
+
+static int hotplug_init(struct ao *ao)
+{
+    MP_DBG(ao, "Hotplug init\n");
+    struct wasapi_state *state = ao->priv;
+    state->log = ao->log;
+    CoInitializeEx(NULL, COINIT_MULTITHREADED);
+    HRESULT hr = wasapi_change_init(ao, true);
+    EXIT_ON_ERROR(hr);
+
+    return 0;
+    exit_label:
+    MP_FATAL(state, "Error setting up audio hotplug: %s\n", mp_HRESULT_to_str(hr));
+    hotplug_uninit(ao);
+    return -1;
+}
+
+#define OPT_BASE_STRUCT struct wasapi_state
+
+const struct ao_driver audio_out_wasapi = {
+    .description    = "Windows WASAPI audio output (event mode)",
+    .name           = "wasapi",
+    .init           = init,
+    .uninit         = uninit,
+    .control        = control,
+    .reset          = audio_reset,
+    .start          = audio_resume,
+    .list_devs      = wasapi_list_devs,
+    .hotplug_init   = hotplug_init,
+    .hotplug_uninit = hotplug_uninit,
+    .priv_size      = sizeof(wasapi_state),
+};
diff --git a/audio/out/ao_wasapi.h b/audio/out/ao_wasapi.h
new file mode 100644
index 0000000..17b8f7a
--- /dev/null
+++ b/audio/out/ao_wasapi.h
@@ -0,0 +1,116 @@
+/*
+ * This file is part of mpv.
+ *
+ * Original author: Jonathan Yong <10walls@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_AO_WASAPI_H_
+#define MP_AO_WASAPI_H_
+
+#include <stdatomic.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include <windows.h>
+#include <mmdeviceapi.h>
+#include <audioclient.h>
+#include <audiopolicy.h>
+#include <endpointvolume.h>
+
+#include "common/msg.h"
+#include "osdep/windows_utils.h"
+#include "internal.h"
+#include "ao.h"
+
+typedef struct change_notify {
+    IMMNotificationClient client; // this must be first in the structure!
+    IMMDeviceEnumerator *pEnumerator; // object where client is registered
+    LPWSTR monitored; // Monitored device
+    bool is_hotplug;
+    struct ao *ao;
+} change_notify;
+
+HRESULT wasapi_change_init(struct ao* ao, bool is_hotplug);
+void wasapi_change_uninit(struct ao* ao);
+
+enum wasapi_thread_state {
+    WASAPI_THREAD_FEED = 0,
+    WASAPI_THREAD_RESUME,
+    WASAPI_THREAD_RESET,
+    WASAPI_THREAD_SHUTDOWN
+};
+
+typedef struct wasapi_state {
+    struct mp_log *log;
+
+    bool init_ok;            // status of init phase
+    // Thread handles
+    HANDLE hInitDone;        // set when init is complete in audio thread
+    HANDLE hAudioThread;     // the audio thread itself
+    HANDLE hWake;            // thread wakeup event
+    atomic_int thread_state; // enum wasapi_thread_state (what to do on wakeup)
+    struct mp_dispatch_queue *dispatch; // for volume/mute/session display
+
+    // for setting the audio thread priority
+    HANDLE hTask;
+
+    // ID of the device to use
+    LPWSTR deviceID;
+    // WASAPI object handles owned and used by audio thread
+    IMMDevice *pDevice;
+    IAudioClient *pAudioClient;
+    IAudioRenderClient *pRenderClient;
+
+    // WASAPI internal clock information, for estimating delay
+    IAudioClock *pAudioClock;
+    atomic_ullong sample_count;  // samples per channel written by GetBuffer
+    UINT64 clock_frequency;      // scale for position returned by GetPosition
+    LARGE_INTEGER qpc_frequency; // frequency of Windows' high resolution timer
+
+    // WASAPI control
+    IAudioSessionControl *pSessionControl; // setting the stream title
+    IAudioEndpointVolume *pEndpointVolume; // exclusive mode volume/mute
+    ISimpleAudioVolume *pAudioVolume;      // shared mode volume/mute
+    DWORD vol_hw_support; // is hardware volume supported for exclusive-mode?
+
+    // ao options
+    int opt_exclusive;
+
+    // format info
+    WAVEFORMATEXTENSIBLE format;
+    AUDCLNT_SHAREMODE share_mode; // AUDCLNT_SHAREMODE_EXCLUSIVE / SHARED
+    UINT32 bufferFrameCount;      // number of frames in buffer
+    struct ao_convert_fmt convert_format;
+
+    change_notify change;
+} wasapi_state;
+
+char *mp_PKEY_to_str_buf(char *buf, size_t buf_size, const PROPERTYKEY *pkey);
+#define mp_PKEY_to_str(pkey) mp_PKEY_to_str_buf((char[42]){0}, 42, (pkey))
+
+void wasapi_list_devs(struct ao *ao, struct ao_device_list *list);
+bstr wasapi_get_specified_device_string(struct ao *ao);
+LPWSTR wasapi_find_deviceID(struct ao *ao);
+
+bool wasapi_thread_init(struct ao *ao);
+void wasapi_thread_uninit(struct ao *ao);
+
+#define EXIT_ON_ERROR(hres)  \
+              do { if (FAILED(hres)) { goto exit_label; } } while(0)
+#define SAFE_DESTROY(unk, release) \
+              do { if ((unk) != NULL) { release; (unk) = NULL; } } while(0)
+
+#endif
diff --git a/audio/out/ao_wasapi_changenotify.c b/audio/out/ao_wasapi_changenotify.c
new file mode 100644
index 0000000..f0e1895
--- /dev/null
+++ b/audio/out/ao_wasapi_changenotify.c
@@ -0,0 +1,246 @@
+/*
+ * This file is part of mpv.
+ *
+ * Original author: Jonathan Yong <10walls@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <wchar.h>
+
+#include "ao_wasapi.h"
+
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_QueryInterface(
+    IMMNotificationClient* This, REFIID riid, void **ppvObject)
+{
+    // Compatible with IMMNotificationClient and IUnknown
+    if (IsEqualGUID(&IID_IMMNotificationClient, riid) ||
+        IsEqualGUID(&IID_IUnknown, riid))
+    {
+        *ppvObject = (void *)This;
+        return S_OK;
+    } else {
+        *ppvObject = NULL;
+        return E_NOINTERFACE;
+    }
+}
+
+// these are required, but not actually used
+static ULONG STDMETHODCALLTYPE sIMMNotificationClient_AddRef(
+    IMMNotificationClient *This)
+{
+    return 1;
+}
+
+// MSDN says it should free itself, but we're static
+static ULONG STDMETHODCALLTYPE sIMMNotificationClient_Release(
+    IMMNotificationClient *This)
+{
+    return 1;
+}
+
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_OnDeviceStateChanged(
+    IMMNotificationClient *This,
+    LPCWSTR pwstrDeviceId,
+    DWORD dwNewState)
+{
+    change_notify *change = (change_notify *)This;
+    struct ao *ao = change->ao;
+
+    if (change->is_hotplug) {
+        MP_VERBOSE(ao,
+                   "OnDeviceStateChanged triggered: sending hotplug event\n");
+        ao_hotplug_event(ao);
+    } else if (pwstrDeviceId && !wcscmp(pwstrDeviceId, change->monitored)) {
+        switch (dwNewState) {
+        case DEVICE_STATE_DISABLED:
+        case DEVICE_STATE_NOTPRESENT:
+        case DEVICE_STATE_UNPLUGGED:
+            MP_VERBOSE(ao, "OnDeviceStateChanged triggered on device %ls: "
+                       "requesting ao reload\n", pwstrDeviceId);
+            ao_request_reload(ao);
+            break;
+        case DEVICE_STATE_ACTIVE:
+            break;
+        }
+    }
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_OnDeviceAdded(
+    IMMNotificationClient *This,
+    LPCWSTR pwstrDeviceId)
+{
+    change_notify *change = (change_notify *)This;
+    struct ao *ao = change->ao;
+
+    if (change->is_hotplug) {
+        MP_VERBOSE(ao, "OnDeviceAdded triggered: sending hotplug event\n");
+        ao_hotplug_event(ao);
+    }
+
+    return S_OK;
+}
+
+// maybe MPV can go over to the preferred device once it is plugged in?
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_OnDeviceRemoved(
+    IMMNotificationClient *This,
+    LPCWSTR pwstrDeviceId)
+{
+    change_notify *change = (change_notify *)This;
+    struct ao *ao = change->ao;
+
+    if (change->is_hotplug) {
+        MP_VERBOSE(ao, "OnDeviceRemoved triggered: sending hotplug event\n");
+        ao_hotplug_event(ao);
+    } else if (pwstrDeviceId && !wcscmp(pwstrDeviceId, change->monitored)) {
+        MP_VERBOSE(ao, "OnDeviceRemoved triggered for device %ls: "
+                   "requesting ao reload\n", pwstrDeviceId);
+        ao_request_reload(ao);
+    }
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_OnDefaultDeviceChanged(
+    IMMNotificationClient *This,
+    EDataFlow flow,
+    ERole role,
+    LPCWSTR pwstrDeviceId)
+{
+    change_notify *change = (change_notify *)This;
+    struct ao *ao = change->ao;
+
+    // don't care about "eCapture" or non-"eMultimedia" roles
+    if (flow == eCapture || role != eMultimedia) return S_OK;
+
+    if (change->is_hotplug) {
+        MP_VERBOSE(ao,
+                   "OnDefaultDeviceChanged triggered: sending hotplug event\n");
+        ao_hotplug_event(ao);
+    } else {
+        // stay on the device the user specified
+        bstr device = wasapi_get_specified_device_string(ao);
+        if (device.len) {
+            MP_VERBOSE(ao, "OnDefaultDeviceChanged triggered: "
+                       "staying on specified device %.*s\n", BSTR_P(device));
+            return S_OK;
+        }
+
+        // don't reload if already on the new default
+        if (pwstrDeviceId && !wcscmp(pwstrDeviceId, change->monitored)) {
+            MP_VERBOSE(ao, "OnDefaultDeviceChanged triggered: "
+                       "already using default device, no reload required\n");
+            return S_OK;
+        }
+
+        // if we got here, we need to reload
+        MP_VERBOSE(ao,
+                   "OnDefaultDeviceChanged triggered: requesting ao reload\n");
+        ao_request_reload(ao);
+    }
+
+    return S_OK;
+}
+
+static HRESULT STDMETHODCALLTYPE sIMMNotificationClient_OnPropertyValueChanged(
+    IMMNotificationClient *This,
+    LPCWSTR pwstrDeviceId,
+    const PROPERTYKEY key)
+{
+    change_notify *change = (change_notify *)This;
+    struct ao *ao = change->ao;
+
+    if (!change->is_hotplug && pwstrDeviceId &&
+        !wcscmp(pwstrDeviceId, change->monitored))
+    {
+        MP_VERBOSE(ao, "OnPropertyValueChanged triggered on device %ls\n",
+                   pwstrDeviceId);
+        if (IsEqualPropertyKey(PKEY_AudioEngine_DeviceFormat, key)) {
+            MP_VERBOSE(change->ao,
+                       "Changed property: PKEY_AudioEngine_DeviceFormat "
+                       "- requesting ao reload\n");
+            ao_request_reload(change->ao);
+        } else {
+            MP_VERBOSE(ao, "Changed property: %s\n", mp_PKEY_to_str(&key));
+        }
+    }
+
+    return S_OK;
+}
+
+static CONST_VTBL IMMNotificationClientVtbl sIMMNotificationClientVtbl = {
+    .QueryInterface = sIMMNotificationClient_QueryInterface,
+    .AddRef = sIMMNotificationClient_AddRef,
+    .Release = sIMMNotificationClient_Release,
+    .OnDeviceStateChanged = sIMMNotificationClient_OnDeviceStateChanged,
+    .OnDeviceAdded = sIMMNotificationClient_OnDeviceAdded,
+    .OnDeviceRemoved = sIMMNotificationClient_OnDeviceRemoved,
+    .OnDefaultDeviceChanged = sIMMNotificationClient_OnDefaultDeviceChanged,
+    .OnPropertyValueChanged = sIMMNotificationClient_OnPropertyValueChanged,
+};
+
+
+HRESULT wasapi_change_init(struct ao *ao, bool is_hotplug)
+{
+    struct wasapi_state *state = ao->priv;
+    struct change_notify *change = &state->change;
+    HRESULT hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL,
+                                  &IID_IMMDeviceEnumerator,
+                                  (void **)&change->pEnumerator);
+    EXIT_ON_ERROR(hr);
+
+    // so the callbacks can access the ao
+    change->ao = ao;
+
+    // whether or not this is the hotplug instance
+    change->is_hotplug = is_hotplug;
+
+    if (is_hotplug) {
+        MP_DBG(ao, "Monitoring for hotplug events\n");
+    } else {
+        // Get the device string to compare with the pwstrDeviceId
+        change->monitored = state->deviceID;
+        MP_VERBOSE(ao, "Monitoring changes in device %ls\n", change->monitored);
+    }
+
+    // COM voodoo to emulate c++ class
+    change->client.lpVtbl = &sIMMNotificationClientVtbl;
+
+    // register the change notification client
+    hr = IMMDeviceEnumerator_RegisterEndpointNotificationCallback(
+        change->pEnumerator, (IMMNotificationClient *)change);
+    EXIT_ON_ERROR(hr);
+
+    return hr;
+exit_label:
+    MP_ERR(state, "Error setting up device change monitoring: %s\n",
+           mp_HRESULT_to_str(hr));
+    wasapi_change_uninit(ao);
+    return hr;
+}
+
+void wasapi_change_uninit(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    struct change_notify *change = &state->change;
+
+    if (change->pEnumerator && change->client.lpVtbl) {
+        IMMDeviceEnumerator_UnregisterEndpointNotificationCallback(
+            change->pEnumerator, (IMMNotificationClient *)change);
+    }
+
+    SAFE_RELEASE(change->pEnumerator);
+}
diff --git a/audio/out/ao_wasapi_utils.c b/audio/out/ao_wasapi_utils.c
new file mode 100644
index 0000000..731fe8a
--- /dev/null
+++ b/audio/out/ao_wasapi_utils.c
@@ -0,0 +1,1063 @@
+/*
+ * This file is part of mpv.
+ *
+ * Original author: Jonathan Yong <10walls@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include <wchar.h>
+#include <windows.h>
+#include <errors.h>
+#include <ksguid.h>
+#include <ksmedia.h>
+#include <avrt.h>
+
+#include "audio/format.h"
+#include "osdep/timer.h"
+#include "osdep/io.h"
+#include "osdep/strnlen.h"
+#include "ao_wasapi.h"
+
+DEFINE_PROPERTYKEY(mp_PKEY_Device_FriendlyName,
+                   0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x20,
+                   0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0, 14);
+DEFINE_PROPERTYKEY(mp_PKEY_Device_DeviceDesc,
+                   0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x20,
+                   0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0, 2);
+// CEA 861 subformats
+// should work on vista
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_DTS,
+            0x00000008, 0x0000, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_DIGITAL,
+            0x00000092, 0x0000, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+// might require 7+
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_AAC,
+            0x00000006, 0x0cea, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_MPEG3,
+            0x00000004, 0x0cea, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_DIGITAL_PLUS,
+            0x0000000a, 0x0cea, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_DTS_HD,
+            0x0000000b, 0x0cea, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+DEFINE_GUID(mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_MLP,
+            0x0000000c, 0x0cea, 0x0010, 0x80, 0x00,
+            0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71);
+
+struct wasapi_sample_fmt {
+    int mp_format;  // AF_FORMAT_*
+    int bits;       // aka wBitsPerSample
+    int used_msb;   // aka wValidBitsPerSample
+    const GUID *subtype;
+};
+
+// some common bit depths / container sizes (requests welcome)
+// Entries that have the same mp_format must be:
+//  1. consecutive
+//  2. sorted by preferred format (worst comes last)
+static const struct wasapi_sample_fmt wasapi_formats[] = {
+    {AF_FORMAT_U8,       8,  8, &KSDATAFORMAT_SUBTYPE_PCM},
+    {AF_FORMAT_S16,     16, 16, &KSDATAFORMAT_SUBTYPE_PCM},
+    {AF_FORMAT_S32,     32, 32, &KSDATAFORMAT_SUBTYPE_PCM},
+    // compatible, assume LSBs are ignored
+    {AF_FORMAT_S32,     32, 24, &KSDATAFORMAT_SUBTYPE_PCM},
+    // aka S24 (with conversion on output)
+    {AF_FORMAT_S32,     24, 24, &KSDATAFORMAT_SUBTYPE_PCM},
+    {AF_FORMAT_FLOAT,   32, 32, &KSDATAFORMAT_SUBTYPE_IEEE_FLOAT},
+    {AF_FORMAT_S_AC3,   16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_DIGITAL},
+    {AF_FORMAT_S_DTS,   16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_DTS},
+    {AF_FORMAT_S_AAC,   16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_AAC},
+    {AF_FORMAT_S_MP3,   16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_MPEG3},
+    {AF_FORMAT_S_TRUEHD, 16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_MLP},
+    {AF_FORMAT_S_EAC3,  16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_DOLBY_DIGITAL_PLUS},
+    {AF_FORMAT_S_DTSHD, 16, 16, &mp_KSDATAFORMAT_SUBTYPE_IEC61937_DTS_HD},
+    {0},
+};
+
+static void wasapi_get_best_sample_formats(
+    int src_format, struct wasapi_sample_fmt *out_formats)
+{
+    int mp_formats[AF_FORMAT_COUNT + 1];
+    af_get_best_sample_formats(src_format, mp_formats);
+    for (int n = 0; mp_formats[n]; n++) {
+        for (int i = 0; wasapi_formats[i].mp_format; i++) {
+            if (wasapi_formats[i].mp_format == mp_formats[n])
+                *out_formats++ = wasapi_formats[i];
+        }
+    }
+    *out_formats = (struct wasapi_sample_fmt) {0};
+}
+
+static const GUID *format_to_subtype(int format)
+{
+    for (int i = 0; wasapi_formats[i].mp_format; i++) {
+        if (format == wasapi_formats[i].mp_format)
+            return wasapi_formats[i].subtype;
+    }
+    return &KSDATAFORMAT_SPECIFIER_NONE;
+}
+
+char *mp_PKEY_to_str_buf(char *buf, size_t buf_size, const PROPERTYKEY *pkey)
+{
+    buf = mp_GUID_to_str_buf(buf, buf_size, &pkey->fmtid);
+    size_t guid_len = strnlen(buf, buf_size);
+    snprintf(buf + guid_len, buf_size - guid_len, ",%"PRIu32,
+             (uint32_t) pkey->pid);
+    return buf;
+}
+
+static void update_waveformat_datarate(WAVEFORMATEXTENSIBLE *wformat)
+{
+    WAVEFORMATEX *wf = &wformat->Format;
+    wf->nBlockAlign     = wf->nChannels      * wf->wBitsPerSample / 8;
+    wf->nAvgBytesPerSec = wf->nSamplesPerSec * wf->nBlockAlign;
+}
+
+static void set_waveformat(WAVEFORMATEXTENSIBLE *wformat,
+                           struct wasapi_sample_fmt *format,
+                           DWORD samplerate, struct mp_chmap *channels)
+{
+    wformat->Format.wFormatTag     = WAVE_FORMAT_EXTENSIBLE;
+    wformat->Format.nChannels      = channels->num;
+    wformat->Format.nSamplesPerSec = samplerate;
+    wformat->Format.wBitsPerSample = format->bits;
+    wformat->Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
+
+    wformat->SubFormat                   = *format_to_subtype(format->mp_format);
+    wformat->Samples.wValidBitsPerSample = format->used_msb;
+
+    uint64_t chans = mp_chmap_to_waveext(channels);
+    wformat->dwChannelMask = chans;
+
+    if (wformat->Format.nChannels > 8 || wformat->dwChannelMask != chans) {
+        // IAudioClient::IsFormatSupported tend to fallback to stereo for closest
+        // format match when there are more channels. Remix to standard layout.
+        // Also if input channel mask has channels outside 32-bits override it
+        // and hope for the best...
+        wformat->dwChannelMask = KSAUDIO_SPEAKER_7POINT1_SURROUND;
+        wformat->Format.nChannels = 8;
+    }
+
+    update_waveformat_datarate(wformat);
+}
+
+// other wformat parameters must already be set with set_waveformat
+static void change_waveformat_samplerate(WAVEFORMATEXTENSIBLE *wformat,
+                                         DWORD samplerate)
+{
+    wformat->Format.nSamplesPerSec = samplerate;
+    update_waveformat_datarate(wformat);
+}
+
+// other wformat parameters must already be set with set_waveformat
+static void change_waveformat_channels(WAVEFORMATEXTENSIBLE *wformat,
+                                       struct mp_chmap *channels)
+{
+    wformat->Format.nChannels = channels->num;
+    wformat->dwChannelMask    = mp_chmap_to_waveext(channels);
+    update_waveformat_datarate(wformat);
+}
+
+static struct wasapi_sample_fmt format_from_waveformat(WAVEFORMATEX *wf)
+{
+    struct wasapi_sample_fmt res = {0};
+
+    for (int n = 0; wasapi_formats[n].mp_format; n++) {
+        const struct wasapi_sample_fmt *fmt = &wasapi_formats[n];
+        int valid_bits = 0;
+
+        if (wf->wBitsPerSample != fmt->bits)
+            continue;
+
+        const GUID *wf_guid = NULL;
+
+        switch (wf->wFormatTag) {
+        case WAVE_FORMAT_EXTENSIBLE: {
+            WAVEFORMATEXTENSIBLE *wformat = (WAVEFORMATEXTENSIBLE *)wf;
+            wf_guid = &wformat->SubFormat;
+            if (IsEqualGUID(wf_guid, &KSDATAFORMAT_SUBTYPE_PCM))
+                valid_bits = wformat->Samples.wValidBitsPerSample;
+            break;
+        }
+        case WAVE_FORMAT_PCM:
+            wf_guid = &KSDATAFORMAT_SUBTYPE_PCM;
+            break;
+        case WAVE_FORMAT_IEEE_FLOAT:
+            wf_guid = &KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
+            break;
+        }
+
+        if (!wf_guid || !IsEqualGUID(wf_guid, fmt->subtype))
+            continue;
+
+        res = *fmt;
+        if (valid_bits > 0 && valid_bits < fmt->bits)
+            res.used_msb = valid_bits;
+        break;
+    }
+
+    return res;
+}
+
+static bool chmap_from_waveformat(struct mp_chmap *channels,
+                                  const WAVEFORMATEX *wf)
+{
+    if (wf->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
+        WAVEFORMATEXTENSIBLE *wformat = (WAVEFORMATEXTENSIBLE *)wf;
+        mp_chmap_from_waveext(channels, wformat->dwChannelMask);
+    } else {
+        mp_chmap_from_channels(channels, wf->nChannels);
+    }
+
+    if (channels->num != wf->nChannels) {
+        mp_chmap_from_str(channels, bstr0("empty"));
+        return false;
+    }
+
+    return true;
+}
+
+static char *waveformat_to_str_buf(char *buf, size_t buf_size, WAVEFORMATEX *wf)
+{
+    struct mp_chmap channels;
+    chmap_from_waveformat(&channels, wf);
+
+    struct wasapi_sample_fmt format = format_from_waveformat(wf);
+
+    snprintf(buf, buf_size, "%s %s (%d/%d bits) @ %uhz",
+             mp_chmap_to_str(&channels),
+             af_fmt_to_str(format.mp_format), format.bits, format.used_msb,
+             (unsigned) wf->nSamplesPerSec);
+    return buf;
+}
+#define waveformat_to_str_(wf, sz) waveformat_to_str_buf((char[sz]){0}, sz, (wf))
+#define waveformat_to_str(wf) waveformat_to_str_(wf, MP_NUM_CHANNELS * 4 + 42)
+
+static void waveformat_copy(WAVEFORMATEXTENSIBLE* dst, WAVEFORMATEX* src)
+{
+    if (src->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
+        *dst = *(WAVEFORMATEXTENSIBLE *)src;
+    } else {
+        dst->Format = *src;
+    }
+}
+
+static bool set_ao_format(struct ao *ao, WAVEFORMATEX *wf,
+                          AUDCLNT_SHAREMODE share_mode)
+{
+    struct wasapi_state *state = ao->priv;
+    struct wasapi_sample_fmt format = format_from_waveformat(wf);
+    if (!format.mp_format) {
+        MP_ERR(ao, "Unable to construct sample format from WAVEFORMAT %s\n",
+               waveformat_to_str(wf));
+        return false;
+    }
+
+    // Do not touch the ao for passthrough, just assume that we set WAVEFORMATEX
+    // correctly.
+    if (af_fmt_is_pcm(format.mp_format)) {
+        struct mp_chmap channels;
+        if (!chmap_from_waveformat(&channels, wf)) {
+            MP_ERR(ao, "Unable to construct channel map from WAVEFORMAT %s\n",
+                   waveformat_to_str(wf));
+            return false;
+        }
+
+        struct ao_convert_fmt conv = {
+            .src_fmt    = format.mp_format,
+            .channels   = channels.num,
+            .dst_bits   = format.bits,
+            .pad_lsb    = format.bits - format.used_msb,
+        };
+        if (!ao_can_convert_inplace(&conv)) {
+            MP_ERR(ao, "Unable to convert to %s\n", waveformat_to_str(wf));
+            return false;
+        }
+
+        state->convert_format = conv;
+        ao->samplerate = wf->nSamplesPerSec;
+        ao->format     = format.mp_format;
+        ao->channels   = channels;
+    }
+    waveformat_copy(&state->format, wf);
+    state->share_mode = share_mode;
+
+    MP_VERBOSE(ao, "Accepted as %s %s @ %dhz -> %s (%s)\n",
+               mp_chmap_to_str(&ao->channels),
+               af_fmt_to_str(ao->format), ao->samplerate,
+               waveformat_to_str(wf),
+               state->share_mode == AUDCLNT_SHAREMODE_EXCLUSIVE
+               ? "exclusive" : "shared");
+    return true;
+}
+
+#define mp_format_res_str(hres) \
+    (SUCCEEDED(hres) ? ((hres) == S_OK) ? "ok" : "close" \
+                     : ((hres) == AUDCLNT_E_UNSUPPORTED_FORMAT) \
+                       ? "unsupported" : mp_HRESULT_to_str(hres))
+
+static bool try_format_exclusive(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat)
+{
+    struct wasapi_state *state = ao->priv;
+    HRESULT hr = IAudioClient_IsFormatSupported(state->pAudioClient,
+                                                AUDCLNT_SHAREMODE_EXCLUSIVE,
+                                                &wformat->Format, NULL);
+    MP_VERBOSE(ao, "Trying %s (exclusive) -> %s\n",
+               waveformat_to_str(&wformat->Format), mp_format_res_str(hr));
+    return SUCCEEDED(hr);
+}
+
+static bool search_sample_formats(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat,
+                                  int samplerate, struct mp_chmap *channels)
+{
+    struct wasapi_sample_fmt alt_formats[MP_ARRAY_SIZE(wasapi_formats)];
+    wasapi_get_best_sample_formats(ao->format, alt_formats);
+    for (int n = 0; alt_formats[n].mp_format; n++) {
+        set_waveformat(wformat, &alt_formats[n], samplerate, channels);
+        if (try_format_exclusive(ao, wformat))
+            return true;
+    }
+
+    wformat->Format.wBitsPerSample = 0;
+    return false;
+}
+
+static bool search_samplerates(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat,
+                               struct mp_chmap *channels)
+{
+    // put common samplerates first so that we find format early
+    int try[] = {48000, 44100, 96000, 88200, 192000, 176400,
+                 32000, 22050, 11025, 8000, 16000, 352800, 384000, 0};
+
+    // get a list of supported rates
+    int n = 0;
+    int supported[MP_ARRAY_SIZE(try)] = {0};
+
+    wformat->Format.wBitsPerSample = 0;
+    for (int i = 0; try[i]; i++) {
+        if (!wformat->Format.wBitsPerSample) {
+            if (search_sample_formats(ao, wformat, try[i], channels))
+                supported[n++] = try[i];
+        } else {
+            change_waveformat_samplerate(wformat, try[i]);
+            if (try_format_exclusive(ao, wformat))
+                supported[n++] = try[i];
+        }
+    }
+
+    int samplerate = af_select_best_samplerate(ao->samplerate, supported);
+    if (samplerate > 0) {
+        change_waveformat_samplerate(wformat, samplerate);
+        return true;
+    }
+
+    // otherwise, this is probably an unsupported channel map
+    wformat->Format.nSamplesPerSec = 0;
+    return false;
+}
+
+static bool search_channels(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat)
+{
+    struct wasapi_state *state = ao->priv;
+    struct mp_chmap_sel chmap_sel = {.tmp = state};
+    struct mp_chmap entry;
+    // put common layouts first so that we find sample rate/format early
+    char *channel_layouts[] =
+        {"stereo", "5.1", "7.1", "6.1", "mono", "2.1", "4.0", "5.0",
+         "3.0", "3.0(back)",
+         "quad", "quad(side)", "3.1",
+         "5.0(side)", "4.1",
+         "5.1(side)", "6.0", "6.0(front)", "hexagonal",
+         "6.1(back)", "6.1(front)", "7.0", "7.0(front)",
+         "7.1(wide)", "7.1(wide-side)", "7.1(rear)", "octagonal", NULL};
+
+    wformat->Format.nSamplesPerSec = 0;
+    for (int j = 0; channel_layouts[j]; j++) {
+        mp_chmap_from_str(&entry, bstr0(channel_layouts[j]));
+        if (!wformat->Format.nSamplesPerSec) {
+            if (search_samplerates(ao, wformat, &entry))
+                mp_chmap_sel_add_map(&chmap_sel, &entry);
+        } else {
+            change_waveformat_channels(wformat, &entry);
+            if (try_format_exclusive(ao, wformat))
+                mp_chmap_sel_add_map(&chmap_sel, &entry);
+        }
+    }
+
+    entry = ao->channels;
+    if (ao_chmap_sel_adjust2(ao, &chmap_sel, &entry, !state->opt_exclusive)){
+        change_waveformat_channels(wformat, &entry);
+        return true;
+    }
+
+    MP_ERR(ao, "No suitable audio format found\n");
+    return false;
+}
+
+static bool find_formats_exclusive(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat)
+{
+    // Try the specified format as is
+    if (try_format_exclusive(ao, wformat))
+        return true;
+
+    if (af_fmt_is_spdif(ao->format)) {
+        if (ao->format != AF_FORMAT_S_AC3) {
+            // If the requested format failed and it is passthrough, but not
+            // AC3, try lying and saying it is.
+            MP_VERBOSE(ao, "Retrying as AC3.\n");
+            wformat->SubFormat = *format_to_subtype(AF_FORMAT_S_AC3);
+            if (try_format_exclusive(ao, wformat))
+                return true;
+        }
+        return false;
+    }
+
+    // Fallback on the PCM format search
+    return search_channels(ao, wformat);
+}
+
+static bool find_formats_shared(struct ao *ao, WAVEFORMATEXTENSIBLE *wformat)
+{
+    struct wasapi_state *state = ao->priv;
+
+    struct mp_chmap channels;
+    if (!chmap_from_waveformat(&channels, &wformat->Format)) {
+        MP_ERR(ao, "Error converting channel map\n");
+        return false;
+    }
+
+    HRESULT hr;
+    WAVEFORMATEX *mix_format;
+    hr = IAudioClient_GetMixFormat(state->pAudioClient, &mix_format);
+    EXIT_ON_ERROR(hr);
+
+    // WASAPI doesn't do any sample rate conversion on its own and
+    // will typically only accept the mix format samplerate. Although
+    // it will accept any PCM sample format, everything gets converted
+    // to the mix format anyway (pretty much always float32), so just
+    // use that.
+    WAVEFORMATEXTENSIBLE try_format;
+    waveformat_copy(&try_format, mix_format);
+    CoTaskMemFree(mix_format);
+
+    // WASAPI may accept channel maps other than the mix format
+    // if a surround emulator is enabled.
+    change_waveformat_channels(&try_format, &channels);
+
+    hr = IAudioClient_IsFormatSupported(state->pAudioClient,
+                                        AUDCLNT_SHAREMODE_SHARED,
+                                        &try_format.Format,
+                                        &mix_format);
+    MP_VERBOSE(ao, "Trying %s (shared) -> %s\n",
+               waveformat_to_str(&try_format.Format), mp_format_res_str(hr));
+    if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT)
+        EXIT_ON_ERROR(hr);
+
+    switch (hr) {
+    case S_OK:
+        waveformat_copy(wformat, &try_format.Format);
+        break;
+    case S_FALSE:
+        waveformat_copy(wformat, mix_format);
+        CoTaskMemFree(mix_format);
+        MP_VERBOSE(ao, "Closest match is %s\n",
+                   waveformat_to_str(&wformat->Format));
+        break;
+    default:
+        hr = IAudioClient_GetMixFormat(state->pAudioClient, &mix_format);
+        EXIT_ON_ERROR(hr);
+        waveformat_copy(wformat, mix_format);
+        CoTaskMemFree(mix_format);
+        MP_VERBOSE(ao, "Fallback to mix format %s\n",
+                   waveformat_to_str(&wformat->Format));
+
+    }
+
+    return true;
+exit_label:
+    MP_ERR(state, "Error finding shared mode format: %s\n",
+           mp_HRESULT_to_str(hr));
+    return false;
+}
+
+static bool find_formats(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    struct mp_chmap channels = ao->channels;
+
+    if (mp_chmap_is_unknown(&channels))
+        mp_chmap_from_channels(&channels, channels.num);
+    mp_chmap_reorder_to_waveext(&channels);
+    if (!mp_chmap_is_valid(&channels))
+        mp_chmap_from_channels(&channels, 2);
+
+    struct wasapi_sample_fmt alt_formats[MP_ARRAY_SIZE(wasapi_formats)];
+    wasapi_get_best_sample_formats(ao->format, alt_formats);
+    struct wasapi_sample_fmt wasapi_format =
+        {AF_FORMAT_S16, 16, 16, &KSDATAFORMAT_SUBTYPE_PCM};;
+    if (alt_formats[0].mp_format)
+        wasapi_format = alt_formats[0];
+
+    AUDCLNT_SHAREMODE share_mode;
+    WAVEFORMATEXTENSIBLE wformat;
+    set_waveformat(&wformat, &wasapi_format, ao->samplerate, &channels);
+
+    if (state->opt_exclusive || af_fmt_is_spdif(ao->format)) {
+        share_mode = AUDCLNT_SHAREMODE_EXCLUSIVE;
+        if(!find_formats_exclusive(ao, &wformat))
+            return false;
+    } else {
+        share_mode = AUDCLNT_SHAREMODE_SHARED;
+        if(!find_formats_shared(ao, &wformat))
+            return false;
+    }
+
+    return set_ao_format(ao, &wformat.Format, share_mode);
+}
+
+static HRESULT init_clock(struct wasapi_state *state) {
+    HRESULT hr = IAudioClient_GetService(state->pAudioClient,
+                                         &IID_IAudioClock,
+                                         (void **)&state->pAudioClock);
+    EXIT_ON_ERROR(hr);
+    hr = IAudioClock_GetFrequency(state->pAudioClock, &state->clock_frequency);
+    EXIT_ON_ERROR(hr);
+
+    QueryPerformanceFrequency(&state->qpc_frequency);
+
+    atomic_store(&state->sample_count, 0);
+
+    MP_VERBOSE(state,
+               "IAudioClock::GetFrequency gave a frequency of %"PRIu64".\n",
+               (uint64_t) state->clock_frequency);
+
+    return S_OK;
+exit_label:
+    MP_ERR(state, "Error obtaining the audio device's timing: %s\n",
+           mp_HRESULT_to_str(hr));
+    return hr;
+}
+
+static void init_session_display(struct wasapi_state *state, const char *name) {
+    HRESULT hr = IAudioClient_GetService(state->pAudioClient,
+                                         &IID_IAudioSessionControl,
+                                         (void **)&state->pSessionControl);
+    EXIT_ON_ERROR(hr);
+
+    wchar_t path[MAX_PATH] = {0};
+    GetModuleFileNameW(NULL, path, MAX_PATH);
+    hr = IAudioSessionControl_SetIconPath(state->pSessionControl, path, NULL);
+    if (FAILED(hr)) {
+        // don't goto exit_label here since SetDisplayName might still work
+        MP_WARN(state, "Error setting audio session icon: %s\n",
+                mp_HRESULT_to_str(hr));
+    }
+
+    assert(name);
+    if (!name)
+        return;
+
+    wchar_t *title = mp_from_utf8(NULL, name);
+    hr = IAudioSessionControl_SetDisplayName(state->pSessionControl, title, NULL);
+    talloc_free(title);
+
+    EXIT_ON_ERROR(hr);
+    return;
+exit_label:
+    // if we got here then the session control is useless - release it
+    SAFE_RELEASE(state->pSessionControl);
+    MP_WARN(state, "Error setting audio session name: %s\n",
+            mp_HRESULT_to_str(hr));
+    return;
+}
+
+static void init_volume_control(struct wasapi_state *state)
+{
+    HRESULT hr;
+    if (state->share_mode == AUDCLNT_SHAREMODE_EXCLUSIVE) {
+        MP_DBG(state, "Activating pEndpointVolume interface\n");
+        hr = IMMDeviceActivator_Activate(state->pDevice,
+                                         &IID_IAudioEndpointVolume,
+                                         CLSCTX_ALL, NULL,
+                                         (void **)&state->pEndpointVolume);
+        EXIT_ON_ERROR(hr);
+
+        MP_DBG(state, "IAudioEndpointVolume::QueryHardwareSupport\n");
+        hr = IAudioEndpointVolume_QueryHardwareSupport(state->pEndpointVolume,
+                                                       &state->vol_hw_support);
+        EXIT_ON_ERROR(hr);
+    } else {
+        MP_DBG(state, "IAudioClient::Initialize pAudioVolume\n");
+        hr = IAudioClient_GetService(state->pAudioClient,
+                                     &IID_ISimpleAudioVolume,
+                                     (void **)&state->pAudioVolume);
+        EXIT_ON_ERROR(hr);
+    }
+    return;
+exit_label:
+    state->vol_hw_support = 0;
+    SAFE_RELEASE(state->pEndpointVolume);
+    SAFE_RELEASE(state->pAudioVolume);
+    MP_WARN(state, "Error setting up volume control: %s\n",
+            mp_HRESULT_to_str(hr));
+}
+
+static HRESULT fix_format(struct ao *ao, bool align_hack)
+{
+    struct wasapi_state *state = ao->priv;
+
+    MP_DBG(state, "IAudioClient::GetDevicePeriod\n");
+    REFERENCE_TIME devicePeriod;
+    HRESULT hr = IAudioClient_GetDevicePeriod(state->pAudioClient,&devicePeriod,
+                                              NULL);
+    MP_VERBOSE(state, "Device period: %.2g ms\n",
+               (double) devicePeriod / 10000.0 );
+
+    REFERENCE_TIME bufferDuration = devicePeriod;
+    if (state->share_mode == AUDCLNT_SHAREMODE_SHARED) {
+        // for shared mode, use integer multiple of device period close to 50ms
+        bufferDuration = devicePeriod * ceil(50.0 * 10000.0 / devicePeriod);
+    }
+
+    // handle unsupported buffer size if AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED was
+    // returned in a previous attempt. hopefully this shouldn't happen because
+    // of the above integer device period
+    // http://msdn.microsoft.com/en-us/library/windows/desktop/dd370875%28v=vs.85%29.aspx
+    if (align_hack) {
+        bufferDuration = (REFERENCE_TIME) (0.5 +
+            (10000.0 * 1000 / state->format.Format.nSamplesPerSec
+             * state->bufferFrameCount));
+    }
+
+    REFERENCE_TIME bufferPeriod =
+        state->share_mode == AUDCLNT_SHAREMODE_EXCLUSIVE ? bufferDuration : 0;
+
+    MP_DBG(state, "IAudioClient::Initialize\n");
+    hr = IAudioClient_Initialize(state->pAudioClient,
+                                 state->share_mode,
+                                 AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+                                 bufferDuration,
+                                 bufferPeriod,
+                                 &(state->format.Format),
+                                 NULL);
+    EXIT_ON_ERROR(hr);
+
+    MP_DBG(state, "IAudioClient::Initialize pRenderClient\n");
+    hr = IAudioClient_GetService(state->pAudioClient,
+                                 &IID_IAudioRenderClient,
+                                 (void **)&state->pRenderClient);
+    EXIT_ON_ERROR(hr);
+
+    MP_DBG(state, "IAudioClient::Initialize IAudioClient_SetEventHandle\n");
+    hr = IAudioClient_SetEventHandle(state->pAudioClient, state->hWake);
+    EXIT_ON_ERROR(hr);
+
+    MP_DBG(state, "IAudioClient::Initialize IAudioClient_GetBufferSize\n");
+    hr = IAudioClient_GetBufferSize(state->pAudioClient,
+                                    &state->bufferFrameCount);
+    EXIT_ON_ERROR(hr);
+
+    ao->device_buffer = state->bufferFrameCount;
+    bufferDuration = (REFERENCE_TIME) (0.5 +
+        (10000.0 * 1000 / state->format.Format.nSamplesPerSec
+         * state->bufferFrameCount));
+    MP_VERBOSE(state, "Buffer frame count: %"PRIu32" (%.2g ms)\n",
+               state->bufferFrameCount, (double) bufferDuration / 10000.0 );
+
+    hr = init_clock(state);
+    EXIT_ON_ERROR(hr);
+
+    init_session_display(state, ao->client_name);
+    init_volume_control(state);
+
+#if !HAVE_UWP
+    state->hTask = AvSetMmThreadCharacteristics(L"Pro Audio", &(DWORD){0});
+    if (!state->hTask) {
+        MP_WARN(state, "Failed to set AV thread to Pro Audio: %s\n",
+                mp_LastError_to_str());
+    }
+#endif
+
+    return S_OK;
+exit_label:
+    MP_ERR(state, "Error initializing device: %s\n", mp_HRESULT_to_str(hr));
+    return hr;
+}
+
+struct device_desc {
+    LPWSTR deviceID;
+    char *id;
+    char *name;
+};
+
+static char* get_device_name(struct mp_log *l, void *talloc_ctx, IMMDevice *pDevice)
+{
+    char *namestr = NULL;
+    IPropertyStore *pProps = NULL;
+    PROPVARIANT devname;
+    PropVariantInit(&devname);
+
+    HRESULT hr = IMMDevice_OpenPropertyStore(pDevice, STGM_READ, &pProps);
+    EXIT_ON_ERROR(hr);
+
+    hr = IPropertyStore_GetValue(pProps, &mp_PKEY_Device_FriendlyName,
+                                 &devname);
+    EXIT_ON_ERROR(hr);
+
+    namestr = mp_to_utf8(talloc_ctx, devname.pwszVal);
+
+exit_label:
+    if (FAILED(hr))
+        mp_warn(l, "Failed getting device name: %s\n", mp_HRESULT_to_str(hr));
+    PropVariantClear(&devname);
+    SAFE_RELEASE(pProps);
+    return namestr ? namestr : talloc_strdup(talloc_ctx, "");
+}
+
+static struct device_desc *get_device_desc(struct mp_log *l, IMMDevice *pDevice)
+{
+    LPWSTR deviceID;
+    HRESULT hr = IMMDevice_GetId(pDevice, &deviceID);
+    if (FAILED(hr)) {
+        mp_err(l, "Failed getting device id: %s\n", mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+    struct device_desc *d = talloc_zero(NULL, struct device_desc);
+    d->deviceID = talloc_memdup(d, deviceID,
+                                (wcslen(deviceID) + 1) * sizeof(wchar_t));
+    SAFE_DESTROY(deviceID, CoTaskMemFree(deviceID));
+
+    char *full_id = mp_to_utf8(NULL, d->deviceID);
+    bstr id = bstr0(full_id);
+    bstr_eatstart0(&id, "{0.0.0.00000000}.");
+    d->id = bstrdup0(d, id);
+    talloc_free(full_id);
+
+    d->name = get_device_name(l, d, pDevice);
+    return d;
+}
+
+struct enumerator {
+    struct mp_log *log;
+    IMMDeviceEnumerator *pEnumerator;
+    IMMDeviceCollection *pDevices;
+    UINT count;
+};
+
+static void destroy_enumerator(struct enumerator *e)
+{
+    if (!e)
+        return;
+    SAFE_RELEASE(e->pDevices);
+    SAFE_RELEASE(e->pEnumerator);
+    talloc_free(e);
+}
+
+static struct enumerator *create_enumerator(struct mp_log *log)
+{
+    struct enumerator *e = talloc_zero(NULL, struct enumerator);
+    e->log = log;
+    HRESULT hr = CoCreateInstance(
+        &CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, &IID_IMMDeviceEnumerator,
+        (void **)&e->pEnumerator);
+    EXIT_ON_ERROR(hr);
+
+    hr = IMMDeviceEnumerator_EnumAudioEndpoints(
+        e->pEnumerator, eRender, DEVICE_STATE_ACTIVE, &e->pDevices);
+    EXIT_ON_ERROR(hr);
+
+    hr = IMMDeviceCollection_GetCount(e->pDevices, &e->count);
+    EXIT_ON_ERROR(hr);
+
+    return e;
+exit_label:
+    mp_err(log, "Error getting device enumerator: %s\n", mp_HRESULT_to_str(hr));
+    destroy_enumerator(e);
+    return NULL;
+}
+
+static struct device_desc *device_desc_for_num(struct enumerator *e, UINT i)
+{
+    IMMDevice *pDevice = NULL;
+    HRESULT hr = IMMDeviceCollection_Item(e->pDevices, i, &pDevice);
+    if (FAILED(hr)) {
+        MP_ERR(e, "Failed getting device #%d: %s\n", i, mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+    struct device_desc *d = get_device_desc(e->log, pDevice);
+    SAFE_RELEASE(pDevice);
+    return d;
+}
+
+static struct device_desc *default_device_desc(struct enumerator *e)
+{
+    IMMDevice *pDevice = NULL;
+    HRESULT hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(
+        e->pEnumerator, eRender, eMultimedia, &pDevice);
+    if (FAILED(hr)) {
+        MP_ERR(e, "Error from GetDefaultAudioEndpoint: %s\n",
+               mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+    struct device_desc *d = get_device_desc(e->log, pDevice);
+    SAFE_RELEASE(pDevice);
+    return d;
+}
+
+void wasapi_list_devs(struct ao *ao, struct ao_device_list *list)
+{
+    struct enumerator *enumerator = create_enumerator(ao->log);
+    if (!enumerator)
+        return;
+
+    for (UINT i = 0; i < enumerator->count; i++) {
+        struct device_desc *d = device_desc_for_num(enumerator, i);
+        if (!d)
+            goto exit_label;
+        ao_device_list_add(list, ao, &(struct ao_device_desc){d->id, d->name});
+        talloc_free(d);
+    }
+
+exit_label:
+    destroy_enumerator(enumerator);
+}
+
+static bool load_device(struct mp_log *l,
+                           IMMDevice **ppDevice, LPWSTR deviceID)
+{
+    IMMDeviceEnumerator *pEnumerator = NULL;
+    HRESULT hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL,
+                                  &IID_IMMDeviceEnumerator,
+                                  (void **)&pEnumerator);
+    EXIT_ON_ERROR(hr);
+
+    hr = IMMDeviceEnumerator_GetDevice(pEnumerator, deviceID, ppDevice);
+    EXIT_ON_ERROR(hr);
+
+exit_label:
+    if (FAILED(hr))
+        mp_err(l, "Error loading selected device: %s\n", mp_HRESULT_to_str(hr));
+    SAFE_RELEASE(pEnumerator);
+    return SUCCEEDED(hr);
+}
+
+static LPWSTR select_device(struct mp_log *l, struct device_desc *d)
+{
+    if (!d)
+        return NULL;
+    mp_verbose(l, "Selecting device \'%s\' (%s)\n", d->id, d->name);
+    return talloc_memdup(NULL, d->deviceID,
+                         (wcslen(d->deviceID) + 1) * sizeof(wchar_t));
+}
+
+bstr wasapi_get_specified_device_string(struct ao *ao)
+{
+    return bstr_strip(bstr0(ao->device));
+}
+
+LPWSTR wasapi_find_deviceID(struct ao *ao)
+{
+    LPWSTR deviceID = NULL;
+    bstr device = wasapi_get_specified_device_string(ao);
+    MP_DBG(ao, "Find device \'%.*s\'\n", BSTR_P(device));
+
+    struct device_desc *d = NULL;
+    struct enumerator *enumerator = create_enumerator(ao->log);
+    if (!enumerator)
+        goto exit_label;
+
+    if (!enumerator->count) {
+        MP_ERR(ao, "There are no playback devices available\n");
+        goto exit_label;
+    }
+
+    if (!device.len) {
+        MP_VERBOSE(ao, "No device specified. Selecting default.\n");
+        d = default_device_desc(enumerator);
+        deviceID = select_device(ao->log, d);
+        goto exit_label;
+    }
+
+    // try selecting by number
+    bstr rest;
+    long long devno = bstrtoll(device, &rest, 10);
+    if (!rest.len && 0 <= devno && devno < (long long)enumerator->count) {
+        MP_VERBOSE(ao, "Selecting device by number: #%lld\n", devno);
+        d = device_desc_for_num(enumerator, devno);
+        deviceID = select_device(ao->log, d);
+        goto exit_label;
+    }
+
+    // select by id or name
+    bstr_eatstart0(&device, "{0.0.0.00000000}.");
+    for (UINT i = 0; i < enumerator->count; i++) {
+        d = device_desc_for_num(enumerator, i);
+        if (!d)
+            goto exit_label;
+
+        if (bstrcmp(device, bstr_strip(bstr0(d->id))) == 0) {
+            MP_VERBOSE(ao, "Selecting device by id: \'%.*s\'\n", BSTR_P(device));
+            deviceID = select_device(ao->log, d);
+            goto exit_label;
+        }
+
+        if (bstrcmp(device, bstr_strip(bstr0(d->name))) == 0) {
+            if (!deviceID) {
+                MP_VERBOSE(ao, "Selecting device by name: \'%.*s\'\n", BSTR_P(device));
+                deviceID = select_device(ao->log, d);
+            } else {
+                MP_WARN(ao, "Multiple devices matched \'%.*s\'."
+                        "Ignoring device \'%s\' (%s).\n",
+                        BSTR_P(device), d->id, d->name);
+            }
+        }
+        SAFE_DESTROY(d, talloc_free(d));
+    }
+
+    if (!deviceID)
+        MP_ERR(ao, "Failed to find device \'%.*s\'\n", BSTR_P(device));
+
+exit_label:
+    talloc_free(d);
+    destroy_enumerator(enumerator);
+    return deviceID;
+}
+
+bool wasapi_thread_init(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    MP_DBG(ao, "Init wasapi thread\n");
+    int64_t retry_wait = MP_TIME_US_TO_NS(1);
+    bool align_hack = false;
+    HRESULT hr;
+
+    ao->format = af_fmt_from_planar(ao->format);
+
+retry:
+    if (state->deviceID) {
+        if (!load_device(ao->log, &state->pDevice, state->deviceID))
+            return false;
+
+        MP_DBG(ao, "Activating pAudioClient interface\n");
+        hr = IMMDeviceActivator_Activate(state->pDevice, &IID_IAudioClient,
+                                         CLSCTX_ALL, NULL,
+                                         (void **)&state->pAudioClient);
+        if (FAILED(hr)) {
+            MP_FATAL(ao, "Error activating device: %s\n",
+                     mp_HRESULT_to_str(hr));
+            return false;
+        }
+    } else {
+        MP_VERBOSE(ao, "Trying UWP wrapper.\n");
+
+        HRESULT (*wuCreateDefaultAudioRenderer)(IUnknown **res) = NULL;
+        HANDLE lib = LoadLibraryW(L"wasapiuwp2.dll");
+        if (!lib) {
+            MP_ERR(ao, "Wrapper not found: %d\n", (int)GetLastError());
+            return false;
+        }
+
+        wuCreateDefaultAudioRenderer =
+            (void*)GetProcAddress(lib, "wuCreateDefaultAudioRenderer");
+        if (!wuCreateDefaultAudioRenderer) {
+            MP_ERR(ao, "Function not found.\n");
+            return false;
+        }
+        IUnknown *res = NULL;
+        hr = wuCreateDefaultAudioRenderer(&res);
+        MP_VERBOSE(ao, "Device: %s %p\n", mp_HRESULT_to_str(hr), res);
+        if (FAILED(hr)) {
+            MP_FATAL(ao, "Error activating device: %s\n",
+                     mp_HRESULT_to_str(hr));
+            return false;
+        }
+        hr = IUnknown_QueryInterface(res, &IID_IAudioClient,
+                                     (void **)&state->pAudioClient);
+        IUnknown_Release(res);
+        if (FAILED(hr)) {
+            MP_FATAL(ao, "Failed to get UWP audio client: %s\n",
+                     mp_HRESULT_to_str(hr));
+            return false;
+        }
+    }
+
+    // In the event of an align hack, we've already done this.
+    if (!align_hack) {
+        MP_DBG(ao, "Probing formats\n");
+        if (!find_formats(ao))
+            return false;
+    }
+
+    MP_DBG(ao, "Fixing format\n");
+    hr = fix_format(ao, align_hack);
+    switch (hr) {
+    case AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED:
+        if (align_hack) {
+            MP_FATAL(ao, "Align hack failed\n");
+            break;
+        }
+        // According to MSDN, we must use this as base after the failure.
+        hr = IAudioClient_GetBufferSize(state->pAudioClient,
+                                        &state->bufferFrameCount);
+        if (FAILED(hr)) {
+            MP_FATAL(ao, "Error getting buffer size for align hack: %s\n",
+                     mp_HRESULT_to_str(hr));
+            return false;
+        }
+        wasapi_thread_uninit(ao);
+        align_hack = true;
+        MP_WARN(ao, "This appears to require a weird Windows 7 hack. Retrying.\n");
+        goto retry;
+    case AUDCLNT_E_DEVICE_IN_USE:
+    case AUDCLNT_E_DEVICE_INVALIDATED:
+        if (retry_wait > MP_TIME_US_TO_NS(8)) {
+            MP_FATAL(ao, "Bad device retry failed\n");
+            return false;
+        }
+        wasapi_thread_uninit(ao);
+        MP_WARN(ao, "Retrying in %"PRId64" ns\n", retry_wait);
+        mp_sleep_ns(retry_wait);
+        retry_wait *= 2;
+        goto retry;
+    }
+    return SUCCEEDED(hr);
+}
+
+void wasapi_thread_uninit(struct ao *ao)
+{
+    struct wasapi_state *state = ao->priv;
+    MP_DBG(ao, "Thread shutdown\n");
+
+    if (state->pAudioClient)
+        IAudioClient_Stop(state->pAudioClient);
+
+    SAFE_RELEASE(state->pRenderClient);
+    SAFE_RELEASE(state->pAudioClock);
+    SAFE_RELEASE(state->pAudioVolume);
+    SAFE_RELEASE(state->pEndpointVolume);
+    SAFE_RELEASE(state->pSessionControl);
+    SAFE_RELEASE(state->pAudioClient);
+    SAFE_RELEASE(state->pDevice);
+#if !HAVE_UWP
+    SAFE_DESTROY(state->hTask, AvRevertMmThreadCharacteristics(state->hTask));
+#endif
+    MP_DBG(ao, "Thread uninit done\n");
+}
diff --git a/audio/out/buffer.c b/audio/out/buffer.c
new file mode 100644
index 0000000..5b8b523
--- /dev/null
+++ b/audio/out/buffer.c
@@ -0,0 +1,736 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <inttypes.h>
+#include <math.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "ao.h"
+#include "internal.h"
+#include "audio/aframe.h"
+#include "audio/format.h"
+
+#include "common/msg.h"
+#include "common/common.h"
+
+#include "filters/f_async_queue.h"
+#include "filters/filter_internal.h"
+
+#include "osdep/timer.h"
+#include "osdep/threads.h"
+
+struct buffer_state {
+    // Buffer and AO
+    mp_mutex lock;
+    mp_cond wakeup;
+
+    // Playthread sleep
+    mp_mutex pt_lock;
+    mp_cond pt_wakeup;
+
+    // Access from AO driver's thread only.
+    char *convert_buffer;
+
+    // Immutable.
+    struct mp_async_queue *queue;
+
+    // --- protected by lock
+
+    struct mp_filter *filter_root;
+    struct mp_filter *input;    // connected to queue
+    struct mp_aframe *pending;  // last, not fully consumed output
+
+    bool streaming;             // AO streaming active
+    bool playing;               // logically playing audio from buffer
+    bool paused;                // logically paused
+
+    int64_t end_time_ns;        // absolute output time of last played sample
+
+    bool initial_unblocked;
+
+    // "Push" AOs only (AOs with driver->write).
+    bool hw_paused;             // driver->set_pause() was used successfully
+    bool recover_pause;         // non-hw_paused: needs to recover delay
+    struct mp_pcm_state prepause_state;
+    mp_thread thread;           // thread shoveling data to AO
+    bool thread_valid;          // thread is running
+    struct mp_aframe *temp_buf;
+
+    // --- protected by pt_lock
+    bool need_wakeup;
+    bool terminate;             // exit thread
+};
+
+static MP_THREAD_VOID playthread(void *arg);
+
+void ao_wakeup_playthread(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    mp_mutex_lock(&p->pt_lock);
+    p->need_wakeup = true;
+    mp_cond_broadcast(&p->pt_wakeup);
+    mp_mutex_unlock(&p->pt_lock);
+}
+
+// called locked
+static void get_dev_state(struct ao *ao, struct mp_pcm_state *state)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if (p->paused && p->playing && !ao->stream_silence) {
+        *state = p->prepause_state;
+        return;
+    }
+
+    *state = (struct mp_pcm_state){
+        .free_samples = -1,
+        .queued_samples = -1,
+        .delay = -1,
+    };
+    ao->driver->get_state(ao, state);
+}
+
+struct mp_async_queue *ao_get_queue(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    return p->queue;
+}
+
+// Special behavior with data==NULL: caller uses p->pending.
+static int read_buffer(struct ao *ao, void **data, int samples, bool *eof,
+                       bool pad_silence)
+{
+    struct buffer_state *p = ao->buffer_state;
+    int pos = 0;
+    *eof = false;
+
+    while (p->playing && !p->paused && pos < samples) {
+        if (!p->pending || !mp_aframe_get_size(p->pending)) {
+            TA_FREEP(&p->pending);
+            struct mp_frame frame = mp_pin_out_read(p->input->pins[0]);
+            if (!frame.type)
+                break; // we can't/don't want to block
+            if (frame.type != MP_FRAME_AUDIO) {
+                if (frame.type == MP_FRAME_EOF)
+                    *eof = true;
+                mp_frame_unref(&frame);
+                continue;
+            }
+            p->pending = frame.data;
+        }
+
+        if (!data)
+            break;
+
+        int copy = mp_aframe_get_size(p->pending);
+        uint8_t **fdata = mp_aframe_get_data_ro(p->pending);
+        copy = MPMIN(copy, samples - pos);
+        for (int n = 0; n < ao->num_planes; n++) {
+            memcpy((char *)data[n] + pos * ao->sstride,
+                   fdata[n], copy * ao->sstride);
+        }
+        mp_aframe_skip_samples(p->pending, copy);
+        pos += copy;
+        *eof = false;
+    }
+
+    if (!data) {
+        if (!p->pending)
+            return 0;
+        void **pd = (void *)mp_aframe_get_data_rw(p->pending);
+        if (pd)
+            ao_post_process_data(ao, pd, mp_aframe_get_size(p->pending));
+        return 1;
+    }
+
+    // pad with silence (underflow/paused/eof)
+    if (pad_silence) {
+        for (int n = 0; n < ao->num_planes; n++) {
+            af_fill_silence((char *)data[n] + pos * ao->sstride,
+                    (samples - pos) * ao->sstride,
+                    ao->format);
+        }
+    }
+
+    ao_post_process_data(ao, data, pos);
+    return pos;
+}
+
+static int ao_read_data_unlocked(struct ao *ao, void **data, int samples,
+                                 int64_t out_time_ns, bool pad_silence)
+{
+    struct buffer_state *p = ao->buffer_state;
+    assert(!ao->driver->write);
+
+    int pos = read_buffer(ao, data, samples, &(bool){0}, pad_silence);
+
+    if (pos > 0)
+        p->end_time_ns = out_time_ns;
+
+    if (pos < samples && p->playing && !p->paused) {
+        p->playing = false;
+        ao->wakeup_cb(ao->wakeup_ctx);
+        // For ao_drain().
+        mp_cond_broadcast(&p->wakeup);
+    }
+
+    return pos;
+}
+
+// Read the given amount of samples in the user-provided data buffer. Returns
+// the number of samples copied. If there is not enough data (buffer underrun
+// or EOF), return the number of samples that could be copied, and fill the
+// rest of the user-provided buffer with silence.
+// This basically assumes that the audio device doesn't care about underruns.
+// If this is called in paused mode, it will always return 0.
+// The caller should set out_time_ns to the expected delay until the last sample
+// reaches the speakers, in nanoseconds, using mp_time_ns() as reference.
+int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_ns)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    mp_mutex_lock(&p->lock);
+
+    int pos = ao_read_data_unlocked(ao, data, samples, out_time_ns, true);
+
+    mp_mutex_unlock(&p->lock);
+
+    return pos;
+}
+
+// Like ao_read_data() but does not block and also may return partial data.
+// Callers have to check the return value.
+int ao_read_data_nonblocking(struct ao *ao, void **data, int samples, int64_t out_time_ns)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if (mp_mutex_trylock(&p->lock))
+            return 0;
+
+    int pos = ao_read_data_unlocked(ao, data, samples, out_time_ns, false);
+
+    mp_mutex_unlock(&p->lock);
+
+    return pos;
+}
+
+// Same as ao_read_data(), but convert data according to *fmt.
+// fmt->src_fmt and fmt->channels must be the same as the AO parameters.
+int ao_read_data_converted(struct ao *ao, struct ao_convert_fmt *fmt,
+                           void **data, int samples, int64_t out_time_ns)
+{
+    struct buffer_state *p = ao->buffer_state;
+    void *ndata[MP_NUM_CHANNELS] = {0};
+
+    if (!ao_need_conversion(fmt))
+        return ao_read_data(ao, data, samples, out_time_ns);
+
+    assert(ao->format == fmt->src_fmt);
+    assert(ao->channels.num == fmt->channels);
+
+    bool planar = af_fmt_is_planar(fmt->src_fmt);
+    int planes = planar ? fmt->channels : 1;
+    int plane_samples = samples * (planar ? 1: fmt->channels);
+    int src_plane_size = plane_samples * af_fmt_to_bytes(fmt->src_fmt);
+    int dst_plane_size = plane_samples * fmt->dst_bits / 8;
+
+    int needed = src_plane_size * planes;
+    if (needed > talloc_get_size(p->convert_buffer) || !p->convert_buffer) {
+        talloc_free(p->convert_buffer);
+        p->convert_buffer = talloc_size(NULL, needed);
+    }
+
+    for (int n = 0; n < planes; n++)
+        ndata[n] = p->convert_buffer + n * src_plane_size;
+
+    int res = ao_read_data(ao, ndata, samples, out_time_ns);
+
+    ao_convert_inplace(fmt, ndata, samples);
+    for (int n = 0; n < planes; n++)
+        memcpy(data[n], ndata[n], dst_plane_size);
+
+    return res;
+}
+
+int ao_control(struct ao *ao, enum aocontrol cmd, void *arg)
+{
+    struct buffer_state *p = ao->buffer_state;
+    int r = CONTROL_UNKNOWN;
+    if (ao->driver->control) {
+        // Only need to lock in push mode.
+        if (ao->driver->write)
+            mp_mutex_lock(&p->lock);
+
+        r = ao->driver->control(ao, cmd, arg);
+
+        if (ao->driver->write)
+            mp_mutex_unlock(&p->lock);
+    }
+    return r;
+}
+
+double ao_get_delay(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    mp_mutex_lock(&p->lock);
+
+    double driver_delay;
+    if (ao->driver->write) {
+        struct mp_pcm_state state;
+        get_dev_state(ao, &state);
+        driver_delay = state.delay;
+    } else {
+        int64_t end = p->end_time_ns;
+        int64_t now = mp_time_ns();
+        driver_delay = MPMAX(0, MP_TIME_NS_TO_S(end - now));
+    }
+
+    int pending = mp_async_queue_get_samples(p->queue);
+    if (p->pending)
+        pending += mp_aframe_get_size(p->pending);
+
+    mp_mutex_unlock(&p->lock);
+    return driver_delay + pending / (double)ao->samplerate;
+}
+
+// Fully stop playback; clear buffers, including queue.
+void ao_reset(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    bool wakeup = false;
+    bool do_reset = false;
+
+    mp_mutex_lock(&p->lock);
+
+    TA_FREEP(&p->pending);
+    mp_async_queue_reset(p->queue);
+    mp_filter_reset(p->filter_root);
+    mp_async_queue_resume_reading(p->queue);
+
+    if (!ao->stream_silence && ao->driver->reset) {
+        if (ao->driver->write) {
+            ao->driver->reset(ao);
+        } else {
+            // Pull AOs may wait for ao_read_data() to return.
+            // That would deadlock if called from within the lock.
+            do_reset = true;
+        }
+        p->streaming = false;
+    }
+    wakeup = p->playing;
+    p->playing = false;
+    p->recover_pause = false;
+    p->hw_paused = false;
+    p->end_time_ns = 0;
+
+    mp_mutex_unlock(&p->lock);
+
+    if (do_reset)
+        ao->driver->reset(ao);
+
+    if (wakeup)
+        ao_wakeup_playthread(ao);
+}
+
+// Initiate playback. This moves from the stop/underrun state to actually
+// playing (orthogonally taking the paused state into account). Plays all
+// data in the queue, and goes into underrun state if no more data available.
+// No-op if already running.
+void ao_start(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+    bool do_start = false;
+
+    mp_mutex_lock(&p->lock);
+
+    p->playing = true;
+
+    if (!ao->driver->write && !p->paused && !p->streaming) {
+        p->streaming = true;
+        do_start = true;
+    }
+
+    mp_mutex_unlock(&p->lock);
+
+    // Pull AOs might call ao_read_data() so do this outside the lock.
+    if (do_start)
+        ao->driver->start(ao);
+
+    ao_wakeup_playthread(ao);
+}
+
+void ao_set_paused(struct ao *ao, bool paused, bool eof)
+{
+    struct buffer_state *p = ao->buffer_state;
+    bool wakeup = false;
+    bool do_reset = false, do_start = false;
+
+    // If we are going to pause on eof and ao is still playing,
+    // be sure to drain the ao first for gapless.
+    if (eof && paused && ao_is_playing(ao))
+        ao_drain(ao);
+
+    mp_mutex_lock(&p->lock);
+
+    if ((p->playing || !ao->driver->write) && !p->paused && paused) {
+        if (p->streaming && !ao->stream_silence) {
+            if (ao->driver->write) {
+                if (!p->recover_pause)
+                    get_dev_state(ao, &p->prepause_state);
+                if (ao->driver->set_pause && ao->driver->set_pause(ao, true)) {
+                    p->hw_paused = true;
+                } else {
+                    ao->driver->reset(ao);
+                    p->streaming = false;
+                    p->recover_pause = !ao->untimed;
+                }
+            } else if (ao->driver->reset) {
+                // See ao_reset() why this is done outside of the lock.
+                do_reset = true;
+                p->streaming = false;
+            }
+        }
+        wakeup = true;
+    } else if (p->playing && p->paused && !paused) {
+        if (ao->driver->write) {
+            if (p->hw_paused)
+                ao->driver->set_pause(ao, false);
+            p->hw_paused = false;
+        } else {
+            if (!p->streaming)
+                do_start = true;
+            p->streaming = true;
+        }
+        wakeup = true;
+    }
+    p->paused = paused;
+
+    mp_mutex_unlock(&p->lock);
+
+    if (do_reset)
+        ao->driver->reset(ao);
+    if (do_start)
+        ao->driver->start(ao);
+
+    if (wakeup)
+        ao_wakeup_playthread(ao);
+}
+
+// Whether audio is playing. This means that there is still data in the buffers,
+// and ao_start() was called. This returns true even if playback was logically
+// paused. On false, EOF was reached, or an underrun happened, or ao_reset()
+// was called.
+bool ao_is_playing(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    mp_mutex_lock(&p->lock);
+    bool playing = p->playing;
+    mp_mutex_unlock(&p->lock);
+
+    return playing;
+}
+
+// Block until the current audio buffer has played completely.
+void ao_drain(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    mp_mutex_lock(&p->lock);
+    while (!p->paused && p->playing) {
+        mp_mutex_unlock(&p->lock);
+        double delay = ao_get_delay(ao);
+        mp_mutex_lock(&p->lock);
+
+        // Limit to buffer + arbitrary ~250ms max. waiting for robustness.
+        delay += mp_async_queue_get_samples(p->queue) / (double)ao->samplerate;
+
+        // Wait for EOF signal from AO.
+        if (mp_cond_timedwait(&p->wakeup, &p->lock,
+                              MP_TIME_S_TO_NS(MPMAX(delay, 0) + 0.25)))
+        {
+            MP_VERBOSE(ao, "drain timeout\n");
+            break;
+        }
+
+        if (!p->playing && mp_async_queue_get_samples(p->queue)) {
+            MP_WARN(ao, "underrun during draining\n");
+            mp_mutex_unlock(&p->lock);
+            ao_start(ao);
+            mp_mutex_lock(&p->lock);
+        }
+    }
+    mp_mutex_unlock(&p->lock);
+
+    ao_reset(ao);
+}
+
+static void wakeup_filters(void *ctx)
+{
+    struct ao *ao = ctx;
+    ao_wakeup_playthread(ao);
+}
+
+void ao_uninit(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if (p && p->thread_valid) {
+        mp_mutex_lock(&p->pt_lock);
+        p->terminate = true;
+        mp_cond_broadcast(&p->pt_wakeup);
+        mp_mutex_unlock(&p->pt_lock);
+
+        mp_thread_join(p->thread);
+        p->thread_valid = false;
+    }
+
+    if (ao->driver_initialized)
+        ao->driver->uninit(ao);
+
+    if (p) {
+        talloc_free(p->filter_root);
+        talloc_free(p->queue);
+        talloc_free(p->pending);
+        talloc_free(p->convert_buffer);
+        talloc_free(p->temp_buf);
+
+        mp_cond_destroy(&p->wakeup);
+        mp_mutex_destroy(&p->lock);
+
+        mp_cond_destroy(&p->pt_wakeup);
+        mp_mutex_destroy(&p->pt_lock);
+    }
+
+    talloc_free(ao);
+}
+
+void init_buffer_pre(struct ao *ao)
+{
+    ao->buffer_state = talloc_zero(ao, struct buffer_state);
+}
+
+bool init_buffer_post(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    assert(ao->driver->start);
+    if (ao->driver->write) {
+        assert(ao->driver->reset);
+        assert(ao->driver->get_state);
+    }
+
+    mp_mutex_init(&p->lock);
+    mp_cond_init(&p->wakeup);
+
+    mp_mutex_init(&p->pt_lock);
+    mp_cond_init(&p->pt_wakeup);
+
+    p->queue = mp_async_queue_create();
+    p->filter_root = mp_filter_create_root(ao->global);
+    p->input = mp_async_queue_create_filter(p->filter_root, MP_PIN_OUT, p->queue);
+
+    mp_async_queue_resume_reading(p->queue);
+
+    struct mp_async_queue_config cfg = {
+        .sample_unit = AQUEUE_UNIT_SAMPLES,
+        .max_samples = ao->buffer,
+        .max_bytes = INT64_MAX,
+    };
+    mp_async_queue_set_config(p->queue, cfg);
+
+    if (ao->driver->write) {
+        mp_filter_graph_set_wakeup_cb(p->filter_root, wakeup_filters, ao);
+
+        p->thread_valid = true;
+        if (mp_thread_create(&p->thread, playthread, ao)) {
+            p->thread_valid = false;
+            return false;
+        }
+    } else {
+        if (ao->stream_silence) {
+            ao->driver->start(ao);
+            p->streaming = true;
+        }
+    }
+
+    if (ao->stream_silence) {
+        MP_WARN(ao, "The --audio-stream-silence option is set. This will break "
+                "certain player behavior.\n");
+    }
+
+    return true;
+}
+
+static bool realloc_buf(struct ao *ao, int samples)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    samples = MPMAX(1, samples);
+
+    if (!p->temp_buf || samples > mp_aframe_get_size(p->temp_buf)) {
+        TA_FREEP(&p->temp_buf);
+        p->temp_buf = mp_aframe_create();
+        if (!mp_aframe_set_format(p->temp_buf, ao->format) ||
+            !mp_aframe_set_chmap(p->temp_buf, &ao->channels) ||
+            !mp_aframe_set_rate(p->temp_buf, ao->samplerate) ||
+            !mp_aframe_alloc_data(p->temp_buf, samples))
+        {
+            TA_FREEP(&p->temp_buf);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// called locked
+static bool ao_play_data(struct ao *ao)
+{
+    struct buffer_state *p = ao->buffer_state;
+
+    if ((!p->playing || p->paused) && !ao->stream_silence)
+        return false;
+
+    struct mp_pcm_state state;
+    get_dev_state(ao, &state);
+
+    if (p->streaming && !state.playing && !ao->untimed)
+        goto eof;
+
+    void **planes = NULL;
+    int space = state.free_samples;
+    if (!space)
+        return false;
+    assert(space >= 0);
+
+    int samples = 0;
+    bool got_eof = false;
+    if (ao->driver->write_frames) {
+        TA_FREEP(&p->pending);
+        samples = read_buffer(ao, NULL, 1, &got_eof, false);
+        planes = (void **)&p->pending;
+    } else {
+        if (!realloc_buf(ao, space)) {
+            MP_ERR(ao, "Failed to allocate buffer.\n");
+            return false;
+        }
+        planes = (void **)mp_aframe_get_data_rw(p->temp_buf);
+        assert(planes);
+
+        if (p->recover_pause) {
+            samples = MPCLAMP(p->prepause_state.delay * ao->samplerate, 0, space);
+            p->recover_pause = false;
+            mp_aframe_set_silence(p->temp_buf, 0, space);
+        }
+
+        if (!samples) {
+            samples = read_buffer(ao, planes, space, &got_eof, true);
+            if (p->paused || (ao->stream_silence && !p->playing))
+                samples = space; // read_buffer() sets remainder to silent
+        }
+    }
+
+    if (samples) {
+        MP_STATS(ao, "start ao fill");
+        if (!ao->driver->write(ao, planes, samples))
+            MP_ERR(ao, "Error writing audio to device.\n");
+        MP_STATS(ao, "end ao fill");
+
+        if (!p->streaming) {
+            MP_VERBOSE(ao, "starting AO\n");
+            ao->driver->start(ao);
+            p->streaming = true;
+            state.playing = true;
+        }
+    }
+
+    MP_TRACE(ao, "in=%d space=%d(%d) pl=%d, eof=%d\n",
+             samples, space, state.free_samples, p->playing, got_eof);
+
+    if (got_eof)
+        goto eof;
+
+    return samples > 0 && (samples < space || ao->untimed);
+
+eof:
+    MP_VERBOSE(ao, "audio end or underrun\n");
+    // Normal AOs signal EOF on underrun, untimed AOs never signal underruns.
+    if (ao->untimed || !state.playing || ao->stream_silence) {
+        p->streaming = state.playing && !ao->untimed;
+        p->playing = false;
+    }
+    ao->wakeup_cb(ao->wakeup_ctx);
+    // For ao_drain().
+    mp_cond_broadcast(&p->wakeup);
+    return true;
+}
+
+static MP_THREAD_VOID playthread(void *arg)
+{
+    struct ao *ao = arg;
+    struct buffer_state *p = ao->buffer_state;
+    mp_thread_set_name("ao");
+    while (1) {
+        mp_mutex_lock(&p->lock);
+
+        bool retry = false;
+        if (!ao->driver->initially_blocked || p->initial_unblocked)
+            retry = ao_play_data(ao);
+
+        // Wait until the device wants us to write more data to it.
+        // Fallback to guessing.
+        int64_t timeout = INT64_MAX;
+        if (p->streaming && !retry && (!p->paused || ao->stream_silence)) {
+            // Wake up again if half of the audio buffer has been played.
+            // Since audio could play at a faster or slower pace, wake up twice
+            // as often as ideally needed.
+            timeout = MP_TIME_S_TO_NS(ao->device_buffer / (double)ao->samplerate * 0.25);
+        }
+
+        mp_mutex_unlock(&p->lock);
+
+        mp_mutex_lock(&p->pt_lock);
+        if (p->terminate) {
+            mp_mutex_unlock(&p->pt_lock);
+            break;
+        }
+        if (!p->need_wakeup && !retry) {
+            MP_STATS(ao, "start audio wait");
+            mp_cond_timedwait(&p->pt_wakeup, &p->pt_lock, timeout);
+            MP_STATS(ao, "end audio wait");
+        }
+        p->need_wakeup = false;
+        mp_mutex_unlock(&p->pt_lock);
+    }
+    MP_THREAD_RETURN();
+}
+
+void ao_unblock(struct ao *ao)
+{
+    if (ao->driver->write) {
+        struct buffer_state *p = ao->buffer_state;
+        mp_mutex_lock(&p->lock);
+        p->initial_unblocked = true;
+        mp_mutex_unlock(&p->lock);
+        ao_wakeup_playthread(ao);
+    }
+}
diff --git a/audio/out/internal.h b/audio/out/internal.h
new file mode 100644
index 0000000..7951b38
--- /dev/null
+++ b/audio/out/internal.h
@@ -0,0 +1,237 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_AO_INTERNAL_H_
+#define MP_AO_INTERNAL_H_
+
+#include <stdatomic.h>
+#include <stdbool.h>
+
+#include "audio/out/ao.h"
+
+/* global data used by ao.c and ao drivers */
+struct ao {
+    int samplerate;
+    struct mp_chmap channels;
+    int format;                 // one of AF_FORMAT_...
+    int bps;                    // bytes per second (per plane)
+    int sstride;                // size of a sample on each plane
+                                // (format_size*num_channels/num_planes)
+    int num_planes;
+    bool probing;               // if true, don't fail loudly on init
+    bool untimed;               // don't assume realtime playback
+    int device_buffer;          // device buffer in samples (guessed by
+                                // common init code if not set by driver)
+    const struct ao_driver *driver;
+    bool driver_initialized;
+    void *priv;
+    struct mpv_global *global;
+    struct encode_lavc_context *encode_lavc_ctx;
+    void (*wakeup_cb)(void *ctx);
+    void *wakeup_ctx;
+    struct mp_log *log; // Using e.g. "[ao/coreaudio]" as prefix
+    int init_flags; // AO_INIT_* flags
+    bool stream_silence;        // if audio inactive, just play silence
+
+    // The device as selected by the user, usually using ao_device_desc.name
+    // from an entry from the list returned by driver->list_devices. If the
+    // default device should be used, this is set to NULL.
+    char *device;
+
+    // Application name to report to the audio API.
+    char *client_name;
+
+    // Used during init: if init fails, redirect to this ao
+    char *redirect;
+
+    // Internal events (use ao_request_reload(), ao_hotplug_event())
+    atomic_uint events_;
+
+    // Float gain multiplicator
+    _Atomic float gain;
+
+    int buffer;
+    double def_buffer;
+    struct buffer_state *buffer_state;
+};
+
+void init_buffer_pre(struct ao *ao);
+bool init_buffer_post(struct ao *ao);
+
+struct mp_pcm_state {
+    // Note: free_samples+queued_samples <= ao->device_buffer; the sum may be
+    //       less if the audio API can report partial periods played, while
+    //       free_samples should be period-size aligned. If free_samples is not
+    //       period-size aligned, the AO thread might get into a situation where
+    //       it writes a very small number of samples in each iteration, leading
+    //       to extremely inefficient behavior.
+    //       Keep in mind that write() may write less than free_samples (or your
+    //       period size alignment) anyway.
+    int free_samples;       // number of free space in ring buffer
+    int queued_samples;     // number of samples to play in ring buffer
+    double delay;           // total latency in seconds (includes queued_samples)
+    bool playing;           // set if underlying API is actually playing audio;
+                            // the AO must unset it on underrun (accidental
+                            // underrun and EOF are indistinguishable; the upper
+                            // layers decide what it was)
+                            // real pausing may assume playing=true
+};
+
+/* Note:
+ *
+ * In general, there are two types of audio drivers:
+ *  a) push based (the user queues data that should be played)
+ *  b) pull callback based (the audio API calls a callback to get audio)
+ *
+ * The ao.c code can handle both. It basically implements two audio paths
+ * and provides a uniform API for them. If ao_driver->write is NULL, it assumes
+ * that the driver uses a callback based audio API, otherwise push based.
+ *
+ * Requirements:
+ *  a+b) Mandatory for both types:
+ *          init
+ *          uninit
+ *          start
+ *     Optional for both types:
+ *          control
+ *  a) ->write is called to queue audio. push.c creates a thread to regularly
+ *     refill audio device buffers with ->write, but all driver functions are
+ *     always called under an exclusive lock.
+ *     Mandatory:
+ *          reset
+ *          write
+ *          get_state
+ *     Optional:
+ *          set_pause
+ *  b) ->write must be NULL. ->start must be provided, and should make the
+ *     audio API start calling the audio callback. Your audio callback should
+ *     in turn call ao_read_data() to get audio data. Most functions are
+ *     optional and will be emulated if missing (e.g. pausing is emulated as
+ *     silence).
+ *     Also, the following optional callbacks can be provided:
+ *          reset       (stops the audio callback, start() restarts it)
+ */
+struct ao_driver {
+    // If true, use with encoding only.
+    bool encode;
+    // Name used for --ao.
+    const char *name;
+    // Description shown with --ao=help.
+    const char *description;
+    // This requires waiting for a AO_EVENT_INITIAL_UNBLOCK event before the
+    // first write() call is done. Encode mode uses this, and push mode
+    // respects it automatically (don't use with pull mode).
+    bool initially_blocked;
+    // If true, write units of entire frames. The write() call is modified to
+    // use data==mp_aframe. Useful for encoding AO only.
+    bool write_frames;
+    // Init the device using ao->format/ao->channels/ao->samplerate. If the
+    // device doesn't accept these parameters, you can attempt to negotiate
+    // fallback parameters, and set the ao format fields accordingly.
+    int (*init)(struct ao *ao);
+    // Optional. See ao_control() etc. in ao.c
+    int (*control)(struct ao *ao, enum aocontrol cmd, void *arg);
+    void (*uninit)(struct ao *ao);
+    // Stop all audio playback, clear buffers, back to state after init().
+    // Optional for pull AOs.
+    void (*reset)(struct ao *ao);
+    // push based: set pause state. Only called after start() and before reset().
+    //             returns success (this is intended for paused=true; if it
+    //             returns false, playback continues, and the core emulates via
+    //             reset(); unpausing always works)
+    //             The pausing state is also cleared by reset().
+    bool (*set_pause)(struct ao *ao, bool paused);
+    // pull based: start the audio callback
+    // push based: start playing queued data
+    //             AO should call ao_wakeup_playthread() if a period boundary
+    //             is crossed, or playback stops due to external reasons
+    //             (including underruns or device removal)
+    //             must set mp_pcm_state.playing; unset on error/underrun/end
+    void (*start)(struct ao *ao);
+    // push based: queue new data. This won't try to write more data than the
+    // reported free space (samples <= mp_pcm_state.free_samples).
+    // This must NOT start playback. start() does that, and write() may be
+    // called multiple times before start() is called. It may also happen that
+    // reset() is called to discard the buffer. start() without write() will
+    // immediately reported an underrun.
+    // Return false on failure.
+    bool (*write)(struct ao *ao, void **data, int samples);
+    // push based: return mandatory stream information
+    void (*get_state)(struct ao *ao, struct mp_pcm_state *state);
+
+    // Return the list of devices currently available in the system. Use
+    // ao_device_list_add() to add entries. The selected device will be set as
+    // ao->device (using ao_device_desc.name).
+    // Warning: the ao struct passed is not initialized with ao_driver->init().
+    //          Instead, hotplug_init/hotplug_uninit is called. If these
+    //          callbacks are not set, no driver initialization call is done
+    //          on the ao struct.
+    void (*list_devs)(struct ao *ao, struct ao_device_list *list);
+
+    // If set, these are called before/after ao_driver->list_devs is called.
+    // It is also assumed that the driver can do hotplugging - which means
+    // it is expected to call ao_hotplug_event(ao) whenever the system's
+    // audio device list changes. The player will then call list_devs() again.
+    int (*hotplug_init)(struct ao *ao);
+    void (*hotplug_uninit)(struct ao *ao);
+
+    // For option parsing (see vo.h)
+    int priv_size;
+    const void *priv_defaults;
+    const struct m_option *options;
+    const char *options_prefix;
+    const struct m_sub_options *global_opts;
+};
+
+// These functions can be called by AOs.
+
+int ao_read_data(struct ao *ao, void **data, int samples, int64_t out_time_ns);
+MP_WARN_UNUSED_RESULT
+int ao_read_data_nonblocking(struct ao *ao, void **data, int samples, int64_t out_time_ns);
+
+bool ao_chmap_sel_adjust(struct ao *ao, const struct mp_chmap_sel *s,
+                         struct mp_chmap *map);
+bool ao_chmap_sel_adjust2(struct ao *ao, const struct mp_chmap_sel *s,
+                          struct mp_chmap *map, bool safe_multichannel);
+bool ao_chmap_sel_get_def(struct ao *ao, const struct mp_chmap_sel *s,
+                          struct mp_chmap *map, int num);
+
+// Add a deep copy of e to the list.
+// Call from ao_driver->list_devs callback only.
+void ao_device_list_add(struct ao_device_list *list, struct ao *ao,
+                        struct ao_device_desc *e);
+
+void ao_post_process_data(struct ao *ao, void **data, int num_samples);
+
+struct ao_convert_fmt {
+    int src_fmt;        // source AF_FORMAT_*
+    int channels;       // number of channels
+    int dst_bits;       // total target data sample size
+    int pad_msb;        // padding in the MSB (i.e. required shifting)
+    int pad_lsb;        // padding in LSB (required 0 bits) (ignored)
+};
+
+bool ao_can_convert_inplace(struct ao_convert_fmt *fmt);
+bool ao_need_conversion(struct ao_convert_fmt *fmt);
+void ao_convert_inplace(struct ao_convert_fmt *fmt, void **data, int num_samples);
+
+void ao_wakeup_playthread(struct ao *ao);
+
+int ao_read_data_converted(struct ao *ao, struct ao_convert_fmt *fmt,
+                           void **data, int samples, int64_t out_time_ns);
+
+#endif