diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/aom/examples | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/aom/examples')
24 files changed, 8602 insertions, 0 deletions
diff --git a/third_party/aom/examples/analyzer.cc b/third_party/aom/examples/analyzer.cc new file mode 100644 index 0000000000..501f5024db --- /dev/null +++ b/third_party/aom/examples/analyzer.cc @@ -0,0 +1,722 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +#include <wx/wx.h> +#include <wx/aboutdlg.h> +#include <wx/cmdline.h> +#include <wx/dcbuffer.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "av1/common/av1_common_int.h" +#include "av1/decoder/accounting.h" +#include "av1/decoder/inspection.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +#define OD_SIGNMASK(a) (-((a) < 0)) +#define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b)) +#define OD_DIV_ROUND(x, y) (((x) + OD_FLIPSIGNI((y) >> 1, x)) / (y)) + +enum { + OD_LUMA_MASK = 1 << 0, + OD_CB_MASK = 1 << 1, + OD_CR_MASK = 1 << 2, + OD_ALL_MASK = OD_LUMA_MASK | OD_CB_MASK | OD_CR_MASK +}; + +class AV1Decoder { + private: + FILE *input; + wxString path; + + AvxVideoReader *reader; + const AvxVideoInfo *info; + + insp_frame_data frame_data; + + aom_codec_ctx_t codec; + bool show_padding; + + public: + aom_image_t *image; + int frame; + + int plane_mask; + + AV1Decoder(); + ~AV1Decoder(); + + bool open(const wxString &path); + void close(); + bool step(); + + int getWidthPadding() const; + int getHeightPadding() const; + void togglePadding(); + int getWidth() const; + int getHeight() const; + + bool getAccountingStruct(Accounting **acct); + bool setInspectionCallback(); + + static void inspect(void *decoder, void *data); +}; + +AV1Decoder::AV1Decoder() + : reader(NULL), info(NULL), decoder(NULL), show_padding(false), image(NULL), + frame(0) {} + +AV1Decoder::~AV1Decoder() {} + +void AV1Decoder::togglePadding() { show_padding = !show_padding; } + +bool AV1Decoder::open(const wxString &path) { + reader = aom_video_reader_open(path.mb_str()); + if (!reader) { + fprintf(stderr, "Failed to open %s for reading.", path.mb_str().data()); + return false; + } + this->path = path; + info = aom_video_reader_get_info(reader); + decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) { + fprintf(stderr, "Unknown input codec."); + return false; + } + printf("Using %s\n", aom_codec_iface_name(decoder)); + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) { + fprintf(stderr, "Failed to initialize decoder."); + return false; + } + ifd_init(&frame_data, info->frame_width, info->frame_height); + setInspectionCallback(); + return true; +} + +void AV1Decoder::close() {} + +bool AV1Decoder::step() { + if (aom_video_reader_read_frame(reader)) { + size_t frame_size; + const unsigned char *frame_data; + frame_data = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame_data, frame_size, NULL)) { + fprintf(stderr, "Failed to decode frame."); + return false; + } else { + aom_codec_iter_t iter = NULL; + image = aom_codec_get_frame(&codec, &iter); + if (image != NULL) { + frame++; + return true; + } + return false; + } + } + return false; +} + +int AV1Decoder::getWidth() const { + return info->frame_width + 2 * getWidthPadding(); +} + +int AV1Decoder::getWidthPadding() const { + return show_padding ? AOMMAX(info->frame_width + 16, + ALIGN_POWER_OF_TWO(info->frame_width, 6)) - + info->frame_width + : 0; +} + +int AV1Decoder::getHeight() const { + return info->frame_height + 2 * getHeightPadding(); +} + +int AV1Decoder::getHeightPadding() const { + return show_padding ? AOMMAX(info->frame_height + 16, + ALIGN_POWER_OF_TWO(info->frame_height, 6)) - + info->frame_height + : 0; +} + +bool AV1Decoder::getAccountingStruct(Accounting **accounting) { + return aom_codec_control(&codec, AV1_GET_ACCOUNTING, accounting) == + AOM_CODEC_OK; +} + +bool AV1Decoder::setInspectionCallback() { + aom_inspect_init ii; + ii.inspect_cb = AV1Decoder::inspect; + ii.inspect_ctx = (void *)this; + return aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii) == + AOM_CODEC_OK; +} + +void AV1Decoder::inspect(void *pbi, void *data) { + AV1Decoder *decoder = (AV1Decoder *)data; + ifd_inspect(&decoder->frame_data, pbi, 0); +} + +#define MIN_ZOOM (1) +#define MAX_ZOOM (4) + +class AnalyzerPanel : public wxPanel { + DECLARE_EVENT_TABLE() + + private: + AV1Decoder decoder; + const wxString path; + + int zoom; + unsigned char *pixels; + + const bool bit_accounting; + double *bpp_q3; + + int plane_mask; + + // The display size is the decode size, scaled by the zoom. + int getDisplayWidth() const; + int getDisplayHeight() const; + + bool updateDisplaySize(); + + void computeBitsPerPixel(); + + public: + AnalyzerPanel(wxWindow *parent, const wxString &path, + const bool bit_accounting); + ~AnalyzerPanel(); + + bool open(const wxString &path); + void close(); + void render(); + void togglePadding(); + bool nextFrame(); + void refresh(); + + int getZoom() const; + bool setZoom(int zoom); + + void setShowPlane(bool show_plane, int mask); + + void onPaint(wxPaintEvent &event); // NOLINT +}; + +BEGIN_EVENT_TABLE(AnalyzerPanel, wxPanel) +EVT_PAINT(AnalyzerPanel::onPaint) +END_EVENT_TABLE() + +AnalyzerPanel::AnalyzerPanel(wxWindow *parent, const wxString &path, + const bool bit_accounting) + : wxPanel(parent), path(path), zoom(0), pixels(NULL), + bit_accounting(bit_accounting), bpp_q3(NULL), plane_mask(OD_ALL_MASK) {} + +AnalyzerPanel::~AnalyzerPanel() { close(); } + +void AnalyzerPanel::setShowPlane(bool show_plane, int mask) { + if (show_plane) { + plane_mask |= mask; + } else { + plane_mask &= ~mask; + } +} + +void AnalyzerPanel::render() { + aom_image_t *img = decoder.image; + const int hbd = !!(img->fmt & AOM_IMG_FMT_HIGHBITDEPTH); + int y_stride = img->stride[0] >> hbd; + int cb_stride = img->stride[1] >> hbd; + int cr_stride = img->stride[2] >> hbd; + int p_stride = 3 * getDisplayWidth(); + unsigned char *y_row = img->planes[0]; + unsigned char *cb_row = img->planes[1]; + unsigned char *cr_row = img->planes[2]; + uint16_t *y_row16 = reinterpret_cast<uint16_t *>(y_row); + uint16_t *cb_row16 = reinterpret_cast<uint16_t *>(cb_row); + uint16_t *cr_row16 = reinterpret_cast<uint16_t *>(cr_row); + unsigned char *p_row = pixels; + int y_width_padding = decoder.getWidthPadding(); + int cb_width_padding = y_width_padding >> 1; + int cr_width_padding = y_width_padding >> 1; + int y_height_padding = decoder.getHeightPadding(); + int cb_height_padding = y_height_padding >> 1; + int cr_height_padding = y_height_padding >> 1; + for (int j = 0; j < decoder.getHeight(); j++) { + unsigned char *y = y_row - y_stride * y_height_padding; + unsigned char *cb = cb_row - cb_stride * cb_height_padding; + unsigned char *cr = cr_row - cr_stride * cr_height_padding; + uint16_t *y16 = y_row16 - y_stride * y_height_padding; + uint16_t *cb16 = cb_row16 - cb_stride * cb_height_padding; + uint16_t *cr16 = cr_row16 - cr_stride * cr_height_padding; + unsigned char *p = p_row; + for (int i = 0; i < decoder.getWidth(); i++) { + int64_t yval; + int64_t cbval; + int64_t crval; + int pmask; + unsigned rval; + unsigned gval; + unsigned bval; + if (hbd) { + yval = *(y16 - y_width_padding); + cbval = *(cb16 - cb_width_padding); + crval = *(cr16 - cr_width_padding); + } else { + yval = *(y - y_width_padding); + cbval = *(cb - cb_width_padding); + crval = *(cr - cr_width_padding); + } + pmask = plane_mask; + if (pmask & OD_LUMA_MASK) { + yval -= 16; + } else { + yval = 128; + } + cbval = ((pmask & OD_CB_MASK) >> 1) * (cbval - 128); + crval = ((pmask & OD_CR_MASK) >> 2) * (crval - 128); + /*This is intentionally slow and very accurate.*/ + rval = OD_CLAMPI( + 0, + (int32_t)OD_DIV_ROUND( + 2916394880000LL * yval + 4490222169144LL * crval, 9745792000LL), + 65535); + gval = OD_CLAMPI(0, + (int32_t)OD_DIV_ROUND(2916394880000LL * yval - + 534117096223LL * cbval - + 1334761232047LL * crval, + 9745792000LL), + 65535); + bval = OD_CLAMPI( + 0, + (int32_t)OD_DIV_ROUND( + 2916394880000LL * yval + 5290866304968LL * cbval, 9745792000LL), + 65535); + unsigned char *px_row = p; + for (int v = 0; v < zoom; v++) { + unsigned char *px = px_row; + for (int u = 0; u < zoom; u++) { + *(px + 0) = (unsigned char)(rval >> 8); + *(px + 1) = (unsigned char)(gval >> 8); + *(px + 2) = (unsigned char)(bval >> 8); + px += 3; + } + px_row += p_stride; + } + if (hbd) { + int dc = ((y16 - y_row16) & 1) | (1 - img->x_chroma_shift); + y16++; + cb16 += dc; + cr16 += dc; + } else { + int dc = ((y - y_row) & 1) | (1 - img->x_chroma_shift); + y++; + cb += dc; + cr += dc; + } + p += zoom * 3; + } + int dc = -((j & 1) | (1 - img->y_chroma_shift)); + if (hbd) { + y_row16 += y_stride; + cb_row16 += dc & cb_stride; + cr_row16 += dc & cr_stride; + } else { + y_row += y_stride; + cb_row += dc & cb_stride; + cr_row += dc & cr_stride; + } + p_row += zoom * p_stride; + } +} + +void AnalyzerPanel::computeBitsPerPixel() { + Accounting *acct; + double bpp_total; + int totals_q3[MAX_SYMBOL_TYPES] = { 0 }; + int sym_count[MAX_SYMBOL_TYPES] = { 0 }; + decoder.getAccountingStruct(&acct); + for (int j = 0; j < decoder.getHeight(); j++) { + for (int i = 0; i < decoder.getWidth(); i++) { + bpp_q3[j * decoder.getWidth() + i] = 0.0; + } + } + bpp_total = 0; + for (int i = 0; i < acct->syms.num_syms; i++) { + AccountingSymbol *s; + s = &acct->syms.syms[i]; + totals_q3[s->id] += s->bits; + sym_count[s->id] += s->samples; + } + printf("=== Frame: %-3i ===\n", decoder.frame - 1); + for (int i = 0; i < acct->syms.dictionary.num_strs; i++) { + if (totals_q3[i]) { + printf("%30s = %10.3f (%f bit/symbol)\n", acct->syms.dictionary.strs[i], + (float)totals_q3[i] / 8, (float)totals_q3[i] / 8 / sym_count[i]); + } + } + printf("\n"); +} + +void AnalyzerPanel::togglePadding() { + decoder.togglePadding(); + updateDisplaySize(); +} + +bool AnalyzerPanel::nextFrame() { + if (decoder.step()) { + refresh(); + return true; + } + return false; +} + +void AnalyzerPanel::refresh() { + if (bit_accounting) { + computeBitsPerPixel(); + } + render(); +} + +int AnalyzerPanel::getDisplayWidth() const { return zoom * decoder.getWidth(); } + +int AnalyzerPanel::getDisplayHeight() const { + return zoom * decoder.getHeight(); +} + +bool AnalyzerPanel::updateDisplaySize() { + unsigned char *p = (unsigned char *)malloc( + sizeof(*p) * 3 * getDisplayWidth() * getDisplayHeight()); + if (p == NULL) { + return false; + } + free(pixels); + pixels = p; + SetSize(getDisplayWidth(), getDisplayHeight()); + return true; +} + +bool AnalyzerPanel::open(const wxString &path) { + if (!decoder.open(path)) { + return false; + } + if (!setZoom(MIN_ZOOM)) { + return false; + } + if (bit_accounting) { + bpp_q3 = (double *)malloc(sizeof(*bpp_q3) * decoder.getWidth() * + decoder.getHeight()); + if (bpp_q3 == NULL) { + fprintf(stderr, "Could not allocate memory for bit accounting\n"); + close(); + return false; + } + } + if (!nextFrame()) { + close(); + return false; + } + SetFocus(); + return true; +} + +void AnalyzerPanel::close() { + decoder.close(); + free(pixels); + pixels = NULL; + free(bpp_q3); + bpp_q3 = NULL; +} + +int AnalyzerPanel::getZoom() const { return zoom; } + +bool AnalyzerPanel::setZoom(int z) { + if (z <= MAX_ZOOM && z >= MIN_ZOOM && zoom != z) { + int old_zoom = zoom; + zoom = z; + if (!updateDisplaySize()) { + zoom = old_zoom; + return false; + } + return true; + } + return false; +} + +void AnalyzerPanel::onPaint(wxPaintEvent &) { + wxBitmap bmp(wxImage(getDisplayWidth(), getDisplayHeight(), pixels, true)); + wxBufferedPaintDC dc(this, bmp); +} + +class AnalyzerFrame : public wxFrame { + DECLARE_EVENT_TABLE() + + private: + AnalyzerPanel *panel; + const bool bit_accounting; + + wxMenu *fileMenu; + wxMenu *viewMenu; + wxMenu *playbackMenu; + + public: + AnalyzerFrame(const bool bit_accounting); // NOLINT + + void onOpen(wxCommandEvent &event); // NOLINT + void onClose(wxCommandEvent &event); // NOLINT + void onQuit(wxCommandEvent &event); // NOLINT + + void onTogglePadding(wxCommandEvent &event); // NOLINT + void onZoomIn(wxCommandEvent &event); // NOLINT + void onZoomOut(wxCommandEvent &event); // NOLINT + void onActualSize(wxCommandEvent &event); // NOLINT + + void onToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT + void onResetAndToggleViewMenuCheckBox(wxCommandEvent &event); // NOLINT + + void onNextFrame(wxCommandEvent &event); // NOLINT + void onGotoFrame(wxCommandEvent &event); // NOLINT + void onRestart(wxCommandEvent &event); // NOLINT + + void onAbout(wxCommandEvent &event); // NOLINT + + bool open(const wxString &path); + bool setZoom(int zoom); + void updateViewMenu(); +}; + +enum { + wxID_NEXT_FRAME = 6000, + wxID_SHOW_Y, + wxID_SHOW_U, + wxID_SHOW_V, + wxID_GOTO_FRAME, + wxID_RESTART, + wxID_ACTUAL_SIZE, + wxID_PADDING +}; + +BEGIN_EVENT_TABLE(AnalyzerFrame, wxFrame) +EVT_MENU(wxID_OPEN, AnalyzerFrame::onOpen) +EVT_MENU(wxID_CLOSE, AnalyzerFrame::onClose) +EVT_MENU(wxID_EXIT, AnalyzerFrame::onQuit) +EVT_MENU(wxID_PADDING, AnalyzerFrame::onTogglePadding) +EVT_MENU(wxID_ZOOM_IN, AnalyzerFrame::onZoomIn) +EVT_MENU(wxID_ZOOM_OUT, AnalyzerFrame::onZoomOut) +EVT_MENU(wxID_ACTUAL_SIZE, AnalyzerFrame::onActualSize) +EVT_MENU(wxID_SHOW_Y, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_SHOW_U, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_SHOW_V, AnalyzerFrame::onResetAndToggleViewMenuCheckBox) +EVT_MENU(wxID_NEXT_FRAME, AnalyzerFrame::onNextFrame) +EVT_MENU(wxID_GOTO_FRAME, AnalyzerFrame::onGotoFrame) +EVT_MENU(wxID_RESTART, AnalyzerFrame::onRestart) +EVT_MENU(wxID_ABOUT, AnalyzerFrame::onAbout) +END_EVENT_TABLE() + +AnalyzerFrame::AnalyzerFrame(const bool bit_accounting) + : wxFrame(NULL, wxID_ANY, _("AV1 Stream Analyzer"), wxDefaultPosition, + wxDefaultSize, wxDEFAULT_FRAME_STYLE), + panel(NULL), bit_accounting(bit_accounting) { + wxMenuBar *mb = new wxMenuBar(); + + fileMenu = new wxMenu(); + fileMenu->Append(wxID_OPEN, _("&Open...\tCtrl-O"), _("Open AV1 file")); + fileMenu->Append(wxID_CLOSE, _("&Close\tCtrl-W"), _("Close AV1 file")); + fileMenu->Enable(wxID_CLOSE, false); + fileMenu->Append(wxID_EXIT, _("E&xit\tCtrl-Q"), _("Quit this program")); + mb->Append(fileMenu, _("&File")); + + wxAcceleratorEntry entries[2]; + entries[0].Set(wxACCEL_CTRL, (int)'=', wxID_ZOOM_IN); + entries[1].Set(wxACCEL_CTRL | wxACCEL_SHIFT, (int)'-', wxID_ZOOM_OUT); + wxAcceleratorTable accel(2, entries); + this->SetAcceleratorTable(accel); + + viewMenu = new wxMenu(); + +viewMenu->Append(wxID_PADDING, _("Toggle padding\tCtrl-p"), + _("Show padding")); + viewMenu->Append(wxID_ZOOM_IN, _("Zoom-In\tCtrl-+"), _("Double image size")); + viewMenu->Append(wxID_ZOOM_OUT, _("Zoom-Out\tCtrl--"), _("Half image size")); + viewMenu->Append(wxID_ACTUAL_SIZE, _("Actual size\tCtrl-0"), + _("Actual size of the frame")); + viewMenu->AppendSeparator(); + viewMenu->AppendCheckItem(wxID_SHOW_Y, _("&Y plane\tCtrl-Y"), + _("Show Y plane")); + viewMenu->AppendCheckItem(wxID_SHOW_U, _("&U plane\tCtrl-U"), + _("Show U plane")); + viewMenu->AppendCheckItem(wxID_SHOW_V, _("&V plane\tCtrl-V"), + _("Show V plane")); + mb->Append(viewMenu, _("&View")); + + playbackMenu = new wxMenu(); + playbackMenu->Append(wxID_NEXT_FRAME, _("Next frame\tCtrl-."), + _("Go to next frame")); + /*playbackMenu->Append(wxID_RESTART, _("&Restart\tCtrl-R"), + _("Set video to frame 0")); + playbackMenu->Append(wxID_GOTO_FRAME, _("Jump to Frame\tCtrl-J"), + _("Go to frame number"));*/ + mb->Append(playbackMenu, _("&Playback")); + + wxMenu *helpMenu = new wxMenu(); + helpMenu->Append(wxID_ABOUT, _("&About...\tF1"), _("Show about dialog")); + mb->Append(helpMenu, _("&Help")); + + SetMenuBar(mb); + + CreateStatusBar(1); +} + +void AnalyzerFrame::onOpen(wxCommandEvent &WXUNUSED(event)) { + wxFileDialog openFileDialog(this, _("Open file"), wxEmptyString, + wxEmptyString, _("AV1 files (*.ivf)|*.ivf"), + wxFD_OPEN | wxFD_FILE_MUST_EXIST); + if (openFileDialog.ShowModal() != wxID_CANCEL) { + open(openFileDialog.GetPath()); + } +} + +void AnalyzerFrame::onClose(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onQuit(wxCommandEvent &WXUNUSED(event)) { Close(true); } + +void AnalyzerFrame::onTogglePadding(wxCommandEvent &WXUNUSED(event)) { + panel->togglePadding(); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(); +} + +void AnalyzerFrame::onZoomIn(wxCommandEvent &WXUNUSED(event)) { + setZoom(panel->getZoom() + 1); +} + +void AnalyzerFrame::onZoomOut(wxCommandEvent &WXUNUSED(event)) { + setZoom(panel->getZoom() - 1); +} + +void AnalyzerFrame::onActualSize(wxCommandEvent &WXUNUSED(event)) { + setZoom(MIN_ZOOM); +} + +void AnalyzerFrame::onToggleViewMenuCheckBox(wxCommandEvent &event) { // NOLINT + GetMenuBar()->Check(event.GetId(), event.IsChecked()); + updateViewMenu(); +} + +void AnalyzerFrame::onResetAndToggleViewMenuCheckBox( + wxCommandEvent &event) { // NOLINT + int id = event.GetId(); + if (id != wxID_SHOW_Y && id != wxID_SHOW_U && id != wxID_SHOW_V) { + GetMenuBar()->Check(wxID_SHOW_Y, true); + GetMenuBar()->Check(wxID_SHOW_U, true); + GetMenuBar()->Check(wxID_SHOW_V, true); + } + onToggleViewMenuCheckBox(event); +} + +void AnalyzerFrame::onNextFrame(wxCommandEvent &WXUNUSED(event)) { + panel->nextFrame(); + panel->Refresh(false); +} + +void AnalyzerFrame::onGotoFrame(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onRestart(wxCommandEvent &WXUNUSED(event)) {} + +void AnalyzerFrame::onAbout(wxCommandEvent &WXUNUSED(event)) { + wxAboutDialogInfo info; + info.SetName(_("AV1 Bitstream Analyzer")); + info.SetVersion(_("0.1-beta")); + info.SetDescription( + _("This program implements a bitstream analyzer for AV1")); + info.SetCopyright( + wxT("(C) 2017 Alliance for Open Media <negge@mozilla.com>")); + wxAboutBox(info); +} + +bool AnalyzerFrame::open(const wxString &path) { + panel = new AnalyzerPanel(this, path, bit_accounting); + if (panel->open(path)) { + SetClientSize(panel->GetSize()); + return true; + } else { + delete panel; + return false; + } +} + +bool AnalyzerFrame::setZoom(int zoom) { + if (panel->setZoom(zoom)) { + GetMenuBar()->Enable(wxID_ACTUAL_SIZE, zoom != MIN_ZOOM); + GetMenuBar()->Enable(wxID_ZOOM_IN, zoom != MAX_ZOOM); + GetMenuBar()->Enable(wxID_ZOOM_OUT, zoom != MIN_ZOOM); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(); + return true; + } + return false; +} + +void AnalyzerFrame::updateViewMenu() { + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_Y), OD_LUMA_MASK); + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_U), OD_CB_MASK); + panel->setShowPlane(GetMenuBar()->IsChecked(wxID_SHOW_V), OD_CR_MASK); + SetClientSize(panel->GetSize()); + panel->render(); + panel->Refresh(false); +} + +class Analyzer : public wxApp { + private: + AnalyzerFrame *frame; + + public: + void OnInitCmdLine(wxCmdLineParser &parser); // NOLINT + bool OnCmdLineParsed(wxCmdLineParser &parser); // NOLINT +}; + +static const wxCmdLineEntryDesc CMD_LINE_DESC[] = { + { wxCMD_LINE_SWITCH, _("h"), _("help"), _("Display this help and exit."), + wxCMD_LINE_VAL_NONE, wxCMD_LINE_OPTION_HELP }, + { wxCMD_LINE_SWITCH, _("a"), _("bit-accounting"), _("Enable bit accounting"), + wxCMD_LINE_VAL_NONE, wxCMD_LINE_PARAM_OPTIONAL }, + { wxCMD_LINE_PARAM, NULL, NULL, _("input.ivf"), wxCMD_LINE_VAL_STRING, + wxCMD_LINE_PARAM_OPTIONAL }, + { wxCMD_LINE_NONE } +}; + +void Analyzer::OnInitCmdLine(wxCmdLineParser &parser) { // NOLINT + parser.SetDesc(CMD_LINE_DESC); + parser.SetSwitchChars(_("-")); +} + +bool Analyzer::OnCmdLineParsed(wxCmdLineParser &parser) { // NOLINT + bool bit_accounting = parser.Found(_("a")); + if (bit_accounting && !CONFIG_ACCOUNTING) { + fprintf(stderr, + "Bit accounting support not found. " + "Recompile with:\n./cmake -DCONFIG_ACCOUNTING=1\n"); + return false; + } + frame = new AnalyzerFrame(parser.Found(_("a"))); + frame->Show(); + if (parser.GetParamCount() > 0) { + return frame->open(parser.GetParam(0)); + } + return true; +} + +void usage_exit(void) { + fprintf(stderr, "uhh\n"); + exit(EXIT_FAILURE); +} + +IMPLEMENT_APP(Analyzer) diff --git a/third_party/aom/examples/aom_cx_set_ref.c b/third_party/aom/examples/aom_cx_set_ref.c new file mode 100644 index 0000000000..b7fb7bce45 --- /dev/null +++ b/third_party/aom/examples/aom_cx_set_ref.c @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// AV1 Set Reference Frame +// ============================ +// +// This is an example demonstrating how to overwrite the AV1 encoder's +// internal reference frame. In the sample we set the last frame to the +// current frame. This technique could be used to bounce between two cameras. +// +// The decoder would also have to set the reference frame to the same value +// on the same frame, or the video will become corrupt. The 'test_decode' +// variable is set to 1 in this example that tests if the encoder and decoder +// results are matching. +// +// Usage +// ----- +// This example encodes a raw video. And the last argument passed in specifies +// the frame number to update the reference frame on. For example, run +// examples/aom_cx_set_ref av1 352 288 in.yuv out.ivf 4 30 +// The parameter is parsed as follows: +// +// +// Extra Variables +// --------------- +// This example maintains the frame number passed on the command line +// in the `update_frame_num` variable. +// +// +// Configuration +// ------------- +// +// The reference frame is updated on the frame specified on the command +// line. +// +// Observing The Effects +// --------------------- +// The encoder and decoder results should be matching when the same reference +// frame setting operation is done in both encoder and decoder. Otherwise, +// the encoder/decoder mismatch would be seen. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "aom_scale/yv12config.h" +#include "common/tools_common.h" +#include "common/video_writer.h" +#include "examples/encoder_util.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<frame> <limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void testing_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder, + unsigned int frame_out, int *mismatch_seen) { + aom_image_t enc_img, dec_img; + + if (*mismatch_seen) return; + + /* Get the internal reference frame */ + if (aom_codec_control(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img)) + die_codec(encoder, "Failed to get encoder reference frame"); + if (aom_codec_control(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img)) + die_codec(decoder, "Failed to get decoder reference frame"); + + if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t enc_hbd_img; + aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img); + enc_img = enc_hbd_img; + } + if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t dec_hbd_img; + aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img); + dec_img = dec_hbd_img; + } + } + + if (!aom_compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + aom_find_mismatch(&enc_img, &dec_img, y, u, v); + } + + printf( + "Encode/decode mismatch on frame %u at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", + frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = 1; + } + + aom_img_free(&enc_img); + aom_img_free(&dec_img); +} + +static int encode_frame(aom_codec_ctx_t *ecodec, aom_image_t *img, + unsigned int frame_in, AvxVideoWriter *writer, + int test_decode, aom_codec_ctx_t *dcodec, + unsigned int *frame_out, int *mismatch_seen, + aom_image_t *ext_ref) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + int got_data; + const aom_codec_err_t res = aom_codec_encode(ecodec, img, frame_in, 1, 0); + if (res != AOM_CODEC_OK) die_codec(ecodec, "Failed to encode frame"); + + got_data = 0; + + while ((pkt = aom_codec_get_cx_data(ecodec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + ++*frame_out; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(ecodec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + got_data = 1; + + // Decode 1 frame. + if (test_decode) { + if (aom_codec_decode(dcodec, pkt->data.frame.buf, + (unsigned int)pkt->data.frame.sz, NULL)) + die_codec(dcodec, "Failed to decode frame."); + + // Copy out first decoded frame, and use it as reference later. + if (*frame_out == 1 && ext_ref != NULL) + if (aom_codec_control(dcodec, AV1_COPY_NEW_FRAME_IMAGE, ext_ref)) + die_codec(dcodec, "Failed to get decoder new frame"); + } + } + } + + // Mismatch checking + if (got_data && test_decode) { + testing_decode(ecodec, dcodec, *frame_out, mismatch_seen); + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + // Encoder + aom_codec_ctx_t ecodec; + aom_codec_enc_cfg_t cfg; + unsigned int frame_in = 0; + aom_image_t raw; + aom_image_t raw_shift; + aom_image_t ext_ref; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + int flags = 0; + int allocated_raw_shift = 0; + aom_img_fmt_t raw_fmt = AOM_IMG_FMT_I420; + aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420; + + // Test encoder/decoder mismatch. + int test_decode = 1; + // Decoder + aom_codec_ctx_t dcodec; + unsigned int frame_out = 0; + + // The frame number to set reference frame on + unsigned int update_frame_num = 0; + int mismatch_seen = 0; + + const int fps = 30; + const int bitrate = 500; + + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *update_frame_num_arg = NULL; + unsigned int limit = 0; + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&ecodec, 0, sizeof(ecodec)); + memset(&cfg, 0, sizeof(cfg)); + memset(&info, 0, sizeof(info)); + + if (argc < 7) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile_arg = argv[4]; + outfile_arg = argv[5]; + update_frame_num_arg = argv[6]; + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0); + // In AV1, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are + // allocated while calling aom_codec_encode(), thus, setting reference for + // 1st frame isn't supported. + if (update_frame_num <= 1) { + die("Couldn't parse frame number '%s'\n", update_frame_num_arg); + } + + if (argc > 7) { + limit = (unsigned int)strtoul(argv[7], NULL, 0); + if (update_frame_num > limit) + die("Update frame number couldn't larger than limit\n"); + } + + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + // In this test, the bit depth of input video is 8-bit, and the input format + // is AOM_IMG_FMT_I420. + if (!aom_img_alloc(&raw, raw_fmt, info.frame_width, info.frame_height, 32)) { + die("Failed to allocate image."); + } + + if (FORCE_HIGHBITDEPTH_DECODING) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + // Allocate memory with the border so that it can be used as a reference. + if (!aom_img_alloc_with_border(&ext_ref, ref_fmt, info.frame_width, + info.frame_height, 32, 8, + AOM_DEC_BORDER_IN_PIXELS)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder)); + +#if CONFIG_REALTIME_ONLY + res = aom_codec_enc_config_default(encoder, &cfg, 1); +#else + res = aom_codec_enc_config_default(encoder, &cfg, 0); +#endif + if (res) die_codec(&ecodec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_lag_in_frames = 3; + cfg.g_bit_depth = AOM_BITS_8; + + flags |= (cfg.g_bit_depth > AOM_BITS_8 || FORCE_HIGHBITDEPTH_DECODING) + ? AOM_CODEC_USE_HIGHBITDEPTH + : 0; + + writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (aom_codec_enc_init(&ecodec, encoder, &cfg, flags)) + die("Failed to initialize encoder"); + + // Disable alt_ref. + if (aom_codec_control(&ecodec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&ecodec, "Failed to set enable auto alt ref"); + + if (test_decode) { + aom_codec_iface_t *decoder = get_aom_decoder_by_short_name(codec_arg); + if (aom_codec_dec_init(&dcodec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + } + + // Encode frames. + while (aom_img_read(&raw, infile)) { + if (limit && frame_in >= limit) break; + aom_image_t *frame_to_encode; + + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + int input_shift = 0; + if (!allocated_raw_shift) { + aom_img_alloc(&raw_shift, raw_fmt | AOM_IMG_FMT_HIGHBITDEPTH, + info.frame_width, info.frame_height, 32); + allocated_raw_shift = 1; + } + aom_img_upshift(&raw_shift, &raw, input_shift); + frame_to_encode = &raw_shift; + } else { + frame_to_encode = &raw; + } + + if (update_frame_num > 1 && frame_out + 1 == update_frame_num) { + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 0; + ref.img = ext_ref; + // Set reference frame in encoder. + if (aom_codec_control(&ecodec, AV1_SET_REFERENCE, &ref)) + die_codec(&ecodec, "Failed to set encoder reference frame"); + printf(" <SET_REF>"); + +#if CONFIG_REALTIME_ONLY + // Set cpu speed in encoder. + if (aom_codec_control(&ecodec, AOME_SET_CPUUSED, 7)) + die_codec(&ecodec, "Failed to set cpu speed"); +#endif + + // If set_reference in decoder is commented out, the enc/dec mismatch + // would be seen. + if (test_decode) { + ref.use_external_ref = 1; + if (aom_codec_control(&dcodec, AV1_SET_REFERENCE, &ref)) + die_codec(&dcodec, "Failed to set decoder reference frame"); + } + } + + encode_frame(&ecodec, frame_to_encode, frame_in, writer, test_decode, + &dcodec, &frame_out, &mismatch_seen, &ext_ref); + frame_in++; + if (mismatch_seen) break; + } + + // Flush encoder. + if (!mismatch_seen) + while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec, + &frame_out, &mismatch_seen, NULL)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %u frames.\n", frame_out); + + if (test_decode) { + if (!mismatch_seen) + printf("Encoder/decoder results are matching.\n"); + else + printf("Encoder/decoder results are NOT matching.\n"); + } + + if (test_decode) + if (aom_codec_destroy(&dcodec)) + die_codec(&dcodec, "Failed to destroy decoder"); + + if (allocated_raw_shift) aom_img_free(&raw_shift); + aom_img_free(&ext_ref); + aom_img_free(&raw); + if (aom_codec_destroy(&ecodec)) + die_codec(&ecodec, "Failed to destroy encoder."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/av1_dec_fuzzer.cc b/third_party/aom/examples/av1_dec_fuzzer.cc new file mode 100644 index 0000000000..9b9a0b9cb6 --- /dev/null +++ b/third_party/aom/examples/av1_dec_fuzzer.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* + * See build_av1_dec_fuzzer.sh for building instructions. + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <algorithm> +#include <memory> +#include "config/aom_config.h" +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_ports/mem_ops.h" + +#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ +#define IVF_FILE_HDR_SZ 32 + +extern "C" void usage_exit(void) { exit(EXIT_FAILURE); } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size <= IVF_FILE_HDR_SZ) { + return 0; + } + + aom_codec_iface_t *codec_interface = aom_codec_av1_dx(); + aom_codec_ctx_t codec; + // Set thread count in the range [1, 64]. + const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; + aom_codec_dec_cfg_t cfg = { threads, 0, 0, !FORCE_HIGHBITDEPTH_DECODING }; + if (aom_codec_dec_init(&codec, codec_interface, &cfg, 0)) { + return 0; + } + + data += IVF_FILE_HDR_SZ; + size -= IVF_FILE_HDR_SZ; + + while (size > IVF_FRAME_HDR_SZ) { + size_t frame_size = mem_get_le32(data); + size -= IVF_FRAME_HDR_SZ; + data += IVF_FRAME_HDR_SZ; + frame_size = std::min(size, frame_size); + + const aom_codec_err_t err = + aom_codec_decode(&codec, data, frame_size, nullptr); + static_cast<void>(err); + aom_codec_iter_t iter = nullptr; + aom_image_t *img = nullptr; + while ((img = aom_codec_get_frame(&codec, &iter)) != nullptr) { + } + data += frame_size; + size -= frame_size; + } + aom_codec_destroy(&codec); + return 0; +} diff --git a/third_party/aom/examples/av1_dec_fuzzer.dict b/third_party/aom/examples/av1_dec_fuzzer.dict new file mode 100644 index 0000000000..fb1638864c --- /dev/null +++ b/third_party/aom/examples/av1_dec_fuzzer.dict @@ -0,0 +1,5 @@ +# IVF Signature + version (bytes 0-5) +kw1="DKIF\x00\x00" + +# AV1 codec fourCC (bytes 8-11) +kw2="AV01" diff --git a/third_party/aom/examples/build_av1_dec_fuzzer.sh b/third_party/aom/examples/build_av1_dec_fuzzer.sh new file mode 100755 index 0000000000..40355ea133 --- /dev/null +++ b/third_party/aom/examples/build_av1_dec_fuzzer.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Copyright (c) 2019, Alliance for Open Media. All rights reserved +# +# This source code is subject to the terms of the BSD 2 Clause License and +# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License +# was not distributed with this source code in the LICENSE file, you can +# obtain it at www.aomedia.org/license/software. If the Alliance for Open +# Media Patent License 1.0 was not distributed with this source code in the +# PATENTS file, you can obtain it at www.aomedia.org/license/patent. +# +############################################################################### +# Fuzzer for libaom decoder. +# ========================== +# Requirements +# --------------------- +# Clang6.0 or above (must support -fsanitize=fuzzer -fsanitize=fuzzer-no-link) +# +# References: +# --------------------- +# http://llvm.org/docs/LibFuzzer.html +# https://github.com/google/oss-fuzz +# +# Steps to build / run +# --------------------- + +set -eu + +# Have a copy of AOM and a build directory ready. +if [[ $# -ne 2 ]]; then + echo "Pass in the AOM source tree as first argument, and a build directory " + echo "as the second argument. The AOM source tree can be obtained via: " + echo " git clone https://aomedia.googlesource.com/aom" + exit 2 +fi +if [[ -z "${CC:-}" ]]; then + echo "Set the CC environment variable to point to your C compiler." + exit 2 +fi +if [[ -z "${CXX:-}" ]]; then + echo "Set the CXX environment variable to point to your C++ compiler." + exit 2 +fi + +AOM_DIR=$1 +BUILD_DIR=$2 +# Run CMake with address sanitizer enabled and build the codec. +# Enable DO_RANGE_CHECK_CLAMP to suppress the noise of integer overflows +# in the transform functions. Also set memory limits. +EXTRA_C_FLAGS='-UNDEBUG -DDO_RANGE_CHECK_CLAMP=1 -DAOM_MAX_ALLOCABLE_MEMORY=1073741824' +cd "${BUILD_DIR}" +cmake "${AOM_DIR}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCONFIG_PIC=1 \ + -DFORCE_HIGHBITDEPTH_DECODING=0 \ + -DCONFIG_AV1_ENCODER=0 -DENABLE_EXAMPLES=0 -DENABLE_DOCS=0 -DENABLE_TESTS=0 \ + -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=12288 -DDECODE_WIDTH_LIMIT=12288 \ + -DAOM_EXTRA_C_FLAGS="${EXTRA_C_FLAGS}" \ + -DAOM_EXTRA_CXX_FLAGS="${EXTRA_C_FLAGS}" -DSANITIZE=fuzzer-no-link,address + +# Build the codec. +make -j$(nproc) + +# Build the av1 fuzzer +$CXX -std=c++11 -I${AOM_DIR} -I${BUILD_DIR} \ + -g -fsanitize=fuzzer,address \ + ${AOM_DIR}/examples/av1_dec_fuzzer.cc -o ${BUILD_DIR}/av1_dec_fuzzer \ + ${BUILD_DIR}/libaom.a + +echo "Fuzzer built at ${BUILD_DIR}/av1_dec_fuzzer." +echo "Create a corpus directory, copy IVF files in there, and run:" +echo " av1_dec_fuzzer CORPUS_DIR" diff --git a/third_party/aom/examples/decode_to_md5.c b/third_party/aom/examples/decode_to_md5.c new file mode 100644 index 0000000000..07f788ff97 --- /dev/null +++ b/third_party/aom/examples/decode_to_md5.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Frame-by-frame MD5 Checksum +// =========================== +// +// This example builds upon the simple decoder loop to show how checksums +// of the decoded output can be generated. These are used for validating +// decoder implementations against the reference implementation, for example. +// +// MD5 algorithm +// ------------- +// The Message-Digest 5 (MD5) is a well known hash function. We have provided +// an implementation derived from the RSA Data Security, Inc. MD5 Message-Digest +// Algorithm for your use. Our implmentation only changes the interface of this +// reference code. You must include the `md5_utils.h` header for access to these +// functions. +// +// Processing The Decoded Data +// --------------------------- +// Each row of the image is passed to the MD5 accumulator. First the Y plane +// is processed, then U, then V. It is important to honor the image's `stride` +// values. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/md5_utils.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static void get_image_md5(const aom_image_t *img, unsigned char digest[16]) { + int plane, y; + MD5Context md5; + + MD5Init(&md5); + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int w = plane ? (img->d_w + 1) >> 1 : img->d_w; + const int h = plane ? (img->d_h + 1) >> 1 : img->d_h; + + for (y = 0; y < h; ++y) { + MD5Update(&md5, buf, w); + buf += stride; + } + } + + MD5Final(digest, &md5); +} + +static void print_md5(FILE *stream, unsigned char digest[16]) { + int i; + + for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]); +} + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = aom_video_reader_get_info(reader); + + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder"); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame"); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + unsigned char digest[16]; + + get_image_md5(img, digest); + print_md5(outfile, digest); + fprintf(outfile, " img-%ux%u-%04d.i420\n", img->d_w, img->d_h, + ++frame_cnt); + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_reader_close(reader); + + fclose(outfile); + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/decode_with_drops.c b/third_party/aom/examples/decode_with_drops.c new file mode 100644 index 0000000000..9bec6ee2df --- /dev/null +++ b/third_party/aom/examples/decode_with_drops.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Decode With Drops Example +// ========================= +// +// This is an example utility which drops a series of frames, as specified +// on the command line. This is useful for observing the error recovery +// features of the codec. +// +// Usage +// ----- +// This example adds a single argument to the `simple_decoder` example, +// which specifies the range or pattern of frames to drop. The parameter is +// parsed as follows: +// +// Dropping A Range Of Frames +// -------------------------- +// To drop a range of frames, specify the starting frame and the ending +// frame to drop, separated by a dash. The following command will drop +// frames 5 through 10 (base 1). +// +// $ ./decode_with_drops in.ivf out.i420 5-10 +// +// +// Dropping A Pattern Of Frames +// ---------------------------- +// To drop a pattern of frames, specify the number of frames to drop and +// the number of frames after which to repeat the pattern, separated by +// a forward-slash. The following command will drop 3 of 7 frames. +// Specifically, it will decode 4 frames, then drop 3 frames, and then +// repeat. +// +// $ ./decode_with_drops in.ivf out.i420 3/7 +// +// +// Extra Variables +// --------------- +// This example maintains the pattern passed on the command line in the +// `n`, `m`, and `is_range` variables: +// +// +// Making The Drop Decision +// ------------------------ +// The example decides whether to drop the frame based on the current +// frame number, immediately before decoding the frame. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + int n = 0; + int m = 0; + int is_range = 0; + char *nptr = NULL; + + exec_name = argv[0]; + + if (argc != 4) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + n = (int)strtol(argv[3], &nptr, 0); + m = (int)strtol(nptr + 1, NULL, 0); + is_range = (*nptr == '-'); + if (!n || !m || (*nptr != '-' && *nptr != '/')) + die("Couldn't parse pattern %s.\n", argv[3]); + + info = aom_video_reader_get_info(reader); + + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder)); + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + int skip; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + ++frame_cnt; + + skip = (is_range && frame_cnt >= n && frame_cnt <= m) || + (!is_range && m - (frame_cnt - 1) % m <= n); + + if (!skip) { + putc('.', stdout); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) + aom_img_write(img, outfile); + } else { + putc('X', stdout); + } + + fflush(stdout); + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/encoder_util.c b/third_party/aom/examples/encoder_util.c new file mode 100644 index 0000000000..e43b372506 --- /dev/null +++ b/third_party/aom/examples/encoder_util.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Utility functions used by encoder binaries. + +#include "examples/encoder_util.h" + +#include <assert.h> +#include <string.h> + +#include "aom/aom_integer.h" + +#define mmin(a, b) ((a) < (b) ? (a) : (b)) + +static void find_mismatch_plane(const aom_image_t *const img1, + const aom_image_t *const img2, int plane, + int use_highbitdepth, int loc[4]) { + const unsigned char *const p1 = img1->planes[plane]; + const int p1_stride = img1->stride[plane] >> use_highbitdepth; + const unsigned char *const p2 = img2->planes[plane]; + const int p2_stride = img2->stride[plane] >> use_highbitdepth; + const uint32_t bsize = 64; + const int is_y_plane = (plane == AOM_PLANE_Y); + const uint32_t bsizex = is_y_plane ? bsize : bsize >> img1->x_chroma_shift; + const uint32_t bsizey = is_y_plane ? bsize : bsize >> img1->y_chroma_shift; + const uint32_t c_w = + is_y_plane ? img1->d_w + : (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + is_y_plane ? img1->d_h + : (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + assert(img1->d_w == img2->d_w && img1->d_h == img2->d_h); + assert(img1->x_chroma_shift == img2->x_chroma_shift && + img1->y_chroma_shift == img2->y_chroma_shift); + loc[0] = loc[1] = loc[2] = loc[3] = -1; + if (img1->monochrome && img2->monochrome && plane) return; + int match = 1; + uint32_t i, j; + for (i = 0; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + const int si = + is_y_plane ? mmin(i + bsizey, c_h) - i : mmin(i + bsizey, c_h - i); + const int sj = + is_y_plane ? mmin(j + bsizex, c_w) - j : mmin(j + bsizex, c_w - j); + int k, l; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + const int row = i + k; + const int col = j + l; + const int offset1 = row * p1_stride + col; + const int offset2 = row * p2_stride + col; + const int val1 = use_highbitdepth + ? p1[2 * offset1] | (p1[2 * offset1 + 1] << 8) + : p1[offset1]; + const int val2 = use_highbitdepth + ? p2[2 * offset2] | (p2[2 * offset2 + 1] << 8) + : p2[offset2]; + if (val1 != val2) { + loc[0] = row; + loc[1] = col; + loc[2] = val1; + loc[3] = val2; + match = 0; + break; + } + } + } + } + } +} + +static void find_mismatch_helper(const aom_image_t *const img1, + const aom_image_t *const img2, + int use_highbitdepth, int yloc[4], int uloc[4], + int vloc[4]) { + find_mismatch_plane(img1, img2, AOM_PLANE_Y, use_highbitdepth, yloc); + find_mismatch_plane(img1, img2, AOM_PLANE_U, use_highbitdepth, uloc); + find_mismatch_plane(img1, img2, AOM_PLANE_V, use_highbitdepth, vloc); +} + +void aom_find_mismatch_high(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], + int uloc[4], int vloc[4]) { + find_mismatch_helper(img1, img2, 1, yloc, uloc, vloc); +} + +void aom_find_mismatch(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]) { + find_mismatch_helper(img1, img2, 0, yloc, uloc, vloc); +} + +int aom_compare_img(const aom_image_t *const img1, + const aom_image_t *const img2) { + assert(img1->cp == img2->cp); + assert(img1->tc == img2->tc); + assert(img1->mc == img2->mc); + assert(img1->monochrome == img2->monochrome); + + int num_planes = img1->monochrome ? 1 : 3; + + uint32_t l_w = img1->d_w; + uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + + match &= (img1->fmt == img2->fmt); + match &= (img1->d_w == img2->d_w); + match &= (img1->d_h == img2->d_h); + if (img1->fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + l_w *= 2; + c_w *= 2; + } + + for (int plane = 0; plane < num_planes; ++plane) { + uint32_t height = plane ? c_h : img1->d_h; + uint32_t width = plane ? c_w : l_w; + + for (uint32_t i = 0; i < height; ++i) { + match &= + (memcmp(img1->planes[plane] + i * img1->stride[plane], + img2->planes[plane] + i * img2->stride[plane], width) == 0); + } + } + + return match; +} diff --git a/third_party/aom/examples/encoder_util.h b/third_party/aom/examples/encoder_util.h new file mode 100644 index 0000000000..fa0e7d1880 --- /dev/null +++ b/third_party/aom/examples/encoder_util.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Utility functions used by encoder binaries. + +#ifndef AOM_EXAMPLES_ENCODER_UTIL_H_ +#define AOM_EXAMPLES_ENCODER_UTIL_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "aom/aom_image.h" + +// Returns mismatch location (?loc[0],?loc[1]) and the values at that location +// in img1 (?loc[2]) and img2 (?loc[3]). +void aom_find_mismatch_high(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], + int uloc[4], int vloc[4]); + +void aom_find_mismatch(const aom_image_t *const img1, + const aom_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]); + +// Returns 1 if the two images match. +int aom_compare_img(const aom_image_t *const img1, + const aom_image_t *const img2); + +#ifdef __cplusplus +} +#endif +#endif // AOM_EXAMPLES_ENCODER_UTIL_H_ diff --git a/third_party/aom/examples/inspect.c b/third_party/aom/examples/inspect.c new file mode 100644 index 0000000000..e285be0209 --- /dev/null +++ b/third_party/aom/examples/inspect.c @@ -0,0 +1,963 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Inspect Decoder +// ================ +// +// This is a simple decoder loop that writes JSON stats to stdout. This tool +// can also be compiled with Emscripten and used as a library. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef __EMSCRIPTEN__ +#include <emscripten.h> +#else +#define EMSCRIPTEN_KEEPALIVE +#endif + +#include "config/aom_config.h" + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "av1/common/av1_common_int.h" + +#if CONFIG_ACCOUNTING +#include "av1/decoder/accounting.h" +#endif + +#include "av1/decoder/inspection.h" +#include "common/args.h" +#include "common/tools_common.h" +#include "common/video_common.h" +#include "common/video_reader.h" + +// Max JSON buffer size. +const int MAX_BUFFER = 1024 * 1024 * 256; + +typedef enum { + ACCOUNTING_LAYER = 1, + BLOCK_SIZE_LAYER = 1 << 1, + TRANSFORM_SIZE_LAYER = 1 << 2, + TRANSFORM_TYPE_LAYER = 1 << 3, + MODE_LAYER = 1 << 4, + SKIP_LAYER = 1 << 5, + FILTER_LAYER = 1 << 6, + CDEF_LAYER = 1 << 7, + REFERENCE_FRAME_LAYER = 1 << 8, + MOTION_VECTORS_LAYER = 1 << 9, + UV_MODE_LAYER = 1 << 10, + CFL_LAYER = 1 << 11, + DUAL_FILTER_LAYER = 1 << 12, + Q_INDEX_LAYER = 1 << 13, + SEGMENT_ID_LAYER = 1 << 14, + MOTION_MODE_LAYER = 1 << 15, + COMPOUND_TYPE_LAYER = 1 << 16, + INTRABC_LAYER = 1 << 17, + PALETTE_LAYER = 1 << 18, + UV_PALETTE_LAYER = 1 << 19, + ALL_LAYERS = (1 << 20) - 1 +} LayerType; + +static LayerType layers = 0; + +static int stop_after = 0; +static int compress = 0; + +static const arg_def_t limit_arg = + ARG_DEF(NULL, "limit", 1, "Stop decoding after n frames"); +static const arg_def_t dump_all_arg = ARG_DEF("A", "all", 0, "Dump All"); +static const arg_def_t compress_arg = + ARG_DEF("x", "compress", 0, "Compress JSON using RLE"); +static const arg_def_t dump_accounting_arg = + ARG_DEF("a", "accounting", 0, "Dump Accounting"); +static const arg_def_t dump_block_size_arg = + ARG_DEF("bs", "blockSize", 0, "Dump Block Size"); +static const arg_def_t dump_motion_vectors_arg = + ARG_DEF("mv", "motionVectors", 0, "Dump Motion Vectors"); +static const arg_def_t dump_transform_size_arg = + ARG_DEF("ts", "transformSize", 0, "Dump Transform Size"); +static const arg_def_t dump_transform_type_arg = + ARG_DEF("tt", "transformType", 0, "Dump Transform Type"); +static const arg_def_t dump_mode_arg = ARG_DEF("m", "mode", 0, "Dump Mode"); +static const arg_def_t dump_motion_mode_arg = + ARG_DEF("mm", "motion_mode", 0, "Dump Motion Modes"); +static const arg_def_t dump_compound_type_arg = + ARG_DEF("ct", "compound_type", 0, "Dump Compound Types"); +static const arg_def_t dump_uv_mode_arg = + ARG_DEF("uvm", "uv_mode", 0, "Dump UV Intra Prediction Modes"); +static const arg_def_t dump_skip_arg = ARG_DEF("s", "skip", 0, "Dump Skip"); +static const arg_def_t dump_filter_arg = + ARG_DEF("f", "filter", 0, "Dump Filter"); +static const arg_def_t dump_cdef_arg = ARG_DEF("c", "cdef", 0, "Dump CDEF"); +static const arg_def_t dump_cfl_arg = + ARG_DEF("cfl", "chroma_from_luma", 0, "Dump Chroma from Luma Alphas"); +static const arg_def_t dump_dual_filter_type_arg = + ARG_DEF("df", "dualFilterType", 0, "Dump Dual Filter Type"); +static const arg_def_t dump_reference_frame_arg = + ARG_DEF("r", "referenceFrame", 0, "Dump Reference Frame"); +static const arg_def_t dump_delta_q_arg = + ARG_DEF("dq", "delta_q", 0, "Dump QIndex"); +static const arg_def_t dump_seg_id_arg = + ARG_DEF("si", "seg_id", 0, "Dump Segment ID"); +static const arg_def_t dump_intrabc_arg = + ARG_DEF("ibc", "intrabc", 0, "Dump If IntraBC Is Used"); +static const arg_def_t dump_palette_arg = + ARG_DEF("plt", "palette", 0, "Dump Palette Size"); +static const arg_def_t dump_uv_palette_arg = + ARG_DEF("uvp", "uv_palette", 0, "Dump UV Palette Size"); +static const arg_def_t usage_arg = ARG_DEF("h", "help", 0, "Help"); +static const arg_def_t skip_non_transform_arg = ARG_DEF( + "snt", "skip_non_transform", 1, "Skip is counted as a non transform."); +static const arg_def_t combined_arg = + ARG_DEF("comb", "combined", 1, "combinining parameters into one output."); + +int combined_parm_list[15]; +int combined_parm_count = 0; + +static const arg_def_t *main_args[] = { &limit_arg, + &dump_all_arg, + &compress_arg, +#if CONFIG_ACCOUNTING + &dump_accounting_arg, +#endif + &dump_block_size_arg, + &dump_transform_size_arg, + &dump_transform_type_arg, + &dump_mode_arg, + &dump_uv_mode_arg, + &dump_motion_mode_arg, + &dump_compound_type_arg, + &dump_skip_arg, + &dump_filter_arg, + &dump_cdef_arg, + &dump_dual_filter_type_arg, + &dump_cfl_arg, + &dump_reference_frame_arg, + &dump_motion_vectors_arg, + &dump_delta_q_arg, + &dump_seg_id_arg, + &dump_intrabc_arg, + &dump_palette_arg, + &dump_uv_palette_arg, + &usage_arg, + &skip_non_transform_arg, + &combined_arg, + NULL }; +#define ENUM(name) \ + { #name, name } +#define LAST_ENUM \ + { NULL, 0 } +typedef struct map_entry { + const char *name; + int value; +} map_entry; + +const map_entry refs_map[] = { + ENUM(INTRA_FRAME), ENUM(LAST_FRAME), ENUM(LAST2_FRAME), + ENUM(LAST3_FRAME), ENUM(GOLDEN_FRAME), ENUM(BWDREF_FRAME), + ENUM(ALTREF2_FRAME), ENUM(ALTREF_FRAME), LAST_ENUM +}; + +const map_entry block_size_map[] = { + ENUM(BLOCK_4X4), ENUM(BLOCK_4X8), ENUM(BLOCK_8X4), + ENUM(BLOCK_8X8), ENUM(BLOCK_8X16), ENUM(BLOCK_16X8), + ENUM(BLOCK_16X16), ENUM(BLOCK_16X32), ENUM(BLOCK_32X16), + ENUM(BLOCK_32X32), ENUM(BLOCK_32X64), ENUM(BLOCK_64X32), + ENUM(BLOCK_64X64), ENUM(BLOCK_64X128), ENUM(BLOCK_128X64), + ENUM(BLOCK_128X128), ENUM(BLOCK_4X16), ENUM(BLOCK_16X4), + ENUM(BLOCK_8X32), ENUM(BLOCK_32X8), ENUM(BLOCK_16X64), + ENUM(BLOCK_64X16), LAST_ENUM +}; + +#define TX_SKIP -1 + +const map_entry tx_size_map[] = { + ENUM(TX_4X4), ENUM(TX_8X8), ENUM(TX_16X16), ENUM(TX_32X32), + ENUM(TX_64X64), ENUM(TX_4X8), ENUM(TX_8X4), ENUM(TX_8X16), + ENUM(TX_16X8), ENUM(TX_16X32), ENUM(TX_32X16), ENUM(TX_32X64), + ENUM(TX_64X32), ENUM(TX_4X16), ENUM(TX_16X4), ENUM(TX_8X32), + ENUM(TX_32X8), ENUM(TX_16X64), ENUM(TX_64X16), LAST_ENUM +}; + +const map_entry tx_type_map[] = { ENUM(DCT_DCT), + ENUM(ADST_DCT), + ENUM(DCT_ADST), + ENUM(ADST_ADST), + ENUM(FLIPADST_DCT), + ENUM(DCT_FLIPADST), + ENUM(FLIPADST_FLIPADST), + ENUM(ADST_FLIPADST), + ENUM(FLIPADST_ADST), + ENUM(IDTX), + ENUM(V_DCT), + ENUM(H_DCT), + ENUM(V_ADST), + ENUM(H_ADST), + ENUM(V_FLIPADST), + ENUM(H_FLIPADST), + LAST_ENUM }; +const map_entry dual_filter_map[] = { ENUM(REG_REG), ENUM(REG_SMOOTH), + ENUM(REG_SHARP), ENUM(SMOOTH_REG), + ENUM(SMOOTH_SMOOTH), ENUM(SMOOTH_SHARP), + ENUM(SHARP_REG), ENUM(SHARP_SMOOTH), + ENUM(SHARP_SHARP), LAST_ENUM }; + +const map_entry prediction_mode_map[] = { + ENUM(DC_PRED), ENUM(V_PRED), ENUM(H_PRED), + ENUM(D45_PRED), ENUM(D135_PRED), ENUM(D113_PRED), + ENUM(D157_PRED), ENUM(D203_PRED), ENUM(D67_PRED), + ENUM(SMOOTH_PRED), ENUM(SMOOTH_V_PRED), ENUM(SMOOTH_H_PRED), + ENUM(PAETH_PRED), ENUM(NEARESTMV), ENUM(NEARMV), + ENUM(GLOBALMV), ENUM(NEWMV), ENUM(NEAREST_NEARESTMV), + ENUM(NEAR_NEARMV), ENUM(NEAREST_NEWMV), ENUM(NEW_NEARESTMV), + ENUM(NEAR_NEWMV), ENUM(NEW_NEARMV), ENUM(GLOBAL_GLOBALMV), + ENUM(NEW_NEWMV), ENUM(INTRA_INVALID), LAST_ENUM +}; + +const map_entry motion_mode_map[] = { ENUM(SIMPLE_TRANSLATION), + ENUM(OBMC_CAUSAL), // 2-sided OBMC + ENUM(WARPED_CAUSAL), // 2-sided WARPED + LAST_ENUM }; + +const map_entry compound_type_map[] = { ENUM(COMPOUND_AVERAGE), + ENUM(COMPOUND_WEDGE), + ENUM(COMPOUND_DIFFWTD), LAST_ENUM }; + +const map_entry uv_prediction_mode_map[] = { + ENUM(UV_DC_PRED), ENUM(UV_V_PRED), + ENUM(UV_H_PRED), ENUM(UV_D45_PRED), + ENUM(UV_D135_PRED), ENUM(UV_D113_PRED), + ENUM(UV_D157_PRED), ENUM(UV_D203_PRED), + ENUM(UV_D67_PRED), ENUM(UV_SMOOTH_PRED), + ENUM(UV_SMOOTH_V_PRED), ENUM(UV_SMOOTH_H_PRED), + ENUM(UV_PAETH_PRED), ENUM(UV_CFL_PRED), + ENUM(UV_MODE_INVALID), LAST_ENUM +}; +#define NO_SKIP 0 +#define SKIP 1 + +const map_entry skip_map[] = { ENUM(SKIP), ENUM(NO_SKIP), LAST_ENUM }; + +const map_entry intrabc_map[] = { { "INTRABC", 1 }, + { "NO_INTRABC", 0 }, + LAST_ENUM }; + +const map_entry palette_map[] = { + { "ZERO_COLORS", 0 }, { "TWO_COLORS", 2 }, { "THREE_COLORS", 3 }, + { "FOUR_COLORS", 4 }, { "FIVE_COLORS", 5 }, { "SIX_COLORS", 6 }, + { "SEVEN_COLORS", 7 }, { "EIGHT_COLORS", 8 }, LAST_ENUM +}; + +const map_entry config_map[] = { ENUM(MI_SIZE), LAST_ENUM }; + +static const char *exec_name; + +struct parm_offset { + char parm[60]; + char offset; +}; +struct parm_offset parm_offsets[] = { + { "blockSize", offsetof(insp_mi_data, bsize) }, + { "transformSize", offsetof(insp_mi_data, tx_size) }, + { "transformType", offsetof(insp_mi_data, tx_type) }, + { "dualFilterType", offsetof(insp_mi_data, dual_filter_type) }, + { "mode", offsetof(insp_mi_data, mode) }, + { "uv_mode", offsetof(insp_mi_data, uv_mode) }, + { "motion_mode", offsetof(insp_mi_data, motion_mode) }, + { "compound_type", offsetof(insp_mi_data, compound_type) }, + { "referenceFrame", offsetof(insp_mi_data, ref_frame) }, + { "skip", offsetof(insp_mi_data, skip) }, +}; +int parm_count = sizeof(parm_offsets) / sizeof(parm_offsets[0]); + +int convert_to_indices(char *str, int *indices, int maxCount, int *count) { + *count = 0; + do { + char *comma = strchr(str, ','); + int length = (comma ? (int)(comma - str) : (int)strlen(str)); + int i; + for (i = 0; i < parm_count; ++i) { + if (!strncmp(str, parm_offsets[i].parm, length)) { + break; + } + } + if (i == parm_count) return 0; + indices[(*count)++] = i; + if (*count > maxCount) return 0; + str += length + 1; + } while (strlen(str) > 0); + return 1; +} + +insp_frame_data frame_data; +int frame_count = 0; +int decoded_frame_count = 0; +aom_codec_ctx_t codec; +AvxVideoReader *reader = NULL; +const AvxVideoInfo *info = NULL; +aom_image_t *img = NULL; + +void on_frame_decoded_dump(char *json) { +#ifdef __EMSCRIPTEN__ + EM_ASM_({ Module.on_frame_decoded_json($0); }, json); +#else + printf("%s", json); +#endif +} + +// Writing out the JSON buffer using snprintf is very slow, especially when +// compiled with emscripten, these functions speed things up quite a bit. +int put_str(char *buffer, const char *str) { + int i; + for (i = 0; str[i] != '\0'; i++) { + buffer[i] = str[i]; + } + return i; +} + +int put_str_with_escape(char *buffer, const char *str) { + int i; + int j = 0; + for (i = 0; str[i] != '\0'; i++) { + if (str[i] < ' ') { + continue; + } else if (str[i] == '"' || str[i] == '\\') { + buffer[j++] = '\\'; + } + buffer[j++] = str[i]; + } + return j; +} + +int put_num(char *buffer, char prefix, int num, char suffix) { + int i = 0; + char *buf = buffer; + int is_neg = 0; + if (prefix) { + buf[i++] = prefix; + } + if (num == 0) { + buf[i++] = '0'; + } else { + if (num < 0) { + num = -num; + is_neg = 1; + } + int s = i; + while (num != 0) { + buf[i++] = '0' + (num % 10); + num = num / 10; + } + if (is_neg) { + buf[i++] = '-'; + } + int e = i - 1; + while (s < e) { + int t = buf[s]; + buf[s] = buf[e]; + buf[e] = t; + s++; + e--; + } + } + if (suffix) { + buf[i++] = suffix; + } + return i; +} + +int put_map(char *buffer, const map_entry *map) { + char *buf = buffer; + const map_entry *entry = map; + while (entry->name != NULL) { + *(buf++) = '"'; + buf += put_str(buf, entry->name); + *(buf++) = '"'; + buf += put_num(buf, ':', entry->value, 0); + entry++; + if (entry->name != NULL) { + *(buf++) = ','; + } + } + return (int)(buf - buffer); +} + +int put_reference_frame(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t; + buf += put_str(buf, " \"referenceFrameMap\": {"); + buf += put_map(buf, refs_map); + buf += put_str(buf, "},\n"); + buf += put_str(buf, " \"referenceFrame\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + buf += put_num(buf, '[', mi->ref_frame[0], 0); + buf += put_num(buf, ',', mi->ref_frame[1], ']'); + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + if (mi->ref_frame[0] != next_mi->ref_frame[0] || + mi->ref_frame[1] != next_mi->ref_frame[1]) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_motion_vectors(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t; + buf += put_str(buf, " \"motionVectors\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + buf += put_num(buf, '[', mi->mv[0].col, 0); + buf += put_num(buf, ',', mi->mv[0].row, 0); + buf += put_num(buf, ',', mi->mv[1].col, 0); + buf += put_num(buf, ',', mi->mv[1].row, ']'); + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + if (mi->mv[0].col != next_mi->mv[0].col || + mi->mv[0].row != next_mi->mv[0].row || + mi->mv[1].col != next_mi->mv[1].col || + mi->mv[1].row != next_mi->mv[1].row) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_combined(char *buffer) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, p; + buf += put_str(buf, " \""); + for (p = 0; p < combined_parm_count; ++p) { + if (p) buf += put_str(buf, "&"); + buf += put_str(buf, parm_offsets[combined_parm_list[p]].parm); + } + buf += put_str(buf, "\": ["); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + *(buf++) = '['; + for (p = 0; p < combined_parm_count; ++p) { + if (p) *(buf++) = ','; + int16_t *v = (int16_t *)(((int8_t *)mi) + + parm_offsets[combined_parm_list[p]].offset); + buf += put_num(buf, 0, v[0], 0); + } + *(buf++) = ']'; + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +int put_block_info(char *buffer, const map_entry *map, const char *name, + size_t offset, int len) { + const int mi_rows = frame_data.mi_rows; + const int mi_cols = frame_data.mi_cols; + char *buf = buffer; + int r, c, t, i; + if (compress && len == 1) { + die("Can't encode scalars as arrays when RLE compression is enabled."); + } + if (map) { + buf += snprintf(buf, MAX_BUFFER, " \"%sMap\": {", name); + buf += put_map(buf, map); + buf += put_str(buf, "},\n"); + } + buf += snprintf(buf, MAX_BUFFER, " \"%s\": [", name); + for (r = 0; r < mi_rows; ++r) { + *(buf++) = '['; + for (c = 0; c < mi_cols; ++c) { + insp_mi_data *mi = &frame_data.mi_grid[r * mi_cols + c]; + int16_t *v = (int16_t *)(((int8_t *)mi) + offset); + if (len == 0) { + buf += put_num(buf, 0, v[0], 0); + } else { + buf += put_str(buf, "["); + for (i = 0; i < len; i++) { + buf += put_num(buf, 0, v[i], 0); + if (i < len - 1) { + buf += put_str(buf, ","); + } + } + buf += put_str(buf, "]"); + } + if (compress) { // RLE + for (t = c + 1; t < mi_cols; ++t) { + insp_mi_data *next_mi = &frame_data.mi_grid[r * mi_cols + t]; + int16_t *nv = (int16_t *)(((int8_t *)next_mi) + offset); + int same = 0; + if (len == 0) { + same = v[0] == nv[0]; + } else { + for (i = 0; i < len; i++) { + same = v[i] == nv[i]; + if (!same) { + break; + } + } + } + if (!same) { + break; + } + } + if (t - c > 1) { + *(buf++) = ','; + buf += put_num(buf, '[', t - c - 1, ']'); + c = t - 1; + } + } + if (c < mi_cols - 1) *(buf++) = ','; + } + *(buf++) = ']'; + if (r < mi_rows - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} + +#if CONFIG_ACCOUNTING +int put_accounting(char *buffer) { + char *buf = buffer; + int i; + const Accounting *accounting = frame_data.accounting; + if (accounting == NULL) { + printf("XXX\n"); + return 0; + } + const int num_syms = accounting->syms.num_syms; + const int num_strs = accounting->syms.dictionary.num_strs; + buf += put_str(buf, " \"symbolsMap\": ["); + for (i = 0; i < num_strs; i++) { + buf += snprintf(buf, MAX_BUFFER, "\"%s\"", + accounting->syms.dictionary.strs[i]); + if (i < num_strs - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + buf += put_str(buf, " \"symbols\": [\n "); + AccountingSymbolContext context; + context.x = -2; + context.y = -2; + AccountingSymbol *sym; + for (i = 0; i < num_syms; i++) { + sym = &accounting->syms.syms[i]; + if (memcmp(&context, &sym->context, sizeof(AccountingSymbolContext)) != 0) { + buf += put_num(buf, '[', sym->context.x, 0); + buf += put_num(buf, ',', sym->context.y, ']'); + } else { + buf += put_num(buf, '[', sym->id, 0); + buf += put_num(buf, ',', sym->bits, 0); + buf += put_num(buf, ',', sym->samples, ']'); + } + context = sym->context; + if (i < num_syms - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + return (int)(buf - buffer); +} +#endif + +int skip_non_transform = 0; + +void inspect(void *pbi, void *data) { + /* Fetch frame data. */ + ifd_inspect(&frame_data, pbi, skip_non_transform); + + // Show existing frames just show a reference buffer we've already decoded. + // There's no information to show. + if (frame_data.show_existing_frame) return; + + (void)data; + // We allocate enough space and hope we don't write out of bounds. Totally + // unsafe but this speeds things up, especially when compiled to Javascript. + char *buffer = aom_malloc(MAX_BUFFER); + if (!buffer) { + fprintf(stderr, "Error allocating inspect info buffer\n"); + abort(); + } + char *buf = buffer; + buf += put_str(buf, "{\n"); + if (layers & BLOCK_SIZE_LAYER) { + buf += put_block_info(buf, block_size_map, "blockSize", + offsetof(insp_mi_data, bsize), 0); + } + if (layers & TRANSFORM_SIZE_LAYER) { + buf += put_block_info(buf, tx_size_map, "transformSize", + offsetof(insp_mi_data, tx_size), 0); + } + if (layers & TRANSFORM_TYPE_LAYER) { + buf += put_block_info(buf, tx_type_map, "transformType", + offsetof(insp_mi_data, tx_type), 0); + } + if (layers & DUAL_FILTER_LAYER) { + buf += put_block_info(buf, dual_filter_map, "dualFilterType", + offsetof(insp_mi_data, dual_filter_type), 0); + } + if (layers & MODE_LAYER) { + buf += put_block_info(buf, prediction_mode_map, "mode", + offsetof(insp_mi_data, mode), 0); + } + if (layers & UV_MODE_LAYER) { + buf += put_block_info(buf, uv_prediction_mode_map, "uv_mode", + offsetof(insp_mi_data, uv_mode), 0); + } + if (layers & MOTION_MODE_LAYER) { + buf += put_block_info(buf, motion_mode_map, "motion_mode", + offsetof(insp_mi_data, motion_mode), 0); + } + if (layers & COMPOUND_TYPE_LAYER) { + buf += put_block_info(buf, compound_type_map, "compound_type", + offsetof(insp_mi_data, compound_type), 0); + } + if (layers & SKIP_LAYER) { + buf += + put_block_info(buf, skip_map, "skip", offsetof(insp_mi_data, skip), 0); + } + if (layers & FILTER_LAYER) { + buf += + put_block_info(buf, NULL, "filter", offsetof(insp_mi_data, filter), 2); + } + if (layers & CDEF_LAYER) { + buf += put_block_info(buf, NULL, "cdef_level", + offsetof(insp_mi_data, cdef_level), 0); + buf += put_block_info(buf, NULL, "cdef_strength", + offsetof(insp_mi_data, cdef_strength), 0); + } + if (layers & CFL_LAYER) { + buf += put_block_info(buf, NULL, "cfl_alpha_idx", + offsetof(insp_mi_data, cfl_alpha_idx), 0); + buf += put_block_info(buf, NULL, "cfl_alpha_sign", + offsetof(insp_mi_data, cfl_alpha_sign), 0); + } + if (layers & Q_INDEX_LAYER) { + buf += put_block_info(buf, NULL, "delta_q", + offsetof(insp_mi_data, current_qindex), 0); + } + if (layers & SEGMENT_ID_LAYER) { + buf += put_block_info(buf, NULL, "seg_id", + offsetof(insp_mi_data, segment_id), 0); + } + if (layers & MOTION_VECTORS_LAYER) { + buf += put_motion_vectors(buf); + } + if (layers & INTRABC_LAYER) { + buf += put_block_info(buf, intrabc_map, "intrabc", + offsetof(insp_mi_data, intrabc), 0); + } + if (layers & PALETTE_LAYER) { + buf += put_block_info(buf, palette_map, "palette", + offsetof(insp_mi_data, palette), 0); + } + if (layers & UV_PALETTE_LAYER) { + buf += put_block_info(buf, palette_map, "uv_palette", + offsetof(insp_mi_data, uv_palette), 0); + } + if (combined_parm_count > 0) buf += put_combined(buf); + if (layers & REFERENCE_FRAME_LAYER) { + buf += put_block_info(buf, refs_map, "referenceFrame", + offsetof(insp_mi_data, ref_frame), 2); + } +#if CONFIG_ACCOUNTING + if (layers & ACCOUNTING_LAYER) { + buf += put_accounting(buf); + } +#endif + buf += + snprintf(buf, MAX_BUFFER, " \"frame\": %d,\n", frame_data.frame_number); + buf += snprintf(buf, MAX_BUFFER, " \"showFrame\": %d,\n", + frame_data.show_frame); + buf += snprintf(buf, MAX_BUFFER, " \"frameType\": %d,\n", + frame_data.frame_type); + buf += snprintf(buf, MAX_BUFFER, " \"baseQIndex\": %d,\n", + frame_data.base_qindex); + buf += snprintf(buf, MAX_BUFFER, " \"tileCols\": %d,\n", + frame_data.tile_mi_cols); + buf += snprintf(buf, MAX_BUFFER, " \"tileRows\": %d,\n", + frame_data.tile_mi_rows); + buf += snprintf(buf, MAX_BUFFER, " \"deltaQPresentFlag\": %d,\n", + frame_data.delta_q_present_flag); + buf += snprintf(buf, MAX_BUFFER, " \"deltaQRes\": %d,\n", + frame_data.delta_q_res); + buf += put_str(buf, " \"config\": {"); + buf += put_map(buf, config_map); + buf += put_str(buf, "},\n"); + buf += put_str(buf, " \"configString\": \""); + buf += put_str_with_escape(buf, aom_codec_build_config()); + buf += put_str(buf, "\"\n"); + decoded_frame_count++; + buf += put_str(buf, "},\n"); + *(buf++) = 0; + on_frame_decoded_dump(buffer); + aom_free(buffer); +} + +void ifd_init_cb(void) { + aom_inspect_init ii; + ii.inspect_cb = inspect; + ii.inspect_ctx = NULL; + aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii); +} + +EMSCRIPTEN_KEEPALIVE +int open_file(char *file) { + if (file == NULL) { + // The JS analyzer puts the .ivf file at this location. + file = "/tmp/input.ivf"; + } + reader = aom_video_reader_open(file); + if (!reader) die("Failed to open %s for reading.", file); + info = aom_video_reader_get_info(reader); + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + fprintf(stderr, "Using %s\n", aom_codec_iface_name(decoder)); + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + ifd_init(&frame_data, info->frame_width, info->frame_height); + ifd_init_cb(); + return EXIT_SUCCESS; +} + +Av1DecodeReturn adr; +int have_frame = 0; +const unsigned char *frame; +const unsigned char *end_frame; +size_t frame_size = 0; + +EMSCRIPTEN_KEEPALIVE +int read_frame(void) { + img = NULL; + + // This loop skips over any frames that are show_existing_frames, as + // there is nothing to analyze. + do { + if (!have_frame) { + if (!aom_video_reader_read_frame(reader)) return EXIT_FAILURE; + frame = aom_video_reader_get_frame(reader, &frame_size); + + have_frame = 1; + end_frame = frame + frame_size; + } + + if (aom_codec_decode(&codec, frame, (unsigned int)frame_size, &adr) != + AOM_CODEC_OK) { + die_codec(&codec, "Failed to decode frame."); + } + + frame = adr.buf; + frame_size = end_frame - frame; + if (frame == end_frame) have_frame = 0; + } while (adr.show_existing); + + int got_any_frames = 0; + aom_image_t *frame_img; + struct av1_ref_frame ref_dec; + ref_dec.idx = adr.idx; + + // ref_dec.idx is the index to the reference buffer idx to AV1_GET_REFERENCE + // if its -1 the decoder didn't update any reference buffer and the only + // way to see the frame is aom_codec_get_frame. + if (ref_dec.idx == -1) { + aom_codec_iter_t iter = NULL; + img = frame_img = aom_codec_get_frame(&codec, &iter); + ++frame_count; + got_any_frames = 1; + } else if (!aom_codec_control(&codec, AV1_GET_REFERENCE, &ref_dec)) { + img = frame_img = &ref_dec.img; + ++frame_count; + got_any_frames = 1; + } + if (!got_any_frames) { + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +EMSCRIPTEN_KEEPALIVE +const char *get_aom_codec_build_config(void) { + return aom_codec_build_config(); +} + +EMSCRIPTEN_KEEPALIVE +int get_bit_depth(void) { return img->bit_depth; } + +EMSCRIPTEN_KEEPALIVE +int get_bits_per_sample(void) { return img->bps; } + +EMSCRIPTEN_KEEPALIVE +int get_image_format(void) { return img->fmt; } + +EMSCRIPTEN_KEEPALIVE +unsigned char *get_plane(int plane) { return img->planes[plane]; } + +EMSCRIPTEN_KEEPALIVE +int get_plane_stride(int plane) { return img->stride[plane]; } + +EMSCRIPTEN_KEEPALIVE +int get_plane_width(int plane) { return aom_img_plane_width(img, plane); } + +EMSCRIPTEN_KEEPALIVE +int get_plane_height(int plane) { return aom_img_plane_height(img, plane); } + +EMSCRIPTEN_KEEPALIVE +int get_frame_width(void) { return info->frame_width; } + +EMSCRIPTEN_KEEPALIVE +int get_frame_height(void) { return info->frame_height; } + +static void parse_args(char **argv) { + char **argi, **argj; + struct arg arg; + (void)dump_accounting_arg; + (void)dump_cdef_arg; + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + if (arg_match(&arg, &dump_block_size_arg, argi)) layers |= BLOCK_SIZE_LAYER; +#if CONFIG_ACCOUNTING + else if (arg_match(&arg, &dump_accounting_arg, argi)) + layers |= ACCOUNTING_LAYER; +#endif + else if (arg_match(&arg, &dump_transform_size_arg, argi)) + layers |= TRANSFORM_SIZE_LAYER; + else if (arg_match(&arg, &dump_transform_type_arg, argi)) + layers |= TRANSFORM_TYPE_LAYER; + else if (arg_match(&arg, &dump_mode_arg, argi)) + layers |= MODE_LAYER; + else if (arg_match(&arg, &dump_uv_mode_arg, argi)) + layers |= UV_MODE_LAYER; + else if (arg_match(&arg, &dump_motion_mode_arg, argi)) + layers |= MOTION_MODE_LAYER; + else if (arg_match(&arg, &dump_compound_type_arg, argi)) + layers |= COMPOUND_TYPE_LAYER; + else if (arg_match(&arg, &dump_skip_arg, argi)) + layers |= SKIP_LAYER; + else if (arg_match(&arg, &dump_filter_arg, argi)) + layers |= FILTER_LAYER; + else if (arg_match(&arg, &dump_cdef_arg, argi)) + layers |= CDEF_LAYER; + else if (arg_match(&arg, &dump_cfl_arg, argi)) + layers |= CFL_LAYER; + else if (arg_match(&arg, &dump_reference_frame_arg, argi)) + layers |= REFERENCE_FRAME_LAYER; + else if (arg_match(&arg, &dump_motion_vectors_arg, argi)) + layers |= MOTION_VECTORS_LAYER; + else if (arg_match(&arg, &dump_dual_filter_type_arg, argi)) + layers |= DUAL_FILTER_LAYER; + else if (arg_match(&arg, &dump_delta_q_arg, argi)) + layers |= Q_INDEX_LAYER; + else if (arg_match(&arg, &dump_seg_id_arg, argi)) + layers |= SEGMENT_ID_LAYER; + else if (arg_match(&arg, &dump_intrabc_arg, argi)) + layers |= INTRABC_LAYER; + else if (arg_match(&arg, &dump_palette_arg, argi)) + layers |= PALETTE_LAYER; + else if (arg_match(&arg, &dump_uv_palette_arg, argi)) + layers |= UV_PALETTE_LAYER; + else if (arg_match(&arg, &dump_all_arg, argi)) + layers |= ALL_LAYERS; + else if (arg_match(&arg, &compress_arg, argi)) + compress = 1; + else if (arg_match(&arg, &usage_arg, argi)) + usage_exit(); + else if (arg_match(&arg, &limit_arg, argi)) + stop_after = arg_parse_uint(&arg); + else if (arg_match(&arg, &skip_non_transform_arg, argi)) + skip_non_transform = arg_parse_uint(&arg); + else if (arg_match(&arg, &combined_arg, argi)) + convert_to_indices( + (char *)arg.val, combined_parm_list, + sizeof(combined_parm_list) / sizeof(combined_parm_list[0]), + &combined_parm_count); + else + argj++; + } +} + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s src_filename <options>\n", exec_name); + fprintf(stderr, "\nOptions:\n"); + arg_show_usage(stderr, main_args); + exit(EXIT_FAILURE); +} + +EMSCRIPTEN_KEEPALIVE +int main(int argc, char **argv) { + exec_name = argv[0]; + parse_args(argv); + if (argc >= 2) { + open_file(argv[1]); + printf("[\n"); + while (1) { + if (stop_after && (decoded_frame_count >= stop_after)) break; + if (read_frame()) break; + } + printf("null\n"); + printf("]"); + } else { + usage_exit(); + } +} + +EMSCRIPTEN_KEEPALIVE +void quit(void) { + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); +} + +EMSCRIPTEN_KEEPALIVE +void set_layers(LayerType v) { layers = v; } + +EMSCRIPTEN_KEEPALIVE +void set_compress(int v) { compress = v; } diff --git a/third_party/aom/examples/lightfield_bitstream_parsing.c b/third_party/aom/examples/lightfield_bitstream_parsing.c new file mode 100644 index 0000000000..05272bafa3 --- /dev/null +++ b/third_party/aom/examples/lightfield_bitstream_parsing.c @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Bitstream Parsing +// ============================ +// +// This is a lightfield bitstream parsing example. It takes an input file +// containing the whole compressed lightfield bitstream(ivf file) and a text +// file containing a stream of tiles to decode and then constructs and outputs +// a new bitstream that can be decoded by an AV1 decoder. The output bitstream +// contains reference frames(i.e. anchor frames), camera frame header, and +// tile list OBUs. num_references is the number of anchor frames coded at the +// beginning of the light field file. After running the lightfield encoder, +// run lightfield bitstream parsing: +// examples/lightfield_bitstream_parsing vase10x10.ivf vase_tile_list.ivf 4 +// tile_list.txt +// +// The tile_list.txt is expected to be of the form: +// Frame <frame_index0> +// <image_index0> <anchor_index0> <tile_col0> <tile_row0> +// <image_index1> <anchor_index1> <tile_col1> <tile_row1> +// ... +// Frame <frame_index1) +// ... +// +// The "Frame" markers indicate a new render frame and thus a new tile list +// will be started and the old one flushed. The image_indexN, anchor_indexN, +// tile_colN, and tile_rowN identify an individual tile to be decoded and +// to use anchor_indexN anchor image for MCP. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aom_encoder.h" +#include "aom/aom_integer.h" +#include "aom/aomdx.h" +#include "aom_dsp/bitwriter_buffer.h" +#include "common/tools_common.h" +#include "common/video_reader.h" +#include "common/video_writer.h" + +#define MAX_TILES 512 + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile> <num_references> <tile_list>\n", + exec_name); + exit(EXIT_FAILURE); +} + +#define ALIGN_POWER_OF_TWO(value, n) \ + (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) + +const int output_frame_width = 512; +const int output_frame_height = 512; + +// Spec: +// typedef struct { +// uint8_t anchor_frame_idx; +// uint8_t tile_row; +// uint8_t tile_col; +// uint16_t coded_tile_data_size_minus_1; +// uint8_t *coded_tile_data; +// } TILE_LIST_ENTRY; + +// Tile list entry provided by the application +typedef struct { + int image_idx; + int reference_idx; + int tile_col; + int tile_row; +} TILE_LIST_INFO; + +static int get_image_bps(aom_img_fmt_t fmt) { + switch (fmt) { + case AOM_IMG_FMT_I420: return 12; + case AOM_IMG_FMT_I422: return 16; + case AOM_IMG_FMT_I444: return 24; + case AOM_IMG_FMT_I42016: return 24; + case AOM_IMG_FMT_I42216: return 32; + case AOM_IMG_FMT_I44416: return 48; + default: die("Invalid image format"); + } +} + +static void process_tile_list(const TILE_LIST_INFO *tiles, int num_tiles, + aom_codec_pts_t tl_pts, unsigned char **frames, + const size_t *frame_sizes, aom_codec_ctx_t *codec, + unsigned char *tl_buf, AvxVideoWriter *writer, + uint8_t output_frame_width_in_tiles_minus_1, + uint8_t output_frame_height_in_tiles_minus_1) { + unsigned char *tl = tl_buf; + struct aom_write_bit_buffer wb = { tl, 0 }; + unsigned char *saved_obu_size_loc = NULL; + uint32_t tile_list_obu_header_size = 0; + uint32_t tile_list_obu_size = 0; + int num_tiles_minus_1 = num_tiles - 1; + int i; + + // Write the tile list OBU header that is 1 byte long. + aom_wb_write_literal(&wb, 0, 1); // forbidden bit. + aom_wb_write_literal(&wb, 8, 4); // tile list OBU: "1000" + aom_wb_write_literal(&wb, 0, 1); // obu_extension = 0 + aom_wb_write_literal(&wb, 1, 1); // obu_has_size_field + aom_wb_write_literal(&wb, 0, 1); // reserved + tl++; + tile_list_obu_header_size++; + + // Write the OBU size using a fixed length_field_size of 4 bytes. + saved_obu_size_loc = tl; + // aom_wb_write_unsigned_literal(&wb, data, bits) requires that bits <= 32. + aom_wb_write_unsigned_literal(&wb, 0, 32); + tl += 4; + tile_list_obu_header_size += 4; + + // write_tile_list_obu() + aom_wb_write_literal(&wb, output_frame_width_in_tiles_minus_1, 8); + aom_wb_write_literal(&wb, output_frame_height_in_tiles_minus_1, 8); + aom_wb_write_literal(&wb, num_tiles_minus_1, 16); + tl += 4; + tile_list_obu_size += 4; + + // Write each tile's data + for (i = 0; i <= num_tiles_minus_1; i++) { + aom_tile_data tile_data = { 0, NULL, 0 }; + + int image_idx = tiles[i].image_idx; + int ref_idx = tiles[i].reference_idx; + int tc = tiles[i].tile_col; + int tr = tiles[i].tile_row; + + // Reset bit writer to the right location. + wb.bit_buffer = tl; + wb.bit_offset = 0; + + size_t frame_size = frame_sizes[image_idx]; + const unsigned char *frame = frames[image_idx]; + + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_ROW, tr); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_COL, tc); + + aom_codec_err_t aom_status = + aom_codec_decode(codec, frame, frame_size, NULL); + if (aom_status) die_codec(codec, "Failed to decode tile."); + + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_DATA, &tile_data); + + // Copy over tile info. + // uint8_t anchor_frame_idx; + // uint8_t tile_row; + // uint8_t tile_col; + // uint16_t coded_tile_data_size_minus_1; + // uint8_t *coded_tile_data; + uint32_t tile_info_bytes = 5; + aom_wb_write_literal(&wb, ref_idx, 8); + aom_wb_write_literal(&wb, tr, 8); + aom_wb_write_literal(&wb, tc, 8); + aom_wb_write_literal(&wb, (int)tile_data.coded_tile_data_size - 1, 16); + tl += tile_info_bytes; + + memcpy(tl, (uint8_t *)tile_data.coded_tile_data, + tile_data.coded_tile_data_size); + tl += tile_data.coded_tile_data_size; + + tile_list_obu_size += + tile_info_bytes + (uint32_t)tile_data.coded_tile_data_size; + } + + // Write tile list OBU size. + size_t bytes_written = 0; + if (aom_uleb_encode_fixed_size(tile_list_obu_size, 4, 4, saved_obu_size_loc, + &bytes_written)) + die_codec(codec, "Failed to encode the tile list obu size."); + + // Copy the tile list. + if (!aom_video_writer_write_frame( + writer, tl_buf, tile_list_obu_header_size + tile_list_obu_size, + tl_pts)) + die_codec(codec, "Failed to copy compressed tile list."); +} + +int main(int argc, char **argv) { + AvxVideoReader *reader = NULL; + AvxVideoWriter *writer = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + int i; + aom_codec_pts_t pts; + const char *tile_list_file = NULL; + + exec_name = argv[0]; + if (argc != 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + num_references = (int)strtol(argv[3], NULL, 0); + info = aom_video_reader_get_info(reader); + + aom_video_reader_set_fourcc(reader, AV1_FOURCC); + + // The writer to write out ivf file in tile list OBU, which can be decoded by + // AV1 decoder. + writer = aom_video_writer_open(argv[2], kContainerIVF, info); + if (!writer) die("Failed to open %s for writing", argv[2]); + + tile_list_file = argv[4]; + + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + + printf("Reading %d reference images.\n", num_references); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + pts = (aom_codec_pts_t)aom_video_reader_get_frame_pts(reader); + + // Copy references bitstream directly. + if (!aom_video_writer_write_frame(writer, frame, frame_size, pts)) + die_codec(&codec, "Failed to copy compressed anchor frame."); + + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + } + + // Decode camera frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 1); + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_EXT_TILE_DEBUG, 1); + + FILE *infile = aom_video_reader_get_file(reader); + // Record the offset of the first camera image. + const FileOffset camera_frame_pos = ftello(infile); + + printf("Loading compressed frames into memory.\n"); + + // Count the frames in the lightfield. + int num_frames = 0; + while (aom_video_reader_read_frame(reader)) { + ++num_frames; + } + if (num_frames < 1) die("Input light field has no frames."); + + // Read all of the lightfield frames into memory. + unsigned char **frames = + (unsigned char **)malloc(num_frames * sizeof(unsigned char *)); + size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t)); + if (!(frames && frame_sizes)) die("Failed to allocate frame data."); + + // Seek to the first camera image. + fseeko(infile, camera_frame_pos, SEEK_SET); + for (int f = 0; f < num_frames; ++f) { + aom_video_reader_read_frame(reader); + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char)); + if (!frames[f]) die("Failed to allocate frame data."); + memcpy(frames[f], frame, frame_size); + frame_sizes[f] = frame_size; + } + printf("Read %d frames.\n", num_frames); + + // Copy first camera frame for getting camera frame header. This is done + // only once. + { + size_t frame_size = frame_sizes[0]; + const unsigned char *frame = frames[0]; + pts = num_references; + aom_tile_data frame_header_info = { 0, NULL, 0 }; + + // Need to decode frame header to get camera frame header info. So, here + // decoding 1 tile is enough. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_DECODE_TILE_ROW, 0); + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_DECODE_TILE_COL, 0); + + aom_codec_err_t aom_status = + aom_codec_decode(&codec, frame, frame_size, NULL); + if (aom_status) die_codec(&codec, "Failed to decode tile."); + + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_HEADER_INFO, + &frame_header_info); + + size_t obu_size_offset = + (uint8_t *)frame_header_info.coded_tile_data - frame; + size_t length_field_size = frame_header_info.coded_tile_data_size; + // Remove ext-tile tile info. + uint32_t frame_header_size = (uint32_t)frame_header_info.extra_size - 1; + size_t bytes_to_copy = + obu_size_offset + length_field_size + frame_header_size; + + unsigned char *frame_hdr_buf = (unsigned char *)malloc(bytes_to_copy); + if (frame_hdr_buf == NULL) + die_codec(&codec, "Failed to allocate frame header buffer."); + + memcpy(frame_hdr_buf, frame, bytes_to_copy); + + // Update frame header OBU size. + size_t bytes_written = 0; + if (aom_uleb_encode_fixed_size( + frame_header_size, length_field_size, length_field_size, + frame_hdr_buf + obu_size_offset, &bytes_written)) + die_codec(&codec, "Failed to encode the tile list obu size."); + + // Copy camera frame header bitstream. + if (!aom_video_writer_write_frame(writer, frame_hdr_buf, bytes_to_copy, + pts)) + die_codec(&codec, "Failed to copy compressed camera frame header."); + free(frame_hdr_buf); + } + + // Read out the image format. + aom_img_fmt_t ref_fmt = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + const int bps = get_image_bps(ref_fmt); + if (!bps) die_codec(&codec, "Invalid image format."); + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(&codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + // Allocate a buffer to store tile list bitstream. + const size_t data_sz = MAX_TILES * ALIGN_POWER_OF_TWO(tile_width, 5) * + ALIGN_POWER_OF_TWO(tile_height, 5) * bps / 8; + + unsigned char *tl_buf = (unsigned char *)malloc(data_sz); + if (tl_buf == NULL) die_codec(&codec, "Failed to allocate tile list buffer."); + + aom_codec_pts_t tl_pts = num_references; + const uint8_t output_frame_width_in_tiles_minus_1 = + output_frame_width / tile_width - 1; + const uint8_t output_frame_height_in_tiles_minus_1 = + output_frame_height / tile_height - 1; + + printf("Reading tile list from file.\n"); + char line[1024]; + FILE *tile_list_fptr = fopen(tile_list_file, "r"); + if (!tile_list_fptr) die_codec(&codec, "Failed to open tile list file."); + int num_tiles = 0; + TILE_LIST_INFO tiles[MAX_TILES]; + while ((fgets(line, 1024, tile_list_fptr)) != NULL) { + if (line[0] == 'F' || num_tiles >= MAX_TILES) { + // Flush existing tile list and start another, either because we hit a + // new render frame or because we've hit our max number of tiles per list. + if (num_tiles > 0) { + process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec, + tl_buf, writer, output_frame_width_in_tiles_minus_1, + output_frame_height_in_tiles_minus_1); + ++tl_pts; + } + num_tiles = 0; + } + if (line[0] == 'F') { + continue; + } + if (sscanf(line, "%d %d %d %d", &tiles[num_tiles].image_idx, + &tiles[num_tiles].reference_idx, &tiles[num_tiles].tile_col, + &tiles[num_tiles].tile_row) == 4) { + if (tiles[num_tiles].image_idx >= num_frames) { + die("Tile list image_idx out of bounds: %d >= %d.", + tiles[num_tiles].image_idx, num_frames); + } + if (tiles[num_tiles].reference_idx >= num_references) { + die("Tile list reference_idx out of bounds: %d >= %d.", + tiles[num_tiles].reference_idx, num_references); + } + ++num_tiles; + } + } + if (num_tiles > 0) { + // Flush out the last tile list. + process_tile_list(tiles, num_tiles, tl_pts, frames, frame_sizes, &codec, + tl_buf, writer, output_frame_width_in_tiles_minus_1, + output_frame_height_in_tiles_minus_1); + ++tl_pts; + } + + const int num_tile_lists = (int)(tl_pts - pts); + printf("Finished processing tile lists. Num tile lists: %d.\n", + num_tile_lists); + free(tl_buf); + for (int f = 0; f < num_frames; ++f) { + free(frames[f]); + } + free(frame_sizes); + free(frames); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_writer_close(writer); + aom_video_reader_close(reader); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/lightfield_decoder.c b/third_party/aom/examples/lightfield_decoder.c new file mode 100644 index 0000000000..65b13efa1a --- /dev/null +++ b/third_party/aom/examples/lightfield_decoder.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Decoder +// ================== +// +// This is an example of a simple lightfield decoder. It builds upon the +// simple_decoder.c example. It takes an input file containing the compressed +// data (in ivf format), treating it as a lightfield instead of a video; and a +// text file with a list of tiles to decode. There is an optional parameter +// allowing to choose the output format, and the supported formats are +// YUV1D(default), YUV, and NV12. +// After running the lightfield encoder, run lightfield decoder to decode a +// batch of tiles: +// examples/lightfield_decoder vase10x10.ivf vase_reference.yuv 4 tile_list.txt +// 0(optional) +// The tile_list.txt is expected to be of the form: +// Frame <frame_index0> +// <image_index0> <anchor_index0> <tile_col0> <tile_row0> +// <image_index1> <anchor_index1> <tile_col1> <tile_row1> +// ... +// Frame <frame_index1) +// ... +// +// The "Frame" markers indicate a new render frame and thus a new tile list +// will be started and the old one flushed. The image_indexN, anchor_indexN, +// tile_colN, and tile_rowN identify an individual tile to be decoded and +// to use anchor_indexN anchor image for MCP. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +enum { + YUV1D, // 1D tile output for conformance test. + YUV, // Tile output in YUV format. + NV12, // Tile output in NV12 format. +} UENUM1BYTE(OUTPUT_FORMAT); + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <infile> <outfile> <num_references> <tile_list> <output " + "format(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +// Output frame size +static const int output_frame_width = 512; +static const int output_frame_height = 512; + +static void aom_img_copy_tile(const aom_image_t *src, const aom_image_t *dst, + int dst_row_offset, int dst_col_offset) { + const int shift = (src->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0; + int plane; + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *src_buf = src->planes[plane]; + const int src_stride = src->stride[plane]; + unsigned char *dst_buf = dst->planes[plane]; + const int dst_stride = dst->stride[plane]; + const int roffset = + (plane > 0) ? dst_row_offset >> dst->y_chroma_shift : dst_row_offset; + const int coffset = + (plane > 0) ? dst_col_offset >> dst->x_chroma_shift : dst_col_offset; + + // col offset needs to be adjusted for HBD. + dst_buf += roffset * dst_stride + (coffset << shift); + + const int w = (aom_img_plane_width(src, plane) << shift); + const int h = aom_img_plane_height(src, plane); + int y; + + for (y = 0; y < h; ++y) { + memcpy(dst_buf, src_buf, w); + src_buf += src_stride; + dst_buf += dst_stride; + } + } +} + +static void decode_tile(aom_codec_ctx_t *codec, const unsigned char *frame, + size_t frame_size, int tr, int tc, int ref_idx, + aom_image_t *reference_images, aom_image_t *output, + int *tile_idx, unsigned int *output_bit_depth, + aom_image_t **img_ptr, int output_format) { + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_TILE_MODE, 1); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_EXT_TILE_DEBUG, 1); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_ROW, tr); + AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_DECODE_TILE_COL, tc); + + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 1; + ref.img = reference_images[ref_idx]; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1_SET_REFERENCE, &ref)) { + die_codec(codec, "Failed to set reference frame."); + } + + aom_codec_err_t aom_status = aom_codec_decode(codec, frame, frame_size, NULL); + if (aom_status) die_codec(codec, "Failed to decode tile."); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = aom_codec_get_frame(codec, &iter); + if (!img) die_codec(codec, "Failed to get frame."); + *img_ptr = img; + + // aom_img_alloc() sets bit_depth as follows: + // output->bit_depth = (fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; + // Use img->bit_depth(read from bitstream), so that aom_shift_img() + // works as expected. + output->bit_depth = img->bit_depth; + *output_bit_depth = img->bit_depth; + + if (output_format != YUV1D) { + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + const uint32_t output_frame_width_in_tiles = + output_frame_width / tile_width; + + // Copy the tile to the output frame. + const int row_offset = + (*tile_idx / output_frame_width_in_tiles) * tile_height; + const int col_offset = + (*tile_idx % output_frame_width_in_tiles) * tile_width; + + aom_img_copy_tile(img, output, row_offset, col_offset); + (*tile_idx)++; + } +} + +static void img_write_to_file(const aom_image_t *img, FILE *file, + int output_format) { + if (output_format == YUV) + aom_img_write(img, file); + else if (output_format == NV12) + aom_img_write_nv12(img, file); + else + die("Invalid output format"); +} + +int main(int argc, char **argv) { + FILE *outfile = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + aom_img_fmt_t ref_fmt = 0; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + aom_image_t output; + aom_image_t *output_shifted = NULL; + size_t frame_size = 0; + const unsigned char *frame = NULL; + int i, j; + const char *tile_list_file = NULL; + int output_format = YUV1D; + exec_name = argv[0]; + + if (argc < 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + num_references = (int)strtol(argv[3], NULL, 0); + tile_list_file = argv[4]; + + if (argc > 5) output_format = (int)strtol(argv[5], NULL, 0); + if (output_format < YUV1D || output_format > NV12) + die("Output format out of range [0, 2]"); + + info = aom_video_reader_get_info(reader); + + aom_codec_iface_t *decoder; + if (info->codec_fourcc == LST_FOURCC) + decoder = get_aom_decoder_by_fourcc(AV1_FOURCC); + else + die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die_codec(&codec, "Failed to initialize decoder."); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_IS_ANNEXB, + info->is_annexb)) { + die("Failed to set annex b status"); + } + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + if (i == 0) { + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + + int frame_res[2]; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_SIZE, frame_res)) + die_codec(&codec, "Failed to get the image frame size"); + + // Allocate memory to store decoded references. Allocate memory with the + // border so that it can be used as a reference. + for (j = 0; j < num_references; j++) { + unsigned int border = AOM_DEC_BORDER_IN_PIXELS; + if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt, + frame_res[0], frame_res[1], 32, 8, + border)) { + die("Failed to allocate references."); + } + } + } + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[i])) + die_codec(&codec, "Failed to copy decoded reference frame"); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + char name[1024]; + snprintf(name, sizeof(name), "ref_%d.yuv", i); + printf("writing ref image to %s, %u, %u\n", name, img->d_w, img->d_h); + FILE *ref_file = fopen(name, "wb"); + aom_img_write(img, ref_file); + fclose(ref_file); + } + } + + FILE *infile = aom_video_reader_get_file(reader); + // Record the offset of the first camera image. + const FileOffset camera_frame_pos = ftello(infile); + + printf("Loading compressed frames into memory.\n"); + + // Count the frames in the lightfield. + int num_frames = 0; + while (aom_video_reader_read_frame(reader)) { + ++num_frames; + } + if (num_frames < 1) die("Input light field has no frames."); + + // Read all of the lightfield frames into memory. + unsigned char **frames = + (unsigned char **)malloc(num_frames * sizeof(unsigned char *)); + size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t)); + if (!(frames && frame_sizes)) die("Failed to allocate frame data."); + // Seek to the first camera image. + fseeko(infile, camera_frame_pos, SEEK_SET); + for (int f = 0; f < num_frames; ++f) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char)); + if (!frames[f]) die("Failed to allocate frame data."); + memcpy(frames[f], frame, frame_size); + frame_sizes[f] = frame_size; + } + printf("Read %d frames.\n", num_frames); + + if (output_format != YUV1D) { + // Allocate the output frame. + aom_img_fmt_t out_fmt = ref_fmt; + if (FORCE_HIGHBITDEPTH_DECODING) out_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + if (!aom_img_alloc(&output, out_fmt, output_frame_width, + output_frame_height, 32)) + die("Failed to allocate output image."); + } + + printf("Decoding tile list from file.\n"); + char line[1024]; + FILE *tile_list_fptr = fopen(tile_list_file, "r"); + if (!tile_list_fptr) die_codec(&codec, "Failed to open tile list file."); + int tile_list_cnt = 0; + int tile_list_writes = 0; + int tile_idx = 0; + aom_image_t *out = NULL; + unsigned int output_bit_depth = 0; + + while ((fgets(line, 1024, tile_list_fptr)) != NULL) { + if (line[0] == 'F') { + if (output_format != YUV1D) { + // Write out the tile list. + if (tile_list_cnt) { + out = &output; + if (output_bit_depth != 0) { + if (!aom_shift_img(output_bit_depth, &out, &output_shifted)) { + die("Error allocating image"); + } + } + img_write_to_file(out, outfile, output_format); + tile_list_writes++; + } + + tile_list_cnt++; + tile_idx = 0; + // Then memset the frame. + memset(output.img_data, 0, output.sz); + } + continue; + } + + int image_idx, ref_idx, tc, tr; + sscanf(line, "%d %d %d %d", &image_idx, &ref_idx, &tc, &tr); + if (image_idx >= num_frames) { + die("Tile list image_idx out of bounds: %d >= %d.", image_idx, + num_frames); + } + if (ref_idx >= num_references) { + die("Tile list ref_idx out of bounds: %d >= %d.", ref_idx, + num_references); + } + frame = frames[image_idx]; + frame_size = frame_sizes[image_idx]; + + aom_image_t *img = NULL; + decode_tile(&codec, frame, frame_size, tr, tc, ref_idx, reference_images, + &output, &tile_idx, &output_bit_depth, &img, output_format); + if (output_format == YUV1D) { + out = img; + if (output_bit_depth != 0) { + if (!aom_shift_img(output_bit_depth, &out, &output_shifted)) { + die("Error allocating image"); + } + } + aom_img_write(out, outfile); + } + } + + if (output_format != YUV1D) { + // Write out the last tile list. + if (tile_list_writes < tile_list_cnt) { + out = &output; + if (output_bit_depth != 0) { + if (!aom_shift_img(output_bit_depth, &out, &output_shifted)) { + die("Error allocating image"); + } + } + img_write_to_file(out, outfile, output_format); + } + } + + if (output_shifted) aom_img_free(output_shifted); + if (output_format != YUV1D) aom_img_free(&output); + for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]); + for (int f = 0; f < num_frames; ++f) { + free(frames[f]); + } + free(frame_sizes); + free(frames); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/lightfield_encoder.c b/third_party/aom/examples/lightfield_encoder.c new file mode 100644 index 0000000000..9aef836ac2 --- /dev/null +++ b/third_party/aom/examples/lightfield_encoder.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Encoder +// ================== +// +// This is an example of a simple lightfield encoder. It builds upon the +// twopass_encoder.c example. It takes an input file in YV12 format, +// treating it as a planar lightfield instead of a video. The img_width +// and img_height arguments are the dimensions of the lightfield images, +// while the lf_width and lf_height arguments are the number of +// lightfield images in each dimension. The lf_blocksize determines the +// number of reference images used for MCP. For example, 5 means that there +// is a reference image for every 5x5 lightfield image block. All images +// within a block will use the center image in that block as the reference +// image for MCP. +// Run "make test" to download lightfield test data: vase10x10.yuv. +// Run lightfield encoder to encode whole lightfield: +// examples/lightfield_encoder 1024 1024 vase10x10.yuv vase10x10.ivf 10 10 5 + +// Note: In bitstream.c and encoder.c, define EXT_TILE_DEBUG as 1 will print +// out the uncompressed header and the frame contexts, which can be used to +// test the bit exactness of the headers and the frame contexts for large scale +// tile coded frames. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "av1/encoder/encoder_utils.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <img_width> <img_height> <infile> <outfile> " + "<lf_width> <lf_height> <lf_blocksize>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int img_size_bytes(aom_image_t *img) { + int image_size_bytes = 0; + int plane; + for (plane = 0; plane < 3; ++plane) { + const int w = aom_img_plane_width(img, plane) * + ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); + const int h = aom_img_plane_height(img, plane); + image_size_bytes += w * h; + } + return image_size_bytes; +} + +static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, + aom_fixed_buf_t *stats) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_STATS_PKT) { + const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; + const size_t pkt_size = pkt->data.twopass_stats.sz; + stats->buf = realloc(stats->buf, stats->sz + pkt_size); + if (!stats->buf) die("Failed to allocate frame stats buffer."); + memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); + stats->sz += pkt_size; + } + } + + return got_pkts; +} + +static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) + die_codec(ctx, "Failed to write compressed frame."); + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +static void get_raw_image(aom_image_t **frame_to_encode, aom_image_t *raw, + aom_image_t *raw_shift) { + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + int input_shift = 0; + aom_img_upshift(raw_shift, raw, input_shift); + *frame_to_encode = raw_shift; + } else { + *frame_to_encode = raw; + } +} + +static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile, + aom_codec_iface_t *encoder, + const aom_codec_enc_cfg_t *cfg, int lf_width, + int lf_height, int lf_blocksize, int flags, + aom_image_t *raw_shift) { + aom_codec_ctx_t codec; + int frame_count = 0; + int image_size_bytes = img_size_bytes(raw); + int u_blocks, v_blocks; + int bu, bv; + aom_fixed_buf_t stats = { NULL, 0 }; + aom_image_t *frame_to_encode; + + if (aom_codec_enc_init(&codec, encoder, cfg, flags)) + die("Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&codec, "Failed to turn off auto altref"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0)) + die_codec(&codec, "Failed to set frame parallel decoding"); + + // How many reference images we need to encode. + u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize; + v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize; + + printf("\n First pass: "); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u_block_size, v_block_size; + int block_ref_u, block_ref_v; + + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + u_block_size = block_u_end - block_u_min; + v_block_size = block_v_end - block_v_min; + block_ref_u = block_u_min + u_block_size / 2; + block_ref_v = block_v_min + v_block_size / 2; + + printf("A%d, ", (block_ref_u + block_ref_v * lf_width)); + fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes, + SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + // Reference frames can be encoded encoded without tiles. + ++frame_count; + get_frame_stats(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF, + &stats); + } + } + + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1)) + die_codec(&codec, "Failed to set frame parallel decoding"); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u, v; + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + for (v = block_v_min; v < block_v_end; ++v) { + for (u = block_u_min; u < block_u_end; ++u) { + printf("C%d, ", (u + v * lf_width)); + fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + ++frame_count; + get_frame_stats(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + &stats); + } + } + } + } + // Flush encoder. + // No ARF, this should not be needed. + while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) { + } + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + printf("\nFirst pass complete. Processed %d frames.\n", frame_count); + + return stats; +} + +static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name, + aom_codec_iface_t *encoder, aom_codec_enc_cfg_t *cfg, + int lf_width, int lf_height, int lf_blocksize, int flags, + aom_image_t *raw_shift) { + AvxVideoInfo info = { get_fourcc_by_aom_encoder(encoder), + cfg->g_w, + cfg->g_h, + { cfg->g_timebase.num, cfg->g_timebase.den }, + 0 }; + AvxVideoWriter *writer = NULL; + aom_codec_ctx_t codec; + int frame_count = 0; + int image_size_bytes = img_size_bytes(raw); + int bu, bv; + int u_blocks, v_blocks; + aom_image_t *frame_to_encode; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + int reference_image_num = 0; + int i; + + writer = aom_video_writer_open(outfile_name, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing", outfile_name); + + if (aom_codec_enc_init(&codec, encoder, cfg, flags)) + die("Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_ENABLEAUTOALTREF, 0)) + die_codec(&codec, "Failed to turn off auto altref"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 0)) + die_codec(&codec, "Failed to set frame parallel decoding"); + if (aom_codec_control(&codec, AV1E_ENABLE_EXT_TILE_DEBUG, 1)) + die_codec(&codec, "Failed to enable encoder ext_tile debug"); + if (aom_codec_control(&codec, AOME_SET_CPUUSED, 3)) + die_codec(&codec, "Failed to set cpu-used"); + + // Note: The superblock is a sequence parameter and has to be the same for 1 + // sequence. In lightfield application, must choose the superblock size(either + // 64x64 or 128x128) before the encoding starts. Otherwise, the default is + // AOM_SUPERBLOCK_SIZE_DYNAMIC, and the superblock size will be set to 64x64 + // internally. + if (aom_codec_control(&codec, AV1E_SET_SUPERBLOCK_SIZE, + AOM_SUPERBLOCK_SIZE_64X64)) + die_codec(&codec, "Failed to set SB size"); + + u_blocks = (lf_width + lf_blocksize - 1) / lf_blocksize; + v_blocks = (lf_height + lf_blocksize - 1) / lf_blocksize; + + reference_image_num = u_blocks * v_blocks; + // Set the max gf group length so the references are guaranteed to be in + // a different gf group than any of the regular frames. This avoids using + // both vbr and constant quality mode in a single group. The number of + // references now cannot surpass 17 because of the enforced MAX_GF_INTERVAL of + // 16. If it is necessary to exceed this reference frame limit, one will have + // to do some additional handling to ensure references are in separate gf + // groups from the regular frames. + if (aom_codec_control(&codec, AV1E_SET_MAX_GF_INTERVAL, + reference_image_num - 1)) + die_codec(&codec, "Failed to set max gf interval"); + aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420; + if (FORCE_HIGHBITDEPTH_DECODING) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + // Allocate memory with the border so that it can be used as a reference. + const bool resize = + codec.config.enc->rc_resize_mode || codec.config.enc->rc_superres_mode; + const bool all_intra = reference_image_num - 1 == 0; + int border_in_pixels = + av1_get_enc_border_size(resize, all_intra, BLOCK_64X64); + + for (i = 0; i < reference_image_num; i++) { + if (!aom_img_alloc_with_border(&reference_images[i], ref_fmt, cfg->g_w, + cfg->g_h, 32, 8, border_in_pixels)) { + die("Failed to allocate image."); + } + } + + printf("\n Second pass: "); + + // Encode reference images first. + printf("Encoding Reference Images\n"); + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u_block_size, v_block_size; + int block_ref_u, block_ref_v; + + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + u_block_size = block_u_end - block_u_min; + v_block_size = block_v_end - block_v_min; + block_ref_u = block_u_min + u_block_size / 2; + block_ref_v = block_v_min + v_block_size / 2; + + printf("A%d, ", (block_ref_u + block_ref_v * lf_width)); + fseek(infile, (block_ref_u + block_ref_v * lf_width) * image_size_bytes, + SEEK_SET); + aom_img_read(raw, infile); + + get_raw_image(&frame_to_encode, raw, raw_shift); + + // Reference frames may be encoded without tiles. + ++frame_count; + printf("Encoding reference image %d of %d\n", bv * u_blocks + bu, + u_blocks * v_blocks); + encode_frame(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + writer); + + if (aom_codec_control(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[frame_count - 1])) + die_codec(&codec, "Failed to copy decoder reference frame"); + } + } + + cfg->large_scale_tile = 1; + // Fixed q encoding for camera frames. + cfg->rc_end_usage = AOM_Q; + if (aom_codec_enc_config_set(&codec, cfg)) + die_codec(&codec, "Failed to configure encoder"); + + // The fixed q value used in encoding. + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 36)) + die_codec(&codec, "Failed to set cq level"); + if (aom_codec_control(&codec, AV1E_SET_FRAME_PARALLEL_DECODING, 1)) + die_codec(&codec, "Failed to set frame parallel decoding"); + if (aom_codec_control(&codec, AV1E_SET_SINGLE_TILE_DECODING, 1)) + die_codec(&codec, "Failed to turn on single tile decoding"); + // Set tile_columns and tile_rows to MAX values, which guarantees the tile + // size of 64 x 64 pixels(i.e. 1 SB) for <= 4k resolution. + if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 6)) + die_codec(&codec, "Failed to set tile width"); + if (aom_codec_control(&codec, AV1E_SET_TILE_ROWS, 6)) + die_codec(&codec, "Failed to set tile height"); + + for (bv = 0; bv < v_blocks; ++bv) { + for (bu = 0; bu < u_blocks; ++bu) { + const int block_u_min = bu * lf_blocksize; + const int block_v_min = bv * lf_blocksize; + int block_u_end = (bu + 1) * lf_blocksize; + int block_v_end = (bv + 1) * lf_blocksize; + int u, v; + block_u_end = block_u_end < lf_width ? block_u_end : lf_width; + block_v_end = block_v_end < lf_height ? block_v_end : lf_height; + for (v = block_v_min; v < block_v_end; ++v) { + for (u = block_u_min; u < block_u_end; ++u) { + av1_ref_frame_t ref; + ref.idx = 0; + ref.use_external_ref = 1; + ref.img = reference_images[bv * u_blocks + bu]; + if (aom_codec_control(&codec, AV1_SET_REFERENCE, &ref)) + die_codec(&codec, "Failed to set reference frame"); + + printf("C%d, ", (u + v * lf_width)); + fseek(infile, (u + v * lf_width) * image_size_bytes, SEEK_SET); + aom_img_read(raw, infile); + get_raw_image(&frame_to_encode, raw, raw_shift); + + ++frame_count; + printf("Encoding image %d of %d\n", + frame_count - (u_blocks * v_blocks), lf_width * lf_height); + encode_frame(&codec, frame_to_encode, frame_count, 1, + AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | + AOM_EFLAG_NO_UPD_ARF | AOM_EFLAG_NO_UPD_ENTROPY, + writer); + } + } + } + } + + // Flush encoder. + // No ARF, this should not be needed. + while (encode_frame(&codec, NULL, -1, 1, 0, writer)) { + } + + for (i = 0; i < reference_image_num; i++) aom_img_free(&reference_images[i]); + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + // Modify large_scale_file fourcc. + if (cfg->large_scale_tile == 1) + aom_video_writer_set_fourcc(writer, LST_FOURCC); + aom_video_writer_close(writer); + + printf("\nSecond pass complete. Processed %d frames.\n", frame_count); +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + int w, h; + // The number of lightfield images in the u and v dimensions. + int lf_width, lf_height; + // Defines how many images refer to the same reference image for MCP. + // lf_blocksize X lf_blocksize images will all use the reference image + // in the middle of the block of images. + int lf_blocksize; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + aom_image_t raw; + aom_image_t raw_shift; + aom_codec_err_t res; + aom_fixed_buf_t stats; + int flags = 0; + + const int fps = 30; + const int bitrate = 200; // kbit/s + const char *const width_arg = argv[1]; + const char *const height_arg = argv[2]; + const char *const infile_arg = argv[3]; + const char *const outfile_arg = argv[4]; + const char *const lf_width_arg = argv[5]; + const char *const lf_height_arg = argv[6]; + const char *lf_blocksize_arg = argv[7]; + exec_name = argv[0]; + + if (argc < 8) die("Invalid number of arguments"); + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name("av1"); + if (!encoder) die("Unsupported codec."); + + w = (int)strtol(width_arg, NULL, 0); + h = (int)strtol(height_arg, NULL, 0); + lf_width = (int)strtol(lf_width_arg, NULL, 0); + lf_height = (int)strtol(lf_height_arg, NULL, 0); + lf_blocksize = (int)strtol(lf_blocksize_arg, NULL, 0); + lf_blocksize = lf_blocksize < lf_width ? lf_blocksize : lf_width; + lf_blocksize = lf_blocksize < lf_height ? lf_blocksize : lf_height; + + if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) + die("Invalid frame size: %dx%d", w, h); + if (lf_width <= 0 || lf_height <= 0) + die("Invalid lf_width and/or lf_height: %dx%d", lf_width, lf_height); + if (lf_blocksize <= 0) die("Invalid lf_blocksize: %d", lf_blocksize); + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 32)) { + die("Failed to allocate image."); + } + if (FORCE_HIGHBITDEPTH_DECODING) { + // Need to allocate larger buffer to use hbd internal. + aom_img_alloc(&raw_shift, AOM_IMG_FMT_I420 | AOM_IMG_FMT_HIGHBITDEPTH, w, h, + 32); + } + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + // Configuration + res = aom_codec_enc_config_default(encoder, &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = w; + cfg.g_h = h; + cfg.g_timebase.num = 1; + cfg.g_timebase.den = fps; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = 0; // This is required. + cfg.g_lag_in_frames = 0; // need to set this since default is 19. + cfg.kf_mode = AOM_KF_DISABLED; + cfg.large_scale_tile = 0; // Only set it to 1 for camera frame encoding. + cfg.g_bit_depth = AOM_BITS_8; + flags |= (cfg.g_bit_depth > AOM_BITS_8 || FORCE_HIGHBITDEPTH_DECODING) + ? AOM_CODEC_USE_HIGHBITDEPTH + : 0; + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading", infile_arg); + + // Pass 0 + cfg.g_pass = AOM_RC_FIRST_PASS; + stats = pass0(&raw, infile, encoder, &cfg, lf_width, lf_height, lf_blocksize, + flags, &raw_shift); + + // Pass 1 + rewind(infile); + cfg.g_pass = AOM_RC_LAST_PASS; + cfg.rc_twopass_stats_in = stats; + pass1(&raw, infile, outfile_arg, encoder, &cfg, lf_width, lf_height, + lf_blocksize, flags, &raw_shift); + free(stats.buf); + + if (FORCE_HIGHBITDEPTH_DECODING) aom_img_free(&raw_shift); + aom_img_free(&raw); + fclose(infile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/lightfield_tile_list_decoder.c b/third_party/aom/examples/lightfield_tile_list_decoder.c new file mode 100644 index 0000000000..d71ff5b387 --- /dev/null +++ b/third_party/aom/examples/lightfield_tile_list_decoder.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Lightfield Tile List Decoder +// ============================ +// +// This is a lightfield tile list decoder example. It takes an input file that +// contains the anchor frames that are references of the coded tiles, the camera +// frame header, and tile list OBUs that include the tile information and the +// compressed tile data. This input file is reconstructed from the encoded +// lightfield ivf file, and is decodable by AV1 decoder. num_references is +// the number of anchor frames coded at the beginning of the light field file. +// num_tile_lists is the number of tile lists need to be decoded. There is an +// optional parameter allowing to choose the output format, and the supported +// formats are YUV1D(default), YUV, and NV12. +// Run lightfield tile list decoder to decode an AV1 tile list file: +// examples/lightfield_tile_list_decoder vase_tile_list.ivf vase_tile_list.yuv +// 4 2 0(optional) + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "aom_scale/yv12config.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +enum { + YUV1D, // 1D tile output for conformance test. + YUV, // Tile output in YUV format. + NV12, // Tile output in NV12 format. +} UENUM1BYTE(OUTPUT_FORMAT); + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <infile> <outfile> <num_references> <num_tile_lists> " + "<output format(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void write_tile_yuv1d(aom_codec_ctx_t *codec, const aom_image_t *img, + FILE *file) { + // read out the tile size. + unsigned int tile_size = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_SIZE, &tile_size)) + die_codec(codec, "Failed to get the tile size"); + const unsigned int tile_width = tile_size >> 16; + const unsigned int tile_height = tile_size & 65535; + const uint32_t output_frame_width_in_tiles = img->d_w / tile_width; + + unsigned int tile_count = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(codec, AV1D_GET_TILE_COUNT, &tile_count)) + die_codec(codec, "Failed to get the tile size"); + + // Write tile to file. + const int shift = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0; + unsigned int tile_idx; + + for (tile_idx = 0; tile_idx < tile_count; ++tile_idx) { + const int row_offset = + (tile_idx / output_frame_width_in_tiles) * tile_height; + const int col_offset = + (tile_idx % output_frame_width_in_tiles) * tile_width; + int plane; + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int roffset = + (plane > 0) ? row_offset >> img->y_chroma_shift : row_offset; + const int coffset = + (plane > 0) ? col_offset >> img->x_chroma_shift : col_offset; + const int w = (plane > 0) ? ((tile_width >> img->x_chroma_shift) << shift) + : (tile_width << shift); + const int h = + (plane > 0) ? (tile_height >> img->y_chroma_shift) : tile_height; + int y; + + // col offset needs to be adjusted for HBD. + buf += roffset * stride + (coffset << shift); + + for (y = 0; y < h; ++y) { + fwrite(buf, 1, w, file); + buf += stride; + } + } + } +} + +int main(int argc, char **argv) { + FILE *outfile = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + int num_references; + int num_tile_lists; + aom_image_t reference_images[MAX_EXTERNAL_REFERENCES]; + size_t frame_size = 0; + const unsigned char *frame = NULL; + int output_format = YUV1D; + int i, j, n; + + exec_name = argv[0]; + + if (argc < 5) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + num_references = (int)strtol(argv[3], NULL, 0); + num_tile_lists = (int)strtol(argv[4], NULL, 0); + + if (argc > 5) output_format = (int)strtol(argv[5], NULL, 0); + if (output_format < YUV1D || output_format > NV12) + die("Output format out of range [0, 2]"); + + info = aom_video_reader_get_info(reader); + + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_IS_ANNEXB, + info->is_annexb)) { + die_codec(&codec, "Failed to set annex b status"); + } + + // Decode anchor frames. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 0); + for (i = 0; i < num_references; ++i) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + if (i == 0) { + aom_img_fmt_t ref_fmt = 0; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_IMG_FORMAT, &ref_fmt)) + die_codec(&codec, "Failed to get the image format"); + + int frame_res[2]; + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_GET_FRAME_SIZE, frame_res)) + die_codec(&codec, "Failed to get the image frame size"); + + // Allocate memory to store decoded references. Allocate memory with the + // border so that it can be used as a reference. + for (j = 0; j < num_references; j++) { + unsigned int border = AOM_DEC_BORDER_IN_PIXELS; + if (!aom_img_alloc_with_border(&reference_images[j], ref_fmt, + frame_res[0], frame_res[1], 32, 8, + border)) { + fatal("Failed to allocate references."); + } + } + } + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_COPY_NEW_FRAME_IMAGE, + &reference_images[i])) + die_codec(&codec, "Failed to copy decoded reference frame"); + + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + char name[1024]; + snprintf(name, sizeof(name), "ref_%d.yuv", i); + printf("writing ref image to %s, %u, %u\n", name, img->d_w, img->d_h); + FILE *ref_file = fopen(name, "wb"); + aom_img_write(img, ref_file); + fclose(ref_file); + } + } + + // Decode the lightfield. + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1_SET_TILE_MODE, 1); + + // Set external references. + av1_ext_ref_frame_t set_ext_ref = { &reference_images[0], num_references }; + AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1D_SET_EXT_REF_PTR, &set_ext_ref); + // Must decode the camera frame header first. + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode the frame."); + // Decode tile lists one by one. + for (n = 0; n < num_tile_lists; n++) { + aom_video_reader_read_frame(reader); + frame = aom_video_reader_get_frame(reader, &frame_size); + + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode the tile list."); + aom_codec_iter_t iter = NULL; + aom_image_t *img = aom_codec_get_frame(&codec, &iter); + if (!img) die_codec(&codec, "Failed to get frame."); + + if (output_format == YUV1D) + // write the tile to the output file in 1D format. + write_tile_yuv1d(&codec, img, outfile); + else if (output_format == YUV) + aom_img_write(img, outfile); + else + // NV12 output format + aom_img_write_nv12(img, outfile); + } + + for (i = 0; i < num_references; i++) aom_img_free(&reference_images[i]); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + aom_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/lossless_encoder.c b/third_party/aom/examples/lossless_encoder.c new file mode 100644 index 0000000000..1971b9c9df --- /dev/null +++ b/third_party/aom/examples/lossless_encoder.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "lossless_encoder: Example demonstrating lossless " + "encoding feature. Supports raw input only.\n"); + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const int fps = 30; + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc < 5) die("Invalid number of arguments"); + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name("av1"); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = (int)strtol(argv[1], NULL, 0); + info.frame_height = (int)strtol(argv[2], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + aom_codec_ctx_t codec; + res = aom_codec_enc_config_default(encoder, &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + + writer = aom_video_writer_open(argv[4], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[4]); + + if (!(infile = fopen(argv[3], "rb"))) + die("Failed to open %s for reading.", argv[3]); + + if (aom_codec_enc_init(&codec, encoder, &cfg, 0)) + die("Failed to initialize encoder"); + + if (AOM_CODEC_CONTROL_TYPECHECKED(&codec, AV1E_SET_LOSSLESS, 1)) + die_codec(&codec, "Failed to use lossless mode"); + + // Encode frames. + while (aom_img_read(&raw, infile)) { + encode_frame(&codec, &raw, frame_count++, 0, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/noise_model.c b/third_party/aom/examples/noise_model.c new file mode 100644 index 0000000000..1de13267fc --- /dev/null +++ b/third_party/aom/examples/noise_model.c @@ -0,0 +1,434 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/*!\file + * \brief This is an sample binary to create noise params from input video. + * + * To allow for external denoising applications, this sample binary illustrates + * how to create a film grain table (film grain params as a function of time) + * from an input video and its corresponding denoised source. + * + * The --output-grain-table file can be passed as input to the encoder (in + * aomenc this is done through the "--film-grain-table" parameter). + * + * As an example, where the input source is an 854x480 yuv420p 8-bit video + * named "input.854_480.yuv" you would use steps similar to the following: + * + * # Run your denoiser (e.g, using hqdn3d filter): + * ffmpeg -vcodec rawvideo -video_size 854x480 -i input.854_480.yuv \ + * -vf hqdn3d=5:5:5:5 -vcodec rawvideo -an -f rawvideo \ + * denoised.854_480.yuv + * + * # Model the noise between the denoised version and original source: + * ./examples/noise_model --fps=25/1 --width=854 --height=480 --i420 \ + * --input-denoised=denoised.854_480.yuv --input=original.854_480.yuv \ + * --output-grain-table=film_grain.tbl + * + * # Encode with your favorite settings (including the grain table): + * aomenc --limit=100 --cpu-used=4 --input-bit-depth=8 \ + * --i420 -w 854 -h 480 --end-usage=q --cq-level=25 --lag-in-frames=25 \ + * --auto-alt-ref=2 --bit-depth=8 --film-grain-table=film_grain.tbl \ + * -o denoised_with_grain_params.ivf denoised.854_480.yuv + */ +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom_dsp/aom_dsp_common.h" + +#if CONFIG_AV1_DECODER +#include "av1/decoder/grain_synthesis.h" +#endif + +#include "aom_dsp/grain_table.h" +#include "aom_dsp/noise_model.h" +#include "aom_dsp/noise_util.h" +#include "aom_mem/aom_mem.h" +#include "common/args.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s --input=<input> --input-denoised=<denoised> " + "--output-grain-table=<outfile> " + "See comments in noise_model.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static const arg_def_t help = + ARG_DEF(NULL, "help", 0, "Show usage options and exit"); +static const arg_def_t width_arg = + ARG_DEF("w", "width", 1, "Input width (if rawvideo)"); +static const arg_def_t height_arg = + ARG_DEF("h", "height", 1, "Input height (if rawvideo)"); +static const arg_def_t skip_frames_arg = + ARG_DEF("s", "skip-frames", 1, "Number of frames to skip (default = 1)"); +static const arg_def_t fps_arg = ARG_DEF(NULL, "fps", 1, "Frame rate"); +static const arg_def_t input_arg = ARG_DEF("-i", "input", 1, "Input filename"); +static const arg_def_t output_grain_table_arg = + ARG_DEF("n", "output-grain-table", 1, "Output noise file"); +static const arg_def_t input_denoised_arg = + ARG_DEF("d", "input-denoised", 1, "Input denoised filename (YUV) only"); +static const arg_def_t flat_block_finder_arg = + ARG_DEF("b", "flat-block-finder", 1, "Run the flat block finder"); +static const arg_def_t block_size_arg = + ARG_DEF("b", "block-size", 1, "Block size"); +static const arg_def_t bit_depth_arg = + ARG_DEF(NULL, "bit-depth", 1, "Bit depth of input"); +static const arg_def_t use_i420 = + ARG_DEF(NULL, "i420", 0, "Input file (and denoised) is I420 (default)"); +static const arg_def_t use_i422 = + ARG_DEF(NULL, "i422", 0, "Input file (and denoised) is I422"); +static const arg_def_t use_i444 = + ARG_DEF(NULL, "i444", 0, "Input file (and denoised) is I444"); +static const arg_def_t debug_file_arg = + ARG_DEF(NULL, "debug-file", 1, "File to output debug info"); + +typedef struct { + int width; + int height; + struct aom_rational fps; + const char *input; + const char *input_denoised; + const char *output_grain_table; + int img_fmt; + int block_size; + int bit_depth; + int run_flat_block_finder; + int force_flat_psd; + int skip_frames; + const char *debug_file; +} noise_model_args_t; + +static void parse_args(noise_model_args_t *noise_args, char **argv) { + struct arg arg; + static const arg_def_t *main_args[] = { &help, + &input_arg, + &fps_arg, + &width_arg, + &height_arg, + &block_size_arg, + &output_grain_table_arg, + &input_denoised_arg, + &use_i420, + &use_i422, + &use_i444, + &debug_file_arg, + NULL }; + for (; *argv; argv++) { + if (arg_match(&arg, &help, argv)) { + fprintf(stdout, "\nOptions:\n"); + arg_show_usage(stdout, main_args); + exit(0); + } else if (arg_match(&arg, &width_arg, argv)) { + noise_args->width = atoi(arg.val); + } else if (arg_match(&arg, &height_arg, argv)) { + noise_args->height = atoi(arg.val); + } else if (arg_match(&arg, &input_arg, argv)) { + noise_args->input = arg.val; + } else if (arg_match(&arg, &input_denoised_arg, argv)) { + noise_args->input_denoised = arg.val; + } else if (arg_match(&arg, &output_grain_table_arg, argv)) { + noise_args->output_grain_table = arg.val; + } else if (arg_match(&arg, &block_size_arg, argv)) { + noise_args->block_size = atoi(arg.val); + } else if (arg_match(&arg, &bit_depth_arg, argv)) { + noise_args->bit_depth = atoi(arg.val); + } else if (arg_match(&arg, &flat_block_finder_arg, argv)) { + noise_args->run_flat_block_finder = atoi(arg.val); + } else if (arg_match(&arg, &fps_arg, argv)) { + noise_args->fps = arg_parse_rational(&arg); + } else if (arg_match(&arg, &use_i420, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I420; + } else if (arg_match(&arg, &use_i422, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I422; + } else if (arg_match(&arg, &use_i444, argv)) { + noise_args->img_fmt = AOM_IMG_FMT_I444; + } else if (arg_match(&arg, &skip_frames_arg, argv)) { + noise_args->skip_frames = atoi(arg.val); + } else if (arg_match(&arg, &debug_file_arg, argv)) { + noise_args->debug_file = arg.val; + } else { + fprintf(stdout, "Unknown arg: %s\n\nUsage:\n", *argv); + arg_show_usage(stdout, main_args); + exit(0); + } + } + if (noise_args->bit_depth > 8) { + noise_args->img_fmt |= AOM_IMG_FMT_HIGHBITDEPTH; + } +} + +#if CONFIG_AV1_DECODER +static void print_variance_y(FILE *debug_file, aom_image_t *raw, + aom_image_t *denoised, const uint8_t *flat_blocks, + int block_size, aom_film_grain_t *grain) { + aom_image_t renoised; + grain->apply_grain = 1; + grain->random_seed = 7391; + grain->bit_depth = raw->bit_depth; + aom_img_alloc(&renoised, raw->fmt, raw->w, raw->h, 1); + + if (av1_add_film_grain(grain, denoised, &renoised)) { + fprintf(stderr, "Internal failure in av1_add_film_grain().\n"); + aom_img_free(&renoised); + return; + } + + const int num_blocks_w = (raw->w + block_size - 1) / block_size; + const int num_blocks_h = (raw->h + block_size - 1) / block_size; + fprintf(debug_file, "x = ["); + for (int by = 0; by < num_blocks_h; by++) { + for (int bx = 0; bx < num_blocks_w; bx++) { + double block_mean = 0; + double noise_std = 0, noise_mean = 0; + double renoise_std = 0, renoise_mean = 0; + for (int yi = 0; yi < block_size; ++yi) { + const int y = by * block_size + yi; + for (int xi = 0; xi < block_size; ++xi) { + const int x = bx * block_size + xi; + const double noise_v = (raw->planes[0][y * raw->stride[0] + x] - + denoised->planes[0][y * raw->stride[0] + x]); + noise_mean += noise_v; + noise_std += noise_v * noise_v; + + block_mean += raw->planes[0][y * raw->stride[0] + x]; + + const double renoise_v = + (renoised.planes[0][y * raw->stride[0] + x] - + denoised->planes[0][y * raw->stride[0] + x]); + renoise_mean += renoise_v; + renoise_std += renoise_v * renoise_v; + } + } + int n = (block_size * block_size); + block_mean /= n; + noise_mean /= n; + renoise_mean /= n; + noise_std = sqrt(noise_std / n - noise_mean * noise_mean); + renoise_std = sqrt(renoise_std / n - renoise_mean * renoise_mean); + fprintf(debug_file, "%d %3.2lf %3.2lf %3.2lf ", + flat_blocks[by * num_blocks_w + bx], block_mean, noise_std, + renoise_std); + } + fprintf(debug_file, "\n"); + } + fprintf(debug_file, "];\n"); + + if (raw->fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + fprintf(stderr, + "Detailed debug info not supported for high bit" + "depth formats\n"); + } else { + fprintf(debug_file, "figure(2); clf;\n"); + fprintf(debug_file, + "scatter(x(:, 2:4:end), x(:, 3:4:end), 'r'); hold on;\n"); + fprintf(debug_file, "scatter(x(:, 2:4:end), x(:, 4:4:end), 'b');\n"); + fprintf(debug_file, + "plot(linspace(0, 255, length(noise_strength_0)), " + "noise_strength_0, 'b');\n"); + fprintf(debug_file, + "title('Scatter plot of intensity vs noise strength');\n"); + fprintf(debug_file, + "legend('Actual', 'Estimated', 'Estimated strength');\n"); + fprintf(debug_file, "figure(3); clf;\n"); + fprintf(debug_file, "scatter(x(:, 3:4:end), x(:, 4:4:end), 'k');\n"); + fprintf(debug_file, "title('Actual vs Estimated');\n"); + fprintf(debug_file, "pause(3);\n"); + } + aom_img_free(&renoised); +} +#endif + +static void print_debug_info(FILE *debug_file, aom_image_t *raw, + aom_image_t *denoised, uint8_t *flat_blocks, + int block_size, aom_noise_model_t *noise_model) { + (void)raw; + (void)denoised; + (void)flat_blocks; + (void)block_size; + fprintf(debug_file, "figure(3); clf;\n"); + fprintf(debug_file, "figure(2); clf;\n"); + fprintf(debug_file, "figure(1); clf;\n"); + for (int c = 0; c < 3; ++c) { + fprintf(debug_file, "noise_strength_%d = [\n", c); + const aom_equation_system_t *eqns = + &noise_model->combined_state[c].strength_solver.eqns; + for (int k = 0; k < eqns->n; ++k) { + fprintf(debug_file, "%lf ", eqns->x[k]); + } + fprintf(debug_file, "];\n"); + fprintf(debug_file, "plot(noise_strength_%d); hold on;\n", c); + } + fprintf(debug_file, "legend('Y', 'cb', 'cr');\n"); + fprintf(debug_file, "title('Noise strength function');\n"); + +#if CONFIG_AV1_DECODER + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(noise_model, &grain); + print_variance_y(debug_file, raw, denoised, flat_blocks, block_size, &grain); +#endif + fflush(debug_file); +} + +int main(int argc, char *argv[]) { + noise_model_args_t args = { 0, 0, { 25, 1 }, 0, 0, 0, AOM_IMG_FMT_I420, + 32, 8, 1, 0, 1, NULL }; + aom_image_t raw, denoised; + FILE *infile = NULL; + AvxVideoInfo info; + + memset(&info, 0, sizeof(info)); + + (void)argc; + exec_name = argv[0]; + parse_args(&args, argv + 1); + + info.frame_width = args.width; + info.frame_height = args.height; + info.time_base.numerator = args.fps.den; + info.time_base.denominator = args.fps.num; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + if (!aom_img_alloc(&raw, args.img_fmt, info.frame_width, info.frame_height, + 1)) { + die("Failed to allocate image."); + } + if (!aom_img_alloc(&denoised, args.img_fmt, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + infile = fopen(args.input, "rb"); + if (!infile) { + die("Failed to open input file: %s", args.input); + } + fprintf(stderr, "Bit depth: %d stride:%d\n", args.bit_depth, raw.stride[0]); + + const int high_bd = args.bit_depth > 8; + const int block_size = args.block_size; + aom_flat_block_finder_t block_finder; + aom_flat_block_finder_init(&block_finder, block_size, args.bit_depth, + high_bd); + + const int num_blocks_w = (info.frame_width + block_size - 1) / block_size; + const int num_blocks_h = (info.frame_height + block_size - 1) / block_size; + uint8_t *flat_blocks = (uint8_t *)aom_malloc(num_blocks_w * num_blocks_h); + if (!flat_blocks) die("Failed to allocate block data."); + // Sets the random seed on the first entry in the output table + int16_t random_seed = 7391; + aom_noise_model_t noise_model; + aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3, args.bit_depth, + high_bd }; + aom_noise_model_init(&noise_model, params); + + FILE *denoised_file = 0; + if (args.input_denoised) { + denoised_file = fopen(args.input_denoised, "rb"); + if (!denoised_file) + die("Unable to open input_denoised: %s", args.input_denoised); + } else { + die("--input-denoised file must be specified"); + } + FILE *debug_file = 0; + if (args.debug_file) { + debug_file = fopen(args.debug_file, "w"); + } + aom_film_grain_table_t grain_table = { 0, 0 }; + + int64_t prev_timestamp = 0; + int frame_count = 0; + while (aom_img_read(&raw, infile)) { + if (args.input_denoised) { + if (!aom_img_read(&denoised, denoised_file)) { + die("Unable to read input denoised file"); + } + } + if (frame_count % args.skip_frames == 0) { + int num_flat_blocks = num_blocks_w * num_blocks_h; + memset(flat_blocks, 1, num_flat_blocks); + if (args.run_flat_block_finder) { + memset(flat_blocks, 0, num_flat_blocks); + num_flat_blocks = aom_flat_block_finder_run( + &block_finder, raw.planes[0], info.frame_width, info.frame_height, + info.frame_width, flat_blocks); + fprintf(stdout, "Num flat blocks %d\n", num_flat_blocks); + } + + const uint8_t *planes[3] = { raw.planes[0], raw.planes[1], + raw.planes[2] }; + uint8_t *denoised_planes[3] = { denoised.planes[0], denoised.planes[1], + denoised.planes[2] }; + int strides[3] = { raw.stride[0] >> high_bd, raw.stride[1] >> high_bd, + raw.stride[2] >> high_bd }; + int chroma_sub[3] = { raw.x_chroma_shift, raw.y_chroma_shift, 0 }; + + fprintf(stdout, "Updating noise model...\n"); + aom_noise_status_t status = aom_noise_model_update( + &noise_model, (const uint8_t *const *)planes, + (const uint8_t *const *)denoised_planes, info.frame_width, + info.frame_height, strides, chroma_sub, flat_blocks, block_size); + + int64_t cur_timestamp = + frame_count * 10000000ULL * args.fps.den / args.fps.num; + if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) { + fprintf(stdout, + "Noise type is different, updating parameters for time " + "[ %" PRId64 ", %" PRId64 ")\n", + prev_timestamp, cur_timestamp); + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(&noise_model, &grain); + grain.random_seed = random_seed; + random_seed = 0; + aom_film_grain_table_append(&grain_table, prev_timestamp, cur_timestamp, + &grain); + aom_noise_model_save_latest(&noise_model); + prev_timestamp = cur_timestamp; + } + if (debug_file) { + print_debug_info(debug_file, &raw, &denoised, flat_blocks, block_size, + &noise_model); + } + fprintf(stdout, "Done noise model update, status = %d\n", status); + } + frame_count++; + } + + aom_film_grain_t grain; + aom_noise_model_get_grain_parameters(&noise_model, &grain); + grain.random_seed = random_seed; + aom_film_grain_table_append(&grain_table, prev_timestamp, INT64_MAX, &grain); + if (args.output_grain_table) { + struct aom_internal_error_info error_info; + if (AOM_CODEC_OK != aom_film_grain_table_write(&grain_table, + args.output_grain_table, + &error_info)) { + die("Unable to write output film grain table"); + } + } + aom_film_grain_table_free(&grain_table); + + if (infile) fclose(infile); + if (denoised_file) fclose(denoised_file); + if (debug_file) fclose(debug_file); + aom_img_free(&raw); + aom_img_free(&denoised); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/photon_noise_table.c b/third_party/aom/examples/photon_noise_table.c new file mode 100644 index 0000000000..d3a21a48ee --- /dev/null +++ b/third_party/aom/examples/photon_noise_table.c @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2021, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// This tool creates a film grain table, for use in stills and videos, +// representing the noise that one would get by shooting with a digital camera +// at a given light level. Much of the noise in digital images is photon shot +// noise, which is due to the characteristics of photon arrival and grows in +// standard deviation as the square root of the expected number of photons +// captured. +// https://www.photonstophotos.net/Emil%20Martinec/noise.html#shotnoise +// +// The proxy used by this tool for the amount of light captured is the ISO value +// such that the focal plane exposure at the time of capture would have been +// mapped by a 35mm camera to the output lightness observed in the image. That +// is, if one were to shoot on a 35mm camera (36×24mm sensor) at the nominal +// exposure for that ISO setting, the resulting image should contain noise of +// the same order of magnitude as generated by this tool. +// +// Example usage: +// +// ./photon_noise_table --width=3840 --height=2160 --iso=25600 -o noise.tbl +// # Then, for example: +// aomenc --film-grain-table=noise.tbl ... +// # Or: +// avifenc -c aom -a film-grain-table=noise.tbl ... +// +// The (mostly) square-root relationship between light intensity and noise +// amplitude holds in linear light, but AV1 streams are most often encoded +// non-linearly, and the film grain is applied to those non-linear values. +// Therefore, this tool must account for the non-linearity, and this is +// controlled by the optional `--transfer-function` (or `-t`) parameter, which +// specifies the tone response curve that will be used when encoding the actual +// image. The default for this tool is sRGB, which is approximately similar to +// an encoding gamma of 1/2.2 (i.e. a decoding gamma of 2.2) though not quite +// identical. +// +// As alluded to above, the tool assumes that the image is taken from the +// entirety of a 36×24mm (“35mm format”) sensor. If that assumption does not +// hold, then a “35mm-equivalent ISO value” that can be passed to the tool can +// be obtained by multiplying the true ISO value by the ratio of 36×24mm to the +// area that was actually used. For formats that approximately share the same +// aspect ratio, this is often expressed as the square of the “equivalence +// ratio” which is the ratio of their diagonals. For example, APS-C (often +// ~24×16mm) is said to have an equivalence ratio of 1.5 relative to the 35mm +// format, and therefore ISO 1000 on APS-C and ISO 1000×1.5² = 2250 on 35mm +// produce an image of the same lightness from the same amount of light spread +// onto their respective surface areas (resulting in different focal plane +// exposures), and those images will thus have similar amounts of noise if the +// cameras are of similar technology. https://doi.org/10.1117/1.OE.57.11.110801 +// +// The tool needs to know the resolution of the images to which its grain tables +// will be applied so that it can know how the light on the sensor was shared +// between its pixels. As a general rule, while a higher pixel count will lead +// to more noise per pixel, when the final image is viewed at the same physical +// size, that noise will tend to “average out” to the same amount over a given +// area, since there will be more pixels in it which, in aggregate, will have +// received essentially as much light. Put differently, the amount of noise +// depends on the scale at which it is measured, and the decision for this tool +// was to make that scale relative to the image instead of its constituent +// samples. For more on this, see: +// +// https://www.photonstophotos.net/Emil%20Martinec/noise-p3.html#pixelsize +// https://www.dpreview.com/articles/5365920428/the-effect-of-pixel-and-sensor-sizes-on-noise/2 +// https://www.dpreview.com/videos/7940373140/dpreview-tv-why-lower-resolution-sensors-are-not-better-in-low-light + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom_dsp/grain_table.h" +#include "common/args.h" +#include "common/tools_common.h" + +static const char *exec_name; + +static const struct arg_enum_list transfer_functions[] = { + { "bt470m", AOM_CICP_TC_BT_470_M }, { "bt470bg", AOM_CICP_TC_BT_470_B_G }, + { "srgb", AOM_CICP_TC_SRGB }, { "smpte2084", AOM_CICP_TC_SMPTE_2084 }, + { "hlg", AOM_CICP_TC_HLG }, ARG_ENUM_LIST_END +}; + +static arg_def_t help_arg = + ARG_DEF("h", "help", 0, "Show the available options"); +static arg_def_t width_arg = + ARG_DEF("w", "width", 1, "Width of the image in pixels (required)"); +static arg_def_t height_arg = + ARG_DEF("l", "height", 1, "Height of the image in pixels (required)"); +static arg_def_t iso_arg = ARG_DEF( + "i", "iso", 1, "ISO setting indicative of the light level (required)"); +static arg_def_t output_arg = + ARG_DEF("o", "output", 1, + "Output file to which to write the film grain table (required)"); +static arg_def_t transfer_function_arg = + ARG_DEF_ENUM("t", "transfer-function", 1, + "Transfer function used by the encoded image (default = sRGB)", + transfer_functions); + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s [--transfer-function=<tf>] --width=<width> " + "--height=<height> --iso=<iso> --output=<output.tbl>\n", + exec_name); + exit(EXIT_FAILURE); +} + +typedef struct { + float (*to_linear)(float); + float (*from_linear)(float); + // In linear output light. This would typically be 0.18 for SDR (this matches + // the definition of Standard Output Sensitivity from ISO 12232:2019), but in + // HDR, we certainly do not want to consider 18% of the maximum output a + // “mid-tone”, as it would be e.g. 1800 cd/m² for SMPTE ST 2084 (PQ). + float mid_tone; +} transfer_function_t; + +static const transfer_function_t *find_transfer_function( + aom_transfer_characteristics_t tc); + +typedef struct { + int width; + int height; + int iso_setting; + + const transfer_function_t *transfer_function; + + const char *output_filename; +} photon_noise_args_t; + +static void parse_args(int argc, char **argv, + photon_noise_args_t *photon_noise_args) { + static const arg_def_t *args[] = { &help_arg, &width_arg, + &height_arg, &iso_arg, + &output_arg, &transfer_function_arg, + NULL }; + struct arg arg; + int width_set = 0, height_set = 0, iso_set = 0, output_set = 0, i; + + photon_noise_args->transfer_function = + find_transfer_function(AOM_CICP_TC_SRGB); + + for (i = 1; i < argc; i += arg.argv_step) { + arg.argv_step = 1; + if (arg_match(&arg, &help_arg, argv + i)) { + arg_show_usage(stdout, args); + exit(EXIT_SUCCESS); + } else if (arg_match(&arg, &width_arg, argv + i)) { + photon_noise_args->width = arg_parse_int(&arg); + width_set = 1; + } else if (arg_match(&arg, &height_arg, argv + i)) { + photon_noise_args->height = arg_parse_int(&arg); + height_set = 1; + } else if (arg_match(&arg, &iso_arg, argv + i)) { + photon_noise_args->iso_setting = arg_parse_int(&arg); + iso_set = 1; + } else if (arg_match(&arg, &output_arg, argv + i)) { + photon_noise_args->output_filename = arg.val; + output_set = 1; + } else if (arg_match(&arg, &transfer_function_arg, argv + i)) { + const aom_transfer_characteristics_t tc = arg_parse_enum(&arg); + photon_noise_args->transfer_function = find_transfer_function(tc); + } else { + fatal("unrecognized argument \"%s\", see --help for available options", + argv[i]); + } + } + + if (!width_set) { + fprintf(stderr, "Missing required parameter --width\n"); + exit(EXIT_FAILURE); + } + + if (!height_set) { + fprintf(stderr, "Missing required parameter --height\n"); + exit(EXIT_FAILURE); + } + + if (!iso_set) { + fprintf(stderr, "Missing required parameter --iso\n"); + exit(EXIT_FAILURE); + } + + if (!output_set) { + fprintf(stderr, "Missing required parameter --output\n"); + exit(EXIT_FAILURE); + } +} + +static float maxf(float a, float b) { return a > b ? a : b; } +static float minf(float a, float b) { return a < b ? a : b; } + +static float gamma22_to_linear(float g) { return powf(g, 2.2f); } +static float gamma22_from_linear(float l) { return powf(l, 1 / 2.2f); } +static float gamma28_to_linear(float g) { return powf(g, 2.8f); } +static float gamma28_from_linear(float l) { return powf(l, 1 / 2.8f); } + +static float srgb_to_linear(float srgb) { + return srgb <= 0.04045f ? srgb / 12.92f + : powf((srgb + 0.055f) / 1.055f, 2.4f); +} +static float srgb_from_linear(float linear) { + return linear <= 0.0031308f ? 12.92f * linear + : 1.055f * powf(linear, 1 / 2.4f) - 0.055f; +} + +static const float kPqM1 = 2610.f / 16384; +static const float kPqM2 = 128 * 2523.f / 4096; +static const float kPqC1 = 3424.f / 4096; +static const float kPqC2 = 32 * 2413.f / 4096; +static const float kPqC3 = 32 * 2392.f / 4096; +static float pq_to_linear(float pq) { + const float pq_pow_inv_m2 = powf(pq, 1.f / kPqM2); + return powf(maxf(0, pq_pow_inv_m2 - kPqC1) / (kPqC2 - kPqC3 * pq_pow_inv_m2), + 1.f / kPqM1); +} +static float pq_from_linear(float linear) { + const float linear_pow_m1 = powf(linear, kPqM1); + return powf((kPqC1 + kPqC2 * linear_pow_m1) / (1 + kPqC3 * linear_pow_m1), + kPqM2); +} + +// Note: it is perhaps debatable whether “linear” for HLG should be scene light +// or display light. Here, it is implemented in terms of display light assuming +// a nominal peak display luminance of 1000 cd/m², hence the system γ of 1.2. To +// make it scene light instead, the OOTF (powf(x, 1.2f)) and its inverse should +// be removed from the functions below, and the .mid_tone should be replaced +// with powf(26.f / 1000, 1 / 1.2f). +static const float kHlgA = 0.17883277f; +static const float kHlgB = 0.28466892f; +static const float kHlgC = 0.55991073f; +static float hlg_to_linear(float hlg) { + // EOTF = OOTF ∘ OETF⁻¹ + const float linear = + hlg <= 0.5f ? hlg * hlg / 3 : (expf((hlg - kHlgC) / kHlgA) + kHlgB) / 12; + return powf(linear, 1.2f); +} +static float hlg_from_linear(float linear) { + // EOTF⁻¹ = OETF ∘ OOTF⁻¹ + linear = powf(linear, 1.f / 1.2f); + return linear <= 1.f / 12 ? sqrtf(3 * linear) + : kHlgA * logf(12 * linear - kHlgB) + kHlgC; +} + +static const transfer_function_t *find_transfer_function( + aom_transfer_characteristics_t tc) { + static const transfer_function_t + kGamma22TransferFunction = { .to_linear = &gamma22_to_linear, + .from_linear = &gamma22_from_linear, + .mid_tone = 0.18f }, + kGamma28TransferFunction = { .to_linear = &gamma28_to_linear, + .from_linear = &gamma28_from_linear, + .mid_tone = 0.18f }, + kSRgbTransferFunction = { .to_linear = &srgb_to_linear, + .from_linear = &srgb_from_linear, + .mid_tone = 0.18f }, + kPqTransferFunction = { .to_linear = &pq_to_linear, + .from_linear = &pq_from_linear, + // https://www.itu.int/pub/R-REP-BT.2408-4-2021 + // page 6 (PDF page 8) + .mid_tone = 26.f / 10000 }, + kHlgTransferFunction = { .to_linear = &hlg_to_linear, + .from_linear = &hlg_from_linear, + .mid_tone = 26.f / 1000 }; + + switch (tc) { + case AOM_CICP_TC_BT_470_M: return &kGamma22TransferFunction; + case AOM_CICP_TC_BT_470_B_G: return &kGamma28TransferFunction; + case AOM_CICP_TC_SRGB: return &kSRgbTransferFunction; + case AOM_CICP_TC_SMPTE_2084: return &kPqTransferFunction; + case AOM_CICP_TC_HLG: return &kHlgTransferFunction; + + default: fatal("unimplemented transfer function %d", tc); + } +} + +static void generate_photon_noise(const photon_noise_args_t *photon_noise_args, + aom_film_grain_t *film_grain) { + // Assumes a daylight-like spectrum. + // https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s + static const float kPhotonsPerLxSPerUm2 = 11260; + + // Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into + // account. + static const float kEffectiveQuantumEfficiency = 0.20f; + + // Also reasonable values for current cameras. The read noise is typically + // higher than this at low ISO settings but it matters less there. + static const float kPhotoResponseNonUniformity = 0.005f; + static const float kInputReferredReadNoise = 1.5f; + + // Focal plane exposure for a mid-tone (typically a 18% reflectance card), in + // lx·s. + const float mid_tone_exposure = 10.f / photon_noise_args->iso_setting; + + // In microns. Assumes a 35mm sensor (36mm × 24mm). + const float pixel_area_um2 = (36000 * 24000.f) / (photon_noise_args->width * + photon_noise_args->height); + + const float mid_tone_electrons_per_pixel = kEffectiveQuantumEfficiency * + kPhotonsPerLxSPerUm2 * + mid_tone_exposure * pixel_area_um2; + const float max_electrons_per_pixel = + mid_tone_electrons_per_pixel / + photon_noise_args->transfer_function->mid_tone; + + int i; + + film_grain->num_y_points = 14; + for (i = 0; i < film_grain->num_y_points; ++i) { + float x = i / (film_grain->num_y_points - 1.f); + const float linear = photon_noise_args->transfer_function->to_linear(x); + const float electrons_per_pixel = max_electrons_per_pixel * linear; + // Quadrature sum of the relevant sources of noise, in electrons rms. Photon + // shot noise is sqrt(electrons) so we can skip the square root and the + // squaring. + // https://en.wikipedia.org/wiki/Addition_in_quadrature + // https://doi.org/10.1117/3.725073 + const float noise_in_electrons = + sqrtf(kInputReferredReadNoise * kInputReferredReadNoise + + electrons_per_pixel + + (kPhotoResponseNonUniformity * kPhotoResponseNonUniformity * + electrons_per_pixel * electrons_per_pixel)); + const float linear_noise = noise_in_electrons / max_electrons_per_pixel; + const float linear_range_start = maxf(0.f, linear - 2 * linear_noise); + const float linear_range_end = minf(1.f, linear + 2 * linear_noise); + const float tf_slope = + (photon_noise_args->transfer_function->from_linear(linear_range_end) - + photon_noise_args->transfer_function->from_linear( + linear_range_start)) / + (linear_range_end - linear_range_start); + float encoded_noise = linear_noise * tf_slope; + + x = roundf(255 * x); + encoded_noise = minf(255.f, roundf(255 * 7.88f * encoded_noise)); + + film_grain->scaling_points_y[i][0] = (int)x; + film_grain->scaling_points_y[i][1] = (int)encoded_noise; + } + + film_grain->apply_grain = 1; + film_grain->update_parameters = 1; + film_grain->num_cb_points = 0; + film_grain->num_cr_points = 0; + film_grain->scaling_shift = 8; + film_grain->ar_coeff_lag = 0; + film_grain->ar_coeffs_cb[0] = 0; + film_grain->ar_coeffs_cr[0] = 0; + film_grain->ar_coeff_shift = 6; + film_grain->cb_mult = 0; + film_grain->cb_luma_mult = 0; + film_grain->cb_offset = 0; + film_grain->cr_mult = 0; + film_grain->cr_luma_mult = 0; + film_grain->cr_offset = 0; + film_grain->overlap_flag = 1; + film_grain->random_seed = 7391; + film_grain->chroma_scaling_from_luma = 0; +} + +int main(int argc, char **argv) { + photon_noise_args_t photon_noise_args; + aom_film_grain_table_t film_grain_table; + aom_film_grain_t film_grain; + struct aom_internal_error_info error_info; + memset(&photon_noise_args, 0, sizeof(photon_noise_args)); + memset(&film_grain_table, 0, sizeof(film_grain_table)); + memset(&film_grain, 0, sizeof(film_grain)); + memset(&error_info, 0, sizeof(error_info)); + + exec_name = argv[0]; + parse_args(argc, argv, &photon_noise_args); + + generate_photon_noise(&photon_noise_args, &film_grain); + aom_film_grain_table_append(&film_grain_table, 0, 9223372036854775807ull, + &film_grain); + if (aom_film_grain_table_write(&film_grain_table, + photon_noise_args.output_filename, + &error_info) != AOM_CODEC_OK) { + aom_film_grain_table_free(&film_grain_table); + fprintf(stderr, "Failed to write film grain table"); + if (error_info.has_detail) { + fprintf(stderr, ": %s", error_info.detail); + } + fprintf(stderr, "\n"); + return EXIT_FAILURE; + } + aom_film_grain_table_free(&film_grain_table); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/scalable_decoder.c b/third_party/aom/examples/scalable_decoder.c new file mode 100644 index 0000000000..00fe820fd5 --- /dev/null +++ b/third_party/aom/examples/scalable_decoder.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Scalable Decoder +// ============== +// +// This is an example of a scalable decoder loop. It takes a 2-spatial-layer +// input file +// containing the compressed data (in OBU format), passes it through the +// decoder, and writes the decompressed frames to disk. The base layer and +// enhancement layers are stored as separate files, out_lyr0.yuv and +// out_lyr1.yuv, respectively. +// +// Standard Includes +// ----------------- +// For decoders, you only have to include `aom_decoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// av1. +// +// Initializing The Codec +// ---------------------- +// The libaom decoder is initialized by the call to aom_codec_dec_init(). +// Determining the codec interface to use is handled by AvxVideoReader and the +// functions prefixed with aom_video_reader_. Discussion of those functions is +// beyond the scope of this example, but the main gist is to open the input file +// and parse just enough of it to determine if it's a AVx file and which AVx +// codec is contained within the file. +// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this +// example because we want the algorithm to determine the stream configuration +// (width/height) and allocate memory automatically. +// +// Decoding A Frame +// ---------------- +// Once the frame has been read into memory, it is decoded using the +// `aom_codec_decode` function. The call takes a pointer to the data +// (`frame`) and the length of the data (`frame_size`). No application data +// is associated with the frame in this example, so the `user_priv` +// parameter is NULL. The `deadline` parameter is left at zero for this +// example. This parameter is generally only used when doing adaptive post +// processing. +// +// Codecs may produce a variable number of output frames for every call to +// `aom_codec_decode`. These frames are retrieved by the +// `aom_codec_get_frame` iterator function. The iterator variable `iter` is +// initialized to NULL each time `aom_codec_decode` is called. +// `aom_codec_get_frame` is called in a loop, returning a pointer to a +// decoded image or NULL to indicate the end of list. +// +// Processing The Decoded Data +// --------------------------- +// In this example, we simply write the encoded data to disk. It is +// important to honor the image's `stride` values. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exceptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "aom/aomdx.h" +#include "common/obudec.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +#define MAX_LAYERS 5 + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile[MAX_LAYERS]; + char filename[80]; + FILE *inputfile = NULL; + uint8_t *buf = NULL; + size_t bytes_in_buffer = 0; + size_t buffer_size = 0; + struct AvxInputContext aom_input_ctx; + struct ObuDecInputContext obu_ctx = { &aom_input_ctx, NULL, 0, 0, 0 }; + aom_codec_stream_info_t si; + uint8_t tmpbuf[32]; + unsigned int i; + + exec_name = argv[0]; + + if (argc != 2) die("Invalid number of arguments."); + + if (!(inputfile = fopen(argv[1], "rb"))) + die("Failed to open %s for read.", argv[1]); + obu_ctx.avx_ctx->file = inputfile; + obu_ctx.avx_ctx->filename = argv[1]; + + aom_codec_iface_t *decoder = get_aom_decoder_by_index(0); + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + + if (aom_codec_control(&codec, AV1D_SET_OUTPUT_ALL_LAYERS, 1)) { + die_codec(&codec, "Failed to set output_all_layers control."); + } + + // peak sequence header OBU to get number of spatial layers + const size_t ret = fread(tmpbuf, 1, 32, inputfile); + if (ret != 32) die_codec(&codec, "Input is not a valid obu file"); + si.is_annexb = 0; + if (aom_codec_peek_stream_info(decoder, tmpbuf, 32, &si)) { + die_codec(&codec, "Input is not a valid obu file"); + } + fseek(inputfile, -32, SEEK_CUR); + + if (!file_is_obu(&obu_ctx)) + die_codec(&codec, "Input is not a valid obu file"); + + // open base layer output yuv file + snprintf(filename, sizeof(filename), "out_lyr%d.yuv", 0); + if (!(outfile[0] = fopen(filename, "wb"))) + die("Failed top open output for writing."); + + // open any enhancement layer output yuv files + for (i = 1; i < si.number_spatial_layers; i++) { + snprintf(filename, sizeof(filename), "out_lyr%u.yuv", i); + if (!(outfile[i] = fopen(filename, "wb"))) + die("Failed to open output for writing."); + } + + while (!obudec_read_temporal_unit(&obu_ctx, &buf, &bytes_in_buffer, + &buffer_size)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + if (aom_codec_decode(&codec, buf, bytes_in_buffer, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + aom_image_t *img_shifted = + aom_img_alloc(NULL, AOM_IMG_FMT_I420, img->d_w, img->d_h, 16); + img_shifted->bit_depth = 8; + aom_img_downshift(img_shifted, img, + img->bit_depth - img_shifted->bit_depth); + if (img->spatial_id == 0) { + printf("Writing base layer 0 %d\n", frame_cnt); + aom_img_write(img_shifted, outfile[0]); + } else if (img->spatial_id <= (int)(si.number_spatial_layers - 1)) { + printf("Writing enhancement layer %d %d\n", img->spatial_id, frame_cnt); + aom_img_write(img_shifted, outfile[img->spatial_id]); + } else { + die_codec(&codec, "Invalid bitstream. Layer id exceeds layer count"); + } + if (img->spatial_id == (int)(si.number_spatial_layers - 1)) ++frame_cnt; + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + for (i = 0; i < si.number_spatial_layers; i++) fclose(outfile[i]); + + fclose(inputfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/scalable_encoder.c b/third_party/aom/examples/scalable_encoder.c new file mode 100644 index 0000000000..5bfd1840b2 --- /dev/null +++ b/third_party/aom/examples/scalable_encoder.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2018, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Scalable Encoder +// ============== +// +// This is an example of a scalable encoder loop. It takes two input files in +// YV12 format, passes it through the encoder, and writes the compressed +// frames to disk in OBU format. +// +// Getting The Default Configuration +// --------------------------------- +// Encoders have the notion of "usage profiles." For example, an encoder +// may want to publish default configurations for both a video +// conferencing application and a best quality offline encoder. These +// obviously have very different default settings. Consult the +// documentation for your codec to see if it provides any default +// configurations. All codecs provide a default configuration, number 0, +// which is valid for material in the vacinity of QCIF/QVGA. +// +// Updating The Configuration +// --------------------------------- +// Almost all applications will want to update the default configuration +// with settings specific to their usage. Here we set the width and height +// of the video file to that specified on the command line. We also scale +// the default bitrate based on the ratio between the default resolution +// and the resolution specified on the command line. +// +// Encoding A Frame +// ---------------- +// The frame is read as a continuous block (size = width * height * 3 / 2) +// from the input file. If a frame was read (the input file has not hit +// EOF) then the frame is passed to the encoder. Otherwise, a NULL +// is passed, indicating the End-Of-Stream condition to the encoder. The +// `frame_cnt` is reused as the presentation time stamp (PTS) and each +// frame is shown for one frame-time in duration. The flags parameter is +// unused in this example. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the +// flags passed to `aom_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. +// +// Processing The Encoded Data +// --------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exeptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "av1/common/enums.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile0> <infile1> " + "<outfile> <frames to encode>\n" + "See comments in scalable_encoder.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, FILE *outfile) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile) != + pkt->data.frame.sz) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + printf(" %6d\n", (int)pkt->data.frame.sz); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile0 = NULL; + FILE *infile1 = NULL; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw0, raw1; + aom_codec_err_t res; + AvxVideoInfo info; + const int fps = 30; + const int bitrate = 200; + int keyframe_interval = 0; + int max_frames = 0; + int frames_encoded = 0; + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile0_arg = NULL; + const char *infile1_arg = NULL; + const char *outfile_arg = NULL; + // const char *keyframe_interval_arg = NULL; + FILE *outfile = NULL; + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc != 8) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile0_arg = argv[4]; + infile1_arg = argv[5]; + outfile_arg = argv[6]; + max_frames = (int)strtol(argv[7], NULL, 0); + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw0, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image for layer 0."); + } + if (!aom_img_alloc(&raw1, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image for layer 1."); + } + + // keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); + keyframe_interval = 100; + if (keyframe_interval < 0) die("Invalid keyframe interval value."); + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + aom_codec_ctx_t codec; + res = aom_codec_enc_config_default(encoder, &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = 0; + cfg.g_lag_in_frames = 0; + cfg.rc_end_usage = AOM_Q; + cfg.save_as_annexb = 0; + + outfile = fopen(outfile_arg, "wb"); + if (!outfile) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile0 = fopen(infile0_arg, "rb"))) + die("Failed to open %s for reading.", infile0_arg); + if (!(infile1 = fopen(infile1_arg, "rb"))) + die("Failed to open %s for reading.", infile0_arg); + + if (aom_codec_enc_init(&codec, encoder, &cfg, 0)) + die("Failed to initialize encoder"); + if (aom_codec_control(&codec, AOME_SET_CPUUSED, 8)) + die_codec(&codec, "Failed to set cpu to 8"); + + if (aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, 2)) + die_codec(&codec, "Failed to set tile columns to 2"); + if (aom_codec_control(&codec, AV1E_SET_NUM_TG, 3)) + die_codec(&codec, "Failed to set num of tile groups to 3"); + + if (aom_codec_control(&codec, AOME_SET_NUMBER_SPATIAL_LAYERS, 2)) + die_codec(&codec, "Failed to set number of spatial layers to 2"); + + // Encode frames. + while (aom_img_read(&raw0, infile0)) { + int flags = 0; + + // configure and encode base layer + + if (keyframe_interval > 0 && frames_encoded % keyframe_interval == 0) + flags |= AOM_EFLAG_FORCE_KF; + else + // use previous base layer (LAST) as sole reference + // save this frame as LAST to be used as reference by enhanmcent layer + // and next base layer + flags |= AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | + AOM_EFLAG_NO_REF_BWD | AOM_EFLAG_NO_REF_ARF2 | + AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_UPD_ENTROPY; + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + if (aom_codec_enc_config_set(&codec, &cfg)) + die_codec(&codec, "Failed to set enc cfg for layer 0"); + if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 0)) + die_codec(&codec, "Failed to set layer id to 0"); + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 62)) + die_codec(&codec, "Failed to set cq level"); + encode_frame(&codec, &raw0, frame_count++, flags, outfile); + + // configure and encode enhancement layer + + // use LAST (base layer) as sole reference + flags = AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | + AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | + AOM_EFLAG_NO_REF_ARF2 | AOM_EFLAG_NO_UPD_LAST | + AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF | + AOM_EFLAG_NO_UPD_ENTROPY; + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + aom_img_read(&raw1, infile1); + if (aom_codec_enc_config_set(&codec, &cfg)) + die_codec(&codec, "Failed to set enc cfg for layer 1"); + if (aom_codec_control(&codec, AOME_SET_SPATIAL_LAYER_ID, 1)) + die_codec(&codec, "Failed to set layer id to 1"); + if (aom_codec_control(&codec, AOME_SET_CQ_LEVEL, 10)) + die_codec(&codec, "Failed to set cq level"); + encode_frame(&codec, &raw1, frame_count++, flags, outfile); + + frames_encoded++; + + if (max_frames > 0 && frames_encoded >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, outfile)) continue; + + printf("\n"); + fclose(infile0); + fclose(infile1); + printf("Processed %d frames.\n", frame_count / 2); + + aom_img_free(&raw0); + aom_img_free(&raw1); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/set_maps.c b/third_party/aom/examples/set_maps.c new file mode 100644 index 0000000000..2593faba34 --- /dev/null +++ b/third_party/aom/examples/set_maps.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// AOM Set Active and ROI Maps +// =========================== +// +// This is an example demonstrating how to control the AOM encoder's +// ROI and Active maps. +// +// ROI (Region of Interest) maps are a way for the application to assign +// each macroblock in the image to a region, and then set quantizer and +// filtering parameters on that image. +// +// Active maps are a way for the application to specify on a +// macroblock-by-macroblock basis whether there is any activity in that +// macroblock. +// +// +// Configuration +// ------------- +// An ROI map is set on frame 22. If the width of the image in macroblocks +// is evenly divisible by 4, then the output will appear to have distinct +// columns, where the quantizer, loopfilter, and static threshold differ +// from column to column. +// +// An active map is set on frame 33. If the width of the image in macroblocks +// is evenly divisible by 4, then the output will appear to have distinct +// columns, where one column will have motion and the next will not. +// +// The active map is cleared on frame 44. +// +// Observing The Effects +// --------------------- +// Use the `simple_decoder` example to decode this sample, and observe +// the change in the image at frames 22, 33, and 44. + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void set_active_map(const aom_codec_enc_cfg_t *cfg, + aom_codec_ctx_t *codec) { + unsigned int i; + aom_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + + map.active_map = (uint8_t *)malloc(map.rows * map.cols); + if (!map.active_map) die("Failed to allocate active map"); + for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2; + + if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); + + free(map.active_map); +} + +static void unset_active_map(const aom_codec_enc_cfg_t *cfg, + aom_codec_ctx_t *codec) { + aom_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + map.active_map = NULL; + + if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(codec, img, frame_index, 1, 0); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + const int limit = 10; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const int fps = 2; // TODO(dkovalev) add command line argument + const double bits_per_pixel_per_frame = 0.067; + +#if CONFIG_REALTIME_ONLY + const int usage = 1; + const int speed = 7; +#else + const int usage = 0; + const int speed = 2; +#endif + + exec_name = argv[0]; + if (argc != 6) die("Invalid number of arguments"); + + memset(&info, 0, sizeof(info)); + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name(argv[1]); + if (encoder == NULL) { + die("Unsupported codec."); + } + assert(encoder != NULL); + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = (int)strtol(argv[2], NULL, 0); + info.frame_height = (int)strtol(argv[3], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + res = aom_codec_enc_config_default(encoder, &cfg, usage); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = + (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000); + cfg.g_lag_in_frames = 0; + + writer = aom_video_writer_open(argv[5], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[5]); + + if (!(infile = fopen(argv[4], "rb"))) + die("Failed to open %s for reading.", argv[4]); + + if (aom_codec_enc_init(&codec, encoder, &cfg, 0)) + die("Failed to initialize encoder"); + + if (aom_codec_control(&codec, AOME_SET_CPUUSED, speed)) + die_codec(&codec, "Failed to set cpu-used"); + + // Encode frames. + while (aom_img_read(&raw, infile) && frame_count < limit) { + ++frame_count; + + if (frame_count == 5) { + set_active_map(&cfg, &codec); + } else if (frame_count == 9) { + unset_active_map(&cfg, &codec); + } + + encode_frame(&codec, &raw, frame_count, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/simple_decoder.c b/third_party/aom/examples/simple_decoder.c new file mode 100644 index 0000000000..b6891dcbba --- /dev/null +++ b/third_party/aom/examples/simple_decoder.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Simple Decoder +// ============== +// +// This is an example of a simple decoder loop. It takes an input file +// containing the compressed data (in IVF format), passes it through the +// decoder, and writes the decompressed frames to disk. Other decoder +// examples build upon this one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For decoders, you only have to include `aom_decoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// aom. +// +// Initializing The Codec +// ---------------------- +// The libaom decoder is initialized by the call to aom_codec_dec_init(). +// Determining the codec interface to use is handled by AvxVideoReader and the +// functions prefixed with aom_video_reader_. Discussion of those functions is +// beyond the scope of this example, but the main gist is to open the input file +// and parse just enough of it to determine if it's a AVx file and which AVx +// codec is contained within the file. +// Note the NULL pointer passed to aom_codec_dec_init(). We do that in this +// example because we want the algorithm to determine the stream configuration +// (width/height) and allocate memory automatically. +// +// Decoding A Frame +// ---------------- +// Once the frame has been read into memory, it is decoded using the +// `aom_codec_decode` function. The call takes a pointer to the data +// (`frame`) and the length of the data (`frame_size`). No application data +// is associated with the frame in this example, so the `user_priv` +// parameter is NULL. +// +// Codecs may produce a variable number of output frames for every call to +// `aom_codec_decode`. These frames are retrieved by the +// `aom_codec_get_frame` iterator function. The iterator variable `iter` is +// initialized to NULL each time `aom_codec_decode` is called. +// `aom_codec_get_frame` is called in a loop, returning a pointer to a +// decoded image or NULL to indicate the end of list. +// +// Processing The Decoded Data +// --------------------------- +// In this example, we simply write the encoded data to disk. It is +// important to honor the image's `stride` values. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exceptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_decoder.h" +#include "common/tools_common.h" +#include "common/video_reader.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + AvxVideoReader *reader = NULL; + const AvxVideoInfo *info = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = aom_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = aom_video_reader_get_info(reader); + + aom_codec_iface_t *decoder = get_aom_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", aom_codec_iface_name(decoder)); + + aom_codec_ctx_t codec; + if (aom_codec_dec_init(&codec, decoder, NULL, 0)) + die("Failed to initialize decoder."); + + while (aom_video_reader_read_frame(reader)) { + aom_codec_iter_t iter = NULL; + aom_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + aom_video_reader_get_frame(reader, &frame_size); + if (aom_codec_decode(&codec, frame, frame_size, NULL)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = aom_codec_get_frame(&codec, &iter)) != NULL) { + aom_img_write(img, outfile); + ++frame_cnt; + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + aom_video_reader_close(reader); + + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/simple_encoder.c b/third_party/aom/examples/simple_encoder.c new file mode 100644 index 0000000000..c026706555 --- /dev/null +++ b/third_party/aom/examples/simple_encoder.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Simple Encoder +// ============== +// +// This is an example of a simple encoder loop. It takes an input file in +// YV12 format, passes it through the encoder, and writes the compressed +// frames to disk in IVF format. Other decoder examples build upon this +// one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For encoders, you only have to include `aom_encoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// aom. +// +// Getting The Default Configuration +// --------------------------------- +// Encoders have the notion of "usage profiles." For example, an encoder +// may want to publish default configurations for both a video +// conferencing application and a best quality offline encoder. These +// obviously have very different default settings. Consult the +// documentation for your codec to see if it provides any default +// configurations. All codecs provide a default configuration, number 0, +// which is valid for material in the vacinity of QCIF/QVGA. +// +// Updating The Configuration +// --------------------------------- +// Almost all applications will want to update the default configuration +// with settings specific to their usage. Here we set the width and height +// of the video file to that specified on the command line. We also scale +// the default bitrate based on the ratio between the default resolution +// and the resolution specified on the command line. +// +// Initializing The Codec +// ---------------------- +// The encoder is initialized by the following code. +// +// Encoding A Frame +// ---------------- +// The frame is read as a continuous block (size width * height * 3 / 2) +// from the input file. If a frame was read (the input file has not hit +// EOF) then the frame is passed to the encoder. Otherwise, a NULL +// is passed, indicating the End-Of-Stream condition to the encoder. The +// `frame_cnt` is reused as the presentation time stamp (PTS) and each +// frame is shown for one frame-time in duration. The flags parameter is +// unused in this example. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the AOM_EFLAG_FORCE_KF bit of the +// flags passed to `aom_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// AOM_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. +// +// Processing The Encoded Data +// --------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// Cleanup +// ------- +// The `aom_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exeptions, aom_codec functions return an enumerated error status, +// with the value `0` indicating success. +// +// Error Resiliency Features +// ------------------------- +// Error resiliency is controlled by the g_error_resilient member of the +// configuration structure. Use the `decode_with_drops` example to decode with +// frames 5-10 dropped. Compare the output for a file encoded with this example +// versus one encoded with the `simple_encoder` example. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<keyframe-interval> <error-resilient> <frames to encode>\n" + "See comments in simple_encoder.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(aom_codec_ctx_t *codec, aom_image_t *img, + int frame_index, int flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = + aom_codec_encode(codec, img, frame_index, 1, flags); + if (res != AOM_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = aom_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps. +int main(int argc, char **argv) { + FILE *infile = NULL; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + int frame_count = 0; + aom_image_t raw; + aom_codec_err_t res; + AvxVideoInfo info; + AvxVideoWriter *writer = NULL; + const int fps = 30; + const int bitrate = 200; + int keyframe_interval = 0; + int max_frames = 0; + int frames_encoded = 0; + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *keyframe_interval_arg = NULL; +#if CONFIG_REALTIME_ONLY + const int usage = 1; + const int speed = 7; +#else + const int usage = 0; + const int speed = 2; +#endif + + exec_name = argv[0]; + + // Clear explicitly, as simply assigning "{ 0 }" generates + // "missing-field-initializers" warning in some compilers. + memset(&info, 0, sizeof(info)); + + if (argc != 9) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile_arg = argv[4]; + outfile_arg = argv[5]; + keyframe_interval_arg = argv[6]; + max_frames = (int)strtol(argv[8], NULL, 0); + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); + if (keyframe_interval < 0) die("Invalid keyframe interval value."); + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + res = aom_codec_enc_config_default(encoder, &cfg, usage); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = (aom_codec_er_flags_t)strtoul(argv[7], NULL, 0); + + writer = aom_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (aom_codec_enc_init(&codec, encoder, &cfg, 0)) + die("Failed to initialize encoder"); + + if (aom_codec_control(&codec, AOME_SET_CPUUSED, speed)) + die_codec(&codec, "Failed to set cpu-used"); + + // Encode frames. + while (aom_img_read(&raw, infile)) { + int flags = 0; + if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) + flags |= AOM_EFLAG_FORCE_KF; + encode_frame(&codec, &raw, frame_count++, flags, writer); + frames_encoded++; + if (max_frames > 0 && frames_encoded >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) continue; + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + aom_img_free(&raw); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/svc_encoder_rtc.cc b/third_party/aom/examples/svc_encoder_rtc.cc new file mode 100644 index 0000000000..2c041081e5 --- /dev/null +++ b/third_party/aom/examples/svc_encoder_rtc.cc @@ -0,0 +1,2062 @@ +/* + * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is an example demonstrating how to implement a multi-layer AOM +// encoding scheme for RTC video applications. + +#include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <memory> + +#include "config/aom_config.h" + +#if CONFIG_AV1_DECODER +#include "aom/aom_decoder.h" +#endif +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/args.h" +#include "common/tools_common.h" +#include "common/video_writer.h" +#include "examples/encoder_util.h" +#include "aom_ports/aom_timer.h" +#include "av1/ratectrl_rtc.h" + +#define OPTION_BUFFER_SIZE 1024 + +typedef struct { + const char *output_filename; + char options[OPTION_BUFFER_SIZE]; + struct AvxInputContext input_ctx; + int speed; + int aq_mode; + int layering_mode; + int output_obu; + int decode; + int tune_content; + int show_psnr; + bool use_external_rc; +} AppInput; + +typedef enum { + QUANTIZER = 0, + BITRATE, + SCALE_FACTOR, + AUTO_ALT_REF, + ALL_OPTION_TYPES +} LAYER_OPTION_TYPE; + +static const arg_def_t outputfile = + ARG_DEF("o", "output", 1, "Output filename"); +static const arg_def_t frames_arg = + ARG_DEF("f", "frames", 1, "Number of frames to encode"); +static const arg_def_t threads_arg = + ARG_DEF("th", "threads", 1, "Number of threads to use"); +static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width"); +static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height"); +static const arg_def_t timebase_arg = + ARG_DEF("t", "timebase", 1, "Timebase (num/den)"); +static const arg_def_t bitrate_arg = ARG_DEF( + "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second"); +static const arg_def_t spatial_layers_arg = + ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers"); +static const arg_def_t temporal_layers_arg = + ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers"); +static const arg_def_t layering_mode_arg = + ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme."); +static const arg_def_t kf_dist_arg = + ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes"); +static const arg_def_t scale_factors_arg = + ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)"); +static const arg_def_t min_q_arg = + ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); +static const arg_def_t max_q_arg = + ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); +static const arg_def_t speed_arg = + ARG_DEF("sp", "speed", 1, "Speed configuration"); +static const arg_def_t aqmode_arg = + ARG_DEF("aq", "aqmode", 1, "AQ mode off/on"); +static const arg_def_t bitrates_arg = + ARG_DEF("bl", "bitrates", 1, + "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]"); +static const arg_def_t dropframe_thresh_arg = + ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); +static const arg_def_t error_resilient_arg = + ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag"); +static const arg_def_t output_obu_arg = + ARG_DEF(NULL, "output-obu", 1, + "Write OBUs when set to 1. Otherwise write IVF files."); +static const arg_def_t test_decode_arg = + ARG_DEF(NULL, "test-decode", 1, + "Attempt to test decoding the output when set to 1. Default is 1."); +static const arg_def_t psnr_arg = + ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line."); +static const arg_def_t ext_rc_arg = + ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control."); +static const struct arg_enum_list tune_content_enum[] = { + { "default", AOM_CONTENT_DEFAULT }, + { "screen", AOM_CONTENT_SCREEN }, + { "film", AOM_CONTENT_FILM }, + { NULL, 0 } +}; +static const arg_def_t tune_content_arg = ARG_DEF_ENUM( + NULL, "tune-content", 1, "Tune content type", tune_content_enum); + +#if CONFIG_AV1_HIGHBITDEPTH +static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 }, + { "10", AOM_BITS_10 }, + { NULL, 0 } }; + +static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( + "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum); +#endif // CONFIG_AV1_HIGHBITDEPTH + +static const arg_def_t *svc_args[] = { + &frames_arg, &outputfile, &width_arg, + &height_arg, &timebase_arg, &bitrate_arg, + &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg, + &min_q_arg, &max_q_arg, &temporal_layers_arg, + &layering_mode_arg, &threads_arg, &aqmode_arg, +#if CONFIG_AV1_HIGHBITDEPTH + &bitdepth_arg, +#endif + &speed_arg, &bitrates_arg, &dropframe_thresh_arg, + &error_resilient_arg, &output_obu_arg, &test_decode_arg, + &tune_content_arg, &psnr_arg, NULL, +}; + +#define zero(Dest) memset(&(Dest), 0, sizeof(Dest)) + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n", + exec_name); + fprintf(stderr, "Options:\n"); + arg_show_usage(stderr, svc_args); + exit(EXIT_FAILURE); +} + +static int file_is_y4m(const char detect[4]) { + return memcmp(detect, "YUV4", 4) == 0; +} + +static int fourcc_is_ivf(const char detect[4]) { + if (memcmp(detect, "DKIF", 4) == 0) { + return 1; + } + return 0; +} + +static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX, + 1 }; + +static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 }; + +static void open_input_file(struct AvxInputContext *input, + aom_chroma_sample_position_t csp) { + /* Parse certain options from the input file, if possible */ + input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") + : set_binary_mode(stdin); + + if (!input->file) fatal("Failed to open input file"); + + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + + /* Default to 1:1 pixel aspect ratio. */ + input->pixel_aspect_ratio.numerator = 1; + input->pixel_aspect_ratio.denominator = 1; + + /* For RAW input sources, these bytes will applied on the first frame + * in read_frame(). + */ + input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); + input->detect.position = 0; + + if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp, + input->only_i420) >= 0) { + input->file_type = FILE_TYPE_Y4M; + input->width = input->y4m.pic_w; + input->height = input->y4m.pic_h; + input->pixel_aspect_ratio.numerator = input->y4m.par_n; + input->pixel_aspect_ratio.denominator = input->y4m.par_d; + input->framerate.numerator = input->y4m.fps_n; + input->framerate.denominator = input->y4m.fps_d; + input->fmt = input->y4m.aom_fmt; + input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth); + } else { + fatal("Unsupported Y4M stream."); + } + } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { + fatal("IVF is not supported as input."); + } else { + input->file_type = FILE_TYPE_RAW; + } +} + +static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input, + int *value0, int *value1) { + if (type == SCALE_FACTOR) { + *value0 = (int)strtol(input, &input, 10); + if (*input++ != '/') return AOM_CODEC_INVALID_PARAM; + *value1 = (int)strtol(input, &input, 10); + + if (*value0 < option_min_values[SCALE_FACTOR] || + *value1 < option_min_values[SCALE_FACTOR] || + *value0 > option_max_values[SCALE_FACTOR] || + *value1 > option_max_values[SCALE_FACTOR] || + *value0 > *value1) // num shouldn't be greater than den + return AOM_CODEC_INVALID_PARAM; + } else { + *value0 = atoi(input); + if (*value0 < option_min_values[type] || *value0 > option_max_values[type]) + return AOM_CODEC_INVALID_PARAM; + } + return AOM_CODEC_OK; +} + +static aom_codec_err_t parse_layer_options_from_string( + aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input, + int *option0, int *option1) { + aom_codec_err_t res = AOM_CODEC_OK; + char *input_string; + char *token; + const char *delim = ","; + int num_layers = svc_params->number_spatial_layers; + int i = 0; + + if (type == BITRATE) + num_layers = + svc_params->number_spatial_layers * svc_params->number_temporal_layers; + + if (input == NULL || option0 == NULL || + (option1 == NULL && type == SCALE_FACTOR)) + return AOM_CODEC_INVALID_PARAM; + + const size_t input_length = strlen(input); + input_string = reinterpret_cast<char *>(malloc(input_length + 1)); + if (input_string == NULL) return AOM_CODEC_MEM_ERROR; + memcpy(input_string, input, input_length + 1); + token = strtok(input_string, delim); // NOLINT + for (i = 0; i < num_layers; ++i) { + if (token != NULL) { + res = extract_option(type, token, option0 + i, option1 + i); + if (res != AOM_CODEC_OK) break; + token = strtok(NULL, delim); // NOLINT + } else { + res = AOM_CODEC_INVALID_PARAM; + break; + } + } + free(input_string); + return res; +} + +static void parse_command_line(int argc, const char **argv_, + AppInput *app_input, + aom_svc_params_t *svc_params, + aom_codec_enc_cfg_t *enc_cfg) { + struct arg arg; + char **argv = NULL; + char **argi = NULL; + char **argj = NULL; + char string_options[1024] = { 0 }; + + // Default settings + svc_params->number_spatial_layers = 1; + svc_params->number_temporal_layers = 1; + app_input->layering_mode = 0; + app_input->output_obu = 0; + app_input->decode = 1; + enc_cfg->g_threads = 1; + enc_cfg->rc_end_usage = AOM_CBR; + + // process command line options + argv = argv_dup(argc - 1, argv_ + 1); + if (!argv) { + fprintf(stderr, "Error allocating argument list\n"); + exit(EXIT_FAILURE); + } + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + + if (arg_match(&arg, &outputfile, argi)) { + app_input->output_filename = arg.val; + } else if (arg_match(&arg, &width_arg, argi)) { + enc_cfg->g_w = arg_parse_uint(&arg); + } else if (arg_match(&arg, &height_arg, argi)) { + enc_cfg->g_h = arg_parse_uint(&arg); + } else if (arg_match(&arg, &timebase_arg, argi)) { + enc_cfg->g_timebase = arg_parse_rational(&arg); + } else if (arg_match(&arg, &bitrate_arg, argi)) { + enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &spatial_layers_arg, argi)) { + svc_params->number_spatial_layers = arg_parse_uint(&arg); + } else if (arg_match(&arg, &temporal_layers_arg, argi)) { + svc_params->number_temporal_layers = arg_parse_uint(&arg); + } else if (arg_match(&arg, &speed_arg, argi)) { + app_input->speed = arg_parse_uint(&arg); + if (app_input->speed > 11) { + aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed); + } + } else if (arg_match(&arg, &aqmode_arg, argi)) { + app_input->aq_mode = arg_parse_uint(&arg); + } else if (arg_match(&arg, &threads_arg, argi)) { + enc_cfg->g_threads = arg_parse_uint(&arg); + } else if (arg_match(&arg, &layering_mode_arg, argi)) { + app_input->layering_mode = arg_parse_int(&arg); + } else if (arg_match(&arg, &kf_dist_arg, argi)) { + enc_cfg->kf_min_dist = arg_parse_uint(&arg); + enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; + } else if (arg_match(&arg, &scale_factors_arg, argi)) { + aom_codec_err_t res = parse_layer_options_from_string( + svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num, + svc_params->scaling_factor_den); + if (res != AOM_CODEC_OK) { + die("Failed to parse scale factors: %s\n", + aom_codec_err_to_string(res)); + } + } else if (arg_match(&arg, &min_q_arg, argi)) { + enc_cfg->rc_min_quantizer = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_q_arg, argi)) { + enc_cfg->rc_max_quantizer = arg_parse_uint(&arg); +#if CONFIG_AV1_HIGHBITDEPTH + } else if (arg_match(&arg, &bitdepth_arg, argi)) { + enc_cfg->g_bit_depth = + static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg)); + switch (enc_cfg->g_bit_depth) { + case AOM_BITS_8: + enc_cfg->g_input_bit_depth = 8; + enc_cfg->g_profile = 0; + break; + case AOM_BITS_10: + enc_cfg->g_input_bit_depth = 10; + enc_cfg->g_profile = 0; + break; + default: + die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) { + enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg); + } else if (arg_match(&arg, &error_resilient_arg, argi)) { + enc_cfg->g_error_resilient = arg_parse_uint(&arg); + if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1) + die("Invalid value for error resilient (0, 1): %d.", + enc_cfg->g_error_resilient); + } else if (arg_match(&arg, &output_obu_arg, argi)) { + app_input->output_obu = arg_parse_uint(&arg); + if (app_input->output_obu != 0 && app_input->output_obu != 1) + die("Invalid value for obu output flag (0, 1): %d.", + app_input->output_obu); + } else if (arg_match(&arg, &test_decode_arg, argi)) { + app_input->decode = arg_parse_uint(&arg); + if (app_input->decode != 0 && app_input->decode != 1) + die("Invalid value for test decode flag (0, 1): %d.", + app_input->decode); + } else if (arg_match(&arg, &tune_content_arg, argi)) { + app_input->tune_content = arg_parse_enum_or_int(&arg); + printf("tune content %d\n", app_input->tune_content); + } else if (arg_match(&arg, &psnr_arg, argi)) { + app_input->show_psnr = 1; + } else if (arg_match(&arg, &ext_rc_arg, argi)) { + app_input->use_external_rc = true; + } else { + ++argj; + } + } + + // Total bitrate needs to be parsed after the number of layers. + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + if (arg_match(&arg, &bitrates_arg, argi)) { + aom_codec_err_t res = parse_layer_options_from_string( + svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL); + if (res != AOM_CODEC_OK) { + die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res)); + } + } else { + ++argj; + } + } + + // There will be a space in front of the string options + if (strlen(string_options) > 0) + strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE); + + // Check for unrecognized options + for (argi = argv; *argi; ++argi) + if (argi[0][0] == '-' && strlen(argi[0]) > 1) + die("Error: Unrecognized option %s\n", *argi); + + if (argv[0] == NULL) { + usage_exit(); + } + + app_input->input_ctx.filename = argv[0]; + free(argv); + + open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN); + if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { + enc_cfg->g_w = app_input->input_ctx.width; + enc_cfg->g_h = app_input->input_ctx.height; + } + + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || + enc_cfg->g_h % 2) + die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); + + printf( + "Codec %s\n" + "layers: %d\n" + "width %u, height: %u\n" + "num: %d, den: %d, bitrate: %u\n" + "gop size: %u\n", + aom_codec_iface_name(aom_codec_av1_cx()), + svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h, + enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, + enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist); +} + +static int mode_to_num_temporal_layers[12] = { + 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3, +}; +static int mode_to_num_spatial_layers[12] = { + 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3, +}; + +// For rate control encoding stats. +struct RateControlMetrics { + // Number of input frames per layer. + int layer_input_frames[AOM_MAX_TS_LAYERS]; + // Number of encoded non-key frames per layer. + int layer_enc_frames[AOM_MAX_TS_LAYERS]; + // Framerate per layer layer (cumulative). + double layer_framerate[AOM_MAX_TS_LAYERS]; + // Target average frame size per layer (per-frame-bandwidth per layer). + double layer_pfb[AOM_MAX_LAYERS]; + // Actual average frame size per layer. + double layer_avg_frame_size[AOM_MAX_LAYERS]; + // Average rate mismatch per layer (|target - actual| / target). + double layer_avg_rate_mismatch[AOM_MAX_LAYERS]; + // Actual encoding bitrate per layer (cumulative across temporal layers). + double layer_encoding_bitrate[AOM_MAX_LAYERS]; + // Average of the short-time encoder actual bitrate. + // TODO(marpan): Should we add these short-time stats for each layer? + double avg_st_encoding_bitrate; + // Variance of the short-time encoder actual bitrate. + double variance_st_encoding_bitrate; + // Window (number of frames) for computing short-timee encoding bitrate. + int window_size; + // Number of window measurements. + int window_count; + int layer_target_bitrate[AOM_MAX_LAYERS]; +}; + +static const int REF_FRAMES = 8; + +static const int INTER_REFS_PER_FRAME = 7; + +// Reference frames used in this example encoder. +enum { + SVC_LAST_FRAME = 0, + SVC_LAST2_FRAME, + SVC_LAST3_FRAME, + SVC_GOLDEN_FRAME, + SVC_BWDREF_FRAME, + SVC_ALTREF2_FRAME, + SVC_ALTREF_FRAME +}; + +static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) { + FILE *f = input_ctx->file; + y4m_input *y4m = &input_ctx->y4m; + int shortread = 0; + + if (input_ctx->file_type == FILE_TYPE_Y4M) { + if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; + } else { + shortread = read_yuv_frame(input_ctx, img); + } + + return !shortread; +} + +static void close_input_file(struct AvxInputContext *input) { + fclose(input->file); + if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); +} + +// Note: these rate control metrics assume only 1 key frame in the +// sequence (i.e., first frame only). So for temporal pattern# 7 +// (which has key frame for every frame on base layer), the metrics +// computation will be off/wrong. +// TODO(marpan): Update these metrics to account for multiple key frames +// in the stream. +static void set_rate_control_metrics(struct RateControlMetrics *rc, + double framerate, int ss_number_layers, + int ts_number_layers) { + int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 }; + ts_rate_decimator[0] = 1; + if (ts_number_layers == 2) { + ts_rate_decimator[0] = 2; + ts_rate_decimator[1] = 1; + } + if (ts_number_layers == 3) { + ts_rate_decimator[0] = 4; + ts_rate_decimator[1] = 2; + ts_rate_decimator[2] = 1; + } + // Set the layer (cumulative) framerate and the target layer (non-cumulative) + // per-frame-bandwidth, for the rate control encoding stats below. + for (int sl = 0; sl < ss_number_layers; ++sl) { + int i = sl * ts_number_layers; + rc->layer_framerate[0] = framerate / ts_rate_decimator[0]; + rc->layer_pfb[i] = + 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0]; + for (int tl = 0; tl < ts_number_layers; ++tl) { + i = sl * ts_number_layers + tl; + if (tl > 0) { + rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl]; + rc->layer_pfb[i] = + 1000.0 * + (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / + (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]); + } + rc->layer_input_frames[tl] = 0; + rc->layer_enc_frames[tl] = 0; + rc->layer_encoding_bitrate[i] = 0.0; + rc->layer_avg_frame_size[i] = 0.0; + rc->layer_avg_rate_mismatch[i] = 0.0; + } + } + rc->window_count = 0; + rc->window_size = 15; + rc->avg_st_encoding_bitrate = 0.0; + rc->variance_st_encoding_bitrate = 0.0; +} + +static void printout_rate_control_summary(struct RateControlMetrics *rc, + int frame_cnt, int ss_number_layers, + int ts_number_layers) { + int tot_num_frames = 0; + double perc_fluctuation = 0.0; + printf("Total number of processed frames: %d\n\n", frame_cnt - 1); + printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers); + for (int sl = 0; sl < ss_number_layers; ++sl) { + tot_num_frames = 0; + for (int tl = 0; tl < ts_number_layers; ++tl) { + int i = sl * ts_number_layers + tl; + const int num_dropped = + tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] + : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1; + tot_num_frames += rc->layer_input_frames[tl]; + rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] * + rc->layer_encoding_bitrate[i] / + tot_num_frames; + rc->layer_avg_frame_size[i] = + rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl]; + rc->layer_avg_rate_mismatch[i] = + 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl]; + printf("For layer#: %d %d \n", sl, tl); + printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i], + rc->layer_encoding_bitrate[i]); + printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i], + rc->layer_avg_frame_size[i]); + printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]); + printf( + "Number of input frames, encoded (non-key) frames, " + "and perc dropped frames: %d %d %f\n", + rc->layer_input_frames[tl], rc->layer_enc_frames[tl], + 100.0 * num_dropped / rc->layer_input_frames[tl]); + printf("\n"); + } + } + rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; + rc->variance_st_encoding_bitrate = + rc->variance_st_encoding_bitrate / rc->window_count - + (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); + perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / + rc->avg_st_encoding_bitrate; + printf("Short-time stats, for window of %d frames:\n", rc->window_size); + printf("Average, rms-variance, and percent-fluct: %f %f %f\n", + rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), + perc_fluctuation); + if (frame_cnt - 1 != tot_num_frames) + die("Error: Number of input frames not equal to output!\n"); +} + +// Layer pattern configuration. +static void set_layer_pattern( + int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id, + aom_svc_ref_frame_config_t *ref_frame_config, + aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control, + int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) { + // Setting this flag to 1 enables simplex example of + // RPS (Reference Picture Selection) for 1 layer. + int use_rps_example = 0; + int i; + int enable_longterm_temporal_ref = 1; + int shift = (layering_mode == 8) ? 2 : 0; + int simulcast_mode = (layering_mode == 11); + *use_svc_control = 1; + layer_id->spatial_layer_id = spatial_layer_id; + int lag_index = 0; + int base_count = superframe_cnt >> 2; + ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST + ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST + ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST + // Set the reference map buffer idx for the 7 references: + // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), + // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0; + for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; + + if (ksvc_mode) { + // Same pattern as case 9, but the reference strucutre will be constrained + // below. + layering_mode = 9; + } + switch (layering_mode) { + case 0: + if (use_rps_example == 0) { + // 1-layer: update LAST on every frame, reference LAST. + layer_id->temporal_layer_id = 0; + layer_id->spatial_layer_id = 0; + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else { + // Pattern of 2 references (ALTREF and GOLDEN) trailing + // LAST by 4 and 8 frames, with some switching logic to + // sometimes only predict from the longer-term reference + //(golden here). This is simple example to test RPS + // (reference picture selection). + int last_idx = 0; + int last_idx_refresh = 0; + int gld_idx = 0; + int alt_ref_idx = 0; + int lag_alt = 4; + int lag_gld = 8; + layer_id->temporal_layer_id = 0; + layer_id->spatial_layer_id = 0; + int sh = 8; // slots 0 - 7. + // Moving index slot for last: 0 - (sh - 1) + if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh; + // Moving index for refresh of last: one ahead for next frame. + last_idx_refresh = superframe_cnt % sh; + // Moving index for gld_ref, lag behind current by lag_gld + if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh; + // Moving index for alt_ref, lag behind LAST by lag_alt frames. + if (superframe_cnt > lag_alt) + alt_ref_idx = (superframe_cnt - lag_alt) % sh; + // Set the ref_idx. + // Default all references to slot for last. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = last_idx; + // Set the ref_idx for the relevant references. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx; + ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx; + ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx; + // Refresh this slot, which will become LAST on next frame. + ref_frame_config->refresh[last_idx_refresh] = 1; + // Reference LAST, ALTREF, and GOLDEN + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + // Switch to only GOLDEN every 300 frames. + if (superframe_cnt % 200 == 0 && superframe_cnt > 0) { + ref_frame_config->reference[SVC_LAST_FRAME] = 0; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 0; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + // Test if the long-term is LAST instead, this is just a renaming + // but its tests if encoder behaves the same, whether its + // LAST or GOLDEN. + if (superframe_cnt % 400 == 0 && superframe_cnt > 0) { + ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 0; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0; + } + } + } + break; + case 1: + // 2-temporal layer. + // 1 3 5 + // 0 2 4 + // Keep golden fixed at slot 3. + base_count = superframe_cnt >> 1; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + // Cyclically refresh slots 5, 6, 7, for lag alt ref. + lag_index = 5; + if (base_count > 0) { + lag_index = 5 + (base_count % 3); + if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3); + } + // Set the altref slot to lag_index. + ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index; + if (superframe_cnt % 2 == 0) { + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + // Refresh lag_index slot, needed for lagging golen. + ref_frame_config->refresh[lag_index] = 1; + // Refresh GOLDEN every x base layer frames. + if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1; + } else { + layer_id->temporal_layer_id = 1; + // No updates on layer 1, reference LAST (TL0). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } + // Always reference golden and altref on TL0. + if (layer_id->temporal_layer_id == 0) { + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; + } + break; + case 2: + // 3-temporal layer: + // 1 3 5 7 + // 2 6 + // 0 4 8 + if (superframe_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update LAST2, only reference LAST (TL0). + ref_frame_config->refresh[1] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference LAST. + // Set buffer idx for LAST to slot 1, since that was the slot + // updated in previous frame. So LAST is TL1 frame. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } + break; + case 3: + // 3 TL, same as above, except allow for predicting + // off 2 more references (GOLDEN and ALTREF), with + // GOLDEN updated periodically, and ALTREF lagging from + // LAST from ~4 frames. Both GOLDEN and ALTREF + // can only be updated on base temporal layer. + + // Keep golden fixed at slot 3. + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + // Cyclically refresh slots 5, 6, 7, for lag altref. + lag_index = 5; + if (base_count > 0) { + lag_index = 5 + (base_count % 3); + if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3); + } + // Set the altref slot to lag_index. + ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index; + if (superframe_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, reference LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + // Refresh GOLDEN every x ~10 base layer frames. + if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1; + // Refresh lag_index slot, needed for lagging altref. + ref_frame_config->refresh[lag_index] = 1; + } else if ((superframe_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update LAST2, only reference LAST (TL0). + ref_frame_config->refresh[1] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference LAST. + // Set buffer idx for LAST to slot 1, since that was the slot + // updated in previous frame. So LAST is TL1 frame. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } + // Every frame can reference GOLDEN AND ALTREF. + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; + // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN. + if (speed >= 7) { + ref_frame_comp_pred->use_comp_pred[2] = 1; + ref_frame_comp_pred->use_comp_pred[0] = 1; + } + break; + case 4: + // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will + // only reference GF (not LAST). Other frames only reference LAST. + // 1 3 5 7 + // 2 6 + // 0 4 8 + if (superframe_cnt % 4 == 0) { + // Base layer. + layer_id->temporal_layer_id = 0; + // Update LAST on layer 0, only reference LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 1) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // First top layer: no updates, only reference LAST (TL0). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 2) % 4 == 0) { + layer_id->temporal_layer_id = 1; + // Middle layer (TL1): update GF, only reference LAST (TL0). + ref_frame_config->refresh[3] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if ((superframe_cnt - 3) % 4 == 0) { + layer_id->temporal_layer_id = 2; + // Second top layer: no updates, only reference GF. + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + } + break; + case 5: + // 2 spatial layers, 1 temporal. + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 + // and GOLDEN to slot 0. Update slot 1 (LAST). + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0; + ref_frame_config->refresh[1] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + } + break; + case 6: + // 3 spatial layers, 1 temporal. + // Note for this case, we set the buffer idx for all references to be + // either LAST or GOLDEN, which are always valid references, since decoder + // will check if any of the 7 references is valid scale in + // valid_ref_frame_size(). + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. Set all buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1 + // and GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->refresh[1] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2 + // and GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->refresh[2] = 1; + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + // For 3 spatial layer case: allow for top spatial layer to use + // additional temporal reference. Update every 10 frames. + if (enable_longterm_temporal_ref) { + ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1; + ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; + if (base_count % 10 == 0) + ref_frame_config->refresh[REF_FRAMES - 1] = 1; + } + } + break; + case 7: + // 2 spatial and 3 temporal layer. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + if (superframe_cnt % 4 == 0) { + // Base temporal layer + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST + // Set all buffer_idx to 0 + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->refresh[1] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 3. + // No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer. + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST. + // Set all buffer_idx to 0. + // Set GOLDEN to slot 5 and update slot 5. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift; + ref_frame_config->refresh[5 - shift] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 5. + // Set LAST3 to slot 6 and update slot 6. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5 - shift; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift; + ref_frame_config->refresh[6 - shift] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Set LAST to slot 5 and reference LAST. + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, + // GOLDEN to slot 3. No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + } + } + break; + case 8: + // 3 spatial and 3 temporal layer. + // Same as case 9 but overalap in the buffer slot updates. + // (shift = 2). The slots 3 and 4 updated by first TL2 are + // reused for update in TL1 superframe. + // Note for this case, frame order hint must be disabled for + // lower resolutios (operating points > 0) to be decoedable. + case 9: + // 3 spatial and 3 temporal layer. + // No overlap in buffer updates between TL2 and TL1. + // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7. + // Set the references via the svc_ref_frame_config control. + // Always reference LAST. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + if (superframe_cnt % 4 == 0) { + // Base temporal layer. + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST, update LAST. + // Set all buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 0. + // Update slot 1 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 1. + // Update slot 2 (LAST). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->refresh[2] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST (slot 0). + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to slot 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 3. + // Set LAST2 to slot 4 and Update slot 4. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 4. + // No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer. + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { + // Reference LAST. + // Set all buffer_idx to 0. + // Set GOLDEN to slot 5 and update slot 5. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift; + ref_frame_config->refresh[5 - shift] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, + // GOLDEN (and all other refs) to slot 5. + // Set LAST3 to slot 6 and update slot 6. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5 - shift; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift; + ref_frame_config->refresh[6 - shift] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2, + // GOLDEN (and all other refs) to slot 6. + // Set LAST3 to slot 7 and update slot 7. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 6 - shift; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift; + ref_frame_config->refresh[7 - shift] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer. + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { + // Set LAST to slot 5 and reference LAST. + // Set GOLDEN to slot 3 and update slot 3. + // Set all other buffer_idx to 0. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, + // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; + ref_frame_config->refresh[4] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7, + // GOLDEN to slot 4. No update. + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4; + } + } + break; + case 11: + // Simulcast mode for 3 spatial and 3 temporal layers. + // No inter-layer predicton, only prediction is temporal and single + // reference (LAST). + // No overlap in buffer slots between spatial layers. So for example, + // SL0 only uses slots 0 and 1. + // SL1 only uses slots 2 and 3. + // SL2 only uses slots 4 and 5. + // All 7 references for each inter-frame must only access buffer slots + // for that spatial layer. + // On key (super)frames: SL1 and SL2 must have no references set + // and must refresh all the slots for that layer only (so 2 and 3 + // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally + // as a Key frame (refresh all slots). SL1/SL2 will be labelled + // internally as Intra-only frames that allow that stream to be decoded. + // These conditions will allow for each spatial stream to be + // independently decodeable. + + // Initialize all references to 0 (don't use reference). + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->reference[i] = 0; + // Initialize as no refresh/update for all slots. + for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + + if (is_key_frame) { + if (layer_id->spatial_layer_id == 0) { + // Assign LAST/GOLDEN to slot 0/1. + // Refesh slots 0 and 1 for SL0. + // SL0: this will get set to KEY frame internally. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1; + ref_frame_config->refresh[0] = 1; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 1) { + // Assign LAST/GOLDEN to slot 2/3. + // Refesh slots 2 and 3 for SL1. + // This will get set to Intra-only frame internally. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; + ref_frame_config->refresh[2] = 1; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 2) { + // Assign LAST/GOLDEN to slot 4/5. + // Refresh slots 4 and 5 for SL2. + // This will get set to Intra-only frame internally. + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; + ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5; + ref_frame_config->refresh[4] = 1; + ref_frame_config->refresh[5] = 1; + } + } else if (superframe_cnt % 4 == 0) { + // Base temporal layer: TL0 + layer_id->temporal_layer_id = 0; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST. Assign all references to either slot + // 0 or 1. Here we assign LAST to slot 0, all others to 1. + // Update slot 0 (LAST). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; + ref_frame_config->refresh[0] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST. Assign all references to either slot + // 2 or 3. Here we assign LAST to slot 2, all others to 3. + // Update slot 2 (LAST). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->refresh[2] = 1; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST. Assign all references to either slot + // 4 or 5. Here we assign LAST to slot 4, all others to 5. + // Update slot 4 (LAST). + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; + ref_frame_config->refresh[4] = 1; + } + } else if ((superframe_cnt - 1) % 4 == 0) { + // First top temporal enhancement layer: TL2 + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 0). Assign other references to slot 1. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 2). Assign other references to slot 3. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 4). Assign other references to slot 4. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; + } + } else if ((superframe_cnt - 2) % 4 == 0) { + // Middle temporal enhancement layer: TL1 + layer_id->temporal_layer_id = 1; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 0). + // Set GOLDEN to slot 1 and update slot 1. + // This will be used as reference for next TL2. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 1; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0; + ref_frame_config->refresh[1] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 2). + // Set GOLDEN to slot 3 and update slot 3. + // This will be used as reference for next TL2. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 3; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2; + ref_frame_config->refresh[3] = 1; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 4). + // Set GOLDEN to slot 5 and update slot 5. + // This will be used as reference for next TL2. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 5; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4; + ref_frame_config->refresh[5] = 1; + } + } else if ((superframe_cnt - 3) % 4 == 0) { + // Second top temporal enhancement layer: TL2 + layer_id->temporal_layer_id = 2; + if (layer_id->spatial_layer_id == 0) { // SL0 + // Reference LAST (slot 1). Assign other references to slot 0. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 0; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; + } else if (layer_id->spatial_layer_id == 1) { // SL1 + // Reference LAST (slot 3). Assign other references to slot 2. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 2; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3; + } else if (layer_id->spatial_layer_id == 2) { // SL2 + // Reference LAST (slot 5). Assign other references to slot 4. + // No update/refresh on any slots. + ref_frame_config->reference[SVC_LAST_FRAME] = 1; + for (i = 0; i < INTER_REFS_PER_FRAME; i++) + ref_frame_config->ref_idx[i] = 4; + ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5; + } + } + if (!simulcast_mode && layer_id->spatial_layer_id > 0) { + // Always reference GOLDEN (inter-layer prediction). + ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1; + if (ksvc_mode) { + // KSVC: only keep the inter-layer reference (GOLDEN) for + // superframes whose base is key. + if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0; + } + if (is_key_frame && layer_id->spatial_layer_id > 1) { + // On superframes whose base is key: remove LAST to avoid prediction + // off layer two levels below. + ref_frame_config->reference[SVC_LAST_FRAME] = 0; + } + } + // For 3 spatial layer case 8 (where there is free buffer slot): + // allow for top spatial layer to use additional temporal reference. + // Additional reference is only updated on base temporal layer, every + // 10 TL0 frames here. + if (!simulcast_mode && enable_longterm_temporal_ref && + layer_id->spatial_layer_id == 2 && layering_mode == 8) { + ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1; + if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1; + if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0) + ref_frame_config->refresh[REF_FRAMES - 1] = 1; + } + break; + default: assert(0); die("Error: Unsupported temporal layering mode!\n"); + } +} + +#if CONFIG_AV1_DECODER +// Returns whether there is a mismatch between the encoder's new frame and the +// decoder's new frame. +static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder, + const int frames_out) { + aom_image_t enc_img, dec_img; + int mismatch = 0; + + /* Get the internal new frame */ + AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img); + AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img); + +#if CONFIG_AV1_HIGHBITDEPTH + if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t enc_hbd_img; + aom_img_alloc( + &enc_hbd_img, + static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH), + enc_img.d_w, enc_img.d_h, 16); + aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img); + enc_img = enc_hbd_img; + } + if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t dec_hbd_img; + aom_img_alloc( + &dec_hbd_img, + static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH), + dec_img.d_w, dec_img.d_h, 16); + aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img); + dec_img = dec_hbd_img; + } + } +#endif + + if (!aom_compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; +#if CONFIG_AV1_HIGHBITDEPTH + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + aom_find_mismatch(&enc_img, &dec_img, y, u, v); + } +#else + aom_find_mismatch(&enc_img, &dec_img, y, u, v); +#endif + fprintf(stderr, + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}\n", + frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], + v[1], v[2], v[3]); + mismatch = 1; + } + + aom_img_free(&enc_img); + aom_img_free(&dec_img); + return mismatch; +} +#endif // CONFIG_AV1_DECODER + +struct psnr_stats { + // The second element of these arrays is reserved for high bitdepth. + uint64_t psnr_sse_total[2]; + uint64_t psnr_samples_total[2]; + double psnr_totals[2][4]; + int psnr_count[2]; +}; + +static void show_psnr(struct psnr_stats *psnr_stream, double peak) { + double ovpsnr; + + if (!psnr_stream->psnr_count[0]) return; + + fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)"); + ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak, + (double)psnr_stream->psnr_sse_total[0]); + fprintf(stderr, " %.3f", ovpsnr); + + for (int i = 0; i < 4; i++) { + fprintf(stderr, " %.3f", + psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]); + } + fprintf(stderr, "\n"); +} + +static aom::AV1RateControlRtcConfig create_rtc_rc_config( + const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) { + aom::AV1RateControlRtcConfig rc_cfg; + rc_cfg.width = cfg.g_w; + rc_cfg.height = cfg.g_h; + rc_cfg.max_quantizer = cfg.rc_max_quantizer; + rc_cfg.min_quantizer = cfg.rc_min_quantizer; + rc_cfg.target_bandwidth = cfg.rc_target_bitrate; + rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz; + rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz; + rc_cfg.buf_sz = cfg.rc_buf_sz; + rc_cfg.overshoot_pct = cfg.rc_overshoot_pct; + rc_cfg.undershoot_pct = cfg.rc_undershoot_pct; + // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT + rc_cfg.max_intra_bitrate_pct = 300; + rc_cfg.framerate = cfg.g_timebase.den; + // TODO(jianj): Add suppor for SVC. + rc_cfg.ss_number_layers = 1; + rc_cfg.ts_number_layers = 1; + rc_cfg.scaling_factor_num[0] = 1; + rc_cfg.scaling_factor_den[0] = 1; + rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth); + rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer; + rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer; + rc_cfg.aq_mode = app_input.aq_mode; + + return rc_cfg; +} + +static int qindex_to_quantizer(int qindex) { + // Table that converts 0-63 range Q values passed in outside to the 0-255 + // range Qindex used internally. + static const int quantizer_to_qindex[] = { + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, + 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, + 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, + 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, + 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255, + }; + for (int quantizer = 0; quantizer < 64; ++quantizer) + if (quantizer_to_qindex[quantizer] >= qindex) return quantizer; + + return 63; +} + +int main(int argc, const char **argv) { + AppInput app_input; + AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL }; + FILE *obu_files[AOM_MAX_LAYERS] = { NULL }; + AvxVideoWriter *total_layer_file = NULL; + FILE *total_layer_obu_file = NULL; + aom_codec_enc_cfg_t cfg; + int frame_cnt = 0; + aom_image_t raw; + int frame_avail; + int got_data = 0; + int flags = 0; + int i; + int pts = 0; // PTS starts at 0. + int frame_duration = 1; // 1 timebase tick per frame. + aom_svc_layer_id_t layer_id; + aom_svc_params_t svc_params; + aom_svc_ref_frame_config_t ref_frame_config; + aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred; + +#if CONFIG_INTERNAL_STATS + FILE *stats_file = fopen("opsnr.stt", "a"); + if (stats_file == NULL) { + die("Cannot open opsnr.stt\n"); + } +#endif +#if CONFIG_AV1_DECODER + aom_codec_ctx_t decoder; +#endif + + struct RateControlMetrics rc; + int64_t cx_time = 0; + int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers. + int frame_cnt_layer[AOM_MAX_LAYERS]; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + double framerate = 30.0; + int use_svc_control = 1; + int set_err_resil_frame = 0; + int test_changing_bitrate = 0; + zero(rc.layer_target_bitrate); + memset(&layer_id, 0, sizeof(aom_svc_layer_id_t)); + memset(&app_input, 0, sizeof(AppInput)); + memset(&svc_params, 0, sizeof(svc_params)); + + // Flag to test dynamic scaling of source frames for single + // spatial stream, using the scaling_mode control. + const int test_dynamic_scaling_single_layer = 0; + + // Flag to test setting speed per layer. + const int test_speed_per_layer = 0; + + /* Setup default input stream settings */ + app_input.input_ctx.framerate.numerator = 30; + app_input.input_ctx.framerate.denominator = 1; + app_input.input_ctx.only_i420 = 0; + app_input.input_ctx.bit_depth = AOM_BITS_8; + app_input.speed = 7; + exec_name = argv[0]; + + // start with default encoder configuration + aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, + AOM_USAGE_REALTIME); + if (res != AOM_CODEC_OK) { + die("Failed to get config: %s\n", aom_codec_err_to_string(res)); + } + + // Real time parameters. + cfg.g_usage = AOM_USAGE_REALTIME; + + cfg.rc_end_usage = AOM_CBR; + cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 52; + cfg.rc_undershoot_pct = 50; + cfg.rc_overshoot_pct = 50; + cfg.rc_buf_initial_sz = 600; + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize. + cfg.g_lag_in_frames = 0; + cfg.kf_mode = AOM_KF_AUTO; + + parse_command_line(argc, argv, &app_input, &svc_params, &cfg); + + int ts_number_layers = svc_params.number_temporal_layers; + int ss_number_layers = svc_params.number_spatial_layers; + + unsigned int width = cfg.g_w; + unsigned int height = cfg.g_h; + + if (app_input.layering_mode >= 0) { + if (ts_number_layers != + mode_to_num_temporal_layers[app_input.layering_mode] || + ss_number_layers != + mode_to_num_spatial_layers[app_input.layering_mode]) { + die("Number of layers doesn't match layering mode."); + } + } + + // Y4M reader has its own allocation. + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image (%dx%d)", width, height); + } + } + + aom_codec_iface_t *encoder = aom_codec_av1_cx(); + + memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0], + sizeof(svc_params.layer_target_bitrate)); + + unsigned int total_rate = 0; + for (i = 0; i < ss_number_layers; i++) { + total_rate += + svc_params + .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1]; + } + if (total_rate != cfg.rc_target_bitrate) { + die("Incorrect total target bitrate"); + } + + svc_params.framerate_factor[0] = 1; + if (ts_number_layers == 2) { + svc_params.framerate_factor[0] = 2; + svc_params.framerate_factor[1] = 1; + } else if (ts_number_layers == 3) { + svc_params.framerate_factor[0] = 4; + svc_params.framerate_factor[1] = 2; + svc_params.framerate_factor[2] = 1; + } + + if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) { + // Override these settings with the info from Y4M file. + cfg.g_w = app_input.input_ctx.width; + cfg.g_h = app_input.input_ctx.height; + // g_timebase is the reciprocal of frame rate. + cfg.g_timebase.num = app_input.input_ctx.framerate.denominator; + cfg.g_timebase.den = app_input.input_ctx.framerate.numerator; + } + framerate = cfg.g_timebase.den / cfg.g_timebase.num; + set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); + + AvxVideoInfo info; + info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); + info.frame_width = cfg.g_w; + info.frame_height = cfg.g_h; + info.time_base.numerator = cfg.g_timebase.num; + info.time_base.denominator = cfg.g_timebase.den; + // Open an output file for each stream. + for (int sl = 0; sl < ss_number_layers; ++sl) { + for (int tl = 0; tl < ts_number_layers; ++tl) { + i = sl * ts_number_layers + tl; + char file_name[PATH_MAX]; + snprintf(file_name, sizeof(file_name), "%s_%d.av1", + app_input.output_filename, i); + if (app_input.output_obu) { + obu_files[i] = fopen(file_name, "wb"); + if (!obu_files[i]) die("Failed to open %s for writing", file_name); + } else { + outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[i]) die("Failed to open %s for writing", file_name); + } + } + } + if (app_input.output_obu) { + total_layer_obu_file = fopen(app_input.output_filename, "wb"); + if (!total_layer_obu_file) + die("Failed to open %s for writing", app_input.output_filename); + } else { + total_layer_file = + aom_video_writer_open(app_input.output_filename, kContainerIVF, &info); + if (!total_layer_file) + die("Failed to open %s for writing", app_input.output_filename); + } + + // Initialize codec. + aom_codec_ctx_t codec; + aom_codec_flags_t flag = 0; + flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH; + flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0; + if (aom_codec_enc_init(&codec, encoder, &cfg, flag)) + die_codec(&codec, "Failed to initialize encoder"); + +#if CONFIG_AV1_DECODER + if (app_input.decode) { + if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) + die_codec(&decoder, "Failed to initialize decoder"); + } +#endif + + aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed); + aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0); + aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1); + aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1); + aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0); + aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0); + aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3); + aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3); + aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3); + aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3); + aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1); + + // Settings to reduce key frame encoding time. + aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0); + aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0); + aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1); + + if (cfg.g_threads > 1) { + aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, + (unsigned int)log2(cfg.g_threads)); + } + + aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content); + if (app_input.tune_content == AOM_CONTENT_SCREEN) { + aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1); + aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 1); + // INTRABC is currently disabled for rt mode, as it's too slow. + aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0); + } + + if (app_input.use_external_rc) { + aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1); + } + + aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, INT_MAX); + + aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE, + AOM_FULL_SUPERFRAME_DROP); + + svc_params.number_spatial_layers = ss_number_layers; + svc_params.number_temporal_layers = ts_number_layers; + for (i = 0; i < ss_number_layers * ts_number_layers; ++i) { + svc_params.max_quantizers[i] = cfg.rc_max_quantizer; + svc_params.min_quantizers[i] = cfg.rc_min_quantizer; + } + for (i = 0; i < ss_number_layers; ++i) { + svc_params.scaling_factor_num[i] = 1; + svc_params.scaling_factor_den[i] = 1; + } + if (ss_number_layers == 2) { + svc_params.scaling_factor_num[0] = 1; + svc_params.scaling_factor_den[0] = 2; + } else if (ss_number_layers == 3) { + svc_params.scaling_factor_num[0] = 1; + svc_params.scaling_factor_den[0] = 4; + svc_params.scaling_factor_num[1] = 1; + svc_params.scaling_factor_den[1] = 2; + } + aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params); + // TODO(aomedia:3032): Configure KSVC in fixed mode. + + // This controls the maximum target size of the key frame. + // For generating smaller key frames, use a smaller max_intra_size_pct + // value, like 100 or 200. + { + const int max_intra_size_pct = 300; + aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT, + max_intra_size_pct); + } + + for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) { + cx_time_layer[lx] = 0; + frame_cnt_layer[lx] = 0; + } + + std::unique_ptr<aom::AV1RateControlRTC> rc_api; + if (app_input.use_external_rc) { + const aom::AV1RateControlRtcConfig rc_cfg = + create_rtc_rc_config(cfg, app_input); + rc_api = aom::AV1RateControlRTC::Create(rc_cfg); + } + + frame_avail = 1; + struct psnr_stats psnr_stream; + memset(&psnr_stream, 0, sizeof(psnr_stream)); + while (frame_avail || got_data) { + struct aom_usec_timer timer; + frame_avail = read_frame(&(app_input.input_ctx), &raw); + // Loop over spatial layers. + for (int slx = 0; slx < ss_number_layers; slx++) { + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt; + int layer = 0; + // Flag for superframe whose base is key. + int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0; + // For flexible mode: + if (app_input.layering_mode >= 0) { + // Set the reference/update flags, layer_id, and reference_map + // buffer index. + set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id, + &ref_frame_config, &ref_frame_comp_pred, + &use_svc_control, slx, is_key_frame, + (app_input.layering_mode == 10), app_input.speed); + aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); + if (use_svc_control) { + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED, + &ref_frame_comp_pred); + } + // Set the speed per layer. + if (test_speed_per_layer) { + int speed_per_layer = 10; + if (layer_id.spatial_layer_id == 0) { + if (layer_id.temporal_layer_id == 0) speed_per_layer = 6; + if (layer_id.temporal_layer_id == 1) speed_per_layer = 7; + if (layer_id.temporal_layer_id == 2) speed_per_layer = 8; + } else if (layer_id.spatial_layer_id == 1) { + if (layer_id.temporal_layer_id == 0) speed_per_layer = 7; + if (layer_id.temporal_layer_id == 1) speed_per_layer = 8; + if (layer_id.temporal_layer_id == 2) speed_per_layer = 9; + } else if (layer_id.spatial_layer_id == 2) { + if (layer_id.temporal_layer_id == 0) speed_per_layer = 8; + if (layer_id.temporal_layer_id == 1) speed_per_layer = 9; + if (layer_id.temporal_layer_id == 2) speed_per_layer = 10; + } + aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer); + } + } else { + // Only up to 3 temporal layers supported in fixed mode. + // Only need to set spatial and temporal layer_id: reference + // prediction, refresh, and buffer_idx are set internally. + layer_id.spatial_layer_id = slx; + layer_id.temporal_layer_id = 0; + if (ts_number_layers == 2) { + layer_id.temporal_layer_id = (frame_cnt % 2) != 0; + } else if (ts_number_layers == 3) { + if (frame_cnt % 2 != 0) + layer_id.temporal_layer_id = 2; + else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) + layer_id.temporal_layer_id = 1; + } + aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); + } + + if (set_err_resil_frame && cfg.g_error_resilient == 0) { + // Set error_resilient per frame: off/0 for base layer and + // on/1 for enhancement layer frames. + // Note that this is can only be done on the fly/per-frame/layer + // if the config error_resilience is off/0. See the logic for updating + // in set_encoder_config(): + // tool_cfg->error_resilient_mode = + // cfg->g_error_resilient | extra_cfg->error_resilient_mode; + const int err_resil_mode = + layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0; + aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE, + err_resil_mode); + } + + layer = slx * ts_number_layers + layer_id.temporal_layer_id; + if (frame_avail && slx == 0) ++rc.layer_input_frames[layer]; + + if (test_dynamic_scaling_single_layer) { + // Example to scale source down by 2x2, then 4x4, and then back up to + // 2x2, and then back to original. + int frame_2x2 = 200; + int frame_4x4 = 400; + int frame_2x2up = 600; + int frame_orig = 800; + if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) { + // Scale source down by 2x2. + struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) { + // Scale source down by 4x4. + struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) { + // Source back up to 2x2. + struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } else if (frame_cnt >= frame_orig) { + // Source back up to original resolution (no scaling). + struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL }; + aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode); + } + if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 || + frame_cnt == frame_2x2up || frame_cnt == frame_orig) { + // For dynamic resize testing on single layer: refresh all references + // on the resized frame: this is to avoid decode error: + // if resize goes down by >= 4x4 then libaom decoder will throw an + // error that some reference (even though not used) is beyond the + // limit size (must be smaller than 4x4). + for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1; + if (use_svc_control) { + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED, + &ref_frame_comp_pred); + } + } + } + + // Change target_bitrate every other frame. + if (test_changing_bitrate && frame_cnt % 2 == 0) { + if (frame_cnt < 500) + cfg.rc_target_bitrate += 10; + else + cfg.rc_target_bitrate -= 10; + // Do big increase and decrease. + if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1; + if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1; + if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100; + // Call change_config, or bypass with new control. + // res = aom_codec_enc_config_set(&codec, &cfg); + if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR, + cfg.rc_target_bitrate)) + die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR"); + } + + if (rc_api) { + aom::AV1FrameParamsRTC frame_params; + // TODO(jianj): Add support for SVC. + frame_params.spatial_layer_id = 0; + frame_params.temporal_layer_id = 0; + frame_params.frame_type = + is_key_frame ? aom::kKeyFrame : aom::kInterFrame; + rc_api->ComputeQP(frame_params); + const int current_qp = rc_api->GetQP(); + if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS, + qindex_to_quantizer(current_qp))) { + die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS"); + } + } + + // Do the layer encode. + aom_usec_timer_start(&timer); + if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags)) + die_codec(&codec, "Failed to encode frame"); + aom_usec_timer_mark(&timer); + cx_time += aom_usec_timer_elapsed(&timer); + cx_time_layer[layer] += aom_usec_timer_elapsed(&timer); + frame_cnt_layer[layer] += 1; + + got_data = 0; + // For simulcast (mode 11): write out each spatial layer to the file. + int ss_layers_write = (app_input.layering_mode == 11) + ? layer_id.spatial_layer_id + 1 + : ss_number_layers; + while ((pkt = aom_codec_get_cx_data(&codec, &iter))) { + switch (pkt->kind) { + case AOM_CODEC_CX_FRAME_PKT: + for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write; + ++sl) { + for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers; + ++tl) { + int j = sl * ts_number_layers + tl; + if (app_input.output_obu) { + fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + obu_files[j]); + } else { + aom_video_writer_write_frame( + outfile[j], + reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz, pts); + } + if (sl == layer_id.spatial_layer_id) + rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz; + } + } + got_data = 1; + // Write everything into the top layer. + if (app_input.output_obu) { + fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + total_layer_obu_file); + } else { + aom_video_writer_write_frame( + total_layer_file, + reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz, pts); + } + // Keep count of rate control stats per layer (for non-key). + if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) { + int j = layer_id.spatial_layer_id * ts_number_layers + + layer_id.temporal_layer_id; + assert(j >= 0); + rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz; + rc.layer_avg_rate_mismatch[j] += + fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) / + rc.layer_pfb[j]; + if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id]; + } + + if (rc_api) { + rc_api->PostEncodeUpdate(pkt->data.frame.sz); + } + // Update for short-time encoding bitrate states, for moving window + // of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + // For spatial layers: only do this for top/highest SL. + if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) { + sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size; + if (frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate / rc.window_size) * + (sum_bitrate / rc.window_size); + sum_bitrate = 0.0; + } + } + // Second shifted window. + if (frame_cnt > rc.window_size + rc.window_size / 2 && + slx == ss_number_layers - 1) { + sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + if (frame_cnt > 2 * rc.window_size && + frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate2 / rc.window_size) * + (sum_bitrate2 / rc.window_size); + sum_bitrate2 = 0.0; + } + } + +#if CONFIG_AV1_DECODER + if (app_input.decode) { + if (aom_codec_decode( + &decoder, + reinterpret_cast<const uint8_t *>(pkt->data.frame.buf), + pkt->data.frame.sz, NULL)) + die_codec(&decoder, "Failed to decode frame"); + } +#endif + + break; + case AOM_CODEC_PSNR_PKT: + if (app_input.show_psnr) { + psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0]; + psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0]; + for (int plane = 0; plane < 4; plane++) { + psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane]; + } + psnr_stream.psnr_count[0]++; + } + break; + default: break; + } + } +#if CONFIG_AV1_DECODER + if (got_data && app_input.decode) { + // Don't look for mismatch on top spatial and top temporal layers as + // they are non reference frames. + if ((ss_number_layers > 1 || ts_number_layers > 1) && + !(layer_id.temporal_layer_id > 0 && + layer_id.temporal_layer_id == ts_number_layers - 1)) { + if (test_decode(&codec, &decoder, frame_cnt)) { +#if CONFIG_INTERNAL_STATS + fprintf(stats_file, "First mismatch occurred in frame %d\n", + frame_cnt); + fclose(stats_file); +#endif + fatal("Mismatch seen"); + } + } + } +#endif + } // loop over spatial layers + ++frame_cnt; + pts += frame_duration; + } + + close_input_file(&(app_input.input_ctx)); + printout_rate_control_summary(&rc, frame_cnt, ss_number_layers, + ts_number_layers); + + printf("\n"); + for (int slx = 0; slx < ss_number_layers; slx++) + for (int tlx = 0; tlx < ts_number_layers; tlx++) { + int lx = slx * ts_number_layers + tlx; + printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n", + slx, tlx, frame_cnt_layer[lx], + (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000), + 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]); + } + + printf("\n"); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n", + frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); + + if (app_input.show_psnr) { + show_psnr(&psnr_stream, 255.0); + } + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder"); + +#if CONFIG_AV1_DECODER + if (app_input.decode) { + if (aom_codec_destroy(&decoder)) + die_codec(&decoder, "Failed to destroy decoder"); + } +#endif + +#if CONFIG_INTERNAL_STATS + fprintf(stats_file, "No mismatch detected in recon buffers\n"); + fclose(stats_file); +#endif + + // Try to rewrite the output file headers with the actual frame count. + for (i = 0; i < ss_number_layers * ts_number_layers; ++i) + aom_video_writer_close(outfile[i]); + aom_video_writer_close(total_layer_file); + + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { + aom_img_free(&raw); + } + return EXIT_SUCCESS; +} diff --git a/third_party/aom/examples/twopass_encoder.c b/third_party/aom/examples/twopass_encoder.c new file mode 100644 index 0000000000..388f68bd4d --- /dev/null +++ b/third_party/aom/examples/twopass_encoder.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +// Two Pass Encoder +// ================ +// +// This is an example of a two pass encoder loop. It takes an input file in +// YV12 format, passes it through the encoder twice, and writes the compressed +// frames to disk in IVF format. It builds upon the simple_encoder example. +// +// Twopass Variables +// ----------------- +// Twopass mode needs to track the current pass number and the buffer of +// statistics packets. +// +// Updating The Configuration +// --------------------------------- +// In two pass mode, the configuration has to be updated on each pass. The +// statistics buffer is passed on the last pass. +// +// Encoding A Frame +// ---------------- +// Encoding a frame in two pass mode is identical to the simple encoder +// example. +// +// Processing Statistics Packets +// ----------------------------- +// Each packet of type `AOM_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// +// Pass Progress Reporting +// ----------------------------- +// It's sometimes helpful to see when each pass completes. +// +// +// Clean-up +// ----------------------------- +// Destruction of the encoder instance must be done on each pass. The +// raw image should be destroyed at the end as usual. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "aom/aom_encoder.h" +#include "aom/aomcx.h" +#include "common/tools_common.h" +#include "common/video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, + aom_fixed_buf_t *stats) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == AOM_CODEC_STATS_PKT) { + const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; + const size_t pkt_size = pkt->data.twopass_stats.sz; + stats->buf = realloc(stats->buf, stats->sz + pkt_size); + if (!stats->buf) die("Failed to allocate frame stats buffer."); + memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); + stats->sz += pkt_size; + } + } + + return got_pkts; +} + +static int encode_frame(aom_codec_ctx_t *ctx, const aom_image_t *img, + aom_codec_pts_t pts, unsigned int duration, + aom_enc_frame_flags_t flags, AvxVideoWriter *writer) { + int got_pkts = 0; + aom_codec_iter_t iter = NULL; + const aom_codec_cx_pkt_t *pkt = NULL; + const aom_codec_err_t res = aom_codec_encode(ctx, img, pts, duration, flags); + if (res != AOM_CODEC_OK) die_codec(ctx, "Failed to encode frame."); + + while ((pkt = aom_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & AOM_FRAME_IS_KEY) != 0; + + if (!aom_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) + die_codec(ctx, "Failed to write compressed frame."); + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +static aom_fixed_buf_t pass0(aom_image_t *raw, FILE *infile, + aom_codec_iface_t *encoder, + const aom_codec_enc_cfg_t *cfg, int limit) { + aom_codec_ctx_t codec; + int frame_count = 0; + aom_fixed_buf_t stats = { NULL, 0 }; + + if (aom_codec_enc_init(&codec, encoder, cfg, 0)) + die("Failed to initialize encoder"); + + // Calculate frame statistics. + while (aom_img_read(raw, infile) && frame_count < limit) { + ++frame_count; + get_frame_stats(&codec, raw, frame_count, 1, 0, &stats); + } + + // Flush encoder. + while (get_frame_stats(&codec, NULL, frame_count, 1, 0, &stats)) { + } + + printf("Pass 0 complete. Processed %d frames.\n", frame_count); + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + return stats; +} + +static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name, + aom_codec_iface_t *encoder, const aom_codec_enc_cfg_t *cfg, + int limit) { + AvxVideoInfo info = { get_fourcc_by_aom_encoder(encoder), + cfg->g_w, + cfg->g_h, + { cfg->g_timebase.num, cfg->g_timebase.den }, + 0 }; + AvxVideoWriter *writer = NULL; + aom_codec_ctx_t codec; + int frame_count = 0; + + writer = aom_video_writer_open(outfile_name, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing", outfile_name); + + if (aom_codec_enc_init(&codec, encoder, cfg, 0)) + die("Failed to initialize encoder"); + + if (aom_codec_control(&codec, AOME_SET_CPUUSED, 2)) + die_codec(&codec, "Failed to set cpu-used"); + + // Encode frames. + while (aom_img_read(raw, infile) && frame_count < limit) { + ++frame_count; + encode_frame(&codec, raw, frame_count, 1, 0, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 1, 0, writer)) { + } + + printf("\n"); + + if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + aom_video_writer_close(writer); + + printf("Pass 1 complete. Processed %d frames.\n", frame_count); +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + int w, h; + aom_codec_ctx_t codec; + aom_codec_enc_cfg_t cfg; + aom_image_t raw; + aom_codec_err_t res; + aom_fixed_buf_t stats; + + const int fps = 30; // TODO(dkovalev) add command line argument + const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + const char *const codec_arg = argv[1]; + const char *const width_arg = argv[2]; + const char *const height_arg = argv[3]; + const char *const infile_arg = argv[4]; + const char *const outfile_arg = argv[5]; + int limit = 0; + exec_name = argv[0]; + + if (argc < 6) die("Invalid number of arguments"); + + if (argc > 6) limit = (int)strtol(argv[6], NULL, 0); + + if (limit == 0) limit = 100; + + aom_codec_iface_t *encoder = get_aom_encoder_by_short_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + w = (int)strtol(width_arg, NULL, 0); + h = (int)strtol(height_arg, NULL, 0); + + if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) + die("Invalid frame size: %dx%d", w, h); + + if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, w, h, 1)) + die("Failed to allocate image (%dx%d)", w, h); + + printf("Using %s\n", aom_codec_iface_name(encoder)); + + // Configuration + res = aom_codec_enc_config_default(encoder, &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = w; + cfg.g_h = h; + cfg.g_timebase.num = 1; + cfg.g_timebase.den = fps; + cfg.rc_target_bitrate = bitrate; + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading", infile_arg); + + // Pass 0 + cfg.g_pass = AOM_RC_FIRST_PASS; + stats = pass0(&raw, infile, encoder, &cfg, limit); + + // Pass 1 + rewind(infile); + cfg.g_pass = AOM_RC_LAST_PASS; + cfg.rc_twopass_stats_in = stats; + pass1(&raw, infile, outfile_arg, encoder, &cfg, limit); + free(stats.buf); + + aom_img_free(&raw); + fclose(infile); + + return EXIT_SUCCESS; +} |