diff options
Diffstat (limited to 'media/libvpx/libvpx/examples')
17 files changed, 5899 insertions, 0 deletions
diff --git a/media/libvpx/libvpx/examples/decode_to_md5.c b/media/libvpx/libvpx/examples/decode_to_md5.c new file mode 100644 index 0000000000..51959f37df --- /dev/null +++ b/media/libvpx/libvpx/examples/decode_to_md5.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Frame-by-frame MD5 Checksum +// =========================== +// +// This example builds upon the simple decoder loop to show how checksums +// of the decoded output can be generated. These are used for validating +// decoder implementations against the reference implementation, for example. +// +// MD5 algorithm +// ------------- +// The Message-Digest 5 (MD5) is a well known hash function. We have provided +// an implementation derived from the RSA Data Security, Inc. MD5 Message-Digest +// Algorithm for your use. Our implmentation only changes the interface of this +// reference code. You must include the `md5_utils.h` header for access to these +// functions. +// +// Processing The Decoded Data +// --------------------------- +// Each row of the image is passed to the MD5 accumulator. First the Y plane +// is processed, then U, then V. It is important to honor the image's `stride` +// values. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +#include "../md5_utils.h" +#include "../tools_common.h" +#include "../video_reader.h" +#include "./vpx_config.h" + +static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) { + int plane, y; + MD5Context md5; + + MD5Init(&md5); + + for (plane = 0; plane < 3; ++plane) { + const unsigned char *buf = img->planes[plane]; + const int stride = img->stride[plane]; + const int w = plane ? (img->d_w + 1) >> 1 : img->d_w; + const int h = plane ? (img->d_h + 1) >> 1 : img->d_h; + + for (y = 0; y < h; ++y) { + MD5Update(&md5, buf, w); + buf += stride; + } + } + + MD5Final(digest, &md5); +} + +static void print_md5(FILE *stream, unsigned char digest[16]) { + int i; + + for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]); +} + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + vpx_codec_ctx_t codec; + VpxVideoReader *reader = NULL; + const VpxVideoInfo *info = NULL; + const VpxInterface *decoder = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = vpx_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = vpx_video_reader_get_info(reader); + + decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); + + if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die_codec(&codec, "Failed to initialize decoder"); + + while (vpx_video_reader_read_frame(reader)) { + vpx_codec_iter_t iter = NULL; + vpx_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + vpx_video_reader_get_frame(reader, &frame_size); + if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) + die_codec(&codec, "Failed to decode frame"); + + while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { + unsigned char digest[16]; + + get_image_md5(img, digest); + print_md5(outfile, digest); + fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h, + ++frame_cnt); + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_reader_close(reader); + + fclose(outfile); + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/decode_with_drops.c b/media/libvpx/libvpx/examples/decode_with_drops.c new file mode 100644 index 0000000000..03c79a4561 --- /dev/null +++ b/media/libvpx/libvpx/examples/decode_with_drops.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Decode With Drops Example +// ========================= +// +// This is an example utility which drops a series of frames, as specified +// on the command line. This is useful for observing the error recovery +// features of the codec. +// +// Usage +// ----- +// This example adds a single argument to the `simple_decoder` example, +// which specifies the range or pattern of frames to drop. The parameter is +// parsed as follows: +// +// Dropping A Range Of Frames +// -------------------------- +// To drop a range of frames, specify the starting frame and the ending +// frame to drop, separated by a dash. The following command will drop +// frames 5 through 10 (base 1). +// +// $ ./decode_with_drops in.ivf out.i420 5-10 +// +// +// Dropping A Pattern Of Frames +// ---------------------------- +// To drop a pattern of frames, specify the number of frames to drop and +// the number of frames after which to repeat the pattern, separated by +// a forward-slash. The following command will drop 3 of 7 frames. +// Specifically, it will decode 4 frames, then drop 3 frames, and then +// repeat. +// +// $ ./decode_with_drops in.ivf out.i420 3/7 +// +// +// Extra Variables +// --------------- +// This example maintains the pattern passed on the command line in the +// `n`, `m`, and `is_range` variables: +// +// +// Making The Drop Decision +// ------------------------ +// The example decides whether to drop the frame based on the current +// frame number, immediately before decoding the frame. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +#include "../tools_common.h" +#include "../video_reader.h" +#include "./vpx_config.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + vpx_codec_ctx_t codec; + const VpxInterface *decoder = NULL; + VpxVideoReader *reader = NULL; + const VpxVideoInfo *info = NULL; + int n = 0; + int m = 0; + int is_range = 0; + char *nptr = NULL; + + exec_name = argv[0]; + + if (argc != 4) die("Invalid number of arguments."); + + reader = vpx_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + n = (int)strtol(argv[3], &nptr, 0); + m = (int)strtol(nptr + 1, NULL, 0); + is_range = (*nptr == '-'); + if (!n || !m || (*nptr != '-' && *nptr != '/')) + die("Couldn't parse pattern %s.\n", argv[3]); + + info = vpx_video_reader_get_info(reader); + + decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); + + if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die("Failed to initialize decoder."); + + while (vpx_video_reader_read_frame(reader)) { + vpx_codec_iter_t iter = NULL; + vpx_image_t *img = NULL; + size_t frame_size = 0; + int skip; + const unsigned char *frame = + vpx_video_reader_get_frame(reader, &frame_size); + if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) + die_codec(&codec, "Failed to decode frame."); + + ++frame_cnt; + + skip = (is_range && frame_cnt >= n && frame_cnt <= m) || + (!is_range && m - (frame_cnt - 1) % m <= n); + + if (!skip) { + putc('.', stdout); + + while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) + vpx_img_write(img, outfile); + } else { + putc('X', stdout); + } + + fflush(stdout); + } + + printf("Processed %d frames.\n", frame_cnt); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + vpx_video_reader_close(reader); + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/postproc.c b/media/libvpx/libvpx/examples/postproc.c new file mode 100644 index 0000000000..b53c15ea15 --- /dev/null +++ b/media/libvpx/libvpx/examples/postproc.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Postprocessing Decoder +// ====================== +// +// This example adds postprocessing to the simple decoder loop. +// +// Initializing Postprocessing +// --------------------------- +// You must inform the codec that you might request postprocessing at +// initialization time. This is done by passing the VPX_CODEC_USE_POSTPROC +// flag to `vpx_codec_dec_init`. If the codec does not support +// postprocessing, this call will return VPX_CODEC_INCAPABLE. For +// demonstration purposes, we also fall back to default initialization if +// the codec does not provide support. +// +// Using Adaptive Postprocessing +// ----------------------------- +// VP6 provides "adaptive postprocessing." It will automatically select the +// best postprocessing filter on a frame by frame basis based on the amount +// of time remaining before the user's specified deadline expires. The +// special value 0 indicates that the codec should take as long as +// necessary to provide the best quality frame. This example gives the +// codec 15ms (15000us) to return a frame. Remember that this is a soft +// deadline, and the codec may exceed it doing its regular processing. In +// these cases, no additional postprocessing will be done. +// +// Codec Specific Postprocessing Controls +// -------------------------------------- +// Some codecs provide fine grained controls over their built-in +// postprocessors. VP8 is one example. The following sample code toggles +// postprocessing on and off every 15 frames. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" + +#include "../tools_common.h" +#include "../video_reader.h" +#include "./vpx_config.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + vpx_codec_ctx_t codec; + vpx_codec_err_t res; + VpxVideoReader *reader = NULL; + const VpxInterface *decoder = NULL; + const VpxVideoInfo *info = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = vpx_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing", argv[2]); + + info = vpx_video_reader_get_info(reader); + + decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); + + res = vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, + VPX_CODEC_USE_POSTPROC); + if (res == VPX_CODEC_INCAPABLE) + die("Postproc not supported by this decoder."); + + if (res) die("Failed to initialize decoder."); + + while (vpx_video_reader_read_frame(reader)) { + vpx_codec_iter_t iter = NULL; + vpx_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + vpx_video_reader_get_frame(reader, &frame_size); + + ++frame_cnt; + + if (frame_cnt % 30 == 1) { + vp8_postproc_cfg_t pp = { 0, 0, 0 }; + + if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) + die_codec(&codec, "Failed to turn off postproc."); + } else if (frame_cnt % 30 == 16) { + vp8_postproc_cfg_t pp = { VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, + 0 }; + if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) + die_codec(&codec, "Failed to turn on postproc."); + } + + // Decode the frame with 15ms deadline + if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 15000)) + die_codec(&codec, "Failed to decode frame"); + + while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { + vpx_img_write(img, outfile); + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + vpx_video_reader_close(reader); + + fclose(outfile); + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/resize_util.c b/media/libvpx/libvpx/examples/resize_util.c new file mode 100644 index 0000000000..7e529b2e20 --- /dev/null +++ b/media/libvpx/libvpx/examples/resize_util.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "../tools_common.h" +#include "../vp9/encoder/vp9_resize.h" + +static const char *exec_name = NULL; + +static void usage() { + printf("Usage:\n"); + printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ", + exec_name); + printf("<output_yuv> [<frames>]\n"); +} + +void usage_exit(void) { + usage(); + exit(EXIT_FAILURE); +} + +static int parse_dim(char *v, int *width, int *height) { + char *x = strchr(v, 'x'); + if (x == NULL) x = strchr(v, 'X'); + if (x == NULL) return 0; + *width = atoi(v); + *height = atoi(&x[1]); + if (*width <= 0 || *height <= 0) + return 0; + else + return 1; +} + +int main(int argc, char *argv[]) { + char *fin, *fout; + FILE *fpin, *fpout; + uint8_t *inbuf, *outbuf; + uint8_t *inbuf_u, *outbuf_u; + uint8_t *inbuf_v, *outbuf_v; + int f, frames; + int width, height, target_width, target_height; + + exec_name = argv[0]; + + if (argc < 5) { + printf("Incorrect parameters:\n"); + usage(); + return 1; + } + + fin = argv[1]; + fout = argv[4]; + if (!parse_dim(argv[2], &width, &height)) { + printf("Incorrect parameters: %s\n", argv[2]); + usage(); + return 1; + } + if (!parse_dim(argv[3], &target_width, &target_height)) { + printf("Incorrect parameters: %s\n", argv[3]); + usage(); + return 1; + } + + fpin = fopen(fin, "rb"); + if (fpin == NULL) { + printf("Can't open file %s to read\n", fin); + usage(); + return 1; + } + fpout = fopen(fout, "wb"); + if (fpout == NULL) { + printf("Can't open file %s to write\n", fout); + usage(); + return 1; + } + if (argc >= 6) + frames = atoi(argv[5]); + else + frames = INT_MAX; + + printf("Input size: %dx%d\n", width, height); + printf("Target size: %dx%d, Frames: ", target_width, target_height); + if (frames == INT_MAX) + printf("All\n"); + else + printf("%d\n", frames); + + inbuf = (uint8_t *)malloc(width * height * 3 / 2); + outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2); + inbuf_u = inbuf + width * height; + inbuf_v = inbuf_u + width * height / 4; + outbuf_u = outbuf + target_width * target_height; + outbuf_v = outbuf_u + target_width * target_height / 4; + f = 0; + while (f < frames) { + if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1) break; + vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height, + width, outbuf, target_width, outbuf_u, outbuf_v, + target_width / 2, target_height, target_width); + fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout); + f++; + } + printf("%d frames processed\n", f); + fclose(fpin); + fclose(fpout); + + free(inbuf); + free(outbuf); + return 0; +} diff --git a/media/libvpx/libvpx/examples/set_maps.c b/media/libvpx/libvpx/examples/set_maps.c new file mode 100644 index 0000000000..867e473aea --- /dev/null +++ b/media/libvpx/libvpx/examples/set_maps.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// VP8 Set Active and ROI Maps +// =========================== +// +// This is an example demonstrating how to control the VP8 encoder's +// ROI and Active maps. +// +// ROI (Reigon of Interest) maps are a way for the application to assign +// each macroblock in the image to a region, and then set quantizer and +// filtering parameters on that image. +// +// Active maps are a way for the application to specify on a +// macroblock-by-macroblock basis whether there is any activity in that +// macroblock. +// +// +// Configuration +// ------------- +// An ROI map is set on frame 22. If the width of the image in macroblocks +// is evenly divisble by 4, then the output will appear to have distinct +// columns, where the quantizer, loopfilter, and static threshold differ +// from column to column. +// +// An active map is set on frame 33. If the width of the image in macroblocks +// is evenly divisble by 4, then the output will appear to have distinct +// columns, where one column will have motion and the next will not. +// +// The active map is cleared on frame 44. +// +// Observing The Effects +// --------------------- +// Use the `simple_decoder` example to decode this sample, and observe +// the change in the image at frames 22, 33, and 44. + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void set_roi_map(const vpx_codec_enc_cfg_t *cfg, + vpx_codec_ctx_t *codec) { + unsigned int i; + vpx_roi_map_t roi; + memset(&roi, 0, sizeof(roi)); + + roi.rows = (cfg->g_h + 15) / 16; + roi.cols = (cfg->g_w + 15) / 16; + + roi.delta_q[0] = 0; + roi.delta_q[1] = -2; + roi.delta_q[2] = -4; + roi.delta_q[3] = -6; + + roi.delta_lf[0] = 0; + roi.delta_lf[1] = 1; + roi.delta_lf[2] = 2; + roi.delta_lf[3] = 3; + + roi.static_threshold[0] = 1500; + roi.static_threshold[1] = 1000; + roi.static_threshold[2] = 500; + roi.static_threshold[3] = 0; + + roi.roi_map = (uint8_t *)malloc(roi.rows * roi.cols); + for (i = 0; i < roi.rows * roi.cols; ++i) roi.roi_map[i] = i % 4; + + if (vpx_codec_control(codec, VP8E_SET_ROI_MAP, &roi)) + die_codec(codec, "Failed to set ROI map"); + + free(roi.roi_map); +} + +static void set_active_map(const vpx_codec_enc_cfg_t *cfg, + vpx_codec_ctx_t *codec) { + unsigned int i; + vpx_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + + map.active_map = (uint8_t *)malloc(map.rows * map.cols); + for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2; + + if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); + + free(map.active_map); +} + +static void unset_active_map(const vpx_codec_enc_cfg_t *cfg, + vpx_codec_ctx_t *codec) { + vpx_active_map_t map = { 0, 0, 0 }; + + map.rows = (cfg->g_h + 15) / 16; + map.cols = (cfg->g_w + 15) / 16; + map.active_map = NULL; + + if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map)) + die_codec(codec, "Failed to set active map"); +} + +static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, + int frame_index, VpxVideoWriter *writer) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_count = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + const int fps = 2; // TODO(dkovalev) add command line argument + const double bits_per_pixel_per_frame = 0.067; + + exec_name = argv[0]; + if (argc != 6) die("Invalid number of arguments"); + + memset(&info, 0, sizeof(info)); + + encoder = get_vpx_encoder_by_name(argv[1]); + if (encoder == NULL) { + die("Unsupported codec."); + } + assert(encoder != NULL); + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(argv[2], NULL, 0); + info.frame_height = (int)strtol(argv[3], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = + (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000); + cfg.g_lag_in_frames = 0; + + writer = vpx_video_writer_open(argv[5], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[5]); + + if (!(infile = fopen(argv[4], "rb"))) + die("Failed to open %s for reading.", argv[4]); + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die("Failed to initialize encoder"); + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + ++frame_count; + + if (frame_count == 22 && encoder->fourcc == VP8_FOURCC) { + set_roi_map(&cfg, &codec); + } else if (frame_count == 33) { + set_active_map(&cfg, &codec); + } else if (frame_count == 44) { + unset_active_map(&cfg, &codec); + } + + encode_frame(&codec, &raw, frame_count, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/simple_decoder.c b/media/libvpx/libvpx/examples/simple_decoder.c new file mode 100644 index 0000000000..d089e826d5 --- /dev/null +++ b/media/libvpx/libvpx/examples/simple_decoder.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Simple Decoder +// ============== +// +// This is an example of a simple decoder loop. It takes an input file +// containing the compressed data (in IVF format), passes it through the +// decoder, and writes the decompressed frames to disk. Other decoder +// examples build upon this one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For decoders, you only have to include `vpx_decoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// vp8. +// +// Initializing The Codec +// ---------------------- +// The libvpx decoder is initialized by the call to vpx_codec_dec_init(). +// Determining the codec interface to use is handled by VpxVideoReader and the +// functions prefixed with vpx_video_reader_. Discussion of those functions is +// beyond the scope of this example, but the main gist is to open the input file +// and parse just enough of it to determine if it's a VPx file and which VPx +// codec is contained within the file. +// Note the NULL pointer passed to vpx_codec_dec_init(). We do that in this +// example because we want the algorithm to determine the stream configuration +// (width/height) and allocate memory automatically. +// +// Decoding A Frame +// ---------------- +// Once the frame has been read into memory, it is decoded using the +// `vpx_codec_decode` function. The call takes a pointer to the data +// (`frame`) and the length of the data (`frame_size`). No application data +// is associated with the frame in this example, so the `user_priv` +// parameter is NULL. The `deadline` parameter is left at zero for this +// example. This parameter is generally only used when doing adaptive post +// processing. +// +// Codecs may produce a variable number of output frames for every call to +// `vpx_codec_decode`. These frames are retrieved by the +// `vpx_codec_get_frame` iterator function. The iterator variable `iter` is +// initialized to NULL each time `vpx_codec_decode` is called. +// `vpx_codec_get_frame` is called in a loop, returning a pointer to a +// decoded image or NULL to indicate the end of list. +// +// Processing The Decoded Data +// --------------------------- +// In this example, we simply write the encoded data to disk. It is +// important to honor the image's `stride` values. +// +// Cleanup +// ------- +// The `vpx_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exceptions, vpx_codec functions return an enumerated error status, +// with the value `0` indicating success. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vpx_decoder.h" + +#include "../tools_common.h" +#include "../video_reader.h" +#include "./vpx_config.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) { + int frame_cnt = 0; + FILE *outfile = NULL; + vpx_codec_ctx_t codec; + VpxVideoReader *reader = NULL; + const VpxInterface *decoder = NULL; + const VpxVideoInfo *info = NULL; + + exec_name = argv[0]; + + if (argc != 3) die("Invalid number of arguments."); + + reader = vpx_video_reader_open(argv[1]); + if (!reader) die("Failed to open %s for reading.", argv[1]); + + if (!(outfile = fopen(argv[2], "wb"))) + die("Failed to open %s for writing.", argv[2]); + + info = vpx_video_reader_get_info(reader); + + decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); + if (!decoder) die("Unknown input codec."); + + printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); + + if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) + die("Failed to initialize decoder."); + + while (vpx_video_reader_read_frame(reader)) { + vpx_codec_iter_t iter = NULL; + vpx_image_t *img = NULL; + size_t frame_size = 0; + const unsigned char *frame = + vpx_video_reader_get_frame(reader, &frame_size); + if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) + die_codec(&codec, "Failed to decode frame."); + + while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { + vpx_img_write(img, outfile); + ++frame_cnt; + } + } + + printf("Processed %d frames.\n", frame_cnt); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", + info->frame_width, info->frame_height, argv[2]); + + vpx_video_reader_close(reader); + + fclose(outfile); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/simple_encoder.c b/media/libvpx/libvpx/examples/simple_encoder.c new file mode 100644 index 0000000000..dffdd6d7da --- /dev/null +++ b/media/libvpx/libvpx/examples/simple_encoder.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Simple Encoder +// ============== +// +// This is an example of a simple encoder loop. It takes an input file in +// YV12 format, passes it through the encoder, and writes the compressed +// frames to disk in IVF format. Other decoder examples build upon this +// one. +// +// The details of the IVF format have been elided from this example for +// simplicity of presentation, as IVF files will not generally be used by +// your application. In general, an IVF file consists of a file header, +// followed by a variable number of frames. Each frame consists of a frame +// header followed by a variable length payload. The length of the payload +// is specified in the first four bytes of the frame header. The payload is +// the raw compressed data. +// +// Standard Includes +// ----------------- +// For encoders, you only have to include `vpx_encoder.h` and then any +// header files for the specific codecs you use. In this case, we're using +// vp8. +// +// Getting The Default Configuration +// --------------------------------- +// Encoders have the notion of "usage profiles." For example, an encoder +// may want to publish default configurations for both a video +// conferencing application and a best quality offline encoder. These +// obviously have very different default settings. Consult the +// documentation for your codec to see if it provides any default +// configurations. All codecs provide a default configuration, number 0, +// which is valid for material in the vacinity of QCIF/QVGA. +// +// Updating The Configuration +// --------------------------------- +// Almost all applications will want to update the default configuration +// with settings specific to their usage. Here we set the width and height +// of the video file to that specified on the command line. We also scale +// the default bitrate based on the ratio between the default resolution +// and the resolution specified on the command line. +// +// Initializing The Codec +// ---------------------- +// The encoder is initialized by the following code. +// +// Encoding A Frame +// ---------------- +// The frame is read as a continuous block (size width * height * 3 / 2) +// from the input file. If a frame was read (the input file has not hit +// EOF) then the frame is passed to the encoder. Otherwise, a NULL +// is passed, indicating the End-Of-Stream condition to the encoder. The +// `frame_cnt` is reused as the presentation time stamp (PTS) and each +// frame is shown for one frame-time in duration. The flags parameter is +// unused in this example. The deadline is set to VPX_DL_REALTIME to +// make the example run as quickly as possible. + +// Forced Keyframes +// ---------------- +// Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the +// flags passed to `vpx_codec_control()`. In this example, we force a +// keyframe every <keyframe-interval> frames. Note, the output stream can +// contain additional keyframes beyond those that have been forced using the +// VPX_EFLAG_FORCE_KF flag because of automatic keyframe placement by the +// encoder. +// +// Processing The Encoded Data +// --------------------------- +// Each packet of type `VPX_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// Cleanup +// ------- +// The `vpx_codec_destroy` call frees any memory allocated by the codec. +// +// Error Handling +// -------------- +// This example does not special case any error return codes. If there was +// an error, a descriptive message is printed and the program exits. With +// few exeptions, vpx_codec functions return an enumerated error status, +// with the value `0` indicating success. +// +// Error Resiliency Features +// ------------------------- +// Error resiliency is controlled by the g_error_resilient member of the +// configuration structure. Use the `decode_with_drops` example to decode with +// frames 5-10 dropped. Compare the output for a file encoded with this example +// versus one encoded with the `simple_encoder` example. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vpx_encoder.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<keyframe-interval> <error-resilient> <frames to encode>\n" + "See comments in simple_encoder.c for more information.\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, + int frame_index, int flags, VpxVideoWriter *writer) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps. +int main(int argc, char **argv) { + FILE *infile = NULL; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_count = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info = { 0, 0, 0, { 0, 0 } }; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + const int fps = 30; + const int bitrate = 200; + int keyframe_interval = 0; + int max_frames = 0; + int frames_encoded = 0; + const char *codec_arg = NULL; + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *keyframe_interval_arg = NULL; + + exec_name = argv[0]; + + if (argc != 9) die("Invalid number of arguments"); + + codec_arg = argv[1]; + width_arg = argv[2]; + height_arg = argv[3]; + infile_arg = argv[4]; + outfile_arg = argv[5]; + keyframe_interval_arg = argv[6]; + max_frames = (int)strtol(argv[8], NULL, 0); + + encoder = get_vpx_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); + if (keyframe_interval < 0) die("Invalid keyframe interval value."); + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_error_resilient = (vpx_codec_er_flags_t)strtoul(argv[7], NULL, 0); + + writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die("Failed to initialize encoder"); + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + int flags = 0; + if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) + flags |= VPX_EFLAG_FORCE_KF; + encode_frame(&codec, &raw, frame_count++, flags, writer); + frames_encoded++; + if (max_frames > 0 && frames_encoded >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/svc_context.h b/media/libvpx/libvpx/examples/svc_context.h new file mode 100644 index 0000000000..c5779ce8a9 --- /dev/null +++ b/media/libvpx/libvpx/examples/svc_context.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/** + * SvcContext - input parameters and state to encode a multi-layered + * spatial SVC frame + */ + +#ifndef VPX_EXAMPLES_SVC_CONTEXT_H_ +#define VPX_EXAMPLES_SVC_CONTEXT_H_ + +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum SVC_LOG_LEVEL { + SVC_LOG_ERROR, + SVC_LOG_INFO, + SVC_LOG_DEBUG +} SVC_LOG_LEVEL; + +typedef struct { + // public interface to svc_command options + int spatial_layers; // number of spatial layers + int temporal_layers; // number of temporal layers + int temporal_layering_mode; + SVC_LOG_LEVEL log_level; // amount of information to display + int output_rc_stat; // for outputting rc stats + int speed; // speed setting for codec + int threads; + int aqmode; // turns on aq-mode=3 (cyclic_refresh): 0=off, 1=on. + // private storage for vpx_svc_encode + void *internal; +} SvcContext; + +#define OPTION_BUFFER_SIZE 1024 +#define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v + +typedef struct SvcInternal { + char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options + + // values extracted from option, quantizers + vpx_svc_extra_cfg_t svc_params; + int enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; + int bitrates[VPX_MAX_LAYERS]; + + // accumulated statistics + double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V + uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; + uint32_t bytes_sum[VPX_SS_MAX_LAYERS]; + + // codec encoding values + int width; // width of highest layer + int height; // height of highest layer + int kf_dist; // distance between keyframes + + // state variables + int psnr_pkt_received; + int layer; + int use_multiple_frame_contexts; + + vpx_codec_ctx_t *codec_ctx; +} SvcInternal_t; + +/** + * Set SVC options + * options are supplied as a single string separated by spaces + * Format: encoding-mode=<i|ip|alt-ip|gf> + * layers=<layer_count> + * scaling-factors=<n1>/<d1>,<n2>/<d2>,... + * quantizers=<q1>,<q2>,... + */ +vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options); + +/** + * initialize SVC encoding + */ +vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *cfg); +/** + * encode a frame of video with multiple layers + */ +vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + struct vpx_image *rawimg, vpx_codec_pts_t pts, + int64_t duration, int deadline); + +/** + * finished with svc encoding, release allocated resources + */ +void vpx_svc_release(SvcContext *svc_ctx); + +/** + * dump accumulated statistics and reset accumulated values + */ +void vpx_svc_dump_statistics(SvcContext *svc_ctx); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VPX_EXAMPLES_SVC_CONTEXT_H_ diff --git a/media/libvpx/libvpx/examples/svc_encodeframe.c b/media/libvpx/libvpx/examples/svc_encodeframe.c new file mode 100644 index 0000000000..1dd731765c --- /dev/null +++ b/media/libvpx/libvpx/examples/svc_encodeframe.c @@ -0,0 +1,634 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/** + * @file + * VP9 SVC encoding support via libvpx + */ + +#include <assert.h> +#include <math.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define VPX_DISABLE_CTRL_TYPECHECKS 1 +#include "../tools_common.h" +#include "./vpx_config.h" +#include "./svc_context.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_onyxc_int.h" + +#ifdef __MINGW32__ +#define strtok_r strtok_s +#ifndef MINGW_HAS_SECURE_API +// proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h +_CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); +#endif /* MINGW_HAS_SECURE_API */ +#endif /* __MINGW32__ */ + +#ifdef _MSC_VER +#define strdup _strdup +#define strtok_r strtok_s +#endif + +#define SVC_REFERENCE_FRAMES 8 +#define SUPERFRAME_SLOTS (8) +#define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) + +#define MAX_QUANTIZER 63 + +static const int DEFAULT_SCALE_FACTORS_NUM[VPX_SS_MAX_LAYERS] = { 4, 5, 7, 11, + 16 }; + +static const int DEFAULT_SCALE_FACTORS_DEN[VPX_SS_MAX_LAYERS] = { 16, 16, 16, + 16, 16 }; + +static const int DEFAULT_SCALE_FACTORS_NUM_2x[VPX_SS_MAX_LAYERS] = { 1, 2, 4 }; + +static const int DEFAULT_SCALE_FACTORS_DEN_2x[VPX_SS_MAX_LAYERS] = { 4, 4, 4 }; + +typedef enum { + QUANTIZER = 0, + BITRATE, + SCALE_FACTOR, + AUTO_ALT_REF, + ALL_OPTION_TYPES +} LAYER_OPTION_TYPE; + +static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX, + 1 }; + +static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 }; + +// One encoded frame +typedef struct FrameData { + void *buf; // compressed data buffer + size_t size; // length of compressed data + vpx_codec_frame_flags_t flags; /**< flags for this frame */ + struct FrameData *next; +} FrameData; + +static SvcInternal_t *get_svc_internal(SvcContext *svc_ctx) { + if (svc_ctx == NULL) return NULL; + if (svc_ctx->internal == NULL) { + SvcInternal_t *const si = (SvcInternal_t *)malloc(sizeof(*si)); + if (si != NULL) { + memset(si, 0, sizeof(*si)); + } + svc_ctx->internal = si; + } + return (SvcInternal_t *)svc_ctx->internal; +} + +static const SvcInternal_t *get_const_svc_internal(const SvcContext *svc_ctx) { + if (svc_ctx == NULL) return NULL; + return (const SvcInternal_t *)svc_ctx->internal; +} + +static VPX_TOOLS_FORMAT_PRINTF(3, 4) int svc_log(SvcContext *svc_ctx, + SVC_LOG_LEVEL level, + const char *fmt, ...) { + char buf[512]; + int retval = 0; + va_list ap; + + if (level > svc_ctx->log_level) { + return retval; + } + + va_start(ap, fmt); + retval = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + printf("%s", buf); + + return retval; +} + +static vpx_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input, + int *value0, int *value1) { + if (type == SCALE_FACTOR) { + *value0 = (int)strtol(input, &input, 10); + if (*input++ != '/') return VPX_CODEC_INVALID_PARAM; + *value1 = (int)strtol(input, &input, 10); + + if (*value0 < option_min_values[SCALE_FACTOR] || + *value1 < option_min_values[SCALE_FACTOR] || + *value0 > option_max_values[SCALE_FACTOR] || + *value1 > option_max_values[SCALE_FACTOR] || + *value0 > *value1) // num shouldn't be greater than den + return VPX_CODEC_INVALID_PARAM; + } else { + *value0 = atoi(input); + if (*value0 < option_min_values[type] || *value0 > option_max_values[type]) + return VPX_CODEC_INVALID_PARAM; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx, + LAYER_OPTION_TYPE type, + const char *input, + int *option0, + int *option1) { + int i; + vpx_codec_err_t res = VPX_CODEC_OK; + char *input_string; + char *token; + const char *delim = ","; + char *save_ptr; + int num_layers = svc_ctx->spatial_layers; + if (type == BITRATE) + num_layers = svc_ctx->spatial_layers * svc_ctx->temporal_layers; + + if (input == NULL || option0 == NULL || + (option1 == NULL && type == SCALE_FACTOR)) + return VPX_CODEC_INVALID_PARAM; + + input_string = strdup(input); + if (input_string == NULL) return VPX_CODEC_MEM_ERROR; + token = strtok_r(input_string, delim, &save_ptr); + for (i = 0; i < num_layers; ++i) { + if (token != NULL) { + res = extract_option(type, token, option0 + i, option1 + i); + if (res != VPX_CODEC_OK) break; + token = strtok_r(NULL, delim, &save_ptr); + } else { + break; + } + } + if (res == VPX_CODEC_OK && i != num_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: layer params type: %d %d values required, " + "but only %d specified\n", + type, num_layers, i); + res = VPX_CODEC_INVALID_PARAM; + } + free(input_string); + return res; +} + +/** + * Parse SVC encoding options + * Format: encoding-mode=<svc_mode>,layers=<layer_count> + * scale-factors=<n1>/<d1>,<n2>/<d2>,... + * quantizers=<q1>,<q2>,... + * svc_mode = [i|ip|alt_ip|gf] + */ +static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { + char *input_string; + char *option_name; + char *option_value; + char *input_ptr = NULL; + SvcInternal_t *const si = get_svc_internal(svc_ctx); + vpx_codec_err_t res = VPX_CODEC_OK; + int i, alt_ref_enabled = 0; + + if (options == NULL) return VPX_CODEC_OK; + input_string = strdup(options); + if (input_string == NULL) return VPX_CODEC_MEM_ERROR; + + // parse option name + option_name = strtok_r(input_string, "=", &input_ptr); + while (option_name != NULL) { + // parse option value + option_value = strtok_r(NULL, " ", &input_ptr); + if (option_value == NULL) { + svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n", + option_name); + res = VPX_CODEC_INVALID_PARAM; + break; + } + if (strcmp("spatial-layers", option_name) == 0) { + svc_ctx->spatial_layers = atoi(option_value); + } else if (strcmp("temporal-layers", option_name) == 0) { + svc_ctx->temporal_layers = atoi(option_value); + } else if (strcmp("scale-factors", option_name) == 0) { + res = parse_layer_options_from_string(svc_ctx, SCALE_FACTOR, option_value, + si->svc_params.scaling_factor_num, + si->svc_params.scaling_factor_den); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("max-quantizers", option_name) == 0) { + res = + parse_layer_options_from_string(svc_ctx, QUANTIZER, option_value, + si->svc_params.max_quantizers, NULL); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("min-quantizers", option_name) == 0) { + res = + parse_layer_options_from_string(svc_ctx, QUANTIZER, option_value, + si->svc_params.min_quantizers, NULL); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("auto-alt-refs", option_name) == 0) { + res = parse_layer_options_from_string(svc_ctx, AUTO_ALT_REF, option_value, + si->enable_auto_alt_ref, NULL); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("bitrates", option_name) == 0) { + res = parse_layer_options_from_string(svc_ctx, BITRATE, option_value, + si->bitrates, NULL); + if (res != VPX_CODEC_OK) break; + } else if (strcmp("multi-frame-contexts", option_name) == 0) { + si->use_multiple_frame_contexts = atoi(option_value); + } else { + svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); + res = VPX_CODEC_INVALID_PARAM; + break; + } + option_name = strtok_r(NULL, "=", &input_ptr); + } + free(input_string); + + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (si->svc_params.max_quantizers[i] > MAX_QUANTIZER || + si->svc_params.max_quantizers[i] < 0 || + si->svc_params.min_quantizers[i] > si->svc_params.max_quantizers[i] || + si->svc_params.min_quantizers[i] < 0) + res = VPX_CODEC_INVALID_PARAM; + } + + if (si->use_multiple_frame_contexts && + (svc_ctx->spatial_layers > 3 || + svc_ctx->spatial_layers * svc_ctx->temporal_layers > 4)) + res = VPX_CODEC_INVALID_PARAM; + + for (i = 0; i < svc_ctx->spatial_layers; ++i) + alt_ref_enabled += si->enable_auto_alt_ref[i]; + if (alt_ref_enabled > REF_FRAMES - svc_ctx->spatial_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: auto alt ref: Maxinum %d(REF_FRAMES - layers) layers could" + "enabled auto alt reference frame, but %d layers are enabled\n", + REF_FRAMES - svc_ctx->spatial_layers, alt_ref_enabled); + res = VPX_CODEC_INVALID_PARAM; + } + + return res; +} + +vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { + SvcInternal_t *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || options == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + strncpy(si->options, options, sizeof(si->options) - 1); + si->options[sizeof(si->options) - 1] = '\0'; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t assign_layer_bitrates( + const SvcContext *svc_ctx, vpx_codec_enc_cfg_t *const enc_cfg) { + int i; + const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); + int sl, tl, spatial_layer_target; + + if (svc_ctx->temporal_layering_mode != 0) { + if (si->bitrates[0] != 0) { + unsigned int total_bitrate = 0; + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + total_bitrate += si->bitrates[sl * svc_ctx->temporal_layers + + svc_ctx->temporal_layers - 1]; + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] += + (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl]; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + tl] = + si->bitrates[sl * svc_ctx->temporal_layers + tl]; + if (tl > 0 && (si->bitrates[sl * svc_ctx->temporal_layers + tl] <= + si->bitrates[sl * svc_ctx->temporal_layers + tl - 1])) + return VPX_CODEC_INVALID_PARAM; + } + } + if (total_bitrate != enc_cfg->rc_target_bitrate) + return VPX_CODEC_INVALID_PARAM; + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; + + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + if (si->svc_params.scaling_factor_den[sl] > 0) { + alloc_ratio[sl] = (float)(pow(2, sl)); + total += alloc_ratio[sl]; + } + } + + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = + (unsigned int)(enc_cfg->rc_target_bitrate * alloc_ratio[sl] / + total); + if (svc_ctx->temporal_layering_mode == 3) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + (spatial_layer_target * 6) / 10; // 60% + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + (spatial_layer_target * 8) / 10; // 80% + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = + spatial_layer_target; + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + spatial_layer_target * 2 / 3; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + spatial_layer_target; + } else { + // User should explicitly assign bitrates in this case. + assert(0); + } + } + } + } else { + if (si->bitrates[0] != 0) { + unsigned int total_bitrate = 0; + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; + enc_cfg->layer_target_bitrate[i] = (unsigned int)si->bitrates[i]; + total_bitrate += si->bitrates[i]; + } + if (total_bitrate != enc_cfg->rc_target_bitrate) + return VPX_CODEC_INVALID_PARAM; + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; + + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (si->svc_params.scaling_factor_den[i] > 0) { + alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / + si->svc_params.scaling_factor_den[i]); + + alloc_ratio[i] *= alloc_ratio[i]; + total += alloc_ratio[i]; + } + } + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { + if (total > 0) { + enc_cfg->layer_target_bitrate[i] = + (unsigned int)(enc_cfg->rc_target_bitrate * alloc_ratio[i] / + total); + } + } + } + } + return VPX_CODEC_OK; +} + +vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + vpx_codec_iface_t *iface, + vpx_codec_enc_cfg_t *enc_cfg) { + vpx_codec_err_t res; + int sl, tl; + SvcInternal_t *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || + enc_cfg == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + if (si == NULL) return VPX_CODEC_MEM_ERROR; + + si->codec_ctx = codec_ctx; + + si->width = enc_cfg->g_w; + si->height = enc_cfg->g_h; + + si->kf_dist = enc_cfg->kf_max_dist; + + if (svc_ctx->spatial_layers == 0) + svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS; + if (svc_ctx->spatial_layers < 1 || + svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) { + svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n", + svc_ctx->spatial_layers); + return VPX_CODEC_INVALID_PARAM; + } + + // Note: temporal_layering_mode only applies to one-pass CBR + // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; + if (svc_ctx->temporal_layering_mode == 3) { + svc_ctx->temporal_layers = 3; + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { + svc_ctx->temporal_layers = 2; + } + + for (sl = 0; sl < VPX_SS_MAX_LAYERS; ++sl) { + si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM[sl]; + si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl]; + si->svc_params.speed_per_layer[sl] = svc_ctx->speed; + } + if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS && + svc_ctx->spatial_layers <= 3) { + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + int sl2 = (svc_ctx->spatial_layers == 2) ? sl + 1 : sl; + si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM_2x[sl2]; + si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN_2x[sl2]; + } + if (svc_ctx->spatial_layers == 1) { + si->svc_params.scaling_factor_num[0] = 1; + si->svc_params.scaling_factor_den[0] = 1; + } + } + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + const int i = sl * svc_ctx->temporal_layers + tl; + si->svc_params.max_quantizers[i] = MAX_QUANTIZER; + si->svc_params.min_quantizers[i] = 0; + if (enc_cfg->rc_end_usage == VPX_CBR && + enc_cfg->g_pass == VPX_RC_ONE_PASS) { + si->svc_params.max_quantizers[i] = 56; + si->svc_params.min_quantizers[i] = 2; + } + } + } + + // Parse aggregate command line options. Options must start with + // "layers=xx" then followed by other options + res = parse_options(svc_ctx, si->options); + if (res != VPX_CODEC_OK) return res; + + if (svc_ctx->spatial_layers < 1) svc_ctx->spatial_layers = 1; + if (svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) + svc_ctx->spatial_layers = VPX_SS_MAX_LAYERS; + + if (svc_ctx->temporal_layers < 1) svc_ctx->temporal_layers = 1; + if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS) + svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS; + + if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) { + svc_log( + svc_ctx, SVC_LOG_ERROR, + "spatial layers * temporal layers (%d) exceeds the maximum number of " + "allowed layers of %d\n", + svc_ctx->spatial_layers * svc_ctx->temporal_layers, VPX_MAX_LAYERS); + return VPX_CODEC_INVALID_PARAM; + } + res = assign_layer_bitrates(svc_ctx, enc_cfg); + if (res != VPX_CODEC_OK) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "layer bitrates incorrect: \n" + "1) spatial layer bitrates should sum up to target \n" + "2) temporal layer bitrates should be increasing within \n" + "a spatial layer \n"); + return VPX_CODEC_INVALID_PARAM; + } + + if (svc_ctx->temporal_layers > 1) { + int i; + for (i = 0; i < svc_ctx->temporal_layers; ++i) { + enc_cfg->ts_target_bitrate[i] = + enc_cfg->rc_target_bitrate / svc_ctx->temporal_layers; + enc_cfg->ts_rate_decimator[i] = 1 << (svc_ctx->temporal_layers - 1 - i); + } + } + + if (svc_ctx->threads) enc_cfg->g_threads = svc_ctx->threads; + + // Modify encoder configuration + enc_cfg->ss_number_layers = svc_ctx->spatial_layers; + enc_cfg->ts_number_layers = svc_ctx->temporal_layers; + + if (enc_cfg->rc_end_usage == VPX_CBR) { + enc_cfg->rc_resize_allowed = 0; + enc_cfg->rc_min_quantizer = 2; + enc_cfg->rc_max_quantizer = 56; + enc_cfg->rc_undershoot_pct = 50; + enc_cfg->rc_overshoot_pct = 50; + enc_cfg->rc_buf_initial_sz = 500; + enc_cfg->rc_buf_optimal_sz = 600; + enc_cfg->rc_buf_sz = 1000; + } + + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + const int i = sl * svc_ctx->temporal_layers + tl; + if (enc_cfg->rc_end_usage == VPX_CBR && + enc_cfg->g_pass == VPX_RC_ONE_PASS) { + si->svc_params.max_quantizers[i] = enc_cfg->rc_max_quantizer; + si->svc_params.min_quantizers[i] = enc_cfg->rc_min_quantizer; + } + } + } + + if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) + enc_cfg->g_error_resilient = 1; + + // Initialize codec + res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); + if (res != VPX_CODEC_OK) { + svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); + return res; + } + if (svc_ctx->spatial_layers > 1 || svc_ctx->temporal_layers > 1) { + vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); + vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params); + } + return VPX_CODEC_OK; +} + +/** + * Encode a frame into multiple layers + * Create a superframe containing the individual layers + */ +vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, + struct vpx_image *rawimg, vpx_codec_pts_t pts, + int64_t duration, int deadline) { + vpx_codec_err_t res; + vpx_codec_iter_t iter; + const vpx_codec_cx_pkt_t *cx_pkt; + SvcInternal_t *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { + return VPX_CODEC_INVALID_PARAM; + } + + res = + vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 0, deadline); + if (res != VPX_CODEC_OK) { + return res; + } + // save compressed data + iter = NULL; + while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { + switch (cx_pkt->kind) { + case VPX_CODEC_PSNR_PKT: ++si->psnr_pkt_received; break; + default: break; + } + } + + return VPX_CODEC_OK; +} + +static double calc_psnr(double d) { + if (d == 0) return 100; + return -10.0 * log(d) / log(10.0); +} + +// dump accumulated statistics and reset accumulated values +void vpx_svc_dump_statistics(SvcContext *svc_ctx) { + int number_of_frames; + int i, j; + uint32_t bytes_total = 0; + double scale[COMPONENTS]; + double psnr[COMPONENTS]; + double mse[COMPONENTS]; + double y_scale; + + SvcInternal_t *const si = get_svc_internal(svc_ctx); + if (svc_ctx == NULL || si == NULL) return; + + number_of_frames = si->psnr_pkt_received; + if (number_of_frames <= 0) return; + + svc_log(svc_ctx, SVC_LOG_INFO, "\n"); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", + i, si->psnr_sum[i][0] / number_of_frames, + si->psnr_sum[i][1] / number_of_frames, + si->psnr_sum[i][2] / number_of_frames, + si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); + // the following psnr calculation is deduced from ffmpeg.c#print_report + y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; + scale[1] = y_scale; + scale[2] = scale[3] = y_scale / 4; // U or V + scale[0] = y_scale * 1.5; // total + + for (j = 0; j < COMPONENTS; j++) { + psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]); + mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j]; + } + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0], + psnr[1], psnr[2], psnr[3]); + svc_log(svc_ctx, SVC_LOG_INFO, + "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0], + mse[1], mse[2], mse[3]); + + bytes_total += si->bytes_sum[i]; + // Clear sums for next time. + si->bytes_sum[i] = 0; + for (j = 0; j < COMPONENTS; ++j) { + si->psnr_sum[i][j] = 0; + si->sse_sum[i][j] = 0; + } + } + + // only display statistics once + si->psnr_pkt_received = 0; + + svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); +} + +void vpx_svc_release(SvcContext *svc_ctx) { + SvcInternal_t *si; + if (svc_ctx == NULL) return; + // do not use get_svc_internal as it will unnecessarily allocate an + // SvcInternal_t if it was not already allocated + si = (SvcInternal_t *)svc_ctx->internal; + if (si != NULL) { + free(si); + svc_ctx->internal = NULL; + } +} diff --git a/media/libvpx/libvpx/examples/twopass_encoder.c b/media/libvpx/libvpx/examples/twopass_encoder.c new file mode 100644 index 0000000000..07a10d9cf3 --- /dev/null +++ b/media/libvpx/libvpx/examples/twopass_encoder.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Two Pass Encoder +// ================ +// +// This is an example of a two pass encoder loop. It takes an input file in +// YV12 format, passes it through the encoder twice, and writes the compressed +// frames to disk in IVF format. It builds upon the simple_encoder example. +// +// Twopass Variables +// ----------------- +// Twopass mode needs to track the current pass number and the buffer of +// statistics packets. +// +// Updating The Configuration +// --------------------------------- +// In two pass mode, the configuration has to be updated on each pass. The +// statistics buffer is passed on the last pass. +// +// Encoding A Frame +// ---------------- +// Encoding a frame in two pass mode is identical to the simple encoder +// example. To increase the quality while sacrificing encoding speed, +// VPX_DL_BEST_QUALITY can be used in place of VPX_DL_GOOD_QUALITY. +// +// Processing Statistics Packets +// ----------------------------- +// Each packet of type `VPX_CODEC_CX_FRAME_PKT` contains the encoded data +// for this frame. We write a IVF frame header, followed by the raw data. +// +// +// Pass Progress Reporting +// ----------------------------- +// It's sometimes helpful to see when each pass completes. +// +// +// Clean-up +// ----------------------------- +// Destruction of the encoder instance must be done on each pass. The +// raw image should be destroyed at the end as usual. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vpx_encoder.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "Usage: %s <codec> <width> <height> <infile> <outfile> " + "<frame limit>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int get_frame_stats(vpx_codec_ctx_t *ctx, const vpx_image_t *img, + vpx_codec_pts_t pts, unsigned int duration, + vpx_enc_frame_flags_t flags, unsigned int deadline, + vpx_fixed_buf_t *stats) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(ctx, img, pts, duration, flags, deadline); + if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); + + while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_STATS_PKT) { + const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; + const size_t pkt_size = pkt->data.twopass_stats.sz; + stats->buf = realloc(stats->buf, stats->sz + pkt_size); + if (!stats->buf) die("Failed to reallocate stats buffer."); + memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); + stats->sz += pkt_size; + } + } + + return got_pkts; +} + +static int encode_frame(vpx_codec_ctx_t *ctx, const vpx_image_t *img, + vpx_codec_pts_t pts, unsigned int duration, + vpx_enc_frame_flags_t flags, unsigned int deadline, + VpxVideoWriter *writer) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(ctx, img, pts, duration, flags, deadline); + if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to encode frame."); + + while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) { + got_pkts = 1; + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) + die_codec(ctx, "Failed to write compressed frame."); + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +static vpx_fixed_buf_t pass0(vpx_image_t *raw, FILE *infile, + const VpxInterface *encoder, + const vpx_codec_enc_cfg_t *cfg, int max_frames) { + vpx_codec_ctx_t codec; + int frame_count = 0; + vpx_fixed_buf_t stats = { NULL, 0 }; + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) + die("Failed to initialize encoder"); + + // Calculate frame statistics. + while (vpx_img_read(raw, infile)) { + ++frame_count; + get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, + &stats); + if (max_frames > 0 && frame_count >= max_frames) break; + } + + // Flush encoder. + while (get_frame_stats(&codec, NULL, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, + &stats)) { + } + + printf("Pass 0 complete. Processed %d frames.\n", frame_count); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + return stats; +} + +static void pass1(vpx_image_t *raw, FILE *infile, const char *outfile_name, + const VpxInterface *encoder, const vpx_codec_enc_cfg_t *cfg, + int max_frames) { + VpxVideoInfo info = { encoder->fourcc, + cfg->g_w, + cfg->g_h, + { cfg->g_timebase.num, cfg->g_timebase.den } }; + VpxVideoWriter *writer = NULL; + vpx_codec_ctx_t codec; + int frame_count = 0; + + writer = vpx_video_writer_open(outfile_name, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing", outfile_name); + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) + die("Failed to initialize encoder"); + + // Encode frames. + while (vpx_img_read(raw, infile)) { + ++frame_count; + encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer); + + if (max_frames > 0 && frame_count >= max_frames) break; + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 1, 0, VPX_DL_GOOD_QUALITY, writer)) { + } + + printf("\n"); + + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_writer_close(writer); + + printf("Pass 1 complete. Processed %d frames.\n", frame_count); +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + int w, h; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + vpx_image_t raw; + vpx_codec_err_t res; + vpx_fixed_buf_t stats; + + const VpxInterface *encoder = NULL; + const int fps = 30; // TODO(dkovalev) add command line argument + const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + const char *const codec_arg = argv[1]; + const char *const width_arg = argv[2]; + const char *const height_arg = argv[3]; + const char *const infile_arg = argv[4]; + const char *const outfile_arg = argv[5]; + int max_frames = 0; + exec_name = argv[0]; + + if (argc != 7) die("Invalid number of arguments."); + + max_frames = (int)strtol(argv[6], NULL, 0); + + encoder = get_vpx_encoder_by_name(codec_arg); + if (!encoder) die("Unsupported codec."); + + w = (int)strtol(width_arg, NULL, 0); + h = (int)strtol(height_arg, NULL, 0); + + if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) + die("Invalid frame size: %dx%d", w, h); + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, w, h, 1)) + die("Failed to allocate image (%dx%d)", w, h); + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + // Configuration + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = w; + cfg.g_h = h; + cfg.g_timebase.num = 1; + cfg.g_timebase.den = fps; + cfg.rc_target_bitrate = bitrate; + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading", infile_arg); + + // Pass 0 + cfg.g_pass = VPX_RC_FIRST_PASS; + stats = pass0(&raw, infile, encoder, &cfg, max_frames); + + // Pass 1 + rewind(infile); + cfg.g_pass = VPX_RC_LAST_PASS; + cfg.rc_twopass_stats_in = stats; + pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames); + free(stats.buf); + + vpx_img_free(&raw); + fclose(infile); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c b/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c new file mode 100644 index 0000000000..62d96de557 --- /dev/null +++ b/media/libvpx/libvpx/examples/vp8_multi_resolution_encoder.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This is an example demonstrating multi-resolution encoding in VP8. + * High-resolution input video is down-sampled to lower-resolutions. The + * encoder then encodes the video and outputs multiple bitstreams with + * different resolutions. + * + * This test also allows for settings temporal layers for each spatial layer. + * Different number of temporal layers per spatial stream may be used. + * Currently up to 3 temporal layers per spatial stream (encoder) are supported + * in this test. + */ + +#include "./vpx_config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <math.h> +#include <assert.h> +#include <sys/time.h> +#include "vpx_ports/vpx_timer.h" +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#include "vpx_ports/mem_ops.h" +#include "../tools_common.h" +#define interface (vpx_codec_vp8_cx()) +#define fourcc 0x30385056 + +void usage_exit(void) { exit(EXIT_FAILURE); } + +/* + * The input video frame is downsampled several times to generate a multi-level + * hierarchical structure. NUM_ENCODERS is defined as the number of encoding + * levels required. For example, if the size of input video is 1280x720, + * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3 + * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and + * 320x180(level 2) respectively. + */ + +/* Number of encoders (spatial resolutions) used in this test. */ +#define NUM_ENCODERS 3 + +/* Maximum number of temporal layers allowed for this test. */ +#define MAX_NUM_TEMPORAL_LAYERS 3 + +/* This example uses the scaler function in libyuv. */ +#include "third_party/libyuv/include/libyuv/basic_types.h" +#include "third_party/libyuv/include/libyuv/scale.h" +#include "third_party/libyuv/include/libyuv/cpu_id.h" + +int (*read_frame_p)(FILE *f, vpx_image_t *img); + +static int mulres_read_frame(FILE *f, vpx_image_t *img) { + size_t nbytes, to_read; + int res = 1; + + to_read = img->w * img->h * 3 / 2; + nbytes = fread(img->planes[0], 1, to_read, f); + if (nbytes != to_read) { + res = 0; + if (nbytes > 0) + printf("Warning: Read partial frame. Check your width & height!\n"); + } + return res; +} + +static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) { + size_t nbytes, to_read; + int res = 1; + int plane; + + for (plane = 0; plane < 3; plane++) { + unsigned char *ptr; + int w = (plane ? (1 + img->d_w) / 2 : img->d_w); + int h = (plane ? (1 + img->d_h) / 2 : img->d_h); + int r; + + /* Determine the correct plane based on the image format. The for-loop + * always counts in Y,U,V order, but this may not match the order of + * the data on disk. + */ + switch (plane) { + case 1: + ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V + : VPX_PLANE_U]; + break; + case 2: + ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U + : VPX_PLANE_V]; + break; + default: ptr = img->planes[plane]; + } + + for (r = 0; r < h; r++) { + to_read = w; + + nbytes = fread(ptr, 1, to_read, f); + if (nbytes != to_read) { + res = 0; + if (nbytes > 0) + printf("Warning: Read partial frame. Check your width & height!\n"); + break; + } + + ptr += img->stride[plane]; + } + if (!res) break; + } + + return res; +} + +static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + char header[32]; + + if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; + header[0] = 'D'; + header[1] = 'K'; + header[2] = 'I'; + header[3] = 'F'; + mem_put_le16(header + 4, 0); /* version */ + mem_put_le16(header + 6, 32); /* headersize */ + mem_put_le32(header + 8, fourcc); /* headersize */ + mem_put_le16(header + 12, cfg->g_w); /* width */ + mem_put_le16(header + 14, cfg->g_h); /* height */ + mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ + mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + + (void)fwrite(header, 1, 32, outfile); +} + +static void write_ivf_frame_header(FILE *outfile, + const vpx_codec_cx_pkt_t *pkt) { + char header[12]; + vpx_codec_pts_t pts; + + if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; + + pts = pkt->data.frame.pts; + mem_put_le32(header, (int)pkt->data.frame.sz); + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + + (void)fwrite(header, 1, 12, outfile); +} + +/* Temporal scaling parameters */ +/* This sets all the temporal layer parameters given |num_temporal_layers|, + * including the target bit allocation across temporal layers. Bit allocation + * parameters will be passed in as user parameters in another version. + */ +static void set_temporal_layer_pattern(int num_temporal_layers, + vpx_codec_enc_cfg_t *cfg, int bitrate, + int *layer_flags) { + assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS); + switch (num_temporal_layers) { + case 1: { + /* 1-layer */ + cfg->ts_number_layers = 1; + cfg->ts_periodicity = 1; + cfg->ts_rate_decimator[0] = 1; + cfg->ts_layer_id[0] = 0; + cfg->ts_target_bitrate[0] = bitrate; + + // Update L only. + layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + break; + } + + case 2: { + /* 2-layers, with sync point at first frame of layer 1. */ + cfg->ts_number_layers = 2; + cfg->ts_periodicity = 2; + cfg->ts_rate_decimator[0] = 2; + cfg->ts_rate_decimator[1] = 1; + cfg->ts_layer_id[0] = 0; + cfg->ts_layer_id[1] = 1; + // Use 60/40 bit allocation as example. + cfg->ts_target_bitrate[0] = (int)(0.6f * bitrate); + cfg->ts_target_bitrate[1] = bitrate; + + /* 0=L, 1=GF */ + // ARF is used as predictor for all frames, and is only updated on + // key frame. Sync point every 8 frames. + + // Layer 0: predict from L and ARF, update L and G. + layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF; + + // Layer 1: sync point: predict from L and ARF, and update G. + layer_flags[1] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + + // Layer 0, predict from L and ARF, update L. + layer_flags[2] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + + // Layer 1: predict from L, G and ARF, and update G. + layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 0 + layer_flags[4] = layer_flags[2]; + + // Layer 1 + layer_flags[5] = layer_flags[3]; + + // Layer 0 + layer_flags[6] = layer_flags[4]; + + // Layer 1 + layer_flags[7] = layer_flags[5]; + break; + } + + case 3: + default: { + // 3-layers structure where ARF is used as predictor for all frames, + // and is only updated on key frame. + // Sync points for layer 1 and 2 every 8 frames. + cfg->ts_number_layers = 3; + cfg->ts_periodicity = 4; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + cfg->ts_layer_id[0] = 0; + cfg->ts_layer_id[1] = 2; + cfg->ts_layer_id[2] = 1; + cfg->ts_layer_id[3] = 2; + // Use 45/20/35 bit allocation as example. + cfg->ts_target_bitrate[0] = (int)(0.45f * bitrate); + cfg->ts_target_bitrate[1] = (int)(0.65f * bitrate); + cfg->ts_target_bitrate[2] = bitrate; + + /* 0=L, 1=GF, 2=ARF */ + + // Layer 0: predict from L and ARF; update L and G. + layer_flags[0] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + + // Layer 2: sync point: predict from L and ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 1: sync point: predict from L and ARF; update G. + layer_flags[2] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + + // Layer 0: predict from L and ARF; update L. + layer_flags[4] = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[5] = layer_flags[3]; + + // Layer 1: predict from L, G, ARF; update G. + layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + + // Layer 2: predict from L, G, ARF; update none. + layer_flags[7] = layer_flags[3]; + break; + } + } +} + +/* The periodicity of the pattern given the number of temporal layers. */ +static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = { 1, 8, 8 }; + +int main(int argc, char **argv) { + FILE *infile, *outfile[NUM_ENCODERS]; + FILE *downsampled_input[NUM_ENCODERS - 1]; + char filename[50]; + vpx_codec_ctx_t codec[NUM_ENCODERS]; + vpx_codec_enc_cfg_t cfg[NUM_ENCODERS]; + int frame_cnt = 0; + vpx_image_t raw[NUM_ENCODERS]; + vpx_codec_err_t res[NUM_ENCODERS]; + + int i; + int width; + int height; + int length_frame; + int frame_avail; + int got_data; + int flags = 0; + int layer_id = 0; + + int layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS] = { 0 }; + int flag_periodicity; + + /*Currently, only realtime mode is supported in multi-resolution encoding.*/ + int arg_deadline = VPX_DL_REALTIME; + + /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you + don't need to know PSNR, which will skip PSNR calculation and save + encoding time. */ + int show_psnr = 0; + int key_frame_insert = 0; + uint64_t psnr_sse_total[NUM_ENCODERS] = { 0 }; + uint64_t psnr_samples_total[NUM_ENCODERS] = { 0 }; + double psnr_totals[NUM_ENCODERS][4] = { { 0, 0 } }; + int psnr_count[NUM_ENCODERS] = { 0 }; + + int64_t cx_time = 0; + + /* Set the required target bitrates for each resolution level. + * If target bitrate for highest-resolution level is set to 0, + * (i.e. target_bitrate[0]=0), we skip encoding at that level. + */ + unsigned int target_bitrate[NUM_ENCODERS] = { 1000, 500, 100 }; + + /* Enter the frame rate of the input video */ + int framerate = 30; + + /* Set down-sampling factor for each resolution level. + dsf[0] controls down sampling from level 0 to level 1; + dsf[1] controls down sampling from level 1 to level 2; + dsf[2] is not used. */ + vpx_rational_t dsf[NUM_ENCODERS] = { { 2, 1 }, { 2, 1 }, { 1, 1 } }; + + /* Set the number of temporal layers for each encoder/resolution level, + * starting from highest resoln down to lowest resoln. */ + unsigned int num_temporal_layers[NUM_ENCODERS] = { 3, 3, 3 }; + + if (argc != (7 + 3 * NUM_ENCODERS)) + die("Usage: %s <width> <height> <frame_rate> <infile> <outfile(s)> " + "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output " + "psnr?> \n", + argv[0]); + + printf("Using %s\n", vpx_codec_iface_name(interface)); + + width = (int)strtol(argv[1], NULL, 0); + height = (int)strtol(argv[2], NULL, 0); + framerate = (int)strtol(argv[3], NULL, 0); + + if (width < 16 || width % 2 || height < 16 || height % 2) + die("Invalid resolution: %dx%d", width, height); + + /* Open input video file for encoding */ + if (!(infile = fopen(argv[4], "rb"))) + die("Failed to open %s for reading", argv[4]); + + /* Open output file for each encoder to output bitstreams */ + for (i = 0; i < NUM_ENCODERS; i++) { + if (!target_bitrate[i]) { + outfile[i] = NULL; + continue; + } + + if (!(outfile[i] = fopen(argv[i + 5], "wb"))) + die("Failed to open %s for writing", argv[i + 4]); + } + + // Bitrates per spatial layer: overwrite default rates above. + for (i = 0; i < NUM_ENCODERS; i++) { + target_bitrate[i] = (int)strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0); + } + + // Temporal layers per spatial layers: overwrite default settings above. + for (i = 0; i < NUM_ENCODERS; i++) { + num_temporal_layers[i] = + (int)strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0); + if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3) + die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n", + num_temporal_layers[i]); + } + + /* Open file to write out each spatially downsampled input stream. */ + for (i = 0; i < NUM_ENCODERS - 1; i++) { + // Highest resoln is encoder 0. + if (sprintf(filename, "ds%d.yuv", NUM_ENCODERS - i) < 0) { + return EXIT_FAILURE; + } + downsampled_input[i] = fopen(filename, "wb"); + } + + key_frame_insert = (int)strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0); + + show_psnr = (int)strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0); + + /* Populate default encoder configuration */ + for (i = 0; i < NUM_ENCODERS; i++) { + res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0); + if (res[i]) { + printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i])); + return EXIT_FAILURE; + } + } + + /* + * Update the default configuration according to needs of the application. + */ + /* Highest-resolution encoder settings */ + cfg[0].g_w = width; + cfg[0].g_h = height; + cfg[0].rc_dropframe_thresh = 0; + cfg[0].rc_end_usage = VPX_CBR; + cfg[0].rc_resize_allowed = 0; + cfg[0].rc_min_quantizer = 2; + cfg[0].rc_max_quantizer = 56; + cfg[0].rc_undershoot_pct = 100; + cfg[0].rc_overshoot_pct = 15; + cfg[0].rc_buf_initial_sz = 500; + cfg[0].rc_buf_optimal_sz = 600; + cfg[0].rc_buf_sz = 1000; + cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ + cfg[0].g_lag_in_frames = 0; + + /* Disable automatic keyframe placement */ + /* Note: These 3 settings are copied to all levels. But, except the lowest + * resolution level, all other levels are set to VPX_KF_DISABLED internally. + */ + cfg[0].kf_mode = VPX_KF_AUTO; + cfg[0].kf_min_dist = 3000; + cfg[0].kf_max_dist = 3000; + + cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */ + cfg[0].g_timebase.num = 1; /* Set fps */ + cfg[0].g_timebase.den = framerate; + + /* Other-resolution encoder settings */ + for (i = 1; i < NUM_ENCODERS; i++) { + memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t)); + + cfg[i].rc_target_bitrate = target_bitrate[i]; + + /* Note: Width & height of other-resolution encoders are calculated + * from the highest-resolution encoder's size and the corresponding + * down_sampling_factor. + */ + { + unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1; + unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1; + cfg[i].g_w = iw / dsf[i - 1].num; + cfg[i].g_h = ih / dsf[i - 1].num; + } + + /* Make width & height to be multiplier of 2. */ + // Should support odd size ??? + if ((cfg[i].g_w) % 2) cfg[i].g_w++; + if ((cfg[i].g_h) % 2) cfg[i].g_h++; + } + + // Set the number of threads per encode/spatial layer. + // (1, 1, 1) means no encoder threading. + cfg[0].g_threads = 1; + cfg[1].g_threads = 1; + cfg[2].g_threads = 1; + + /* Allocate image for each encoder */ + for (i = 0; i < NUM_ENCODERS; i++) + if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32)) + die("Failed to allocate image (%dx%d)", cfg[i].g_w, cfg[i].g_h); + + if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w) + read_frame_p = mulres_read_frame; + else + read_frame_p = mulres_read_frame_by_row; + + for (i = 0; i < NUM_ENCODERS; i++) + if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0); + + /* Temporal layers settings */ + for (i = 0; i < NUM_ENCODERS; i++) { + set_temporal_layer_pattern(num_temporal_layers[i], &cfg[i], + cfg[i].rc_target_bitrate, + &layer_flags[i * VPX_TS_MAX_PERIODICITY]); + } + + /* Initialize multi-encoder */ + if (vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS, + (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0])) + die_codec(&codec[0], "Failed to initialize encoder"); + + /* The extra encoding configuration parameters can be set as follows. */ + /* Set encoding speed */ + for (i = 0; i < NUM_ENCODERS; i++) { + int speed = -6; + /* Lower speed for the lowest resolution. */ + if (i == NUM_ENCODERS - 1) speed = -4; + if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed)) + die_codec(&codec[i], "Failed to set cpu_used"); + } + + /* Set static threshold = 1 for all encoders */ + for (i = 0; i < NUM_ENCODERS; i++) { + if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1)) + die_codec(&codec[i], "Failed to set static threshold"); + } + + /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ + /* Enable denoising for the highest-resolution encoder. */ + if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1)) + die_codec(&codec[0], "Failed to set noise_sensitivity"); + if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1)) + die_codec(&codec[1], "Failed to set noise_sensitivity"); + for (i = 2; i < NUM_ENCODERS; i++) { + if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) + die_codec(&codec[i], "Failed to set noise_sensitivity"); + } + + /* Set the number of token partitions */ + for (i = 0; i < NUM_ENCODERS; i++) { + if (vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1)) + die_codec(&codec[i], "Failed to set static threshold"); + } + + /* Set the max intra target bitrate */ + for (i = 0; i < NUM_ENCODERS; i++) { + unsigned int max_intra_size_pct = + (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10); + if (vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT, + max_intra_size_pct)) + die_codec(&codec[i], "Failed to set static threshold"); + // printf("%d %d \n",i,max_intra_size_pct); + } + + frame_avail = 1; + got_data = 0; + + while (frame_avail || got_data) { + struct vpx_usec_timer timer; + vpx_codec_iter_t iter[NUM_ENCODERS] = { NULL }; + const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS]; + + flags = 0; + frame_avail = read_frame_p(infile, &raw[0]); + + if (frame_avail) { + for (i = 1; i < NUM_ENCODERS; i++) { + /*Scale the image down a number of times by downsampling factor*/ + /* FilterMode 1 or 2 give better psnr than FilterMode 0. */ + I420Scale( + raw[i - 1].planes[VPX_PLANE_Y], raw[i - 1].stride[VPX_PLANE_Y], + raw[i - 1].planes[VPX_PLANE_U], raw[i - 1].stride[VPX_PLANE_U], + raw[i - 1].planes[VPX_PLANE_V], raw[i - 1].stride[VPX_PLANE_V], + raw[i - 1].d_w, raw[i - 1].d_h, raw[i].planes[VPX_PLANE_Y], + raw[i].stride[VPX_PLANE_Y], raw[i].planes[VPX_PLANE_U], + raw[i].stride[VPX_PLANE_U], raw[i].planes[VPX_PLANE_V], + raw[i].stride[VPX_PLANE_V], raw[i].d_w, raw[i].d_h, 1); + /* Write out down-sampled input. */ + length_frame = cfg[i].g_w * cfg[i].g_h * 3 / 2; + if (fwrite(raw[i].planes[0], 1, length_frame, + downsampled_input[NUM_ENCODERS - i - 1]) != + (unsigned int)length_frame) { + return EXIT_FAILURE; + } + } + } + + /* Set the flags (reference and update) for all the encoders.*/ + for (i = 0; i < NUM_ENCODERS; i++) { + layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity]; + flags = 0; + flag_periodicity = periodicity_to_num_layers[num_temporal_layers[i] - 1]; + flags = layer_flags[i * VPX_TS_MAX_PERIODICITY + + frame_cnt % flag_periodicity]; + // Key frame flag for first frame. + if (frame_cnt == 0) { + flags |= VPX_EFLAG_FORCE_KF; + } + if (frame_cnt > 0 && frame_cnt == key_frame_insert) { + flags = VPX_EFLAG_FORCE_KF; + } + + vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags); + vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id); + } + + /* Encode each frame at multi-levels */ + /* Note the flags must be set to 0 in the encode call if they are set + for each frame with the vpx_codec_control(), as done above. */ + vpx_usec_timer_start(&timer); + if (vpx_codec_encode(&codec[0], frame_avail ? &raw[0] : NULL, frame_cnt, 1, + 0, arg_deadline)) { + die_codec(&codec[0], "Failed to encode frame"); + } + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); + + for (i = NUM_ENCODERS - 1; i >= 0; i--) { + got_data = 0; + while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) { + got_data = 1; + switch (pkt[i]->kind) { + case VPX_CODEC_CX_FRAME_PKT: + write_ivf_frame_header(outfile[i], pkt[i]); + (void)fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz, + outfile[i]); + break; + case VPX_CODEC_PSNR_PKT: + if (show_psnr) { + int j; + + psnr_sse_total[i] += pkt[i]->data.psnr.sse[0]; + psnr_samples_total[i] += pkt[i]->data.psnr.samples[0]; + for (j = 0; j < 4; j++) { + psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j]; + } + psnr_count[i]++; + } + + break; + default: break; + } + fflush(stdout); + } + } + frame_cnt++; + } + printf("\n"); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); + + fclose(infile); + + printf("Processed %ld frames.\n", (long int)frame_cnt - 1); + for (i = 0; i < NUM_ENCODERS; i++) { + /* Calculate PSNR and print it out */ + if ((show_psnr) && (psnr_count[i] > 0)) { + int j; + double ovpsnr = + sse_to_psnr(psnr_samples_total[i], 255.0, psnr_sse_total[i]); + + fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i); + + fprintf(stderr, " %.3lf", ovpsnr); + for (j = 0; j < 4; j++) { + fprintf(stderr, " %.3lf", psnr_totals[i][j] / psnr_count[i]); + } + } + + if (vpx_codec_destroy(&codec[i])) + die_codec(&codec[i], "Failed to destroy codec"); + + vpx_img_free(&raw[i]); + + if (!outfile[i]) continue; + + /* Try to rewrite the file header with the actual frame count */ + if (!fseek(outfile[i], 0, SEEK_SET)) + write_ivf_file_header(outfile[i], &cfg[i], frame_cnt - 1); + fclose(outfile[i]); + } + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vp8cx_set_ref.c b/media/libvpx/libvpx/examples/vp8cx_set_ref.c new file mode 100644 index 0000000000..ca528f9e90 --- /dev/null +++ b/media/libvpx/libvpx/examples/vp8cx_set_ref.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// VP8 Set Reference Frame +// ======================= +// +// This is an example demonstrating how to overwrite the VP8 encoder's +// internal reference frame. In the sample we set the last frame to the +// current frame. If this is done at a cut scene it will avoid a keyframe. +// This technique could be used to bounce between two cameras. +// +// Note that the decoder would also have to set the reference frame to the +// same value on the same frame, or the video will become corrupt. +// +// Usage +// ----- +// This example adds a single argument to the `simple_encoder` example, +// which specifies the frame number to update the reference frame on. +// The parameter is parsed as follows: +// +// +// Extra Variables +// --------------- +// This example maintains the frame number passed on the command line +// in the `update_frame_num` variable. +// +// +// Configuration +// ------------- +// +// The reference frame is updated on the frame specified on the command +// line. +// +// Observing The Effects +// --------------------- +// Use the `simple_encoder` example to encode a sample with a cut scene. +// Determine the frame number of the cut scene by looking for a generated +// key-frame (indicated by a 'K'). Supply that frame number as an argument +// to this example, and observe that no key-frame is generated. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "vp8/common/common.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, + int frame_index, VpxVideoWriter *writer) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_count = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + int update_frame_num = 0; + const int fps = 30; // TODO(dkovalev) add command line argument + const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument + + vp8_zero(codec); + vp8_zero(cfg); + vp8_zero(info); + + exec_name = argv[0]; + + if (argc != 6) die("Invalid number of arguments"); + + // TODO(dkovalev): add vp9 support and rename the file accordingly + encoder = get_vpx_encoder_by_name("vp8"); + if (!encoder) die("Unsupported codec."); + + update_frame_num = atoi(argv[5]); + if (!update_frame_num) die("Couldn't parse frame number '%s'\n", argv[5]); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(argv[1], NULL, 0); + info.frame_height = (int)strtol(argv[2], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + + writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[4]); + + if (!(infile = fopen(argv[3], "rb"))) + die("Failed to open %s for reading.", argv[3]); + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die("Failed to initialize encoder"); + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + if (frame_count + 1 == update_frame_num) { + vpx_ref_frame_t ref; + ref.frame_type = VP8_LAST_FRAME; + ref.img = raw; + if (vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref)) + die_codec(&codec, "Failed to set reference frame"); + } + + encode_frame(&codec, &raw, frame_count++, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vp9_lossless_encoder.c b/media/libvpx/libvpx/examples/vp9_lossless_encoder.c new file mode 100644 index 0000000000..c4eb3a8b17 --- /dev/null +++ b/media/libvpx/libvpx/examples/vp9_lossless_encoder.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#include "vp9/common/vp9_common.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, + "vp9_lossless_encoder: Example demonstrating VP9 lossless " + "encoding feature. Supports raw input only.\n"); + fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name); + exit(EXIT_FAILURE); +} + +static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, + int frame_index, int flags, VpxVideoWriter *writer) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + const vpx_codec_err_t res = + vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); + + while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(codec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + } + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_count = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + const int fps = 30; + + vp9_zero(info); + + exec_name = argv[0]; + + if (argc < 5) die("Invalid number of arguments"); + + encoder = get_vpx_encoder_by_name("vp9"); + if (!encoder) die("Unsupported codec."); + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(argv[1], NULL, 0); + info.frame_height = (int)strtol(argv[2], NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&codec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + + writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", argv[4]); + + if (!(infile = fopen(argv[3], "rb"))) + die("Failed to open %s for reading.", argv[3]); + + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) + die("Failed to initialize encoder"); + + if (vpx_codec_control_(&codec, VP9E_SET_LOSSLESS, 1)) + die_codec(&codec, "Failed to use lossless mode"); + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + encode_frame(&codec, &raw, frame_count++, 0, writer); + } + + // Flush encoder. + while (encode_frame(&codec, NULL, -1, 0, writer)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_count); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c b/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c new file mode 100644 index 0000000000..9d37ed0244 --- /dev/null +++ b/media/libvpx/libvpx/examples/vp9_spatial_svc_encoder.c @@ -0,0 +1,1217 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This is an example demonstrating how to implement a multi-layer + * VP9 encoding scheme based on spatial scalability for video applications + * that benefit from a scalable bitstream. + */ + +#include <math.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "../args.h" +#include "../tools_common.h" +#include "../video_writer.h" + +#include "../vpx_ports/vpx_timer.h" +#include "./svc_context.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "../vpxstats.h" +#include "vp9/encoder/vp9_encoder.h" +#include "./y4minput.h" + +#define OUTPUT_FRAME_STATS 0 +#define OUTPUT_RC_STATS 1 + +#define SIMULCAST_MODE 0 + +static const arg_def_t outputfile = + ARG_DEF("o", "output", 1, "Output filename"); +static const arg_def_t skip_frames_arg = + ARG_DEF("s", "skip-frames", 1, "input frames to skip"); +static const arg_def_t frames_arg = + ARG_DEF("f", "frames", 1, "number of frames to encode"); +static const arg_def_t threads_arg = + ARG_DEF("th", "threads", 1, "number of threads to use"); +#if OUTPUT_RC_STATS +static const arg_def_t output_rc_stats_arg = + ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats"); +#endif +static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width"); +static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height"); +static const arg_def_t timebase_arg = + ARG_DEF("t", "timebase", 1, "timebase (num/den)"); +static const arg_def_t bitrate_arg = ARG_DEF( + "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second"); +static const arg_def_t spatial_layers_arg = + ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers"); +static const arg_def_t temporal_layers_arg = + ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers"); +static const arg_def_t temporal_layering_mode_arg = + ARG_DEF("tlm", "temporal-layering-mode", 1, + "temporal layering scheme." + "VP9E_TEMPORAL_LAYERING_MODE"); +static const arg_def_t kf_dist_arg = + ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); +static const arg_def_t scale_factors_arg = + ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)"); +static const arg_def_t min_q_arg = + ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); +static const arg_def_t max_q_arg = + ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); +static const arg_def_t min_bitrate_arg = + ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate"); +static const arg_def_t max_bitrate_arg = + ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate"); +static const arg_def_t lag_in_frame_arg = + ARG_DEF(NULL, "lag-in-frames", 1, + "Number of frame to input before " + "generating any outputs"); +static const arg_def_t rc_end_usage_arg = + ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q"); +static const arg_def_t speed_arg = + ARG_DEF("sp", "speed", 1, "speed configuration"); +static const arg_def_t aqmode_arg = + ARG_DEF("aq", "aqmode", 1, "aq-mode off/on"); +static const arg_def_t bitrates_arg = + ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]"); +static const arg_def_t dropframe_thresh_arg = + ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); +static const struct arg_enum_list tune_content_enum[] = { + { "default", VP9E_CONTENT_DEFAULT }, + { "screen", VP9E_CONTENT_SCREEN }, + { "film", VP9E_CONTENT_FILM }, + { NULL, 0 } +}; + +static const arg_def_t tune_content_arg = ARG_DEF_ENUM( + NULL, "tune-content", 1, "Tune content type", tune_content_enum); +static const arg_def_t inter_layer_pred_arg = ARG_DEF( + NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained"); + +#if CONFIG_VP9_HIGHBITDEPTH +static const struct arg_enum_list bitdepth_enum[] = { + { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 } +}; + +static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( + "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum); +#endif // CONFIG_VP9_HIGHBITDEPTH + +static const arg_def_t *svc_args[] = { &frames_arg, + &outputfile, + &width_arg, + &height_arg, + &timebase_arg, + &bitrate_arg, + &skip_frames_arg, + &spatial_layers_arg, + &kf_dist_arg, + &scale_factors_arg, + &min_q_arg, + &max_q_arg, + &min_bitrate_arg, + &max_bitrate_arg, + &temporal_layers_arg, + &temporal_layering_mode_arg, + &lag_in_frame_arg, + &threads_arg, + &aqmode_arg, +#if OUTPUT_RC_STATS + &output_rc_stats_arg, +#endif + +#if CONFIG_VP9_HIGHBITDEPTH + &bitdepth_arg, +#endif + &speed_arg, + &rc_end_usage_arg, + &bitrates_arg, + &dropframe_thresh_arg, + &tune_content_arg, + &inter_layer_pred_arg, + NULL }; + +static const uint32_t default_frames_to_skip = 0; +static const uint32_t default_frames_to_code = 60 * 60; +static const uint32_t default_width = 1920; +static const uint32_t default_height = 1080; +static const uint32_t default_timebase_num = 1; +static const uint32_t default_timebase_den = 60; +static const uint32_t default_bitrate = 1000; +static const uint32_t default_spatial_layers = 5; +static const uint32_t default_temporal_layers = 1; +static const uint32_t default_kf_dist = 100; +static const uint32_t default_temporal_layering_mode = 0; +static const uint32_t default_output_rc_stats = 0; +static const int32_t default_speed = -1; // -1 means use library default. +static const uint32_t default_threads = 0; // zero means use library default. + +typedef struct { + const char *output_filename; + uint32_t frames_to_code; + uint32_t frames_to_skip; + struct VpxInputContext input_ctx; + stats_io_t rc_stats; + int tune_content; + int inter_layer_pred; +} AppInput; + +static const char *exec_name; + +void usage_exit(void) { + fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n", + exec_name); + fprintf(stderr, "Options:\n"); + arg_show_usage(stderr, svc_args); + exit(EXIT_FAILURE); +} + +static void parse_command_line(int argc, const char **argv_, + AppInput *app_input, SvcContext *svc_ctx, + vpx_codec_enc_cfg_t *enc_cfg) { + struct arg arg; + char **argv = NULL; + char **argi = NULL; + char **argj = NULL; + vpx_codec_err_t res; + unsigned int min_bitrate = 0; + unsigned int max_bitrate = 0; + char string_options[1024] = { 0 }; + + // initialize SvcContext with parameters that will be passed to vpx_svc_init + svc_ctx->log_level = SVC_LOG_DEBUG; + svc_ctx->spatial_layers = default_spatial_layers; + svc_ctx->temporal_layers = default_temporal_layers; + svc_ctx->temporal_layering_mode = default_temporal_layering_mode; +#if OUTPUT_RC_STATS + svc_ctx->output_rc_stat = default_output_rc_stats; +#endif + svc_ctx->speed = default_speed; + svc_ctx->threads = default_threads; + + // start with default encoder configuration + res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); + if (res) { + die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + } + // update enc_cfg with app default values + enc_cfg->g_w = default_width; + enc_cfg->g_h = default_height; + enc_cfg->g_timebase.num = default_timebase_num; + enc_cfg->g_timebase.den = default_timebase_den; + enc_cfg->rc_target_bitrate = default_bitrate; + enc_cfg->kf_min_dist = default_kf_dist; + enc_cfg->kf_max_dist = default_kf_dist; + enc_cfg->rc_end_usage = VPX_CQ; + + // initialize AppInput with default values + app_input->frames_to_code = default_frames_to_code; + app_input->frames_to_skip = default_frames_to_skip; + + // process command line options + argv = argv_dup(argc - 1, argv_ + 1); + if (!argv) { + fprintf(stderr, "Error allocating argument list\n"); + exit(EXIT_FAILURE); + } + for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { + arg.argv_step = 1; + + if (arg_match(&arg, &frames_arg, argi)) { + app_input->frames_to_code = arg_parse_uint(&arg); + } else if (arg_match(&arg, &outputfile, argi)) { + app_input->output_filename = arg.val; + } else if (arg_match(&arg, &width_arg, argi)) { + enc_cfg->g_w = arg_parse_uint(&arg); + } else if (arg_match(&arg, &height_arg, argi)) { + enc_cfg->g_h = arg_parse_uint(&arg); + } else if (arg_match(&arg, &timebase_arg, argi)) { + enc_cfg->g_timebase = arg_parse_rational(&arg); + } else if (arg_match(&arg, &bitrate_arg, argi)) { + enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &skip_frames_arg, argi)) { + app_input->frames_to_skip = arg_parse_uint(&arg); + } else if (arg_match(&arg, &spatial_layers_arg, argi)) { + svc_ctx->spatial_layers = arg_parse_uint(&arg); + } else if (arg_match(&arg, &temporal_layers_arg, argi)) { + svc_ctx->temporal_layers = arg_parse_uint(&arg); +#if OUTPUT_RC_STATS + } else if (arg_match(&arg, &output_rc_stats_arg, argi)) { + svc_ctx->output_rc_stat = arg_parse_uint(&arg); +#endif + } else if (arg_match(&arg, &speed_arg, argi)) { + svc_ctx->speed = arg_parse_uint(&arg); + if (svc_ctx->speed > 9) { + warn("Mapping speed %d to speed 9.\n", svc_ctx->speed); + } + } else if (arg_match(&arg, &aqmode_arg, argi)) { + svc_ctx->aqmode = arg_parse_uint(&arg); + } else if (arg_match(&arg, &threads_arg, argi)) { + svc_ctx->threads = arg_parse_uint(&arg); + } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) { + svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode = + arg_parse_int(&arg); + if (svc_ctx->temporal_layering_mode) { + enc_cfg->g_error_resilient = 1; + } + } else if (arg_match(&arg, &kf_dist_arg, argi)) { + enc_cfg->kf_min_dist = arg_parse_uint(&arg); + enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; + } else if (arg_match(&arg, &scale_factors_arg, argi)) { + strncat(string_options, " scale-factors=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); + } else if (arg_match(&arg, &bitrates_arg, argi)) { + strncat(string_options, " bitrates=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); + } else if (arg_match(&arg, &min_q_arg, argi)) { + strncat(string_options, " min-quantizers=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); + } else if (arg_match(&arg, &max_q_arg, argi)) { + strncat(string_options, " max-quantizers=", + sizeof(string_options) - strlen(string_options) - 1); + strncat(string_options, arg.val, + sizeof(string_options) - strlen(string_options) - 1); + } else if (arg_match(&arg, &min_bitrate_arg, argi)) { + min_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &max_bitrate_arg, argi)) { + max_bitrate = arg_parse_uint(&arg); + } else if (arg_match(&arg, &lag_in_frame_arg, argi)) { + enc_cfg->g_lag_in_frames = arg_parse_uint(&arg); + } else if (arg_match(&arg, &rc_end_usage_arg, argi)) { + enc_cfg->rc_end_usage = arg_parse_uint(&arg); +#if CONFIG_VP9_HIGHBITDEPTH + } else if (arg_match(&arg, &bitdepth_arg, argi)) { + enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg); + switch (enc_cfg->g_bit_depth) { + case VPX_BITS_8: + enc_cfg->g_input_bit_depth = 8; + enc_cfg->g_profile = 0; + break; + case VPX_BITS_10: + enc_cfg->g_input_bit_depth = 10; + enc_cfg->g_profile = 2; + break; + case VPX_BITS_12: + enc_cfg->g_input_bit_depth = 12; + enc_cfg->g_profile = 2; + break; + default: + die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth); + break; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) { + enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg); + } else if (arg_match(&arg, &tune_content_arg, argi)) { + app_input->tune_content = arg_parse_uint(&arg); + } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) { + app_input->inter_layer_pred = arg_parse_uint(&arg); + } else { + ++argj; + } + } + + // There will be a space in front of the string options + if (strlen(string_options) > 0) + vpx_svc_set_options(svc_ctx, string_options + 1); + + enc_cfg->g_pass = VPX_RC_ONE_PASS; + + if (enc_cfg->rc_target_bitrate > 0) { + if (min_bitrate > 0) { + enc_cfg->rc_2pass_vbr_minsection_pct = + min_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + if (max_bitrate > 0) { + enc_cfg->rc_2pass_vbr_maxsection_pct = + max_bitrate * 100 / enc_cfg->rc_target_bitrate; + } + } + + // Check for unrecognized options + for (argi = argv; *argi; ++argi) + if (argi[0][0] == '-' && strlen(argi[0]) > 1) + die("Error: Unrecognized option %s\n", *argi); + + if (argv[0] == NULL) { + usage_exit(); + } + app_input->input_ctx.filename = argv[0]; + free(argv); + + open_input_file(&app_input->input_ctx); + if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { + enc_cfg->g_w = app_input->input_ctx.width; + enc_cfg->g_h = app_input->input_ctx.height; + enc_cfg->g_timebase.den = app_input->input_ctx.framerate.numerator; + enc_cfg->g_timebase.num = app_input->input_ctx.framerate.denominator; + } + + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || + enc_cfg->g_h % 2) + die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); + + printf( + "Codec %s\nframes: %d, skip: %d\n" + "layers: %d\n" + "width %d, height: %d,\n" + "num: %d, den: %d, bitrate: %d,\n" + "gop size: %d\n", + vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code, + app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w, + enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, + enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist); +} + +#if OUTPUT_RC_STATS +// For rate control encoding stats. +struct RateControlStats { + // Number of input frames per layer. + int layer_input_frames[VPX_MAX_LAYERS]; + // Total (cumulative) number of encoded frames per layer. + int layer_tot_enc_frames[VPX_MAX_LAYERS]; + // Number of encoded non-key frames per layer. + int layer_enc_frames[VPX_MAX_LAYERS]; + // Framerate per layer (cumulative). + double layer_framerate[VPX_MAX_LAYERS]; + // Target average frame size per layer (per-frame-bandwidth per layer). + double layer_pfb[VPX_MAX_LAYERS]; + // Actual average frame size per layer. + double layer_avg_frame_size[VPX_MAX_LAYERS]; + // Average rate mismatch per layer (|target - actual| / target). + double layer_avg_rate_mismatch[VPX_MAX_LAYERS]; + // Actual encoding bitrate per layer (cumulative). + double layer_encoding_bitrate[VPX_MAX_LAYERS]; + // Average of the short-time encoder actual bitrate. + // TODO(marpan): Should we add these short-time stats for each layer? + double avg_st_encoding_bitrate; + // Variance of the short-time encoder actual bitrate. + double variance_st_encoding_bitrate; + // Window (number of frames) for computing short-time encoding bitrate. + int window_size; + // Number of window measurements. + int window_count; +}; + +// Note: these rate control stats assume only 1 key frame in the +// sequence (i.e., first frame only). +static void set_rate_control_stats(struct RateControlStats *rc, + vpx_codec_enc_cfg_t *cfg) { + unsigned int sl, tl; + // Set the layer (cumulative) framerate and the target layer (non-cumulative) + // per-frame-bandwidth, for the rate control encoding stats below. + const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; + + for (sl = 0; sl < cfg->ss_number_layers; ++sl) { + for (tl = 0; tl < cfg->ts_number_layers; ++tl) { + const int layer = sl * cfg->ts_number_layers + tl; + if (cfg->ts_number_layers == 1) + rc->layer_framerate[layer] = framerate; + else + rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl]; + if (tl > 0) { + rc->layer_pfb[layer] = + 1000.0 * + (cfg->layer_target_bitrate[layer] - + cfg->layer_target_bitrate[layer - 1]) / + (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]); + } else { + rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] / + rc->layer_framerate[layer]; + } + rc->layer_input_frames[layer] = 0; + rc->layer_enc_frames[layer] = 0; + rc->layer_tot_enc_frames[layer] = 0; + rc->layer_encoding_bitrate[layer] = 0.0; + rc->layer_avg_frame_size[layer] = 0.0; + rc->layer_avg_rate_mismatch[layer] = 0.0; + } + } + rc->window_count = 0; + rc->window_size = 15; + rc->avg_st_encoding_bitrate = 0.0; + rc->variance_st_encoding_bitrate = 0.0; +} + +static void printout_rate_control_summary(struct RateControlStats *rc, + vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + unsigned int sl, tl; + double perc_fluctuation = 0.0; + int tot_num_frames = 0; + printf("Total number of processed frames: %d\n\n", frame_cnt - 1); + printf("Rate control layer stats for sl%d tl%d layer(s):\n\n", + cfg->ss_number_layers, cfg->ts_number_layers); + for (sl = 0; sl < cfg->ss_number_layers; ++sl) { + tot_num_frames = 0; + for (tl = 0; tl < cfg->ts_number_layers; ++tl) { + const int layer = sl * cfg->ts_number_layers + tl; + const int num_dropped = + (tl > 0) + ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) + : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - + 1); + tot_num_frames += rc->layer_input_frames[layer]; + rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] * + rc->layer_encoding_bitrate[layer] / + tot_num_frames; + rc->layer_avg_frame_size[layer] = + rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer]; + rc->layer_avg_rate_mismatch[layer] = 100.0 * + rc->layer_avg_rate_mismatch[layer] / + rc->layer_enc_frames[layer]; + printf("For layer#: sl%d tl%d \n", sl, tl); + printf("Bitrate (target vs actual): %d %f.0 kbps\n", + cfg->layer_target_bitrate[layer], + rc->layer_encoding_bitrate[layer]); + printf("Average frame size (target vs actual): %f %f bits\n", + rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]); + printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]); + printf( + "Number of input frames, encoded (non-key) frames, " + "and percent dropped frames: %d %d %f.0 \n", + rc->layer_input_frames[layer], rc->layer_enc_frames[layer], + 100.0 * num_dropped / rc->layer_input_frames[layer]); + printf("\n"); + } + } + rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; + rc->variance_st_encoding_bitrate = + rc->variance_st_encoding_bitrate / rc->window_count - + (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); + perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / + rc->avg_st_encoding_bitrate; + printf("Short-time stats, for window of %d frames: \n", rc->window_size); + printf("Average, rms-variance, and percent-fluct: %f %f %f \n", + rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), + perc_fluctuation); + printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt, + tot_num_frames); +} + +static vpx_codec_err_t parse_superframe_index(const uint8_t *data, + size_t data_sz, uint64_t sizes[8], + int *count) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + + uint8_t marker; + + marker = *(data + data_sz - 1); + *count = 0; + + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; + + { + const uint8_t marker2 = *(data + data_sz - index_sz); + + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; + } + + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; +} +#endif + +// Example pattern for spatial layers and 2 temporal layers used in the +// bypass/flexible mode. The pattern corresponds to the pattern +// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in +// non-flexible mode. +static void set_frame_flags_bypass_mode_ex0( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + int sl; + for (sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + + for (sl = 0; sl < num_spatial_layers; ++sl) { + // Set the buffer idx. + if (tl == 0) { + ref_frame_config->lst_fb_idx[sl] = sl; + if (sl) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[sl] = sl - 1; + ref_frame_config->gld_fb_idx[sl] = sl; + } else { + ref_frame_config->gld_fb_idx[sl] = sl - 1; + } + } else { + ref_frame_config->gld_fb_idx[sl] = 0; + } + ref_frame_config->alt_fb_idx[sl] = 0; + } else if (tl == 1) { + ref_frame_config->lst_fb_idx[sl] = sl; + ref_frame_config->gld_fb_idx[sl] = + (sl == 0) ? 0 : num_spatial_layers + sl - 1; + ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl; + } + // Set the reference and update flags. + if (!tl) { + if (!sl) { + // Base spatial and base temporal (sl = 0, tl = 0) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } else { + if (is_key_frame) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->gld_fb_idx[sl]; + } else { + // Non-zero spatiall layer. + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 1; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->lst_fb_idx[sl]; + } + } + } else if (tl == 1) { + if (!sl) { + // Base spatial and top temporal (tl = 1) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 0; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } else { + // Non-zero spatial. + if (sl < num_spatial_layers - 1) { + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 0; + ref_frame_config->update_buffer_slot[sl] |= + 1 << ref_frame_config->alt_fb_idx[sl]; + } else if (sl == num_spatial_layers - 1) { + // Top spatial and top temporal (non-reference -- doesn't update any + // reference buffers) + ref_frame_config->reference_last[sl] = 1; + ref_frame_config->reference_golden[sl] = 1; + ref_frame_config->reference_alt_ref[sl] = 0; + } + } + } + } +} + +// Example pattern for 2 spatial layers and 2 temporal layers used in the +// bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1. +static void set_frame_flags_bypass_mode_ex1( + int tl, int num_spatial_layers, int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + int sl; + for (sl = 0; sl < num_spatial_layers; ++sl) + ref_frame_config->update_buffer_slot[sl] = 0; + + if (tl == 0) { + if (is_key_frame) { + ref_frame_config->lst_fb_idx[1] = 0; + ref_frame_config->gld_fb_idx[1] = 1; + } else { + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 0; + } + ref_frame_config->alt_fb_idx[1] = 0; + + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 0; + ref_frame_config->alt_fb_idx[0] = 0; + } + if (tl == 1) { + ref_frame_config->lst_fb_idx[0] = 0; + ref_frame_config->gld_fb_idx[0] = 1; + ref_frame_config->alt_fb_idx[0] = 2; + + ref_frame_config->lst_fb_idx[1] = 1; + ref_frame_config->gld_fb_idx[1] = 2; + ref_frame_config->alt_fb_idx[1] = 3; + } + // Set the reference and update flags. + if (tl == 0) { + // Base spatial and base temporal (sl = 0, tl = 0) + ref_frame_config->reference_last[0] = 1; + ref_frame_config->reference_golden[0] = 0; + ref_frame_config->reference_alt_ref[0] = 0; + ref_frame_config->update_buffer_slot[0] |= + 1 << ref_frame_config->lst_fb_idx[0]; + + if (is_key_frame) { + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->gld_fb_idx[1]; + } else { + // Non-zero spatiall layer. + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 1; + ref_frame_config->reference_alt_ref[1] = 1; + ref_frame_config->update_buffer_slot[1] |= + 1 << ref_frame_config->lst_fb_idx[1]; + } + } + if (tl == 1) { + // Top spatial and top temporal (non-reference -- doesn't update any + // reference buffers) + ref_frame_config->reference_last[1] = 1; + ref_frame_config->reference_golden[1] = 0; + ref_frame_config->reference_alt_ref[1] = 0; + } +} + +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE +static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, + const int frames_out, int *mismatch_seen) { + vpx_image_t enc_img, dec_img; + struct vp9_ref_frame ref_enc, ref_dec; + if (*mismatch_seen) return; + /* Get the internal reference frame */ + ref_enc.idx = 0; + ref_dec.idx = 0; + vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc); + enc_img = ref_enc.img; + vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec); + dec_img = ref_dec.img; +#if CONFIG_VP9_HIGHBITDEPTH + if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img); + } + if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img); + } + } +#endif + + if (!compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; +#if CONFIG_VP9_HIGHBITDEPTH + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + find_mismatch(&enc_img, &dec_img, y, u, v); + } +#else + find_mismatch(&enc_img, &dec_img, y, u, v); +#endif + decoder->err = 1; + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}\n", + frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = frames_out; + } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); +} +#endif + +#if OUTPUT_RC_STATS +static void svc_output_rc_stats( + vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg, + vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt, + struct RateControlStats *rc, VpxVideoWriter **outfile, + const uint32_t frame_cnt, const double framerate) { + int num_layers_encoded = 0; + unsigned int sl, tl; + uint64_t sizes[8]; + uint64_t sizes_parsed[8]; + int count = 0; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + vp9_zero(sizes); + vp9_zero(sizes_parsed); + vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id); + parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, + sizes_parsed, &count); + if (enc_cfg->ss_number_layers == 1) { + sizes[0] = cx_pkt->data.frame.sz; + } else { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sizes[sl] = 0; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } + } + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + unsigned int sl2; + uint64_t tot_size = 0; +#if SIMULCAST_MODE + for (sl2 = 0; sl2 < sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + vpx_video_writer_write_frame(outfile[sl], + (uint8_t *)(cx_pkt->data.frame.buf) + tot_size, + (size_t)(sizes[sl]), cx_pkt->data.frame.pts); +#else + for (sl2 = 0; sl2 <= sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + if (tot_size > 0) + vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf, + (size_t)(tot_size), cx_pkt->data.frame.pts); +#endif // SIMULCAST_MODE + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers; + ++tl) { + const int layer = sl * enc_cfg->ts_number_layers + tl; + ++rc->layer_tot_enc_frames[layer]; + rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; + // Keep count of rate control stats per layer, for non-key + // frames. + if (tl == (unsigned int)layer_id->temporal_layer_id && + !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl]; + rc->layer_avg_rate_mismatch[layer] += + fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) / + rc->layer_pfb[layer]; + ++rc->layer_enc_frames[layer]; + } + } + } + } + + // Update for short-time encoding bitrate states, for moving + // window of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + if (frame_cnt > (unsigned int)rc->window_size) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) + sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; + } + if (frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size); + } + } + + // Second shifted window. + if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; + } + + if (frame_cnt > (unsigned int)(2 * rc->window_size) && + frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size); + } + } +} +#endif + +int main(int argc, const char **argv) { + AppInput app_input; + VpxVideoWriter *writer = NULL; + VpxVideoInfo info; + vpx_codec_ctx_t encoder; + vpx_codec_enc_cfg_t enc_cfg; + SvcContext svc_ctx; + vpx_svc_frame_drop_t svc_drop_frame; + uint32_t i; + uint32_t frame_cnt = 0; + vpx_image_t raw; + vpx_codec_err_t res; + int pts = 0; /* PTS starts at 0 */ + int frame_duration = 1; /* 1 timebase tick per frame */ + int end_of_stream = 0; +#if OUTPUT_FRAME_STATS + int frames_received = 0; +#endif +#if OUTPUT_RC_STATS + VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL }; + struct RateControlStats rc; + vpx_svc_layer_id_t layer_id; + vpx_svc_ref_frame_config_t ref_frame_config; + unsigned int sl; + double framerate = 30.0; +#endif + struct vpx_usec_timer timer; + int64_t cx_time = 0; +#if CONFIG_INTERNAL_STATS + FILE *f = fopen("opsnr.stt", "a"); +#endif +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + int mismatch_seen = 0; + vpx_codec_ctx_t decoder; +#endif + memset(&svc_ctx, 0, sizeof(svc_ctx)); + memset(&app_input, 0, sizeof(AppInput)); + memset(&info, 0, sizeof(VpxVideoInfo)); + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&rc, 0, sizeof(struct RateControlStats)); + exec_name = argv[0]; + + /* Setup default input stream settings */ + app_input.input_ctx.framerate.numerator = 30; + app_input.input_ctx.framerate.denominator = 1; + app_input.input_ctx.only_i420 = 1; + app_input.input_ctx.bit_depth = 0; + + parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); + + // Y4M reader handles its own allocation. + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { +// Allocate image buffer +#if CONFIG_VP9_HIGHBITDEPTH + if (!vpx_img_alloc(&raw, + enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 + : VPX_IMG_FMT_I42016, + enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } +#else + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + } + + // Initialize codec + if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) != + VPX_CODEC_OK) + die("Failed to initialize encoder\n"); +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + if (vpx_codec_dec_init( + &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0)) + die("Failed to initialize decoder\n"); +#endif + +#if OUTPUT_RC_STATS + rc.window_count = 1; + rc.window_size = 15; // Silence a static analysis warning. + rc.avg_st_encoding_bitrate = 0.0; + rc.variance_st_encoding_bitrate = 0.0; + if (svc_ctx.output_rc_stat) { + set_rate_control_stats(&rc, &enc_cfg); + framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num; + } +#endif + + info.codec_fourcc = VP9_FOURCC; + info.frame_width = enc_cfg.g_w; + info.frame_height = enc_cfg.g_h; + info.time_base.numerator = enc_cfg.g_timebase.num; + info.time_base.denominator = enc_cfg.g_timebase.den; + + writer = + vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info); + if (!writer) + die("Failed to open %s for writing\n", app_input.output_filename); + +#if OUTPUT_RC_STATS + // Write out spatial layer stream. + // TODO(marpan/jianj): allow for writing each spatial and temporal stream. + if (svc_ctx.output_rc_stat) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + char file_name[PATH_MAX]; + + snprintf(file_name, sizeof(file_name), "%s_s%d.ivf", + app_input.output_filename, sl); + outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[sl]) die("Failed to open %s for writing", file_name); + } + } +#endif + + // skip initial frames + for (i = 0; i < app_input.frames_to_skip; ++i) + read_frame(&app_input.input_ctx, &raw); + + if (svc_ctx.speed != -1) + vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed); + if (svc_ctx.threads) { + vpx_codec_control(&encoder, VP9E_SET_TILE_COLUMNS, + get_msb(svc_ctx.threads)); + if (svc_ctx.threads > 1) + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1); + else + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0); + } + if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) + vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3); + if (svc_ctx.speed >= 5) + vpx_codec_control(&encoder, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&encoder, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); + + vpx_codec_control(&encoder, VP9E_SET_SVC_INTER_LAYER_PRED, + app_input.inter_layer_pred); + + vpx_codec_control(&encoder, VP9E_SET_NOISE_SENSITIVITY, 0); + + vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content); + + vpx_codec_control(&encoder, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, 0); + vpx_codec_control(&encoder, VP9E_SET_DISABLE_LOOPFILTER, 0); + + svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP; + for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl) + svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh; + svc_drop_frame.max_consec_drop = INT_MAX; + vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); + + // Encode frames + while (!end_of_stream) { + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *cx_pkt; + // Example patterns for bypass/flexible mode: + // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact + // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal + // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example + // uses the extended API. + int example_pattern = 0; + if (frame_cnt >= app_input.frames_to_code || + !read_frame(&app_input.input_ctx, &raw)) { + // We need one extra vpx_svc_encode call at end of stream to flush + // encoder and get remaining data + end_of_stream = 1; + } + + // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) + // and the buffer indices for each spatial layer of the current + // (super)frame to be encoded. The spatial and temporal layer_id for the + // current frame also needs to be set. + // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" + // mode to "VP9E_LAYERING_MODE_BYPASS". + if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + layer_id.spatial_layer_id = 0; + // Example for 2 temporal layers. + if (frame_cnt % 2 == 0) { + layer_id.temporal_layer_id = 0; + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 0; + } else { + layer_id.temporal_layer_id = 1; + for (i = 0; i < VPX_SS_MAX_LAYERS; i++) + layer_id.temporal_layer_id_per_spatial[i] = 1; + } + if (example_pattern == 1) { + // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers. + assert(svc_ctx.spatial_layers == 2); + assert(svc_ctx.temporal_layers == 2); + if (frame_cnt % 2 == 0) { + // Spatial layer 0 and 1 are encoded. + layer_id.temporal_layer_id_per_spatial[0] = 0; + layer_id.temporal_layer_id_per_spatial[1] = 0; + layer_id.spatial_layer_id = 0; + } else { + // Only spatial layer 1 is encoded here. + layer_id.temporal_layer_id_per_spatial[1] = 1; + layer_id.spatial_layer_id = 1; + } + } + vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id); + // TODO(jianj): Fix the parameter passing for "is_key_frame" in + // set_frame_flags_bypass_model() for case of periodic key frames. + if (example_pattern == 0) { + set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } else if (example_pattern == 1) { + set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id, + svc_ctx.spatial_layers, frame_cnt == 0, + &ref_frame_config); + } + ref_frame_config.duration[0] = frame_duration * 1; + ref_frame_config.duration[1] = frame_duration * 1; + + vpx_codec_control(&encoder, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + // Keep track of input frames, to account for frame drops in rate control + // stats/metrics. + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + + layer_id.temporal_layer_id]; + } + } else { + // For the fixed pattern SVC, temporal layer is given by superframe count. + unsigned int tl = 0; + if (enc_cfg.ts_number_layers == 2) + tl = (frame_cnt % 2 != 0); + else if (enc_cfg.ts_number_layers == 3) { + if (frame_cnt % 2 != 0) tl = 2; + if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1; + } + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl]; + } + + vpx_usec_timer_start(&timer); + res = vpx_svc_encode( + &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration, + svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); + + fflush(stdout); + if (res != VPX_CODEC_OK) { + die_codec(&encoder, "Failed to encode frame"); + } + + while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) { + switch (cx_pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: { + SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; + if (cx_pkt->data.frame.sz > 0) { + vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, + cx_pkt->data.frame.sz, + cx_pkt->data.frame.pts); +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc, + outfile, frame_cnt, framerate); + } +#endif + } +#if OUTPUT_FRAME_STATS + printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, + !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), + (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); + ++frames_received; +#endif + if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) + si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf, + (unsigned int)cx_pkt->data.frame.sz, NULL, 0)) + die_codec(&decoder, "Failed to decode frame."); +#endif + break; + } + case VPX_CODEC_STATS_PKT: { + stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf, + cx_pkt->data.twopass_stats.sz); + break; + } + default: { + break; + } + } + +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE + vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id); + // Don't look for mismatch on top spatial and top temporal layers as they + // are non reference frames. + if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) && + !(layer_id.temporal_layer_id > 0 && + layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 && + cx_pkt->data.frame + .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) { + test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen); + } +#endif + } + + if (!end_of_stream) { + ++frame_cnt; + pts += frame_duration; + } + } + + printf("Processed %d frames\n", frame_cnt); + + close_input_file(&app_input.input_ctx); + +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); + printf("\n"); + } +#endif + if (vpx_codec_destroy(&encoder)) + die_codec(&encoder, "Failed to destroy codec"); + if (writer) { + vpx_video_writer_close(writer); + } +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + vpx_video_writer_close(outfile[sl]); + } + } +#endif +#if CONFIG_INTERNAL_STATS + if (mismatch_seen) { + fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen); + } else { + fprintf(f, "No mismatch detected in recon buffers\n"); + } + fclose(f); +#endif + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } + // display average size, psnr + vpx_svc_dump_statistics(&svc_ctx); + vpx_svc_release(&svc_ctx); + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vp9cx_set_ref.c b/media/libvpx/libvpx/examples/vp9cx_set_ref.c new file mode 100644 index 0000000000..1a0823153b --- /dev/null +++ b/media/libvpx/libvpx/examples/vp9cx_set_ref.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2016 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// VP9 Set Reference Frame +// ============================ +// +// This is an example demonstrating how to overwrite the VP9 encoder's +// internal reference frame. In the sample we set the last frame to the +// current frame. This technique could be used to bounce between two cameras. +// +// The decoder would also have to set the reference frame to the same value +// on the same frame, or the video will become corrupt. The 'test_decode' +// variable is set to 1 in this example that tests if the encoder and decoder +// results are matching. +// +// Usage +// ----- +// This example encodes a raw video. And the last argument passed in specifies +// the frame number to update the reference frame on. For example, run +// examples/vp9cx_set_ref 352 288 in.yuv out.ivf 4 30 +// The parameter is parsed as follows: +// +// +// Extra Variables +// --------------- +// This example maintains the frame number passed on the command line +// in the `update_frame_num` variable. +// +// +// Configuration +// ------------- +// +// The reference frame is updated on the frame specified on the command +// line. +// +// Observing The Effects +// --------------------- +// The encoder and decoder results should be matching when the same reference +// frame setting operation is done in both encoder and decoder. Otherwise, +// the encoder/decoder mismatch would be seen. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "vpx/vp8cx.h" +#include "vpx/vpx_decoder.h" +#include "vpx/vpx_encoder.h" +#include "vp9/common/vp9_common.h" + +#include "./tools_common.h" +#include "./video_writer.h" + +static const char *exec_name; + +void usage_exit() { + fprintf(stderr, + "Usage: %s <width> <height> <infile> <outfile> " + "<frame> <limit(optional)>\n", + exec_name); + exit(EXIT_FAILURE); +} + +static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, + unsigned int frame_out, int *mismatch_seen) { + vpx_image_t enc_img, dec_img; + struct vp9_ref_frame ref_enc, ref_dec; + + if (*mismatch_seen) return; + + ref_enc.idx = 0; + ref_dec.idx = 0; + if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc)) + die_codec(encoder, "Failed to get encoder reference frame"); + enc_img = ref_enc.img; + if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec)) + die_codec(decoder, "Failed to get decoder reference frame"); + dec_img = ref_dec.img; + + if (!compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; + + *mismatch_seen = 1; + + find_mismatch(&enc_img, &dec_img, y, u, v); + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", + frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); +} + +static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img, + unsigned int frame_in, VpxVideoWriter *writer, + int test_decode, vpx_codec_ctx_t *dcodec, + unsigned int *frame_out, int *mismatch_seen) { + int got_pkts = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt = NULL; + int got_data; + const vpx_codec_err_t res = + vpx_codec_encode(ecodec, img, frame_in, 1, 0, VPX_DL_GOOD_QUALITY); + if (res != VPX_CODEC_OK) die_codec(ecodec, "Failed to encode frame"); + + got_data = 0; + + while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) { + got_pkts = 1; + + if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { + const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; + + if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { + *frame_out += 1; + } + + if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, + pkt->data.frame.sz, + pkt->data.frame.pts)) { + die_codec(ecodec, "Failed to write compressed frame"); + } + printf(keyframe ? "K" : "."); + fflush(stdout); + got_data = 1; + + // Decode 1 frame. + if (test_decode) { + if (vpx_codec_decode(dcodec, pkt->data.frame.buf, + (unsigned int)pkt->data.frame.sz, NULL, 0)) + die_codec(dcodec, "Failed to decode frame."); + } + } + } + + // Mismatch checking + if (got_data && test_decode) { + testing_decode(ecodec, dcodec, *frame_out, mismatch_seen); + } + + return got_pkts; +} + +int main(int argc, char **argv) { + FILE *infile = NULL; + // Encoder + vpx_codec_ctx_t ecodec; + vpx_codec_enc_cfg_t cfg; + unsigned int frame_in = 0; + vpx_image_t raw; + vpx_codec_err_t res; + VpxVideoInfo info; + VpxVideoWriter *writer = NULL; + const VpxInterface *encoder = NULL; + + // Test encoder/decoder mismatch. + int test_decode = 1; + // Decoder + vpx_codec_ctx_t dcodec; + unsigned int frame_out = 0; + + // The frame number to set reference frame on + unsigned int update_frame_num = 0; + int mismatch_seen = 0; + + const int fps = 30; + const int bitrate = 500; + + const char *width_arg = NULL; + const char *height_arg = NULL; + const char *infile_arg = NULL; + const char *outfile_arg = NULL; + const char *update_frame_num_arg = NULL; + unsigned int limit = 0; + + vp9_zero(ecodec); + vp9_zero(cfg); + vp9_zero(info); + + exec_name = argv[0]; + + if (argc < 6) die("Invalid number of arguments"); + + width_arg = argv[1]; + height_arg = argv[2]; + infile_arg = argv[3]; + outfile_arg = argv[4]; + update_frame_num_arg = argv[5]; + + encoder = get_vpx_encoder_by_name("vp9"); + if (!encoder) die("Unsupported codec."); + + update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0); + // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are + // allocated while calling vpx_codec_encode(), thus, setting reference for + // 1st frame isn't supported. + if (update_frame_num <= 1) { + die("Couldn't parse frame number '%s'\n", update_frame_num_arg); + } + + if (argc > 6) { + limit = (unsigned int)strtoul(argv[6], NULL, 0); + if (update_frame_num > limit) + die("Update frame number couldn't larger than limit\n"); + } + + info.codec_fourcc = encoder->fourcc; + info.frame_width = (int)strtol(width_arg, NULL, 0); + info.frame_height = (int)strtol(height_arg, NULL, 0); + info.time_base.numerator = 1; + info.time_base.denominator = fps; + + if (info.frame_width <= 0 || info.frame_height <= 0 || + (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { + die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, + info.frame_height, 1)) { + die("Failed to allocate image."); + } + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) die_codec(&ecodec, "Failed to get default codec config."); + + cfg.g_w = info.frame_width; + cfg.g_h = info.frame_height; + cfg.g_timebase.num = info.time_base.numerator; + cfg.g_timebase.den = info.time_base.denominator; + cfg.rc_target_bitrate = bitrate; + cfg.g_lag_in_frames = 3; + + writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); + if (!writer) die("Failed to open %s for writing.", outfile_arg); + + if (!(infile = fopen(infile_arg, "rb"))) + die("Failed to open %s for reading.", infile_arg); + + if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0)) + die("Failed to initialize encoder"); + + // Disable alt_ref. + if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0)) + die_codec(&ecodec, "Failed to set enable auto alt ref"); + + if (test_decode) { + const VpxInterface *decoder = get_vpx_decoder_by_name("vp9"); + if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0)) + die_codec(&dcodec, "Failed to initialize decoder."); + } + + // Encode frames. + while (vpx_img_read(&raw, infile)) { + if (limit && frame_in >= limit) break; + if (update_frame_num > 1 && frame_out + 1 == update_frame_num) { + vpx_ref_frame_t ref; + ref.frame_type = VP8_LAST_FRAME; + ref.img = raw; + // Set reference frame in encoder. + if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref)) + die_codec(&ecodec, "Failed to set reference frame"); + printf(" <SET_REF>"); + + // If set_reference in decoder is commented out, the enc/dec mismatch + // would be seen. + if (test_decode) { + if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref)) + die_codec(&dcodec, "Failed to set reference frame"); + } + } + + encode_frame(&ecodec, &raw, frame_in, writer, test_decode, &dcodec, + &frame_out, &mismatch_seen); + frame_in++; + if (mismatch_seen) break; + } + + // Flush encoder. + if (!mismatch_seen) + while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec, + &frame_out, &mismatch_seen)) { + } + + printf("\n"); + fclose(infile); + printf("Processed %d frames.\n", frame_out); + + if (test_decode) { + if (!mismatch_seen) + printf("Encoder/decoder results are matching.\n"); + else + printf("Encoder/decoder results are NOT matching.\n"); + } + + if (test_decode) + if (vpx_codec_destroy(&dcodec)) + die_codec(&dcodec, "Failed to destroy decoder"); + + vpx_img_free(&raw); + if (vpx_codec_destroy(&ecodec)) + die_codec(&ecodec, "Failed to destroy encoder."); + + vpx_video_writer_close(writer); + + return EXIT_SUCCESS; +} diff --git a/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc b/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc new file mode 100644 index 0000000000..5eba9d74da --- /dev/null +++ b/media/libvpx/libvpx/examples/vpx_dec_fuzzer.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Fuzzer for libvpx decoders + * ========================== + * Requirements + * -------------- + * Requires Clang 6.0 or above as -fsanitize=fuzzer is used as a linker + * option. + + * Steps to build + * -------------- + * Clone libvpx repository + $git clone https://chromium.googlesource.com/webm/libvpx + + * Create a directory in parallel to libvpx and change directory + $mkdir vpx_dec_fuzzer + $cd vpx_dec_fuzzer/ + + * Enable sanitizers (Supported: address integer memory thread undefined) + $source ../libvpx/tools/set_analyzer_env.sh address + + * Configure libvpx. + * Note --size-limit and VPX_MAX_ALLOCABLE_MEMORY are defined to avoid + * Out of memory errors when running generated fuzzer binary + $../libvpx/configure --disable-unit-tests --size-limit=12288x12288 \ + --extra-cflags="-fsanitize=fuzzer-no-link \ + -DVPX_MAX_ALLOCABLE_MEMORY=1073741824" \ + --disable-webm-io --enable-debug --disable-vp8-encoder \ + --disable-vp9-encoder --disable-examples + + * Build libvpx + $make -j32 + + * Build vp9 fuzzer + $ $CXX $CXXFLAGS -std=gnu++11 -DDECODER=vp9 \ + -fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \ + ../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \ + ./libvpx.a -Wl,--end-group + + * DECODER should be defined as vp9 or vp8 to enable vp9/vp8 + * + * create a corpus directory and copy some ivf files there. + * Based on which codec (vp8/vp9) is being tested, it is recommended to + * have corresponding ivf files in corpus directory + * Empty corpus directoy also is acceptable, though not recommended + $mkdir CORPUS && cp some-files CORPUS + + * Run fuzzing: + $./vpx_dec_fuzzer_vp9 CORPUS + + * References: + * http://llvm.org/docs/LibFuzzer.html + * https://github.com/google/oss-fuzz + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <algorithm> +#include <memory> + +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" +#include "vpx_ports/mem_ops.h" + +#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ +#define IVF_FILE_HDR_SZ 32 + +#define VPXD_INTERFACE(name) VPXD_INTERFACE_(name) +#define VPXD_INTERFACE_(name) vpx_codec_##name##_dx() + +extern "C" void usage_exit(void) { exit(EXIT_FAILURE); } + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (size <= IVF_FILE_HDR_SZ) { + return 0; + } + + vpx_codec_ctx_t codec; + // Set thread count in the range [1, 64]. + const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; + vpx_codec_dec_cfg_t cfg = { threads, 0, 0 }; + if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) { + return 0; + } + + if (threads > 1) { + const int enable = (data[IVF_FILE_HDR_SZ] & 0xa0) != 0; + const vpx_codec_err_t err = + vpx_codec_control(&codec, VP9D_SET_LOOP_FILTER_OPT, enable); + static_cast<void>(err); + } + + data += IVF_FILE_HDR_SZ; + size -= IVF_FILE_HDR_SZ; + + while (size > IVF_FRAME_HDR_SZ) { + size_t frame_size = mem_get_le32(data); + size -= IVF_FRAME_HDR_SZ; + data += IVF_FRAME_HDR_SZ; + frame_size = std::min(size, frame_size); + + const vpx_codec_err_t err = + vpx_codec_decode(&codec, data, frame_size, nullptr, 0); + static_cast<void>(err); + vpx_codec_iter_t iter = nullptr; + vpx_image_t *img = nullptr; + while ((img = vpx_codec_get_frame(&codec, &iter)) != nullptr) { + } + data += frame_size; + size -= frame_size; + } + vpx_codec_destroy(&codec); + return 0; +} diff --git a/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c b/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c new file mode 100644 index 0000000000..a80027822a --- /dev/null +++ b/media/libvpx/libvpx/examples/vpx_temporal_svc_encoder.c @@ -0,0 +1,1069 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is an example demonstrating how to implement a multi-layer VPx +// encoding scheme based on temporal scalability for video applications +// that benefit from a scalable bitstream. + +#include <assert.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "./vpx_config.h" +#include "./y4minput.h" +#include "../vpx_ports/vpx_timer.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" +#include "vpx_ports/bitops.h" + +#include "../tools_common.h" +#include "../video_writer.h" + +#define ROI_MAP 0 + +#define zero(Dest) memset(&(Dest), 0, sizeof(Dest)) + +static const char *exec_name; + +void usage_exit(void) { exit(EXIT_FAILURE); } + +// Denoiser states for vp8, for temporal denoising. +enum denoiserStateVp8 { + kVp8DenoiserOff, + kVp8DenoiserOnYOnly, + kVp8DenoiserOnYUV, + kVp8DenoiserOnYUVAggressive, + kVp8DenoiserOnAdaptive +}; + +// Denoiser states for vp9, for temporal denoising. +enum denoiserStateVp9 { + kVp9DenoiserOff, + kVp9DenoiserOnYOnly, + // For SVC: denoise the top two spatial layers. + kVp9DenoiserOnYTwoSpatialLayers +}; + +static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 }; + +// For rate control encoding stats. +struct RateControlMetrics { + // Number of input frames per layer. + int layer_input_frames[VPX_TS_MAX_LAYERS]; + // Total (cumulative) number of encoded frames per layer. + int layer_tot_enc_frames[VPX_TS_MAX_LAYERS]; + // Number of encoded non-key frames per layer. + int layer_enc_frames[VPX_TS_MAX_LAYERS]; + // Framerate per layer layer (cumulative). + double layer_framerate[VPX_TS_MAX_LAYERS]; + // Target average frame size per layer (per-frame-bandwidth per layer). + double layer_pfb[VPX_TS_MAX_LAYERS]; + // Actual average frame size per layer. + double layer_avg_frame_size[VPX_TS_MAX_LAYERS]; + // Average rate mismatch per layer (|target - actual| / target). + double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS]; + // Actual encoding bitrate per layer (cumulative). + double layer_encoding_bitrate[VPX_TS_MAX_LAYERS]; + // Average of the short-time encoder actual bitrate. + // TODO(marpan): Should we add these short-time stats for each layer? + double avg_st_encoding_bitrate; + // Variance of the short-time encoder actual bitrate. + double variance_st_encoding_bitrate; + // Window (number of frames) for computing short-timee encoding bitrate. + int window_size; + // Number of window measurements. + int window_count; + int layer_target_bitrate[VPX_MAX_LAYERS]; +}; + +// Note: these rate control metrics assume only 1 key frame in the +// sequence (i.e., first frame only). So for temporal pattern# 7 +// (which has key frame for every frame on base layer), the metrics +// computation will be off/wrong. +// TODO(marpan): Update these metrics to account for multiple key frames +// in the stream. +static void set_rate_control_metrics(struct RateControlMetrics *rc, + vpx_codec_enc_cfg_t *cfg) { + int i = 0; + // Set the layer (cumulative) framerate and the target layer (non-cumulative) + // per-frame-bandwidth, for the rate control encoding stats below. + const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; + const int ts_number_layers = cfg->ts_number_layers; + rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; + rc->layer_pfb[0] = + 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0]; + for (i = 0; i < ts_number_layers; ++i) { + if (i > 0) { + rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; + rc->layer_pfb[i] = + 1000.0 * + (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / + (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); + } + rc->layer_input_frames[i] = 0; + rc->layer_enc_frames[i] = 0; + rc->layer_tot_enc_frames[i] = 0; + rc->layer_encoding_bitrate[i] = 0.0; + rc->layer_avg_frame_size[i] = 0.0; + rc->layer_avg_rate_mismatch[i] = 0.0; + } + rc->window_count = 0; + rc->window_size = 15; + rc->avg_st_encoding_bitrate = 0.0; + rc->variance_st_encoding_bitrate = 0.0; + // Target bandwidth for the whole stream. + // Set to layer_target_bitrate for highest layer (total bitrate). + cfg->rc_target_bitrate = rc->layer_target_bitrate[ts_number_layers - 1]; +} + +static void printout_rate_control_summary(struct RateControlMetrics *rc, + vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + unsigned int i = 0; + int tot_num_frames = 0; + double perc_fluctuation = 0.0; + printf("Total number of processed frames: %d\n\n", frame_cnt - 1); + printf("Rate control layer stats for %d layer(s):\n\n", + cfg->ts_number_layers); + for (i = 0; i < cfg->ts_number_layers; ++i) { + const int num_dropped = + (i > 0) ? (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) + : (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1); + tot_num_frames += rc->layer_input_frames[i]; + rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] * + rc->layer_encoding_bitrate[i] / + tot_num_frames; + rc->layer_avg_frame_size[i] = + rc->layer_avg_frame_size[i] / rc->layer_enc_frames[i]; + rc->layer_avg_rate_mismatch[i] = + 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i]; + printf("For layer#: %d \n", i); + printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i], + rc->layer_encoding_bitrate[i]); + printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i], + rc->layer_avg_frame_size[i]); + printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]); + printf( + "Number of input frames, encoded (non-key) frames, " + "and perc dropped frames: %d %d %f \n", + rc->layer_input_frames[i], rc->layer_enc_frames[i], + 100.0 * num_dropped / rc->layer_input_frames[i]); + printf("\n"); + } + rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; + rc->variance_st_encoding_bitrate = + rc->variance_st_encoding_bitrate / rc->window_count - + (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); + perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / + rc->avg_st_encoding_bitrate; + printf("Short-time stats, for window of %d frames: \n", rc->window_size); + printf("Average, rms-variance, and percent-fluct: %f %f %f \n", + rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), + perc_fluctuation); + if ((frame_cnt - 1) != tot_num_frames) + die("Error: Number of input frames not equal to output! \n"); +} + +#if ROI_MAP +static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg, + vpx_roi_map_t *roi) { + unsigned int i, j; + int block_size = 0; + uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0; + uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0; + if (!is_vp8 && !is_vp9) { + die("unsupported codec."); + } + zero(*roi); + + block_size = is_vp9 && !is_vp8 ? 8 : 16; + + // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for + // segment is 16x16 for vp8, 8x8 for vp9. + roi->rows = (cfg->g_h + block_size - 1) / block_size; + roi->cols = (cfg->g_w + block_size - 1) / block_size; + + // Applies delta QP on the segment blocks, varies from -63 to 63. + // Setting to negative means lower QP (better quality). + // Below we set delta_q to the extreme (-63) to show strong effect. + // VP8 uses the first 4 segments. VP9 uses all 8 segments. + zero(roi->delta_q); + roi->delta_q[1] = -63; + + // Applies delta loopfilter strength on the segment blocks, varies from -63 to + // 63. Setting to positive means stronger loopfilter. VP8 uses the first 4 + // segments. VP9 uses all 8 segments. + zero(roi->delta_lf); + + if (is_vp8) { + // Applies skip encoding threshold on the segment blocks, varies from 0 to + // UINT_MAX. Larger value means more skipping of encoding is possible. + // This skip threshold only applies on delta frames. + zero(roi->static_threshold); + } + + if (is_vp9) { + // Apply skip segment. Setting to 1 means this block will be copied from + // previous frame. + zero(roi->skip); + } + + if (is_vp9) { + // Apply ref frame segment. + // -1 : Do not apply this segment. + // 0 : Froce using intra. + // 1 : Force using last. + // 2 : Force using golden. + // 3 : Force using alfref but not used in non-rd pickmode for 0 lag. + memset(roi->ref_frame, -1, sizeof(roi->ref_frame)); + roi->ref_frame[1] = 1; + } + + // Use 2 states: 1 is center square, 0 is the rest. + roi->roi_map = + (uint8_t *)calloc(roi->rows * roi->cols, sizeof(*roi->roi_map)); + for (i = 0; i < roi->rows; ++i) { + for (j = 0; j < roi->cols; ++j) { + if (i > (roi->rows >> 2) && i < ((roi->rows * 3) >> 2) && + j > (roi->cols >> 2) && j < ((roi->cols * 3) >> 2)) { + roi->roi_map[i * roi->cols + j] = 1; + } + } + } +} + +static void set_roi_skip_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi, + int *skip_map, int *prev_mask_map, int frame_num) { + const int block_size = 8; + unsigned int i, j; + roi->rows = (cfg->g_h + block_size - 1) / block_size; + roi->cols = (cfg->g_w + block_size - 1) / block_size; + zero(roi->skip); + zero(roi->delta_q); + zero(roi->delta_lf); + memset(roi->ref_frame, -1, sizeof(roi->ref_frame)); + roi->ref_frame[1] = 1; + // Use segment 3 for skip. + roi->skip[3] = 1; + roi->roi_map = + (uint8_t *)calloc(roi->rows * roi->cols, sizeof(*roi->roi_map)); + for (i = 0; i < roi->rows; ++i) { + for (j = 0; j < roi->cols; ++j) { + const int idx = i * roi->cols + j; + // Use segment 3 for skip. + // prev_mask_map keeps track of blocks that have been stably on segment 3 + // for the past 10 frames. Only skip when the block is on segment 3 in + // both current map and prev_mask_map. + if (skip_map[idx] == 1 && prev_mask_map[idx] == 1) roi->roi_map[idx] = 3; + // Reset it every 10 frames so it doesn't propagate for too many frames. + if (frame_num % 10 == 0) + prev_mask_map[idx] = skip_map[idx]; + else if (prev_mask_map[idx] == 1 && skip_map[idx] == 0) + prev_mask_map[idx] = 0; + } + } +} +#endif + +// Temporal scaling parameters: +// NOTE: The 3 prediction frames cannot be used interchangeably due to +// differences in the way they are handled throughout the code. The +// frames should be allocated to layers in the order LAST, GF, ARF. +// Other combinations work, but may produce slightly inferior results. +static void set_temporal_layer_pattern(int layering_mode, + vpx_codec_enc_cfg_t *cfg, + int *layer_flags, + int *flag_periodicity) { + switch (layering_mode) { + case 0: { + // 1-layer. + int ids[1] = { 0 }; + cfg->ts_periodicity = 1; + *flag_periodicity = 1; + cfg->ts_number_layers = 1; + cfg->ts_rate_decimator[0] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // Update L only. + layer_flags[0] = + VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + break; + } + case 1: { + // 2-layers, 2-frame period. + int ids[2] = { 0, 1 }; + cfg->ts_periodicity = 2; + *flag_periodicity = 2; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 2; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); +#if 1 + // 0=L, 1=GF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; +#else + // 0=L, 1=GF, Intra-layer prediction disabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; +#endif + break; + } + case 2: { + // 2-layers, 3-frame period. + int ids[3] = { 0, 1, 1 }; + cfg->ts_periodicity = 3; + *flag_periodicity = 3; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 3; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = layer_flags[2] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + break; + } + case 3: { + // 3-layers, 6-frame period. + int ids[6] = { 0, 2, 2, 1, 2, 2 }; + cfg->ts_periodicity = 6; + *flag_periodicity = 6; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 6; + cfg->ts_rate_decimator[1] = 3; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[3] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = layer_flags[2] = layer_flags[4] = layer_flags[5] = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; + break; + } + case 4: { + // 3-layers, 4-frame period. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = layer_flags[3] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + break; + } + case 5: { + // 3-layers, 4-frame period. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled + // in layer 2. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = layer_flags[3] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + break; + } + case 6: { + // 3-layers, 4-frame period. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = layer_flags[3] = + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + case 7: { + // NOTE: Probably of academic interest only. + // 5-layers, 16-frame period. + int ids[16] = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4 }; + cfg->ts_periodicity = 16; + *flag_periodicity = 16; + cfg->ts_number_layers = 5; + cfg->ts_rate_decimator[0] = 16; + cfg->ts_rate_decimator[1] = 8; + cfg->ts_rate_decimator[2] = 4; + cfg->ts_rate_decimator[3] = 2; + cfg->ts_rate_decimator[4] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + layer_flags[0] = VPX_EFLAG_FORCE_KF; + layer_flags[1] = layer_flags[3] = layer_flags[5] = layer_flags[7] = + layer_flags[9] = layer_flags[11] = layer_flags[13] = layer_flags[15] = + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = layer_flags[6] = layer_flags[10] = layer_flags[14] = + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; + layer_flags[4] = layer_flags[12] = + VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF; + break; + } + case 8: { + // 2-layers, with sync point at first frame of layer 1. + int ids[2] = { 0, 1 }; + cfg->ts_periodicity = 2; + *flag_periodicity = 8; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 2; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF. + // ARF is used as predictor for all frames, and is only updated on + // key frame. Sync point every 8 frames. + + // Layer 0: predict from L and ARF, update L and G. + layer_flags[0] = + VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF; + // Layer 1: sync point: predict from L and ARF, and update G. + layer_flags[1] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + // Layer 0, predict from L and ARF, update L. + layer_flags[2] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + // Layer 1: predict from L, G and ARF, and update G. + layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 0. + layer_flags[4] = layer_flags[2]; + // Layer 1. + layer_flags[5] = layer_flags[3]; + // Layer 0. + layer_flags[6] = layer_flags[4]; + // Layer 1. + layer_flags[7] = layer_flags[5]; + break; + } + case 9: { + // 3-layers: Sync points for layer 1 and 2 every 8 frames. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[3] = layer_flags[5] = + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[6] = + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY; + break; + } + case 10: { + // 3-layers structure where ARF is used as predictor for all frames, + // and is only updated on key frame. + // Sync points for layer 1 and 2 every 8 frames. + + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + // Layer 0: predict from L and ARF; update L and G. + layer_flags[0] = + VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + // Layer 2: sync point: predict from L and ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 1: sync point: predict from L and ARF; update G. + layer_flags[2] = + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 0: predict from L and ARF; update L. + layer_flags[4] = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[5] = layer_flags[3]; + // Layer 1: predict from L, G, ARF; update G. + layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[7] = layer_flags[3]; + break; + } + case 11: { + // 3-layers structure with one reference frame. + // This works same as temporal_layering_mode 3. + // This was added to compare with vp9_spatial_svc_encoder. + + // 3-layers, 4-frame period. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. + layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + case 12: + default: { + // 3-layers structure as in case 10, but no sync/refresh points for + // layer 1 and 2. + int ids[4] = { 0, 2, 1, 2 }; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + // Layer 0: predict from L and ARF; update L. + layer_flags[0] = + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + layer_flags[4] = layer_flags[0]; + // Layer 1: predict from L, G, ARF; update G. + layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[6] = layer_flags[2]; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + layer_flags[3] = layer_flags[1]; + layer_flags[5] = layer_flags[1]; + layer_flags[7] = layer_flags[1]; + break; + } + } +} + +#if ROI_MAP +static void read_mask(FILE *mask_file, int *seg_map) { + int mask_rows, mask_cols, i, j; + int *map_start = seg_map; + fscanf(mask_file, "%d %d\n", &mask_cols, &mask_rows); + for (i = 0; i < mask_rows; i++) { + for (j = 0; j < mask_cols; j++) { + fscanf(mask_file, "%d ", &seg_map[j]); + // reverse the bit + seg_map[j] = 1 - seg_map[j]; + } + seg_map += mask_cols; + } + seg_map = map_start; +} +#endif + +int main(int argc, char **argv) { + VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL }; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_cnt = 0; + vpx_image_t raw; + vpx_codec_err_t res; + unsigned int width; + unsigned int height; + uint32_t error_resilient = 0; + int speed; + int frame_avail; + int got_data; + int flags = 0; + unsigned int i; + int pts = 0; // PTS starts at 0. + int frame_duration = 1; // 1 timebase tick per frame. + int layering_mode = 0; + int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 }; + int flag_periodicity = 1; +#if ROI_MAP + vpx_roi_map_t roi; +#endif + vpx_svc_layer_id_t layer_id; + const VpxInterface *encoder = NULL; + struct VpxInputContext input_ctx; + struct RateControlMetrics rc; + int64_t cx_time = 0; + const int min_args_base = 13; +#if CONFIG_VP9_HIGHBITDEPTH + vpx_bit_depth_t bit_depth = VPX_BITS_8; + int input_bit_depth = 8; + const int min_args = min_args_base + 1; +#else + const int min_args = min_args_base; +#endif // CONFIG_VP9_HIGHBITDEPTH + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + double framerate = 30.0; +#if ROI_MAP + FILE *mask_file = NULL; + int block_size = 8; + int mask_rows = 0; + int mask_cols = 0; + int *mask_map; + int *prev_mask_map; +#endif + zero(rc.layer_target_bitrate); + memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&input_ctx, 0, sizeof(input_ctx)); + /* Setup default input stream settings */ + input_ctx.framerate.numerator = 30; + input_ctx.framerate.denominator = 1; + input_ctx.only_i420 = 1; + input_ctx.bit_depth = 0; + + exec_name = argv[0]; + // Check usage and arguments. + if (argc < min_args) { +#if CONFIG_VP9_HIGHBITDEPTH + die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> " + "<rate_num> <rate_den> <speed> <frame_drop_threshold> " + "<error_resilient> <threads> <mode> " + "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n", + argv[0]); +#else + die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> " + "<rate_num> <rate_den> <speed> <frame_drop_threshold> " + "<error_resilient> <threads> <mode> " + "<Rate_0> ... <Rate_nlayers-1> \n", + argv[0]); +#endif // CONFIG_VP9_HIGHBITDEPTH + } + + encoder = get_vpx_encoder_by_name(argv[3]); + if (!encoder) die("Unsupported codec."); + + printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); + + width = (unsigned int)strtoul(argv[4], NULL, 0); + height = (unsigned int)strtoul(argv[5], NULL, 0); + if (width < 16 || width % 2 || height < 16 || height % 2) { + die("Invalid resolution: %d x %d", width, height); + } + + layering_mode = (int)strtol(argv[12], NULL, 0); + if (layering_mode < 0 || layering_mode > 13) { + die("Invalid layering mode (0..12) %s", argv[12]); + } + +#if ROI_MAP + if (argc != min_args + mode_to_num_layers[layering_mode] + 1) { + die("Invalid number of arguments"); + } +#else + if (argc != min_args + mode_to_num_layers[layering_mode]) { + die("Invalid number of arguments"); + } +#endif + + input_ctx.filename = argv[1]; + open_input_file(&input_ctx); + +#if CONFIG_VP9_HIGHBITDEPTH + switch (strtol(argv[argc - 1], NULL, 0)) { + case 8: + bit_depth = VPX_BITS_8; + input_bit_depth = 8; + break; + case 10: + bit_depth = VPX_BITS_10; + input_bit_depth = 10; + break; + case 12: + bit_depth = VPX_BITS_12; + input_bit_depth = 12; + break; + default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]); + } + + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc( + &raw, + bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, + width, height, 32)) { + die("Failed to allocate image (%dx%d)", width, height); + } + } +#else + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image (%dx%d)", width, height); + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Populate encoder configuration. + res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); + if (res) { + printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + return EXIT_FAILURE; + } + + // Update the default configuration with our settings. + cfg.g_w = width; + cfg.g_h = height; + +#if CONFIG_VP9_HIGHBITDEPTH + if (bit_depth != VPX_BITS_8) { + cfg.g_bit_depth = bit_depth; + cfg.g_input_bit_depth = input_bit_depth; + cfg.g_profile = 2; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + // Timebase format e.g. 30fps: numerator=1, demoninator = 30. + cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0); + cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0); + + speed = (int)strtol(argv[8], NULL, 0); + if (speed < 0) { + die("Invalid speed setting: must be positive"); + } + if (strncmp(encoder->name, "vp9", 3) == 0 && speed > 9) { + warn("Mapping speed %d to speed 9.\n", speed); + } + + for (i = min_args_base; + (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) { + rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0); + if (strncmp(encoder->name, "vp8", 3) == 0) + cfg.ts_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13]; + else if (strncmp(encoder->name, "vp9", 3) == 0) + cfg.layer_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13]; + } + + // Real time parameters. + cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0); + cfg.rc_end_usage = VPX_CBR; + cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 56; + if (strncmp(encoder->name, "vp9", 3) == 0) cfg.rc_max_quantizer = 52; + cfg.rc_undershoot_pct = 50; + cfg.rc_overshoot_pct = 50; + cfg.rc_buf_initial_sz = 600; + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + + // Disable dynamic resizing by default. + cfg.rc_resize_allowed = 0; + + // Use 1 thread as default. + cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0); + + error_resilient = (uint32_t)strtoul(argv[10], NULL, 0); + if (error_resilient != 0 && error_resilient != 1) { + die("Invalid value for error resilient (0, 1): %d.", error_resilient); + } + // Enable error resilient mode. + cfg.g_error_resilient = error_resilient; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = VPX_KF_AUTO; + + // Disable automatic keyframe placement. + cfg.kf_min_dist = cfg.kf_max_dist = 3000; + + cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + + set_temporal_layer_pattern(layering_mode, &cfg, layer_flags, + &flag_periodicity); + + set_rate_control_metrics(&rc, &cfg); + + if (input_ctx.file_type == FILE_TYPE_Y4M) { + if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { + die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); + } + if (input_ctx.framerate.numerator != cfg.g_timebase.den || + input_ctx.framerate.denominator != cfg.g_timebase.num) { + die("Incorrect framerate: numerator %d denominator %d", + cfg.g_timebase.num, cfg.g_timebase.den); + } + } + + framerate = cfg.g_timebase.den / cfg.g_timebase.num; + // Open an output file for each stream. + for (i = 0; i < cfg.ts_number_layers; ++i) { + char file_name[PATH_MAX]; + VpxVideoInfo info; + info.codec_fourcc = encoder->fourcc; + info.frame_width = cfg.g_w; + info.frame_height = cfg.g_h; + info.time_base.numerator = cfg.g_timebase.num; + info.time_base.denominator = cfg.g_timebase.den; + + snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i); + outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[i]) die("Failed to open %s for writing", file_name); + + assert(outfile[i] != NULL); + } + // No spatial layers in this encoder. + cfg.ss_number_layers = 1; + +// Initialize codec. +#if CONFIG_VP9_HIGHBITDEPTH + if (vpx_codec_enc_init( + &codec, encoder->codec_interface(), &cfg, + bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH)) +#else + if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) +#endif // CONFIG_VP9_HIGHBITDEPTH + die("Failed to initialize encoder"); + +#if ROI_MAP + mask_rows = (cfg.g_h + block_size - 1) / block_size; + mask_cols = (cfg.g_w + block_size - 1) / block_size; + mask_map = (int *)calloc(mask_rows * mask_cols, sizeof(*mask_map)); + prev_mask_map = (int *)calloc(mask_rows * mask_cols, sizeof(*mask_map)); +#endif + + if (strncmp(encoder->name, "vp8", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff); + vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0); +#if ROI_MAP + set_roi_map(encoder->name, &cfg, &roi); + if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi)) + die_codec(&codec, "Failed to set ROI map"); +#endif + } else if (strncmp(encoder->name, "vp9", 3) == 0) { + vpx_svc_extra_cfg_t svc_params; + memset(&svc_params, 0, sizeof(svc_params)); + vpx_codec_control(&codec, VP9E_SET_POSTENCODE_DROP, 0); + vpx_codec_control(&codec, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, 0); + vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); + vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0); + vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0); + vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); + vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff); + vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); + vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(cfg.g_threads)); + vpx_codec_control(&codec, VP9E_SET_DISABLE_LOOPFILTER, 0); + + if (cfg.g_threads > 1) + vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1); + else + vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0); + if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1 : 0)) + die_codec(&codec, "Failed to set SVC"); + for (i = 0; i < cfg.ts_number_layers; ++i) { + svc_params.max_quantizers[i] = cfg.rc_max_quantizer; + svc_params.min_quantizers[i] = cfg.rc_min_quantizer; + } + svc_params.scaling_factor_num[0] = cfg.g_h; + svc_params.scaling_factor_den[0] = cfg.g_h; + vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params); + } + if (strncmp(encoder->name, "vp8", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0); + } + vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); + // This controls the maximum target size of the key frame. + // For generating smaller key frames, use a smaller max_intra_size_pct + // value, like 100 or 200. + { + const int max_intra_size_pct = 1000; + vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, + max_intra_size_pct); + } + + frame_avail = 1; + while (frame_avail || got_data) { + struct vpx_usec_timer timer; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt; +#if ROI_MAP + char mask_file_name[255]; +#endif + // Update the temporal layer_id. No spatial layers in this test. + layer_id.spatial_layer_id = 0; + layer_id.temporal_layer_id = + cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id; + if (strncmp(encoder->name, "vp9", 3) == 0) { + vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); + } else if (strncmp(encoder->name, "vp8", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID, + layer_id.temporal_layer_id); + } + flags = layer_flags[frame_cnt % flag_periodicity]; + if (layering_mode == 0) flags = 0; +#if ROI_MAP + snprintf(mask_file_name, sizeof(mask_file_name), "%s%05d.txt", + argv[argc - 1], frame_cnt); + mask_file = fopen(mask_file_name, "r"); + if (mask_file != NULL) { + read_mask(mask_file, mask_map); + fclose(mask_file); + // set_roi_map(encoder->name, &cfg, &roi); + set_roi_skip_map(&cfg, &roi, mask_map, prev_mask_map, frame_cnt); + if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi)) + die_codec(&codec, "Failed to set ROI map"); + } +#endif + frame_avail = read_frame(&input_ctx, &raw); + if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; + vpx_usec_timer_start(&timer); + if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, + VPX_DL_REALTIME)) { + die_codec(&codec, "Failed to encode frame"); + } + vpx_usec_timer_mark(&timer); + cx_time += vpx_usec_timer_elapsed(&timer); + // Reset KF flag. + if (layering_mode != 7) { + layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; + } + got_data = 0; + while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) { + got_data = 1; + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: + for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + i < cfg.ts_number_layers; ++i) { + vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf, + pkt->data.frame.sz, pts); + ++rc.layer_tot_enc_frames[i]; + rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz; + // Keep count of rate control stats per layer (for non-key frames). + if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] && + !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz; + rc.layer_avg_rate_mismatch[i] += + fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) / + rc.layer_pfb[i]; + ++rc.layer_enc_frames[i]; + } + } + // Update for short-time encoding bitrate states, for moving window + // of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + if (rc.window_size == 0) rc.window_size = 15; + if (frame_cnt > rc.window_size) { + sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + if (frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate / rc.window_size) * + (sum_bitrate / rc.window_size); + sum_bitrate = 0.0; + } + } + // Second shifted window. + if (frame_cnt > rc.window_size + rc.window_size / 2) { + sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate; + if (frame_cnt > 2 * rc.window_size && + frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate2 / rc.window_size) * + (sum_bitrate2 / rc.window_size); + sum_bitrate2 = 0.0; + } + } + break; + default: break; + } + } + ++frame_cnt; + pts += frame_duration; + } +#if ROI_MAP + free(mask_map); + free(prev_mask_map); +#endif + close_input_file(&input_ctx); + printout_rate_control_summary(&rc, &cfg, frame_cnt); + printf("\n"); + printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", + frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), + 1000000 * (double)frame_cnt / (double)cx_time); + + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + // Try to rewrite the output file headers with the actual frame count. + for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]); + + if (input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } + +#if ROI_MAP + free(roi.roi_map); +#endif + return EXIT_SUCCESS; +} |