1
0
Fork 0
firefox/third_party/aom/examples/photon_noise_table.c
Daniel Baumann 5e9a113729
Adding upstream version 140.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-25 09:37:52 +02:00

398 lines
17 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2021, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// This tool creates a film grain table, for use in stills and videos,
// representing the noise that one would get by shooting with a digital camera
// at a given light level. Much of the noise in digital images is photon shot
// noise, which is due to the characteristics of photon arrival and grows in
// standard deviation as the square root of the expected number of photons
// captured.
// https://www.photonstophotos.net/Emil%20Martinec/noise.html#shotnoise
//
// The proxy used by this tool for the amount of light captured is the ISO value
// such that the focal plane exposure at the time of capture would have been
// mapped by a 35mm camera to the output lightness observed in the image. That
// is, if one were to shoot on a 35mm camera (36×24mm sensor) at the nominal
// exposure for that ISO setting, the resulting image should contain noise of
// the same order of magnitude as generated by this tool.
//
// Example usage:
//
// ./photon_noise_table --width=3840 --height=2160 --iso=25600 -o noise.tbl
// # Then, for example:
// aomenc --film-grain-table=noise.tbl ...
// # Or:
// avifenc -c aom -a film-grain-table=noise.tbl ...
//
// The (mostly) square-root relationship between light intensity and noise
// amplitude holds in linear light, but AV1 streams are most often encoded
// non-linearly, and the film grain is applied to those non-linear values.
// Therefore, this tool must account for the non-linearity, and this is
// controlled by the optional `--transfer-function` (or `-t`) parameter, which
// specifies the tone response curve that will be used when encoding the actual
// image. The default for this tool is sRGB, which is approximately similar to
// an encoding gamma of 1/2.2 (i.e. a decoding gamma of 2.2) though not quite
// identical.
//
// As alluded to above, the tool assumes that the image is taken from the
// entirety of a 36×24mm (“35mm format”) sensor. If that assumption does not
// hold, then a “35mm-equivalent ISO value” that can be passed to the tool can
// be obtained by multiplying the true ISO value by the ratio of 36×24mm to the
// area that was actually used. For formats that approximately share the same
// aspect ratio, this is often expressed as the square of the “equivalence
// ratio” which is the ratio of their diagonals. For example, APS-C (often
// ~24×16mm) is said to have an equivalence ratio of 1.5 relative to the 35mm
// format, and therefore ISO 1000 on APS-C and ISO 1000×1.5² = 2250 on 35mm
// produce an image of the same lightness from the same amount of light spread
// onto their respective surface areas (resulting in different focal plane
// exposures), and those images will thus have similar amounts of noise if the
// cameras are of similar technology. https://doi.org/10.1117/1.OE.57.11.110801
//
// The tool needs to know the resolution of the images to which its grain tables
// will be applied so that it can know how the light on the sensor was shared
// between its pixels. As a general rule, while a higher pixel count will lead
// to more noise per pixel, when the final image is viewed at the same physical
// size, that noise will tend to “average out” to the same amount over a given
// area, since there will be more pixels in it which, in aggregate, will have
// received essentially as much light. Put differently, the amount of noise
// depends on the scale at which it is measured, and the decision for this tool
// was to make that scale relative to the image instead of its constituent
// samples. For more on this, see:
//
// https://www.photonstophotos.net/Emil%20Martinec/noise-p3.html#pixelsize
// https://www.dpreview.com/articles/5365920428/the-effect-of-pixel-and-sensor-sizes-on-noise/2
// https://www.dpreview.com/videos/7940373140/dpreview-tv-why-lower-resolution-sensors-are-not-better-in-low-light
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "aom_dsp/grain_table.h"
#include "common/args.h"
#include "common/tools_common.h"
static const char *exec_name;
static const struct arg_enum_list transfer_functions[] = {
{ "bt470m", AOM_CICP_TC_BT_470_M }, { "bt470bg", AOM_CICP_TC_BT_470_B_G },
{ "srgb", AOM_CICP_TC_SRGB }, { "smpte2084", AOM_CICP_TC_SMPTE_2084 },
{ "hlg", AOM_CICP_TC_HLG }, ARG_ENUM_LIST_END
};
static arg_def_t help_arg =
ARG_DEF("h", "help", 0, "Show the available options");
static arg_def_t width_arg =
ARG_DEF("w", "width", 1, "Width of the image in pixels (required)");
static arg_def_t height_arg =
ARG_DEF("l", "height", 1, "Height of the image in pixels (required)");
static arg_def_t iso_arg = ARG_DEF(
"i", "iso", 1, "ISO setting indicative of the light level (required)");
static arg_def_t output_arg =
ARG_DEF("o", "output", 1,
"Output file to which to write the film grain table (required)");
static arg_def_t transfer_function_arg =
ARG_DEF_ENUM("t", "transfer-function", 1,
"Transfer function used by the encoded image (default = sRGB)",
transfer_functions);
void usage_exit(void) {
fprintf(stderr,
"Usage: %s [--transfer-function=<tf>] --width=<width> "
"--height=<height> --iso=<iso> --output=<output.tbl>\n",
exec_name);
exit(EXIT_FAILURE);
}
typedef struct {
float (*to_linear)(float);
float (*from_linear)(float);
// In linear output light. This would typically be 0.18 for SDR (this matches
// the definition of Standard Output Sensitivity from ISO 12232:2019), but in
// HDR, we certainly do not want to consider 18% of the maximum output a
// “mid-tone”, as it would be e.g. 1800 cd/m² for SMPTE ST 2084 (PQ).
float mid_tone;
} transfer_function_t;
static const transfer_function_t *find_transfer_function(
aom_transfer_characteristics_t tc);
typedef struct {
int width;
int height;
int iso_setting;
const transfer_function_t *transfer_function;
const char *output_filename;
} photon_noise_args_t;
static void parse_args(int argc, char **argv,
photon_noise_args_t *photon_noise_args) {
static const arg_def_t *args[] = { &help_arg, &width_arg,
&height_arg, &iso_arg,
&output_arg, &transfer_function_arg,
NULL };
struct arg arg;
int width_set = 0, height_set = 0, iso_set = 0, output_set = 0, i;
photon_noise_args->transfer_function =
find_transfer_function(AOM_CICP_TC_SRGB);
for (i = 1; i < argc; i += arg.argv_step) {
arg.argv_step = 1;
if (arg_match(&arg, &help_arg, argv + i)) {
arg_show_usage(stdout, args);
exit(EXIT_SUCCESS);
} else if (arg_match(&arg, &width_arg, argv + i)) {
photon_noise_args->width = arg_parse_int(&arg);
width_set = 1;
} else if (arg_match(&arg, &height_arg, argv + i)) {
photon_noise_args->height = arg_parse_int(&arg);
height_set = 1;
} else if (arg_match(&arg, &iso_arg, argv + i)) {
photon_noise_args->iso_setting = arg_parse_int(&arg);
iso_set = 1;
} else if (arg_match(&arg, &output_arg, argv + i)) {
photon_noise_args->output_filename = arg.val;
output_set = 1;
} else if (arg_match(&arg, &transfer_function_arg, argv + i)) {
const aom_transfer_characteristics_t tc = arg_parse_enum(&arg);
photon_noise_args->transfer_function = find_transfer_function(tc);
} else {
fatal("unrecognized argument \"%s\", see --help for available options",
argv[i]);
}
}
if (!width_set) {
fprintf(stderr, "Missing required parameter --width\n");
exit(EXIT_FAILURE);
}
if (!height_set) {
fprintf(stderr, "Missing required parameter --height\n");
exit(EXIT_FAILURE);
}
if (!iso_set) {
fprintf(stderr, "Missing required parameter --iso\n");
exit(EXIT_FAILURE);
}
if (!output_set) {
fprintf(stderr, "Missing required parameter --output\n");
exit(EXIT_FAILURE);
}
}
static float maxf(float a, float b) { return a > b ? a : b; }
static float minf(float a, float b) { return a < b ? a : b; }
static float gamma22_to_linear(float g) { return powf(g, 2.2f); }
static float gamma22_from_linear(float l) { return powf(l, 1 / 2.2f); }
static float gamma28_to_linear(float g) { return powf(g, 2.8f); }
static float gamma28_from_linear(float l) { return powf(l, 1 / 2.8f); }
static float srgb_to_linear(float srgb) {
return srgb <= 0.04045f ? srgb / 12.92f
: powf((srgb + 0.055f) / 1.055f, 2.4f);
}
static float srgb_from_linear(float linear) {
return linear <= 0.0031308f ? 12.92f * linear
: 1.055f * powf(linear, 1 / 2.4f) - 0.055f;
}
static const float kPqM1 = 2610.f / 16384;
static const float kPqM2 = 128 * 2523.f / 4096;
static const float kPqC1 = 3424.f / 4096;
static const float kPqC2 = 32 * 2413.f / 4096;
static const float kPqC3 = 32 * 2392.f / 4096;
static float pq_to_linear(float pq) {
const float pq_pow_inv_m2 = powf(pq, 1.f / kPqM2);
return powf(maxf(0, pq_pow_inv_m2 - kPqC1) / (kPqC2 - kPqC3 * pq_pow_inv_m2),
1.f / kPqM1);
}
static float pq_from_linear(float linear) {
const float linear_pow_m1 = powf(linear, kPqM1);
return powf((kPqC1 + kPqC2 * linear_pow_m1) / (1 + kPqC3 * linear_pow_m1),
kPqM2);
}
// Note: it is perhaps debatable whether “linear” for HLG should be scene light
// or display light. Here, it is implemented in terms of display light assuming
// a nominal peak display luminance of 1000 cd/m², hence the system γ of 1.2. To
// make it scene light instead, the OOTF (powf(x, 1.2f)) and its inverse should
// be removed from the functions below, and the .mid_tone should be replaced
// with powf(26.f / 1000, 1 / 1.2f).
static const float kHlgA = 0.17883277f;
static const float kHlgB = 0.28466892f;
static const float kHlgC = 0.55991073f;
static float hlg_to_linear(float hlg) {
// EOTF = OOTF ∘ OETF⁻¹
const float linear =
hlg <= 0.5f ? hlg * hlg / 3 : (expf((hlg - kHlgC) / kHlgA) + kHlgB) / 12;
return powf(linear, 1.2f);
}
static float hlg_from_linear(float linear) {
// EOTF⁻¹ = OETF ∘ OOTF⁻¹
linear = powf(linear, 1.f / 1.2f);
return linear <= 1.f / 12 ? sqrtf(3 * linear)
: kHlgA * logf(12 * linear - kHlgB) + kHlgC;
}
static const transfer_function_t *find_transfer_function(
aom_transfer_characteristics_t tc) {
static const transfer_function_t
kGamma22TransferFunction = { .to_linear = &gamma22_to_linear,
.from_linear = &gamma22_from_linear,
.mid_tone = 0.18f },
kGamma28TransferFunction = { .to_linear = &gamma28_to_linear,
.from_linear = &gamma28_from_linear,
.mid_tone = 0.18f },
kSRgbTransferFunction = { .to_linear = &srgb_to_linear,
.from_linear = &srgb_from_linear,
.mid_tone = 0.18f },
kPqTransferFunction = { .to_linear = &pq_to_linear,
.from_linear = &pq_from_linear,
// https://www.itu.int/pub/R-REP-BT.2408-4-2021
// page 6 (PDF page 8)
.mid_tone = 26.f / 10000 },
kHlgTransferFunction = { .to_linear = &hlg_to_linear,
.from_linear = &hlg_from_linear,
.mid_tone = 26.f / 1000 };
switch (tc) {
case AOM_CICP_TC_BT_470_M: return &kGamma22TransferFunction;
case AOM_CICP_TC_BT_470_B_G: return &kGamma28TransferFunction;
case AOM_CICP_TC_SRGB: return &kSRgbTransferFunction;
case AOM_CICP_TC_SMPTE_2084: return &kPqTransferFunction;
case AOM_CICP_TC_HLG: return &kHlgTransferFunction;
default: fatal("unimplemented transfer function %d", tc);
}
}
static void generate_photon_noise(const photon_noise_args_t *photon_noise_args,
aom_film_grain_t *film_grain) {
// Assumes a daylight-like spectrum.
// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
static const float kPhotonsPerLxSPerUm2 = 11260;
// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
// account.
static const float kEffectiveQuantumEfficiency = 0.20f;
// Also reasonable values for current cameras. The read noise is typically
// higher than this at low ISO settings but it matters less there.
static const float kPhotoResponseNonUniformity = 0.005f;
static const float kInputReferredReadNoise = 1.5f;
// Focal plane exposure for a mid-tone (typically a 18% reflectance card), in
// lx·s.
const float mid_tone_exposure = 10.f / photon_noise_args->iso_setting;
// In microns. Assumes a 35mm sensor (36mm × 24mm).
const float pixel_area_um2 = (36000 * 24000.f) / (photon_noise_args->width *
photon_noise_args->height);
const float mid_tone_electrons_per_pixel = kEffectiveQuantumEfficiency *
kPhotonsPerLxSPerUm2 *
mid_tone_exposure * pixel_area_um2;
const float max_electrons_per_pixel =
mid_tone_electrons_per_pixel /
photon_noise_args->transfer_function->mid_tone;
int i;
film_grain->num_y_points = 14;
for (i = 0; i < film_grain->num_y_points; ++i) {
float x = i / (film_grain->num_y_points - 1.f);
const float linear = photon_noise_args->transfer_function->to_linear(x);
const float electrons_per_pixel = max_electrons_per_pixel * linear;
// Quadrature sum of the relevant sources of noise, in electrons rms. Photon
// shot noise is sqrt(electrons) so we can skip the square root and the
// squaring.
// https://en.wikipedia.org/wiki/Addition_in_quadrature
// https://doi.org/10.1117/3.725073
const float noise_in_electrons =
sqrtf(kInputReferredReadNoise * kInputReferredReadNoise +
electrons_per_pixel +
(kPhotoResponseNonUniformity * kPhotoResponseNonUniformity *
electrons_per_pixel * electrons_per_pixel));
const float linear_noise = noise_in_electrons / max_electrons_per_pixel;
const float linear_range_start = maxf(0.f, linear - 2 * linear_noise);
const float linear_range_end = minf(1.f, linear + 2 * linear_noise);
const float tf_slope =
(photon_noise_args->transfer_function->from_linear(linear_range_end) -
photon_noise_args->transfer_function->from_linear(
linear_range_start)) /
(linear_range_end - linear_range_start);
float encoded_noise = linear_noise * tf_slope;
x = roundf(255 * x);
encoded_noise = minf(255.f, roundf(255 * 7.88f * encoded_noise));
film_grain->scaling_points_y[i][0] = (int)x;
film_grain->scaling_points_y[i][1] = (int)encoded_noise;
}
film_grain->apply_grain = 1;
film_grain->update_parameters = 1;
film_grain->num_cb_points = 0;
film_grain->num_cr_points = 0;
film_grain->scaling_shift = 8;
film_grain->ar_coeff_lag = 0;
film_grain->ar_coeffs_cb[0] = 0;
film_grain->ar_coeffs_cr[0] = 0;
film_grain->ar_coeff_shift = 6;
film_grain->cb_mult = 0;
film_grain->cb_luma_mult = 0;
film_grain->cb_offset = 0;
film_grain->cr_mult = 0;
film_grain->cr_luma_mult = 0;
film_grain->cr_offset = 0;
film_grain->overlap_flag = 1;
film_grain->random_seed = 7391;
film_grain->chroma_scaling_from_luma = 0;
}
int main(int argc, char **argv) {
photon_noise_args_t photon_noise_args;
aom_film_grain_table_t film_grain_table;
aom_film_grain_t film_grain;
struct aom_internal_error_info error_info;
memset(&photon_noise_args, 0, sizeof(photon_noise_args));
memset(&film_grain_table, 0, sizeof(film_grain_table));
memset(&film_grain, 0, sizeof(film_grain));
memset(&error_info, 0, sizeof(error_info));
exec_name = argv[0];
parse_args(argc, argv, &photon_noise_args);
generate_photon_noise(&photon_noise_args, &film_grain);
aom_film_grain_table_append(&film_grain_table, 0, 9223372036854775807ull,
&film_grain);
if (aom_film_grain_table_write(&film_grain_table,
photon_noise_args.output_filename,
&error_info) != AOM_CODEC_OK) {
aom_film_grain_table_free(&film_grain_table);
fprintf(stderr, "Failed to write film grain table");
if (error_info.has_detail) {
fprintf(stderr, ": %s", error_info.detail);
}
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
aom_film_grain_table_free(&film_grain_table);
return EXIT_SUCCESS;
}