466 lines
16 KiB
Python
Executable file
466 lines
16 KiB
Python
Executable file
#!/usr/bin/python
|
|
#
|
|
# Copyright (c) 2016, Alliance for Open Media. All rights reserved.
|
|
#
|
|
# This source code is subject to the terms of the BSD 2 Clause License and
|
|
# the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
|
# was not distributed with this source code in the LICENSE file, you can
|
|
# obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
|
# Media Patent License 1.0 was not distributed with this source code in the
|
|
# PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
|
#
|
|
|
|
"""Converts video encoding result data from text files to visualization
|
|
data source."""
|
|
|
|
__author__ = "jzern@google.com (James Zern),"
|
|
__author__ += "jimbankoski@google.com (Jim Bankoski)"
|
|
|
|
import fnmatch
|
|
import numpy as np
|
|
import scipy as sp
|
|
import scipy.interpolate
|
|
import os
|
|
import re
|
|
import string
|
|
import sys
|
|
import math
|
|
import warnings
|
|
|
|
import gviz_api
|
|
|
|
from os.path import basename
|
|
from os.path import splitext
|
|
|
|
warnings.simplefilter('ignore', np.RankWarning)
|
|
warnings.simplefilter('ignore', RuntimeWarning)
|
|
|
|
def bdsnr2(metric_set1, metric_set2):
|
|
"""
|
|
BJONTEGAARD Bjontegaard metric calculation adapted
|
|
Bjontegaard's snr metric allows to compute the average % saving in decibels
|
|
between two rate-distortion curves [1]. This is an adaptation of that
|
|
method that fixes inconsistencies when the curve fit operation goes awry
|
|
by replacing the curve fit function with a Piecewise Cubic Hermite
|
|
Interpolating Polynomial and then integrating that by evaluating that
|
|
function at small intervals using the trapezoid method to calculate
|
|
the integral.
|
|
|
|
metric_set1 - list of tuples ( bitrate, metric ) for first graph
|
|
metric_set2 - list of tuples ( bitrate, metric ) for second graph
|
|
"""
|
|
|
|
if not metric_set1 or not metric_set2:
|
|
return 0.0
|
|
|
|
try:
|
|
|
|
# pchip_interlopate requires keys sorted by x axis. x-axis will
|
|
# be our metric not the bitrate so sort by metric.
|
|
metric_set1.sort()
|
|
metric_set2.sort()
|
|
|
|
# Pull the log of the rate and clamped psnr from metric_sets.
|
|
log_rate1 = [math.log(x[0]) for x in metric_set1]
|
|
metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
|
|
log_rate2 = [math.log(x[0]) for x in metric_set2]
|
|
metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
|
|
|
|
# Integration interval. This metric only works on the area that's
|
|
# overlapping. Extrapolation of these things is sketchy so we avoid.
|
|
min_int = max([min(log_rate1), min(log_rate2)])
|
|
max_int = min([max(log_rate1), max(log_rate2)])
|
|
|
|
# No overlap means no sensible metric possible.
|
|
if max_int <= min_int:
|
|
return 0.0
|
|
|
|
# Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
|
|
# create 100 new samples points separated by interval.
|
|
lin = np.linspace(min_int, max_int, num=100, retstep=True)
|
|
interval = lin[1]
|
|
samples = lin[0]
|
|
v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
|
|
v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
|
|
|
|
# Calculate the integral using the trapezoid method on the samples.
|
|
int_v1 = np.trapz(v1, dx=interval)
|
|
int_v2 = np.trapz(v2, dx=interval)
|
|
|
|
# Calculate the average improvement.
|
|
avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
|
|
|
|
except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
|
|
return 0
|
|
|
|
return avg_exp_diff
|
|
|
|
def bdrate2(metric_set1, metric_set2):
|
|
"""
|
|
BJONTEGAARD Bjontegaard metric calculation adapted
|
|
Bjontegaard's metric allows to compute the average % saving in bitrate
|
|
between two rate-distortion curves [1]. This is an adaptation of that
|
|
method that fixes inconsistencies when the curve fit operation goes awry
|
|
by replacing the curve fit function with a Piecewise Cubic Hermite
|
|
Interpolating Polynomial and then integrating that by evaluating that
|
|
function at small intervals using the trapezoid method to calculate
|
|
the integral.
|
|
|
|
metric_set1 - list of tuples ( bitrate, metric ) for first graph
|
|
metric_set2 - list of tuples ( bitrate, metric ) for second graph
|
|
"""
|
|
|
|
if not metric_set1 or not metric_set2:
|
|
return 0.0
|
|
|
|
try:
|
|
|
|
# pchip_interlopate requires keys sorted by x axis. x-axis will
|
|
# be our metric not the bitrate so sort by metric.
|
|
metric_set1.sort(key=lambda tup: tup[1])
|
|
metric_set2.sort(key=lambda tup: tup[1])
|
|
|
|
# Pull the log of the rate and clamped psnr from metric_sets.
|
|
log_rate1 = [math.log(x[0]) for x in metric_set1]
|
|
metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
|
|
log_rate2 = [math.log(x[0]) for x in metric_set2]
|
|
metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
|
|
|
|
# Integration interval. This metric only works on the area that's
|
|
# overlapping. Extrapolation of these things is sketchy so we avoid.
|
|
min_int = max([min(metric1), min(metric2)])
|
|
max_int = min([max(metric1), max(metric2)])
|
|
|
|
# No overlap means no sensible metric possible.
|
|
if max_int <= min_int:
|
|
return 0.0
|
|
|
|
# Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
|
|
# create 100 new samples points separated by interval.
|
|
lin = np.linspace(min_int, max_int, num=100, retstep=True)
|
|
interval = lin[1]
|
|
samples = lin[0]
|
|
v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
|
|
v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
|
|
|
|
# Calculate the integral using the trapezoid method on the samples.
|
|
int_v1 = np.trapz(v1, dx=interval)
|
|
int_v2 = np.trapz(v2, dx=interval)
|
|
|
|
# Calculate the average improvement.
|
|
avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
|
|
|
|
except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
|
|
return 0
|
|
|
|
# Convert to a percentage.
|
|
avg_diff = (math.exp(avg_exp_diff) - 1) * 100
|
|
|
|
return avg_diff
|
|
|
|
|
|
|
|
def FillForm(string_for_substitution, dictionary_of_vars):
|
|
"""
|
|
This function substitutes all matches of the command string //%% ... %%//
|
|
with the variable represented by ... .
|
|
"""
|
|
return_string = string_for_substitution
|
|
for i in re.findall("//%%(.*)%%//", string_for_substitution):
|
|
return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
|
|
return_string)
|
|
return return_string
|
|
|
|
|
|
def HasMetrics(line):
|
|
"""
|
|
The metrics files produced by aomenc are started with a B for headers.
|
|
"""
|
|
# If the first char of the first word on the line is a digit
|
|
if len(line) == 0:
|
|
return False
|
|
if len(line.split()) == 0:
|
|
return False
|
|
if line.split()[0][0:1].isdigit():
|
|
return True
|
|
return False
|
|
|
|
def GetMetrics(file_name):
|
|
metric_file = open(file_name, "r")
|
|
return metric_file.readline().split();
|
|
|
|
def ParseMetricFile(file_name, metric_column):
|
|
metric_set1 = set([])
|
|
metric_file = open(file_name, "r")
|
|
for line in metric_file:
|
|
metrics = string.split(line)
|
|
if HasMetrics(line):
|
|
if metric_column < len(metrics):
|
|
try:
|
|
tuple = float(metrics[0]), float(metrics[metric_column])
|
|
except:
|
|
tuple = float(metrics[0]), 0
|
|
else:
|
|
tuple = float(metrics[0]), 0
|
|
metric_set1.add(tuple)
|
|
metric_set1_sorted = sorted(metric_set1)
|
|
return metric_set1_sorted
|
|
|
|
|
|
def FileBetter(file_name_1, file_name_2, metric_column, method):
|
|
"""
|
|
Compares two data files and determines which is better and by how
|
|
much. Also produces a histogram of how much better, by PSNR.
|
|
metric_column is the metric.
|
|
"""
|
|
# Store and parse our two files into lists of unique tuples.
|
|
|
|
# Read the two files, parsing out lines starting with bitrate.
|
|
metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
|
|
metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
|
|
|
|
|
|
def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
|
|
"""
|
|
Search through the sorted metric file for metrics on either side of
|
|
the metric from file 1. Since both lists are sorted we really
|
|
should not have to search through the entire range, but these
|
|
are small files."""
|
|
total_bitrate_difference_ratio = 0.0
|
|
count = 0
|
|
for bitrate, metric in metric_set1_sorted:
|
|
if bitrate == 0:
|
|
continue
|
|
for i in range(len(metric_set2_sorted) - 1):
|
|
s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
|
|
s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
|
|
# We have a point on either side of our metric range.
|
|
if metric > s2_metric_0 and metric <= s2_metric_1:
|
|
|
|
# Calculate a slope.
|
|
if s2_metric_1 - s2_metric_0 != 0:
|
|
metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
|
|
(s2_metric_1 - s2_metric_0))
|
|
else:
|
|
metric_slope = 0
|
|
|
|
estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
|
|
metric_slope)
|
|
|
|
if estimated_s2_bitrate == 0:
|
|
continue
|
|
# Calculate percentage difference as given by base.
|
|
if base_is_set_2 == 0:
|
|
bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
|
|
bitrate)
|
|
else:
|
|
bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
|
|
estimated_s2_bitrate)
|
|
|
|
total_bitrate_difference_ratio += bitrate_difference_ratio
|
|
count += 1
|
|
break
|
|
|
|
# Calculate the average improvement between graphs.
|
|
if count != 0:
|
|
avg = total_bitrate_difference_ratio / count
|
|
|
|
else:
|
|
avg = 0.0
|
|
|
|
return avg
|
|
|
|
# Be fair to both graphs by testing all the points in each.
|
|
if method == 'avg':
|
|
avg_improvement = 50 * (
|
|
GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
|
|
GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
|
|
elif method == 'dsnr':
|
|
avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
|
|
else:
|
|
avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
|
|
|
|
return avg_improvement
|
|
|
|
|
|
def HandleFiles(variables):
|
|
"""
|
|
This script creates html for displaying metric data produced from data
|
|
in a video stats file, as created by the AOM project when enable_psnr
|
|
is turned on:
|
|
|
|
Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
|
|
|
|
The script parses each metrics file [see below] that matches the
|
|
statfile_pattern in the baseline directory and looks for the file that
|
|
matches that same file in each of the sub_dirs, and compares the resultant
|
|
metrics bitrate, avg psnr, glb psnr, and ssim. "
|
|
|
|
It provides a table in which each row is a file in the line directory,
|
|
and a column for each subdir, with the cells representing how that clip
|
|
compares to baseline for that subdir. A graph is given for each which
|
|
compares file size to that metric. If you click on a point in the graph it
|
|
zooms in on that point.
|
|
|
|
a SAMPLE metrics file:
|
|
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
25.911 38.242 38.104 38.258 38.121 75.790 14103
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
49.982 41.264 41.129 41.255 41.122 83.993 19817
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
74.967 42.911 42.767 42.899 42.756 87.928 17332
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
100.012 43.983 43.838 43.881 43.738 89.695 25389
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
149.980 45.338 45.203 45.184 45.043 91.591 25438
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
199.852 46.225 46.123 46.113 45.999 92.679 28302
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
249.922 46.864 46.773 46.777 46.673 93.334 27244
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
299.998 47.366 47.281 47.317 47.220 93.844 27137
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
349.769 47.746 47.677 47.722 47.648 94.178 32226
|
|
Bitrate AVGPsnr GLBPsnr AVPsnrP GLPsnrP VPXSSIM Time(us)
|
|
399.773 48.032 47.971 48.013 47.946 94.362 36203
|
|
|
|
sample use:
|
|
visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
|
|
"""
|
|
|
|
# The template file is the html file into which we will write the
|
|
# data from the stats file, formatted correctly for the gviz_api.
|
|
template_file = open(variables[1], "r")
|
|
page_template = template_file.read()
|
|
template_file.close()
|
|
|
|
# This is the path match pattern for finding stats files amongst
|
|
# all the other files it could be. eg: *.stt
|
|
file_pattern = variables[2]
|
|
|
|
# This is the directory with files that we will use to do the comparison
|
|
# against.
|
|
baseline_dir = variables[3]
|
|
snrs = ''
|
|
filestable = {}
|
|
|
|
filestable['dsnr'] = ''
|
|
filestable['drate'] = ''
|
|
filestable['avg'] = ''
|
|
|
|
# Dirs is directories after the baseline to compare to the base.
|
|
dirs = variables[4:len(variables)]
|
|
|
|
# Find the metric files in the baseline directory.
|
|
dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
|
|
|
|
metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
|
|
|
|
metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
|
|
|
|
for column in range(1, len(metrics)):
|
|
|
|
for metric in ['avg','dsnr','drate']:
|
|
description = {"file": ("string", "File")}
|
|
|
|
# Go through each directory and add a column header to our description.
|
|
countoverall = {}
|
|
sumoverall = {}
|
|
|
|
for directory in dirs:
|
|
description[directory] = ("number", directory)
|
|
countoverall[directory] = 0
|
|
sumoverall[directory] = 0
|
|
|
|
# Data holds the data for the visualization, name given comes from
|
|
# gviz_api sample code.
|
|
data = []
|
|
for filename in dir_list:
|
|
row = {'file': splitext(basename(filename))[0] }
|
|
baseline_file_name = baseline_dir + "/" + filename
|
|
|
|
# Read the metric file from each of the directories in our list.
|
|
for directory in dirs:
|
|
metric_file_name = directory + "/" + filename
|
|
|
|
# If there is a metric file in the current directory, open it
|
|
# and calculate its overall difference between it and the baseline
|
|
# directory's metric file.
|
|
if os.path.isfile(metric_file_name):
|
|
overall = FileBetter(baseline_file_name, metric_file_name,
|
|
column, metric)
|
|
row[directory] = overall
|
|
|
|
sumoverall[directory] += overall
|
|
countoverall[directory] += 1
|
|
|
|
data.append(row)
|
|
|
|
# Add the overall numbers.
|
|
row = {"file": "OVERALL" }
|
|
for directory in dirs:
|
|
row[directory] = sumoverall[directory] / countoverall[directory]
|
|
data.append(row)
|
|
|
|
# write the tables out
|
|
data_table = gviz_api.DataTable(description)
|
|
data_table.LoadData(data)
|
|
|
|
filestable[metric] = ( filestable[metric] + "filestable_" + metric +
|
|
"[" + str(column) + "]=" +
|
|
data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
|
|
|
|
filestable_avg = filestable['avg']
|
|
filestable_dpsnr = filestable['dsnr']
|
|
filestable_drate = filestable['drate']
|
|
|
|
# Now we collect all the data for all the graphs. First the column
|
|
# headers which will be Datarate and then each directory.
|
|
columns = ("datarate",baseline_dir)
|
|
description = {"datarate":("number", "Datarate")}
|
|
for directory in dirs:
|
|
description[directory] = ("number", directory)
|
|
|
|
description[baseline_dir] = ("number", baseline_dir)
|
|
|
|
snrs = snrs + "snrs[" + str(column) + "] = ["
|
|
|
|
# Now collect the data for the graphs, file by file.
|
|
for filename in dir_list:
|
|
|
|
data = []
|
|
|
|
# Collect the file in each directory and store all of its metrics
|
|
# in the associated gviz metrics table.
|
|
all_dirs = dirs + [baseline_dir]
|
|
for directory in all_dirs:
|
|
|
|
metric_file_name = directory + "/" + filename
|
|
if not os.path.isfile(metric_file_name):
|
|
continue
|
|
|
|
# Read and parse the metrics file storing it to the data we'll
|
|
# use for the gviz_api.Datatable.
|
|
metrics = ParseMetricFile(metric_file_name, column)
|
|
for bitrate, metric in metrics:
|
|
data.append({"datarate": bitrate, directory: metric})
|
|
|
|
data_table = gviz_api.DataTable(description)
|
|
data_table.LoadData(data)
|
|
snrs = snrs + "'" + data_table.ToJSon(
|
|
columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
|
|
|
|
snrs = snrs + "]\n"
|
|
|
|
formatters = ""
|
|
for i in range(len(dirs)):
|
|
formatters = "%s formatter.format(better, %d);" % (formatters, i+1)
|
|
|
|
print FillForm(page_template, vars())
|
|
return
|
|
|
|
if len(sys.argv) < 3:
|
|
print HandleFiles.__doc__
|
|
else:
|
|
HandleFiles(sys.argv)
|