diff options
Diffstat (limited to '')
-rwxr-xr-x | TOOLS/matroska.py | 479 |
1 files changed, 479 insertions, 0 deletions
diff --git a/TOOLS/matroska.py b/TOOLS/matroska.py new file mode 100755 index 0000000..52bac48 --- /dev/null +++ b/TOOLS/matroska.py @@ -0,0 +1,479 @@ +#!/usr/bin/env python3 +""" +Generate C definitions for parsing Matroska files. +Can also be used to directly parse Matroska files and display their contents. +""" + +# +# This file is part of mpv. +# +# mpv is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# mpv is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with mpv. If not, see <http://www.gnu.org/licenses/>. +# + + +elements_ebml = ( + 'EBML, 1a45dfa3, sub', ( + 'EBMLVersion, 4286, uint', + 'EBMLReadVersion, 42f7, uint', + 'EBMLMaxIDLength, 42f2, uint', + 'EBMLMaxSizeLength, 42f3, uint', + 'DocType, 4282, str', + 'DocTypeVersion, 4287, uint', + 'DocTypeReadVersion, 4285, uint', + ), + + 'CRC32, bf, binary', + 'Void, ec, binary', +) + +elements_matroska = ( + 'Segment, 18538067, sub', ( + + 'SeekHead*, 114d9b74, sub', ( + 'Seek*, 4dbb, sub', ( + 'SeekID, 53ab, ebml_id', + 'SeekPosition, 53ac, uint', + ), + ), + + 'Info*, 1549a966, sub', ( + 'SegmentUID, 73a4, binary', + 'PrevUID, 3cb923, binary', + 'NextUID, 3eb923, binary', + 'TimecodeScale, 2ad7b1, uint', + 'DateUTC, 4461, sint', + 'Title, 7ba9, str', + 'MuxingApp, 4d80, str', + 'WritingApp, 5741, str', + 'Duration, 4489, float', + ), + + 'Cluster*, 1f43b675, sub', ( + 'Timecode, e7, uint', + 'BlockGroup*, a0, sub', ( + 'Block, a1, binary', + 'BlockDuration, 9b, uint', + 'ReferenceBlock*, fb, sint', + 'DiscardPadding, 75A2, sint', + 'BlockAdditions, 75A1, sub', ( + 'BlockMore*, A6, sub', ( + 'BlockAddID, EE, uint', + 'BlockAdditional, A5, binary', + ), + ), + ), + 'SimpleBlock*, a3, binary', + ), + + 'Tracks*, 1654ae6b, sub', ( + 'TrackEntry*, ae, sub', ( + 'TrackNumber, d7, uint', + 'TrackUID, 73c5, uint', + 'TrackType, 83, uint', + 'FlagEnabled, b9, uint', + 'FlagDefault, 88, uint', + 'FlagForced, 55aa, uint', + 'FlagLacing, 9c, uint', + 'MinCache, 6de7, uint', + 'MaxCache, 6df8, uint', + 'DefaultDuration, 23e383, uint', + 'TrackTimecodeScale, 23314f, float', + 'MaxBlockAdditionID, 55ee, uint', + 'Name, 536e, str', + 'Language, 22b59c, str', + 'CodecID, 86, str', + 'CodecPrivate, 63a2, binary', + 'CodecName, 258688, str', + 'CodecDecodeAll, aa, uint', + 'CodecDelay, 56aa, uint', + 'SeekPreRoll, 56bb, uint', + 'Video, e0, sub', ( + 'FlagInterlaced, 9a, uint', + 'PixelWidth, b0, uint', + 'PixelHeight, ba, uint', + 'DisplayWidth, 54b0, uint', + 'DisplayHeight, 54ba, uint', + 'DisplayUnit, 54b2, uint', + 'PixelCropTop, 54bb, uint', + 'PixelCropLeft, 54cc, uint', + 'PixelCropRight, 54dd, uint', + 'PixelCropBottom, 54aa, uint', + 'FrameRate, 2383e3, float', + 'ColourSpace, 2eb524, binary', + 'StereoMode, 53b8, uint', + 'Colour, 55b0, sub', ( + 'MatrixCoefficients, 55B1, uint', + 'BitsPerChannel, 55B2, uint', + 'ChromaSubsamplingHorz, 55B3, uint', + 'ChromaSubsamplingVert, 55B4, uint', + 'CbSubsamplingHorz, 55B5, uint', + 'CbSubsamplingVert, 55B6, uint', + 'ChromaSitingHorz, 55B7, uint', + 'ChromaSitingVert, 55B8, uint', + 'Range, 55B9, uint', + 'TransferCharacteristics, 55BA, uint', + 'Primaries, 55BB, uint', + 'MaxCLL, 55BC, uint', + 'MaxFALL, 55BD, uint', + 'MasteringMetadata, 55D0, sub', ( + 'PrimaryRChromaticityX, 55D1, float', + 'PrimaryRChromaticityY, 55D2, float', + 'PrimaryGChromaticityX, 55D3, float', + 'PrimaryGChromaticityY, 55D4, float', + 'PrimaryBChromaticityX, 55D5, float', + 'PrimaryBChromaticityY, 55D6, float', + 'WhitePointChromaticityX, 55D7, float', + 'WhitePointChromaticityY, 55D8, float', + 'LuminanceMax, 55D9, float', + 'LuminanceMin, 55DA, float', + ), + ), + 'Projection, 7670, sub', ( + 'ProjectionType, 7671, uint', + 'ProjectionPrivate, 7672, binary', + 'ProjectionPoseYaw, 7673, float', + 'ProjectionPosePitch, 7674, float', + 'ProjectionPoseRoll, 7675, float', + ), + ), + 'Audio, e1, sub', ( + 'SamplingFrequency, b5, float', + 'OutputSamplingFrequency, 78b5, float', + 'Channels, 9f, uint', + 'BitDepth, 6264, uint', + ), + 'ContentEncodings, 6d80, sub', ( + 'ContentEncoding*, 6240, sub', ( + 'ContentEncodingOrder, 5031, uint', + 'ContentEncodingScope, 5032, uint', + 'ContentEncodingType, 5033, uint', + 'ContentCompression, 5034, sub', ( + 'ContentCompAlgo, 4254, uint', + 'ContentCompSettings, 4255, binary', + ), + ), + ), + ), + ), + + 'Cues, 1c53bb6b, sub', ( + 'CuePoint*, bb, sub', ( + 'CueTime, b3, uint', + 'CueTrackPositions*, b7, sub', ( + 'CueTrack, f7, uint', + 'CueClusterPosition, f1, uint', + 'CueRelativePosition, f0, uint', + 'CueDuration, b2, uint', + ), + ), + ), + + 'Attachments, 1941a469, sub', ( + 'AttachedFile*, 61a7, sub', ( + 'FileDescription, 467e, str', + 'FileName, 466e, str', + 'FileMimeType, 4660, str', + 'FileData, 465c, binary', + 'FileUID, 46ae, uint', + ), + ), + + 'Chapters, 1043a770, sub', ( + 'EditionEntry*, 45b9, sub', ( + 'EditionUID, 45bc, uint', + 'EditionFlagHidden, 45bd, uint', + 'EditionFlagDefault, 45db, uint', + 'EditionFlagOrdered, 45dd, uint', + 'ChapterAtom*, b6, sub', ( + 'ChapterUID, 73c4, uint', + 'ChapterTimeStart, 91, uint', + 'ChapterTimeEnd, 92, uint', + 'ChapterFlagHidden, 98, uint', + 'ChapterFlagEnabled, 4598, uint', + 'ChapterSegmentUID, 6e67, binary', + 'ChapterSegmentEditionUID, 6ebc, uint', + 'ChapterDisplay*, 80, sub', ( + 'ChapString, 85, str', + 'ChapLanguage*, 437c, str', + 'ChapCountry*, 437e, str', + ), + ), + ), + ), + 'Tags*, 1254c367, sub', ( + 'Tag*, 7373, sub', ( + 'Targets, 63c0, sub', ( + 'TargetTypeValue, 68ca, uint', + 'TargetType, 63ca, str', + 'TargetTrackUID, 63c5, uint', + 'TargetEditionUID, 63c9, uint', + 'TargetChapterUID, 63c4, uint', + 'TargetAttachmentUID, 63c6, uint', + ), + 'SimpleTag*, 67c8, sub', ( + 'TagName, 45a3, str', + 'TagLanguage, 447a, str', + 'TagString, 4487, str', + 'TagDefault, 4484, uint', + ), + ), + ), + ), +) + + +import sys +from math import ldexp +from binascii import hexlify + +def byte2num(s): + return int(hexlify(s), 16) + +class EOF(Exception): pass + +def camelcase_to_words(name): + parts = [] + start = 0 + for i in range(1, len(name)): + if name[i].isupper() and (name[i-1].islower() or + name[i+1:i+2].islower()): + parts.append(name[start:i]) + start = i + parts.append(name[start:]) + return '_'.join(parts).lower() + +class MatroskaElement(object): + + def __init__(self, name, elid, valtype, namespace): + self.name = name + self.definename = '{0}_ID_{1}'.format(namespace, name.upper()) + self.fieldname = camelcase_to_words(name) + self.structname = 'ebml_' + self.fieldname + self.elid = elid + self.valtype = valtype + if valtype == 'sub': + self.ebmltype = 'EBML_TYPE_SUBELEMENTS' + self.valname = 'struct ' + self.structname + else: + self.ebmltype = 'EBML_TYPE_' + valtype.upper() + try: + self.valname = {'uint': 'uint64_t', 'str': 'char *', + 'binary': 'bstr', 'ebml_id': 'uint32_t', + 'float': 'double', 'sint': 'int64_t', + }[valtype] + except KeyError: + raise SyntaxError('Unrecognized value type ' + valtype) + self.subelements = () + + def add_subelements(self, subelements): + self.subelements = subelements + self.subids = {x[0].elid for x in subelements} + +elementd = {} +elementlist = [] +def parse_elems(l, namespace): + subelements = [] + for el in l: + if isinstance(el, str): + name, hexid, eltype = [x.strip() for x in el.split(',')] + hexid = hexid.lower() + multiple = name.endswith('*') + name = name.strip('*') + new = MatroskaElement(name, hexid, eltype, namespace) + elementd[hexid] = new + elementlist.append(new) + subelements.append((new, multiple)) + else: + new.add_subelements(parse_elems(el, namespace)) + return subelements + +parse_elems(elements_ebml, 'EBML') +parse_elems(elements_matroska, 'MATROSKA') + +def printf(out, *args): + out.write(' '.join(str(x) for x in args)) + out.write('\n') + +def generate_C_header(out): + printf(out, '// Generated by TOOLS/matroska.py, do not edit manually') + printf(out) + + for el in elementlist: + printf(out, '#define {0.definename:40} 0x{0.elid}'.format(el)) + + printf(out) + + for el in reversed(elementlist): + if not el.subelements: + continue + printf(out) + printf(out, 'struct {0.structname} {{'.format(el)) + l = max(len(subel.valname) for subel, multiple in el.subelements)+1 + for subel, multiple in el.subelements: + printf(out, ' {e.valname:{l}} {star}{e.fieldname};'.format( + e=subel, l=l, star=' *'[multiple])) + printf(out) + for subel, multiple in el.subelements: + printf(out, ' int n_{0.fieldname};'.format(subel)) + printf(out, '};') + + for el in elementlist: + if not el.subelements: + continue + printf(out, 'extern const struct ebml_elem_desc {0.structname}_desc;'.format(el)) + + printf(out) + printf(out, '#define MAX_EBML_SUBELEMENTS', max(len(el.subelements) + for el in elementlist)) + + +def generate_C_definitions(out): + printf(out, '// Generated by TOOLS/matroska.py, do not edit manually') + printf(out) + for el in reversed(elementlist): + printf(out) + if el.subelements: + printf(out, '#define N', el.fieldname) + printf(out, 'E_S("{0}", {1})'.format(el.name, len(el.subelements))) + for subel, multiple in el.subelements: + printf(out, 'F({0.definename}, {0.fieldname}, {1})'.format( + subel, int(multiple))) + printf(out, '}};') + printf(out, '#undef N') + else: + printf(out, 'E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el)) + +def read(s, length): + t = s.read(length) + if len(t) != length: + raise EOF + return t + +def read_id(s): + t = read(s, 1) + i = 0 + mask = 128 + if ord(t) == 0: + raise SyntaxError + while not ord(t) & mask: + i += 1 + mask >>= 1 + t += read(s, i) + return t + +def read_vint(s): + t = read(s, 1) + i = 0 + mask = 128 + if ord(t) == 0: + raise SyntaxError + while not ord(t) & mask: + i += 1 + mask >>= 1 + t = bytes((ord(t) & (mask - 1),)) + t += read(s, i) + return i+1, byte2num(t) + +def read_str(s, length): + return read(s, length) + +def read_uint(s, length): + t = read(s, length) + return byte2num(t) + +def read_sint(s, length): + i = read_uint(s, length) + mask = 1 << (length * 8 - 1) + if i & mask: + i -= 2 * mask + return i + +def read_float(s, length): + t = read(s, length) + i = byte2num(t) + if length == 4: + f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150) + if i & (1 << 31): + f = -f + elif length == 8: + f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075) + if i & (1 << 63): + f = -f + else: + raise SyntaxError + return f + +def parse_one(s, depth, parent, maxlen): + elid = hexlify(read_id(s)).decode('ascii') + elem = elementd.get(elid) + size, length = read_vint(s) + this_length = len(elid) / 2 + size + length + if elem is not None: + if elem.valtype != 'skip': + print(" " * depth, '[' + elid + ']', elem.name, 'size:', length, 'value:', end=' ') + if elem.valtype == 'sub': + print('subelements:') + while length > 0: + length -= parse_one(s, depth + 1, elem, length) + if length < 0: + raise SyntaxError + elif elem.valtype == 'str': + print('string', repr(read_str(s, length).decode('utf8', 'replace'))) + elif elem.valtype in ('binary', 'ebml_id'): + t = read_str(s, length) + dec = '' + if elem.valtype == 'ebml_id': + idelem = elementd.get(hexlify(t).decode('ascii')) + if idelem is None: + dec = '(UNKNOWN)' + else: + dec = '({0.name})'.format(idelem) + if len(t) < 20: + t = hexlify(t).decode('ascii') + else: + t = '<{0} bytes>'.format(len(t)) + print('binary', t, dec) + elif elem.valtype == 'uint': + print('uint', read_uint(s, length)) + elif elem.valtype == 'sint': + print('sint', read_sint(s, length)) + elif elem.valtype == 'float': + print('float', read_float(s, length)) + elif elem.valtype == 'skip': + read(s, length) + else: + raise NotImplementedError + else: + print(" " * depth, '[' + elid + '] Unknown element! size:', length) + read(s, length) + return this_length + +if __name__ == "__main__": + def parse_toplevel(s): + parse_one(s, 0, None, 1 << 63) + + if sys.argv[1] == '--generate-header': + generate_C_header(open(sys.argv[2], "w")) + elif sys.argv[1] == '--generate-definitions': + generate_C_definitions(open(sys.argv[2], "w")) + else: + s = open(sys.argv[1], "rb") + while 1: + start = s.tell() + try: + parse_toplevel(s) + except EOF: + if s.tell() != start: + raise Exception("Unexpected end of file") + break |