diff options
Diffstat (limited to 'yt_dlp/postprocessor/metadataparser.py')
-rw-r--r-- | yt_dlp/postprocessor/metadataparser.py | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py new file mode 100644 index 0000000..1d60542 --- /dev/null +++ b/yt_dlp/postprocessor/metadataparser.py @@ -0,0 +1,125 @@ +import re + +from .common import PostProcessor +from ..utils import Namespace, filter_dict, function_with_repr + + +class MetadataParserPP(PostProcessor): + def __init__(self, downloader, actions): + super().__init__(downloader) + self._actions = [] + for f in actions: + action, *args = f + assert action in self.Actions + self._actions.append(action(self, *args)) + + @classmethod + def validate_action(cls, action, *data): + """Each action can be: + (Actions.INTERPRET, from, to) OR + (Actions.REPLACE, field, search, replace) + """ + if action not in cls.Actions: + raise ValueError(f'{action!r} is not a valid action') + action(cls, *data) # So this can raise error to validate + + @staticmethod + def field_to_template(tmpl): + if re.match(r'[a-zA-Z_]+$', tmpl): + return f'%({tmpl})s' + + from ..YoutubeDL import YoutubeDL + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + raise err + return tmpl + + @staticmethod + def format_to_regex(fmt): + r""" + Converts a string like + '%(title)s - %(artist)s' + to a regex like + '(?P<title>.+)\ \-\ (?P<artist>.+)' + """ + if not re.search(r'%\(\w+\)s', fmt): + return fmt + lastpos = 0 + regex = '' + # replace %(..)s with regex group and escape other string parts + for match in re.finditer(r'%\((\w+)\)s', fmt): + regex += re.escape(fmt[lastpos:match.start()]) + regex += rf'(?P<{match.group(1)}>.+)' + lastpos = match.end() + if lastpos < len(fmt): + regex += re.escape(fmt[lastpos:]) + return regex + + def run(self, info): + for f in self._actions: + f(info) + return [], info + + @function_with_repr + def interpretter(self, inp, out): + def f(info): + data_to_parse = self._downloader.evaluate_outtmpl(template, info) + self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') + match = out_re.search(data_to_parse) + if match is None: + self.to_screen(f'Could not interpret {inp!r} as {out!r}') + return + for attribute, value in filter_dict(match.groupdict()).items(): + info[attribute] = value + self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}') + + template = self.field_to_template(inp) + out_re = re.compile(self.format_to_regex(out)) + return f + + @function_with_repr + def replacer(self, field, search, replace): + def f(info): + val = info.get(field) + if val is None: + self.to_screen(f'Video does not have a {field}') + return + elif not isinstance(val, str): + self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}') + return + self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}') + info[field], n = search_re.subn(replace, val) + if n: + self.to_screen(f'Changed {field} to: {info[field]}') + else: + self.to_screen(f'Did not find {search!r} in {field}') + + search_re = re.compile(search) + return f + + Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) + + +class MetadataFromFieldPP(MetadataParserPP): + @classmethod + def to_action(cls, f): + match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f) + if match is None: + raise ValueError(f'it should be FROM:TO, not {f!r}') + return ( + cls.Actions.INTERPRET, + match.group('in').replace('\\:', ':'), + match.group('out'), + ) + + def __init__(self, downloader, formats): + super().__init__(downloader, [self.to_action(f) for f in formats]) + + +# Deprecated +class MetadataFromTitlePP(MetadataParserPP): + def __init__(self, downloader, titleformat): + super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) + self.deprecation_warning( + 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' + 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead') |