diff options
Diffstat (limited to 'yt_dlp/postprocessor/sponsorblock.py')
-rw-r--r-- | yt_dlp/postprocessor/sponsorblock.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py new file mode 100644 index 0000000..6ba87cd --- /dev/null +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -0,0 +1,104 @@ +import hashlib +import json +import re +import urllib.parse + +from .ffmpeg import FFmpegPostProcessor + + +class SponsorBlockPP(FFmpegPostProcessor): + # https://wiki.sponsor.ajay.app/w/Types + EXTRACTORS = { + 'Youtube': 'YouTube', + } + POI_CATEGORIES = { + 'poi_highlight': 'Highlight', + } + NON_SKIPPABLE_CATEGORIES = { + **POI_CATEGORIES, + 'chapter': 'Chapter', + } + CATEGORIES = { + 'sponsor': 'Sponsor', + 'intro': 'Intermission/Intro Animation', + 'outro': 'Endcards/Credits', + 'selfpromo': 'Unpaid/Self Promotion', + 'preview': 'Preview/Recap', + 'filler': 'Filler Tangent', + 'interaction': 'Interaction Reminder', + 'music_offtopic': 'Non-Music Section', + **NON_SKIPPABLE_CATEGORIES + } + + def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): + FFmpegPostProcessor.__init__(self, downloader) + self._categories = tuple(categories or self.CATEGORIES.keys()) + self._API_URL = api if re.match('^https?://', api) else 'https://' + api + + def run(self, info): + extractor = info['extractor_key'] + if extractor not in self.EXTRACTORS: + self.to_screen(f'SponsorBlock is not supported for {extractor}') + return [], info + + self.to_screen('Fetching SponsorBlock segments') + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration')) + return [], info + + def _get_sponsor_chapters(self, info, duration): + segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']]) + + def duration_filter(s): + start_end = s['segment'] + # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types). + if start_end == (0, 0): + return False + # Ignore milliseconds difference at the start. + if start_end[0] <= 1: + start_end[0] = 0 + # Make POI chapters 1 sec so that we can properly mark them + if s['category'] in self.POI_CATEGORIES.keys(): + start_end[1] += 1 + # Ignore milliseconds difference at the end. + # Never allow the segment to exceed the video. + if duration and duration - start_end[1] <= 1: + start_end[1] = duration + # SponsorBlock duration may be absent or it may deviate from the real one. + diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0 + return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05) + + duration_match = [s for s in segments if duration_filter(s)] + if len(duration_match) != len(segments): + self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video') + + def to_chapter(s): + (start, end), cat = s['segment'], s['category'] + title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat] + return { + 'start_time': start, + 'end_time': end, + 'category': cat, + 'title': title, + 'type': s['actionType'], + '_categories': [(cat, start, end, title)], + } + + sponsor_chapters = [to_chapter(s) for s in duration_match] + if not sponsor_chapters: + self.to_screen('No matching segments were found in the SponsorBlock database') + else: + self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database') + return sponsor_chapters + + def _get_sponsor_segments(self, video_id, service): + hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() + # SponsorBlock API recommends using first 4 hash characters. + url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({ + 'service': service, + 'categories': json.dumps(self._categories), + 'actionTypes': json.dumps(['skip', 'poi', 'chapter']) + }) + for d in self._download_json(url) or []: + if d['videoID'] == video_id: + return d['segments'] + return [] |