1 files changed, 104 insertions, 0 deletions
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
new file mode 100644
index 0000000..6ba87cd
--- /dev/null
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -0,0 +1,104 @@
+import hashlib
+import json
+import re
+import urllib.parse
+
+from .ffmpeg import FFmpegPostProcessor
+
+
+class SponsorBlockPP(FFmpegPostProcessor):
+    # https://wiki.sponsor.ajay.app/w/Types
+    EXTRACTORS = {
+        'Youtube': 'YouTube',
+    }
+    POI_CATEGORIES = {
+        'poi_highlight': 'Highlight',
+    }
+    NON_SKIPPABLE_CATEGORIES = {
+        **POI_CATEGORIES,
+        'chapter': 'Chapter',
+    }
+    CATEGORIES = {
+        'sponsor': 'Sponsor',
+        'intro': 'Intermission/Intro Animation',
+        'outro': 'Endcards/Credits',
+        'selfpromo': 'Unpaid/Self Promotion',
+        'preview': 'Preview/Recap',
+        'filler': 'Filler Tangent',
+        'interaction': 'Interaction Reminder',
+        'music_offtopic': 'Non-Music Section',
+        **NON_SKIPPABLE_CATEGORIES
+    }
+
+    def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
+        FFmpegPostProcessor.__init__(self, downloader)
+        self._categories = tuple(categories or self.CATEGORIES.keys())
+        self._API_URL = api if re.match('^https?://', api) else 'https://' + api
+
+    def run(self, info):
+        extractor = info['extractor_key']
+        if extractor not in self.EXTRACTORS:
+            self.to_screen(f'SponsorBlock is not supported for {extractor}')
+            return [], info
+
+        self.to_screen('Fetching SponsorBlock segments')
+        info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration'))
+        return [], info
+
+    def _get_sponsor_chapters(self, info, duration):
+        segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']])
+
+        def duration_filter(s):
+            start_end = s['segment']
+            # Ignore entire video segments (https://wiki.sponsor.ajay.app/w/Types).
+            if start_end == (0, 0):
+                return False
+            # Ignore milliseconds difference at the start.
+            if start_end[0] <= 1:
+                start_end[0] = 0
+            # Make POI chapters 1 sec so that we can properly mark them
+            if s['category'] in self.POI_CATEGORIES.keys():
+                start_end[1] += 1
+            # Ignore milliseconds difference at the end.
+            # Never allow the segment to exceed the video.
+            if duration and duration - start_end[1] <= 1:
+                start_end[1] = duration
+            # SponsorBlock duration may be absent or it may deviate from the real one.
+            diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0
+            return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05)
+
+        duration_match = [s for s in segments if duration_filter(s)]
+        if len(duration_match) != len(segments):
+            self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video')
+
+        def to_chapter(s):
+            (start, end), cat = s['segment'], s['category']
+            title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat]
+            return {
+                'start_time': start,
+                'end_time': end,
+                'category': cat,
+                'title': title,
+                'type': s['actionType'],
+                '_categories': [(cat, start, end, title)],
+            }
+
+        sponsor_chapters = [to_chapter(s) for s in duration_match]
+        if not sponsor_chapters:
+            self.to_screen('No matching segments were found in the SponsorBlock database')
+        else:
+            self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database')
+        return sponsor_chapters
+
+    def _get_sponsor_segments(self, video_id, service):
+        hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
+        # SponsorBlock API recommends using first 4 hash characters.
+        url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
+            'service': service,
+            'categories': json.dumps(self._categories),
+            'actionTypes': json.dumps(['skip', 'poi', 'chapter'])
+        })
+        for d in self._download_json(url) or []:
+            if d['videoID'] == video_id:
+                return d['segments']
+        return []