summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/zenporn.py
blob: 084c1fc4f3a385c4d78f5574f05fc06fa68154c4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import base64
import binascii

from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
from ..utils.traversal import traverse_obj


class ZenPornIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
        'md5': '07bd576b5920714d74975c054ca28dee',
        'info_dict': {
            'id': '9563799',
            'display_id': '15627016',
            'ext': 'mp4',
            'title': 'md5:669eafd3bbc688aa29770553b738ada2',
            'description': '',
            'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
            'upload_date': '20230925',
            'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
            'age_limit': 18,
        },
    }, {
        'url': 'https://zenporn.com/video/15570701',
        'md5': 'acba0d080d692664fcc8c4e5502b1a67',
        'info_dict': {
            'id': '2297875',
            'display_id': '15570701',
            'ext': 'mp4',
            'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
            'description': '',
            'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
            'upload_date': '20230921',
            'uploader': 'Lois Clarke',
            'age_limit': 18,
        },
    }, {
        'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
        'md5': '67411256aa9451449e4d29f3be525541',
        'info_dict': {
            'id': '12791908',
            'display_id': '8531117',
            'ext': 'mp4',
            'title': 'Amateur students having a fuck fest at club',
            'description': '',
            'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
            'upload_date': '20191005',
            'uploader': 'Jackopenass',
            'age_limit': 18,
        },
    }, {
        'url': 'https://zenporn.com/video/15872038/glad-you-came/',
        'md5': '296ccab437f5bac6099433768449d8e1',
        'info_dict': {
            'id': '111585',
            'display_id': '15872038',
            'ext': 'mp4',
            'title': 'Glad You Came',
            'description': '',
            'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
            'upload_date': '20231024',
            'uploader': 'Martin Rudenko',
            'age_limit': 18,
        },
    }]

    def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
        """ This function is a reverse engineering from the website javascript """
        result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
        return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'

    @staticmethod
    def _decode_video_url(encoded_url):
        """ This function is a reverse engineering from the website javascript """
        # Replace lookalike characters and standardize map
        translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
        try:
            return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
        except (binascii.Error, ValueError):
            return None

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)

        ext_domain, video_id = self._search_regex(
            r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
            webpage, 'embed info', group=('ext_domain', 'extr_id'))

        info_json = self._download_json(
            self._gen_info_url(ext_domain, video_id), video_id, fatal=False)

        video_json = self._download_json(
            f'https://{ext_domain}/api/videofile.php', video_id, query={
                'video_id': video_id,
                'lifetime': 8640000,
            }, note='Downloading video file JSON', errnote='Failed to download video file JSON')

        decoded_url = self._decode_video_url(video_json[0]['video_url'])
        if not decoded_url:
            raise ExtractorError('Unable to decode the video url')

        return {
            'id': video_id,
            'display_id': display_id,
            'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
            'url': f'https://{ext_domain}{decoded_url}',
            'age_limit': 18,
            **traverse_obj(info_json, ('video', {
                'title': ('title', {str}),
                'description': ('description', {str}),
                'thumbnail': ('thumb', {url_or_none}),
                'upload_date': ('post_date', {unified_strdate}),
                'uploader': ('user', 'username', {str}),
            })),
        }