summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/markiza.py
blob: 088b60d554043d69d1549e47389f4331457fcebe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import re

from .common import InfoExtractor
from ..utils import (
    orderedSet,
    parse_duration,
    try_get,
)


class MarkizaIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
    _TESTS = [{
        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
        'md5': 'ada4e9fad038abeed971843aa028c7b0',
        'info_dict': {
            'id': '139078',
            'ext': 'mp4',
            'title': 'Oteckovia 109',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2760,
        },
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
        'info_dict': {
            'id': '85430',
            'title': 'Televízne noviny',
        },
        'playlist_count': 23,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/84723',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
        'only_matching': True,
    }, {
        'url': 'http://videoarchiv.markiza.sk/embed/85295',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        data = self._download_json(
            'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
            video_id, query={'id': video_id})

        info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')

        if info.get('_type') == 'playlist':
            info.update({
                'id': video_id,
                'title': try_get(
                    data, lambda x: x['details']['name'], str),
            })
        else:
            info['duration'] = parse_duration(
                try_get(data, lambda x: x['details']['duration'], str))
        return info


class MarkizaPageIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
    _TESTS = [{
        'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
        'md5': 'ada4e9fad038abeed971843aa028c7b0',
        'info_dict': {
            'id': '139355',
            'ext': 'mp4',
            'title': 'Oteckovia 110',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 2604,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
        'only_matching': True,
    }, {
        'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
        'only_matching': True,
    }, {
        'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
        'only_matching': True,
    }, {
        'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
        'only_matching': True,
    }, {
        'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        return False if MarkizaIE.suitable(url) else super().suitable(url)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(
            # Downloading for some hosts (e.g. dajto, doma) fails with 500
            # although everything seems to be OK, so considering 500
            # status code to be expected.
            url, playlist_id, expected_status=500)

        entries = [
            self.url_result(f'http://videoarchiv.markiza.sk/video/{video_id}')
            for video_id in orderedSet(re.findall(
                r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
                webpage))]

        return self.playlist_result(entries, playlist_id)