yt_dlp/extractor/musicdex.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

from .common import InfoExtractor
from ..utils import (
    date_from_str,
    format_field,
    try_get,
    unified_strdate,
)


class MusicdexBaseIE(InfoExtractor):
    def _return_info(self, track_json, album_json, id):
        return {
            'id': str(id),
            'title': track_json.get('name'),
            'track': track_json.get('name'),
            'description': track_json.get('description'),
            'track_number': track_json.get('number'),
            'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
            'duration': track_json.get('duration'),
            'genres': [genre.get('name') for genre in track_json.get('genres') or []],
            'like_count': track_json.get('likes_count'),
            'view_count': track_json.get('plays'),
            'artists': [artist.get('name') for artist in track_json.get('artists') or []],
            'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
            'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
            'album': album_json.get('name'),
            'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
            'extractor_key': MusicdexSongIE.ie_key(),
            'extractor': 'MusicdexSong',
        }


class MusicdexSongIE(MusicdexBaseIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/track/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.musicdex.org/track/306/dual-existence',
        'info_dict': {
            'id': '306',
            'ext': 'mp3',
            'title': 'dual existence',
            'description': '#NIPPONSEI @ IRC.RIZON.NET',
            'track': 'dual existence',
            'track_number': 1,
            'duration': 266000,
            'genres': ['Anime'],
            'like_count': int,
            'view_count': int,
            'artists': ['fripSide'],
            'album_artists': ['fripSide'],
            'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
            'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
            'release_year': 2020
        },
        'params': {'skip_download': True}
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track']
        return self._return_info(data_json, data_json.get('album') or {}, id)


class MusicdexAlbumIE(MusicdexBaseIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/album/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://www.musicdex.org/album/56/tenmon-and-eiichiro-yanagi-minori/ef-a-tale-of-memories-original-soundtrack-2-fortissimo',
        'playlist_mincount': 28,
        'info_dict': {
            'id': '56',
            'genres': ['OST'],
            'view_count': int,
            'artists': ['TENMON & Eiichiro Yanagi / minori'],
            'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
            'release_year': 2008,
            'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album']
        entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')]

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'description': data_json.get('description'),
            'genres': [genre.get('name') for genre in data_json.get('genres') or []],
            'view_count': data_json.get('plays'),
            'artists': [artist.get('name') for artist in data_json.get('artists') or []],
            'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
            'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
            'entries': entries,
        }


class MusicdexPageIE(MusicdexBaseIE):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
    def _entries(self, id):
        next_page_url = self._API_URL % id
        while next_page_url:
            data_json = self._download_json(next_page_url, id)['pagination']
            for data in data_json.get('data') or []:
                yield data
            next_page_url = data_json.get('next_page_url')


class MusicdexArtistIE(MusicdexPageIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/artist/(?P<id>\d+)'
    _API_URL = 'https://www.musicdex.org/secure/artists/%s/albums?page=1'

    _TESTS = [{
        'url': 'https://www.musicdex.org/artist/11/fripside',
        'playlist_mincount': 28,
        'info_dict': {
            'id': '11',
            'view_count': int,
            'title': 'fripSide',
            'thumbnail': 'https://www.musicdex.org/storage/artist/ZmOz0lN2vsweegB660em3xWffCjLPmTQHqJls5Xx.jpg',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist']
        entries = []
        for album in self._entries(id):
            entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id'))

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'view_count': data_json.get('plays'),
            'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'),
            'entries': entries,
        }


class MusicdexPlaylistIE(MusicdexPageIE):
    _VALID_URL = r'https?://(?:www\.)?musicdex\.org/playlist/(?P<id>\d+)'
    _API_URL = 'https://www.musicdex.org/secure/playlists/%s/tracks?perPage=10000&page=1'

    _TESTS = [{
        'url': 'https://www.musicdex.org/playlist/9/test',
        'playlist_mincount': 73,
        'info_dict': {
            'id': '9',
            'view_count': int,
            'title': 'Test',
            'thumbnail': 'https://www.musicdex.org/storage/album/jXATI79f0IbQ2sgsKYOYRCW3zRwF3XsfHhzITCuJ.jpg',
            'description': 'Test 123 123 21312 32121321321321312',
        },
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist']
        entries = [self._return_info(track, track.get('album') or {}, track['id'])
                   for track in self._entries(id) or [] if track.get('id')]

        return {
            '_type': 'playlist',
            'id': id,
            'title': data_json.get('name'),
            'description': data_json.get('description'),
            'view_count': data_json.get('plays'),
            'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
            'entries': entries,
        }