From 2415e66f889f38503b73e8ebc5f43ca342390e5c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 18:49:24 +0200 Subject: Adding upstream version 2024.03.10. Signed-off-by: Daniel Baumann --- yt_dlp/extractor/piapro.py | 121 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 yt_dlp/extractor/piapro.py (limited to 'yt_dlp/extractor/piapro.py') diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py new file mode 100644 index 0000000..3ae985d --- /dev/null +++ b/yt_dlp/extractor/piapro.py @@ -0,0 +1,121 @@ +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + ExtractorError, + parse_duration, + parse_filesize, + str_to_int, + unified_timestamp, + urlencode_postdata, +) + + +class PiaproIE(InfoExtractor): + _NETRC_MACHINE = 'piapro' + _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P[\w-]+)/?' + _TESTS = [{ + 'url': 'https://piapro.jp/t/NXYR', + 'md5': 'f7c0f760913fb1d44a1c45a4af793909', + 'info_dict': { + 'id': 'NXYR', + 'ext': 'mp3', + 'uploader': 'wowaka', + 'uploader_id': 'wowaka', + 'title': '裏表ラバーズ', + 'description': 'http://www.nicovideo.jp/watch/sm8082467', + 'duration': 189.0, + 'timestamp': 1251785475, + 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', + 'upload_date': '20090901', + 'view_count': int, + } + }, { + 'note': 'There are break lines in description, mandating (?s) flag', + 'url': 'https://piapro.jp/t/9cSd', + 'md5': '952bb6d1e8de95050206408a87790676', + 'info_dict': { + 'id': '9cSd', + 'ext': 'mp3', + 'title': '青に溶けた風船 / 初音ミク', + 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', + 'uploader': 'シアン・キノ', + 'duration': 229.0, + 'timestamp': 1644030039, + 'upload_date': '20220205', + 'view_count': int, + 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', + 'uploader_id': 'cyankino', + } + }, { + 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6', + 'only_matching': True + }, { + 'url': 'https://piapro.jp/t/-SO-', + 'only_matching': True + }] + + _login_status = False + + def _perform_login(self, username, password): + login_ok = True + login_form_strs = { + '_username': username, + '_password': password, + '_remember_me': 'on', + 'login': 'ログイン' + } + self._request_webpage('https://piapro.jp/login/', None) + urlh = self._request_webpage( + 'https://piapro.jp/login/exe', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.url) + if parts.path != '/': + login_ok = False + if not login_ok: + self.report_warning( + 'unable to log in: bad username or password') + self._login_status = login_ok + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID') + if category_id not in ('1', '2', '21', '22', '23', '24', '25'): + raise ExtractorError('The URL does not contain audio.', expected=True) + + str_duration, str_filesize = self._search_regex( + r'サイズ:(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', + group=(1, 2), default=(None, None)) + str_viewcount = self._search_regex(r'閲覧数:([0-9,]+)\s+', webpage, 'view count', fatal=False) + + uploader_id, uploader = self._search_regex( + r'([^<]+)さん<', webpage, 'uploader', + group=(1, 2), default=(None, None)) + content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') + create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') + + player_webpage = self._download_webpage( + f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', + video_id, note='Downloading player webpage') + + return { + 'id': video_id, + 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), + 'description': self._html_search_regex(r'(?s)(.+?)

\s*