summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/brightcove.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:06:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:06:10 +0000
commit137f07c27a88b07df57df5b2cdebebe3c7722bac (patch)
tree4ebdcbea6b540f69c40ba0be6ac766008030d149 /yt_dlp/extractor/brightcove.py
parentAdding debian version 2024.05.27-1. (diff)
downloadyt-dlp-137f07c27a88b07df57df5b2cdebebe3c7722bac.tar.xz
yt-dlp-137f07c27a88b07df57df5b2cdebebe3c7722bac.zip
Merging upstream version 2024.07.01.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/brightcove.py')
-rw-r--r--yt_dlp/extractor/brightcove.py84
1 files changed, 38 insertions, 46 deletions
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 4190e1a..2526f25 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -1,15 +1,12 @@
import base64
import re
import struct
+import urllib.parse
import xml.etree.ElementTree
from .adobepass import AdobePassIE
from .common import InfoExtractor
-from ..compat import (
- compat_etree_fromstring,
- compat_parse_qs,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -21,6 +18,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
int_or_none,
+ join_nonempty,
js_to_json,
mimetype2ext,
parse_iso8601,
@@ -142,7 +140,7 @@ class BrightcoveLegacyIE(InfoExtractor):
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
'only_matching': True, # Tested in GenericIE
- }
+ },
]
_WEBPAGE_TESTS = [{
@@ -315,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor):
object_str = fix_xml_ampersands(object_str)
try:
- object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
+ object_doc = compat_etree_fromstring(object_str.encode())
except xml.etree.ElementTree.ParseError:
return
@@ -323,7 +321,7 @@ class BrightcoveLegacyIE(InfoExtractor):
if fv_el is not None:
flashvars = dict(
(k, v[0])
- for k, v in compat_parse_qs(fv_el.attrib['value']).items())
+ for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items())
else:
flashvars = {}
@@ -340,32 +338,32 @@ class BrightcoveLegacyIE(InfoExtractor):
params = {}
- playerID = find_param('playerID') or find_param('playerId')
- if playerID is None:
+ player_id = find_param('playerID') or find_param('playerId')
+ if player_id is None:
raise ExtractorError('Cannot find player ID')
- params['playerID'] = playerID
+ params['playerID'] = player_id
- playerKey = find_param('playerKey')
+ player_key = find_param('playerKey')
# Not all pages define this value
- if playerKey is not None:
- params['playerKey'] = playerKey
+ if player_key is not None:
+ params['playerKey'] = player_key
# These fields hold the id of the video
- videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
- if videoPlayer is not None:
- if isinstance(videoPlayer, list):
- videoPlayer = videoPlayer[0]
- videoPlayer = videoPlayer.strip()
+ video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
+ if video_player is not None:
+ if isinstance(video_player, list):
+ video_player = video_player[0]
+ video_player = video_player.strip()
# UUID is also possible for videoPlayer (e.g.
# http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
# or http://www8.hp.com/cn/zh/home.html)
if not (re.match(
r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
- videoPlayer) or videoPlayer.startswith('ref:')):
+ video_player) or video_player.startswith('ref:')):
return None
- params['@videoPlayer'] = videoPlayer
- linkBase = find_param('linkBaseURL')
- if linkBase is not None:
- params['linkBaseURL'] = linkBase
+ params['@videoPlayer'] = video_player
+ link_base = find_param('linkBaseURL')
+ if link_base is not None:
+ params['linkBaseURL'] = link_base
return cls._make_brightcove_url(params)
@classmethod
@@ -389,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod
def _make_brightcove_url(cls, params):
return update_url_query(
- 'http://c.brightcove.com/services/viewer/htmlFederated', params)
+ 'https://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod
def _extract_brightcove_url(cls, webpage):
@@ -448,13 +446,13 @@ class BrightcoveLegacyIE(InfoExtractor):
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = self._match_valid_url(url)
query_str = mobj.group('query')
- query = compat_urlparse.parse_qs(query_str)
+ query = urllib.parse.parse_qs(query_str)
- videoPlayer = query.get('@videoPlayer')
- if videoPlayer:
+ video_player = query.get('@videoPlayer')
+ if video_player:
# We set the original url as the default 'Referer' header
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
- video_id = videoPlayer[0]
+ video_id = video_player[0]
if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None:
@@ -473,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
if referer:
headers['Referer'] = referer
player_page = self._download_webpage(
- 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+ 'https://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False)
if player_page:
player_key = self._search_regex(
@@ -483,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id:
- brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
if referer:
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
@@ -541,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
})
def build_format_id(kind):
- format_id = kind
- if tbr:
- format_id += '-%dk' % int(tbr)
- if height:
- format_id += '-%dp' % height
- return format_id
+ return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
if src or streaming_src:
f.update({
@@ -654,7 +647,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
'params': {
# m3u8 download
'skip_download': True,
- }
+ },
}, {
# playlist stream
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
@@ -666,7 +659,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
'params': {
# m3u8 download
'skip_download': True,
- }
+ },
}, {
'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
'only_matching': True,
@@ -804,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
- entries.append(url if url.startswith('http') else 'http:' + url)
+ entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
# Look for <video> tags [2] and embed_in_page embeds [3]
# [2] looks like:
@@ -833,8 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
player_id = player_id or attrs.get('data-player') or 'default'
embed = embed or attrs.get('data-embed') or 'default'
- bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
- account_id, player_id, embed, video_id)
+ bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
# Some brightcove videos may be embedded with video tag only and
# without script tag or any mentioning of brightcove at all. Such
@@ -865,13 +857,13 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
- policy_key_id = '%s_%s' % (account_id, player_id)
+ policy_key_id = f'{account_id}_{player_id}'
policy_key = self.cache.load('brightcove', policy_key_id)
policy_key_extracted = False
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key():
- base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
+ base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
config = self._download_json(
base_url + 'config.json', video_id, fatal=False) or {}
policy_key = try_get(
@@ -910,7 +902,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
if not policy_key:
policy_key = extract_policy_key()
policy_key_extracted = True
- headers['Accept'] = 'application/json;pk=%s' % policy_key
+ headers['Accept'] = f'application/json;pk={policy_key}'
try:
json_data = self._download_json(api_url, video_id, headers=headers)
break
@@ -936,7 +928,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
- 'Accept': 'application/json;pk=%s' % policy_key
+ 'Accept': f'application/json;pk={policy_key}',
}, query={
'tveToken': tve_token,
})