Extract captions base_url using different method when missing

The base url will be randomly missing.

Take one of the listed captions urls which already
has the &lang and automatic specifiers. Then remove these
specifiers.

Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
James Taylor 2022-03-25 22:02:05 -07:00 committed by Jesús
parent dcd4b0f0ae
commit 79fd2966cd
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766

View File

@ -561,6 +561,25 @@ def extract_watch_info(polymer_json):
info['translation_languages'] = []
captions_info = player_response.get('captions', {})
info['_captions_base_url'] = normalize_url(deep_get(captions_info, 'playerCaptionsRenderer', 'baseUrl'))
# Sometimes the above playerCaptionsRender is randomly missing
# Extract base_url from one of the captions by removing lang specifiers
if not info['_captions_base_url']:
base_url = normalize_url(deep_get(
captions_info,
'playerCaptionsTracklistRenderer',
'captionTracks',
0,
'baseUrl'
))
if base_url:
url_parts = urllib.parse.urlparse(base_url)
qs = urllib.parse.parse_qs(url_parts.query)
for key in ('tlang', 'lang', 'name', 'kind', 'fmt'):
if key in qs:
del qs[key]
base_url = urllib.parse.urlunparse(url_parts._replace(
query=urllib.parse.urlencode(qs, doseq=True)))
info['_captions_base_url'] = base_url
for caption_track in deep_get(captions_info, 'playerCaptionsTracklistRenderer', 'captionTracks', default=()):
lang_code = caption_track.get('languageCode')
if not lang_code: