Retrieve base.js url from html watch page when it's missing
Fixes failure mode 3 in #22
This commit is contained in:
@@ -9,4 +9,5 @@ from .everything_else import (extract_channel_info, extract_search_info,
|
||||
from .watch_extraction import (extract_watch_info, get_caption_url,
|
||||
update_with_age_restricted_info, requires_decryption,
|
||||
extract_decryption_function, decrypt_signatures, _formats,
|
||||
update_format_with_type_info, extract_hls_formats)
|
||||
update_format_with_type_info, extract_hls_formats,
|
||||
update_with_missing_base_js)
|
||||
|
||||
@@ -602,6 +602,19 @@ def update_with_age_restricted_info(info, video_info_page):
|
||||
_extract_formats(info, player_response)
|
||||
_extract_playability_error(info, player_response, error_prefix=ERROR_PREFIX)
|
||||
|
||||
html_watch_page_base_js_re = re.compile(r'jsUrl":\s*"([\w\-\./]+/base.js)"')
|
||||
def update_with_missing_base_js(info, html_watch_page):
|
||||
'''Extracts base_js url and player_name from html watch page. return err
|
||||
Use when base_js is missing from the json page.'''
|
||||
match = html_watch_page_base_js_re.search(html_watch_page)
|
||||
if match:
|
||||
info['base_js'] = normalize_url(match.group(1))
|
||||
# must uniquely identify url
|
||||
info['player_name'] = urllib.parse.urlparse(info['base_js']).path
|
||||
return False
|
||||
else:
|
||||
return 'Could not find base_js url in watch page html'
|
||||
|
||||
def requires_decryption(info):
|
||||
return ('formats' in info) and info['formats'] and info['formats'][0]['s']
|
||||
|
||||
|
||||
Reference in New Issue
Block a user