Check for 403 errors and fallback on Invidious

403 errors on the video urls happen typically when a video has copyrighted content or was livestreamed originally. They appear to not happen (or at least happen less frequently) if the Tor exit node used ipv6, however.
This commit is contained in:
James Taylor 2020-02-01 15:09:37 -08:00
parent e364927f83
commit 7c2736aa26
3 changed files with 63 additions and 1 deletions

View File

@ -195,6 +195,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
return content, response
return content
def head(url, use_tor=False, report_text=None):
pool = get_pool(use_tor and settings.route_tor)
start_time = time.time()
response = pool.request('HEAD', url)
if report_text:
print(report_text, ' Latency:', round(time.time() - start_time,3))
return response
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'

View File

@ -12,6 +12,8 @@ import os
import math
import traceback
import urllib
import re
import urllib3.exceptions
try:
with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@ -232,6 +234,57 @@ def extract_info(video_id):
decryption_error = 'Error decrypting url signatures: ' + decryption_error
info['playability_error'] = decryption_error
# check for 403
if settings.route_tor and info['formats'] and info['formats'][0]['url']:
response = util.head(info['formats'][0]['url'],
report_text='Checked for URL access')
if response.status == 403:
print(('Access denied (403) for video urls.'
' Retrieving urls from Invidious...'))
try:
video_info = util.fetch_url(
'https://invidio.us/api/v1/videos/'
+ video_id
+ '?fields=adaptiveFormats,formatStreams',
report_text='Retrieved urls from Invidious',
debug_name='invidious_urls')
except (urllib3.exceptions.HTTPError) as e:
traceback.print_exc()
playability_error = ('Access denied (403) for video urls.'
+ ' Failed to use Invidious to get the urls: '
+ str(e))
if info['playability_error']:
info['playability_error'] += '\n' + playability_error
else:
info['playability_error'] = playability_error
return info
video_info = json.loads(video_info.decode('utf-8'))
info['formats'] = []
for fmt in (video_info['adaptiveFormats']
+ video_info['formatStreams']):
# adjust keys to match our conventions
fmt['file_size'] = fmt.get('clen')
fmt['ext'] = fmt.get('container')
if 'resolution' in fmt:
fmt['height'] = int(fmt['resolution'].rstrip('p'))
# update with information from _formats table such as ext
itag = fmt.get('itag')
fmt.update(yt_data_extract._formats.get(itag, {}))
# extract acodec, vcodec, and ext
# (need for 'ext' because 'container' not always present)
yt_data_extract.update_format_with_type_info(fmt, fmt)
# ensure keys are present
for key in ('ext', 'audio_bitrate', 'acodec', 'vcodec',
'width', 'height', 'audio_sample_rate', 'fps'):
if key not in fmt:
fmt[key] = None
info['formats'].append(fmt)
return info
def video_quality_string(format):

View File

@ -8,4 +8,5 @@ from .everything_else import (extract_channel_info, extract_search_info,
from .watch_extraction import (extract_watch_info, get_caption_url,
update_with_age_restricted_info, requires_decryption,
extract_decryption_function, decrypt_signatures)
extract_decryption_function, decrypt_signatures, _formats,
update_format_with_type_info)