Extraction: Add signature decryption
This commit is contained in:
parent
4c07546e7a
commit
70b56d6eef
149
youtube/watch.py
149
youtube/watch.py
@ -11,8 +11,14 @@ import gevent
|
|||||||
import os
|
import os
|
||||||
import math
|
import math
|
||||||
import traceback
|
import traceback
|
||||||
|
import re
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
|
||||||
|
decrypt_cache = json.loads(f.read())['decrypt_cache']
|
||||||
|
except FileNotFoundError:
|
||||||
|
decrypt_cache = {}
|
||||||
|
|
||||||
|
|
||||||
def get_video_sources(info):
|
def get_video_sources(info):
|
||||||
@ -22,9 +28,9 @@ def get_video_sources(info):
|
|||||||
else:
|
else:
|
||||||
max_resolution = settings.default_resolution
|
max_resolution = settings.default_resolution
|
||||||
for format in info['formats']:
|
for format in info['formats']:
|
||||||
if not all(attr in format for attr in ('height', 'width', 'ext', 'url')):
|
if not all(format[attr] for attr in ('height', 'width', 'ext', 'url')):
|
||||||
continue
|
continue
|
||||||
if 'acodec' in format and 'vcodec' in format and format['height'] <= max_resolution:
|
if format['acodec'] and format['vcodec'] and format['height'] <= max_resolution:
|
||||||
video_sources.append({
|
video_sources.append({
|
||||||
'src': format['url'],
|
'src': format['url'],
|
||||||
'type': 'video/' + format['ext'],
|
'type': 'video/' + format['ext'],
|
||||||
@ -101,6 +107,112 @@ def get_ordered_music_list_attributes(music_list):
|
|||||||
|
|
||||||
return ordered_attributes
|
return ordered_attributes
|
||||||
|
|
||||||
|
def save_decrypt_cache():
|
||||||
|
try:
|
||||||
|
f = open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'w')
|
||||||
|
except FileNotFoundError:
|
||||||
|
os.makedirs(settings.data_dir)
|
||||||
|
f = open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'w')
|
||||||
|
|
||||||
|
f.write(json.dumps({'version': 1, 'decrypt_cache':decrypt_cache}, indent=4, sort_keys=True))
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# adapted from youtube-dl and invidious:
|
||||||
|
# https://github.com/omarroth/invidious/blob/master/src/invidious/helpers/signatures.cr
|
||||||
|
decrypt_function_re = re.compile(r'function\(a\)\{(a=a\.split\(""\)[^\}]+)\}')
|
||||||
|
op_with_arg_re = re.compile(r'[^\.]+\.([^\(]+)\(a,(\d+)\)')
|
||||||
|
def decrypt_signatures(info):
|
||||||
|
'''return error string, or False if no errors'''
|
||||||
|
if not info['formats'] or not info['formats'][0]['s']:
|
||||||
|
return False # No decryption needed
|
||||||
|
if not info['base_js']:
|
||||||
|
return 'Failed to find base.js'
|
||||||
|
player_name = yt_data_extract.default_get(info['base_js'].split('/'), -2)
|
||||||
|
if not player_name:
|
||||||
|
return 'Could not find player name'
|
||||||
|
|
||||||
|
if player_name in decrypt_cache:
|
||||||
|
print('Using cached decryption function for: ' + player_name)
|
||||||
|
decryption_function = decrypt_cache[player_name]
|
||||||
|
else:
|
||||||
|
base_js = util.fetch_url(info['base_js'], debug_name='base.js', report_text='Fetched player ' + player_name)
|
||||||
|
base_js = base_js.decode('utf-8')
|
||||||
|
|
||||||
|
decrypt_function_match = decrypt_function_re.search(base_js)
|
||||||
|
if decrypt_function_match is None:
|
||||||
|
return 'Could not find decryption function in base.js'
|
||||||
|
|
||||||
|
function_body = decrypt_function_match.group(1).split(';')[1:-1]
|
||||||
|
if not function_body:
|
||||||
|
return 'Empty decryption function body'
|
||||||
|
|
||||||
|
var_name = yt_data_extract.default_get(function_body[0].split('.'), 0)
|
||||||
|
if var_name is None:
|
||||||
|
return 'Could not find var_name'
|
||||||
|
|
||||||
|
var_body_match = re.search(r'var ' + re.escape(var_name) + r'=\{(.*?)\};', base_js, flags=re.DOTALL)
|
||||||
|
if var_body_match is None:
|
||||||
|
return 'Could not find var_body'
|
||||||
|
|
||||||
|
operations = var_body_match.group(1).replace('\n', '').split('},')
|
||||||
|
if not operations:
|
||||||
|
return 'Did not find any definitions in var_body'
|
||||||
|
operations[-1] = operations[-1][:-1] # remove the trailing '}' since we split by '},' on the others
|
||||||
|
operation_definitions = {}
|
||||||
|
for op in operations:
|
||||||
|
colon_index = op.find(':')
|
||||||
|
opening_brace_index = op.find('{')
|
||||||
|
|
||||||
|
if colon_index == -1 or opening_brace_index == -1:
|
||||||
|
return 'Could not parse operation'
|
||||||
|
op_name = op[:colon_index]
|
||||||
|
op_body = op[opening_brace_index+1:]
|
||||||
|
if op_body == 'a.reverse()':
|
||||||
|
operation_definitions[op_name] = 0
|
||||||
|
elif op_body == 'a.splice(0,b)':
|
||||||
|
operation_definitions[op_name] = 1
|
||||||
|
elif op_body.startswith('var c=a[0]'):
|
||||||
|
operation_definitions[op_name] = 2
|
||||||
|
else:
|
||||||
|
return 'Unknown op_body: ' + op_body
|
||||||
|
|
||||||
|
decryption_function = []
|
||||||
|
for op_with_arg in function_body:
|
||||||
|
match = op_with_arg_re.fullmatch(op_with_arg)
|
||||||
|
if match is None:
|
||||||
|
return 'Could not parse operation with arg'
|
||||||
|
op_name = match.group(1)
|
||||||
|
if op_name not in operation_definitions:
|
||||||
|
return 'Unknown op_name: ' + op_name
|
||||||
|
op_argument = match.group(2)
|
||||||
|
decryption_function.append([operation_definitions[op_name], int(op_argument)])
|
||||||
|
|
||||||
|
decrypt_cache[player_name] = decryption_function
|
||||||
|
save_decrypt_cache()
|
||||||
|
|
||||||
|
for format in info['formats']:
|
||||||
|
if not format['s'] or not format['sp'] or not format['url']:
|
||||||
|
print('Warning: s, sp, or url not in format')
|
||||||
|
continue
|
||||||
|
|
||||||
|
a = list(format['s'])
|
||||||
|
for op, argument in decryption_function:
|
||||||
|
if op == 0:
|
||||||
|
a.reverse()
|
||||||
|
elif op == 1:
|
||||||
|
a = a[argument:]
|
||||||
|
else:
|
||||||
|
operation_2(a, argument)
|
||||||
|
|
||||||
|
signature = ''.join(a)
|
||||||
|
format['url'] += '&' + format['sp'] + '=' + signature
|
||||||
|
return False
|
||||||
|
|
||||||
|
def operation_2(a, b):
|
||||||
|
c = a[0]
|
||||||
|
a[0] = a[b % len(a)]
|
||||||
|
a[b % len(a)] = c
|
||||||
|
|
||||||
headers = (
|
headers = (
|
||||||
('Accept', '*/*'),
|
('Accept', '*/*'),
|
||||||
('Accept-Language', 'en-US,en;q=0.5'),
|
('Accept-Language', 'en-US,en;q=0.5'),
|
||||||
@ -115,26 +227,31 @@ def extract_info(video_id):
|
|||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return {'error': 'Failed to parse json response'}
|
return {'error': 'Failed to parse json response'}
|
||||||
return yt_data_extract.extract_watch_info(polymer_json)
|
info = yt_data_extract.extract_watch_info(polymer_json)
|
||||||
|
error = decrypt_signatures(info)
|
||||||
|
if error:
|
||||||
|
print('Error decrypting url signatures: ' + error)
|
||||||
|
info['playability_error'] = error
|
||||||
|
return info
|
||||||
|
|
||||||
def video_quality_string(format):
|
def video_quality_string(format):
|
||||||
if 'vcodec' in format:
|
if format['vcodec']:
|
||||||
result =str(format.get('width', '?')) + 'x' + str(format.get('height', '?'))
|
result =str(format['width'] or '?') + 'x' + str(format['height'] or '?')
|
||||||
if 'fps' in format:
|
if format['fps']:
|
||||||
result += ' ' + format['fps'] + 'fps'
|
result += ' ' + str(format['fps']) + 'fps'
|
||||||
return result
|
return result
|
||||||
elif 'acodec' in format:
|
elif format['acodec']:
|
||||||
return 'audio only'
|
return 'audio only'
|
||||||
|
|
||||||
return '?'
|
return '?'
|
||||||
|
|
||||||
def audio_quality_string(format):
|
def audio_quality_string(format):
|
||||||
if 'acodec' in format:
|
if format['acodec']:
|
||||||
result = str(format.get('abr', '?')) + 'k'
|
result = str(format['audio_bitrate'] or '?') + 'k'
|
||||||
if 'audio_sample_rate' in format:
|
if format['audio_sample_rate']:
|
||||||
result += ' ' + str(format['audio_sample_rate']) + ' Hz'
|
result += ' ' + str(format['audio_sample_rate']) + ' Hz'
|
||||||
return result
|
return result
|
||||||
elif 'vcodec' in format:
|
elif format['vcodec']:
|
||||||
return 'video only'
|
return 'video only'
|
||||||
|
|
||||||
return '?'
|
return '?'
|
||||||
@ -193,13 +310,13 @@ def get_watch_page():
|
|||||||
download_formats = []
|
download_formats = []
|
||||||
|
|
||||||
for format in info['formats']:
|
for format in info['formats']:
|
||||||
if 'acodec' in format and 'vcodec' in format:
|
if format['acodec'] and format['vcodec']:
|
||||||
codecs_string = format['acodec'] + ', ' + format['vcodec']
|
codecs_string = format['acodec'] + ', ' + format['vcodec']
|
||||||
else:
|
else:
|
||||||
codecs_string = format.get('acodec') or format.get('vcodec') or '?'
|
codecs_string = format['acodec'] or format['vcodec'] or '?'
|
||||||
download_formats.append({
|
download_formats.append({
|
||||||
'url': format['url'],
|
'url': format['url'],
|
||||||
'ext': format.get('ext', '?'),
|
'ext': format['ext'] or '?',
|
||||||
'audio_quality': audio_quality_string(format),
|
'audio_quality': audio_quality_string(format),
|
||||||
'video_quality': video_quality_string(format),
|
'video_quality': video_quality_string(format),
|
||||||
'file_size': format_bytes(format['file_size']),
|
'file_size': format_bytes(format['file_size']),
|
||||||
|
@ -39,44 +39,44 @@ import traceback
|
|||||||
|
|
||||||
# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
|
# from https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py
|
||||||
_formats = {
|
_formats = {
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
|
||||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'audio_bitrate': 64, 'vcodec': 'h263'},
|
||||||
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
|
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'mp4v'},
|
||||||
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
|
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 96, 'vcodec': 'h264'},
|
||||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
||||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), audio_bitrate varies as well
|
||||||
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
||||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
||||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
||||||
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
||||||
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
||||||
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
||||||
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
|
|
||||||
|
|
||||||
# 3D videos
|
# 3D videos
|
||||||
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
||||||
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'audio_bitrate': 192, 'vcodec': 'h264'},
|
||||||
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 128, 'vcodec': 'vp8'},
|
||||||
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
||||||
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
|
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'audio_bitrate': 192, 'vcodec': 'vp8'},
|
||||||
|
|
||||||
# Apple HTTP Live Streaming
|
# Apple HTTP Live Streaming
|
||||||
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'},
|
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
||||||
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'},
|
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
||||||
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 128, 'vcodec': 'h264'},
|
||||||
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264'},
|
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
|
||||||
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264'},
|
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 256, 'vcodec': 'h264'},
|
||||||
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264'},
|
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 48, 'vcodec': 'h264'},
|
||||||
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264'},
|
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'audio_bitrate': 24, 'vcodec': 'h264'},
|
||||||
|
|
||||||
# DASH mp4 video
|
# DASH mp4 video
|
||||||
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||||
@ -93,9 +93,9 @@ _formats = {
|
|||||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 48, 'container': 'm4a_dash'},
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 128, 'container': 'm4a_dash'},
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'audio_bitrate': 256, 'container': 'm4a_dash'},
|
||||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
||||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
|
||||||
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
|
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
|
||||||
@ -126,13 +126,13 @@ _formats = {
|
|||||||
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
|
||||||
|
|
||||||
# Dash webm audio
|
# Dash webm audio
|
||||||
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
|
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 128},
|
||||||
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
|
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'audio_bitrate': 256},
|
||||||
|
|
||||||
# Dash webm audio with opus inside
|
# Dash webm audio with opus inside
|
||||||
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
|
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 50},
|
||||||
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
|
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 70},
|
||||||
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
|
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'audio_bitrate': 160},
|
||||||
|
|
||||||
# RTMP (unnamed)
|
# RTMP (unnamed)
|
||||||
'_rtmp': {'protocol': 'rtmp'},
|
'_rtmp': {'protocol': 'rtmp'},
|
||||||
@ -1042,39 +1042,32 @@ def extract_watch_info(polymer_json):
|
|||||||
|
|
||||||
|
|
||||||
player_args = default_multi_get(top_level, 'player', 'args', default={})
|
player_args = default_multi_get(top_level, 'player', 'args', default={})
|
||||||
parsed_formats = []
|
player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {}
|
||||||
|
streaming_data = player_response.get('streamingData', {})
|
||||||
if 'url_encoded_fmt_stream_map' in player_args:
|
yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', [])
|
||||||
string_formats = player_args['url_encoded_fmt_stream_map'].split(',')
|
|
||||||
parsed_formats += [dict(urllib.parse.parse_qsl(fmt_string)) for fmt_string in string_formats if fmt_string]
|
|
||||||
|
|
||||||
if 'adaptive_fmts' in player_args:
|
|
||||||
string_formats = player_args['adaptive_fmts'].split(',')
|
|
||||||
parsed_formats += [dict(urllib.parse.parse_qsl(fmt_string)) for fmt_string in string_formats if fmt_string]
|
|
||||||
|
|
||||||
info['formats'] = []
|
info['formats'] = []
|
||||||
|
|
||||||
for parsed_fmt in parsed_formats:
|
for yt_fmt in yt_formats:
|
||||||
# start with defaults from the big table at the top
|
fmt = {}
|
||||||
if 'itag' in parsed_fmt:
|
fmt['ext'] = None
|
||||||
fmt = _formats.get(parsed_fmt['itag'], {}).copy()
|
fmt['audio_bitrate'] = None
|
||||||
|
fmt['acodec'] = None
|
||||||
|
fmt['vcodec'] = None
|
||||||
|
fmt['width'] = yt_fmt.get('width')
|
||||||
|
fmt['height'] = yt_fmt.get('height')
|
||||||
|
fmt['file_size'] = yt_fmt.get('contentLength')
|
||||||
|
fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate')
|
||||||
|
fmt['fps'] = yt_fmt.get('fps')
|
||||||
|
cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', '')))
|
||||||
|
if cipher:
|
||||||
|
fmt['url'] = cipher.get('url')
|
||||||
else:
|
else:
|
||||||
fmt = {}
|
fmt['url'] = yt_fmt.get('url')
|
||||||
|
fmt['s'] = cipher.get('s')
|
||||||
|
fmt['sp'] = cipher.get('sp')
|
||||||
|
fmt.update(_formats.get(str(yt_fmt.get('itag')), {}))
|
||||||
|
|
||||||
# then override them
|
|
||||||
fmt.update(parsed_fmt)
|
|
||||||
try:
|
|
||||||
fmt['width'], fmt['height'] = map(int, fmt['size'].split('x'))
|
|
||||||
except (KeyError, ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
fmt['file_size'] = None
|
|
||||||
if 'clen' in fmt:
|
|
||||||
fmt['file_size'] = int(fmt.get('clen'))
|
|
||||||
else:
|
|
||||||
match = re.search(r'&clen=(\d+)', fmt.get('url'))
|
|
||||||
if match:
|
|
||||||
fmt['file_size'] = int(match.group(1))
|
|
||||||
info['formats'].append(fmt)
|
info['formats'].append(fmt)
|
||||||
|
|
||||||
info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js')
|
info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js')
|
||||||
@ -1104,5 +1097,4 @@ def extract_watch_info(polymer_json):
|
|||||||
# other stuff
|
# other stuff
|
||||||
info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
|
info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None
|
||||||
info['subtitles'] = {} # TODO
|
info['subtitles'] = {} # TODO
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
Loading…
x
Reference in New Issue
Block a user