Support more audio and video qualities

Adds support for AV1-encoded videos, which includes any videos
above 1080p. These weren't getting included because they did
not have a quality entry in the format table at the top of
watch_extraction.py. So get the quality from the quality
labels of the format if it's not there.

Because YouTube often includes BOTH AV1 and H.264 (AVC) for each
quality, after these are included, there will be way too many
quality options and the code needs to choose which one to use.
The choice is somewhat hard: AV1 is encoded in fewer bytes than
H.264 and is patent-free, however, it has less hardware support,
so might be more difficult to play. For instance, on my system,
AV1 does not work on 1080p, but H.264 does. Adds a setting about
which to prefer, set to H.264 as the default.

Also adds support for the lower quality mp4 audio quality, which
now gets used at 144p to save network bandwidth. For similar
reasons, this was not getting included because it did not
have an audio_bitrate entry in the table. Prefer bitrate
instead for the quality.

Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
James Taylor 2021-08-31 13:38:28 -07:00 committed by Jesús
parent 30e59081b1
commit 7c79f530a5
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
4 changed files with 73 additions and 6 deletions

View File

@ -168,6 +168,17 @@ For security reasons, enabling this is not recommended.''',
'category': 'playback', 'category': 'playback',
}), }),
('preferred_video_codec', {
'type': int,
'default': 0,
'comment': '',
'options': [
(0, 'h.264'),
(1, 'AV1'),
],
'category': 'playback',
}),
('prefer_uni_sources', { ('prefer_uni_sources', {
'label': 'Prefer integrated sources', 'label': 'Prefer integrated sources',
'type': bool, 'type': bool,

View File

@ -56,7 +56,10 @@ def get_video_sources(info, target_resolution):
continue continue
# audio source # audio source
if fmt['acodec'] and not fmt['vcodec'] and fmt['audio_bitrate']: if fmt['acodec'] and not fmt['vcodec'] and (
fmt['audio_bitrate'] or fmt['bitrate']):
if fmt['bitrate']: # prefer this one, more accurate right now
fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
source = { source = {
'type': 'audio/' + fmt['ext'], 'type': 'audio/' + fmt['ext'],
'bitrate': fmt['audio_bitrate'], 'bitrate': fmt['audio_bitrate'],
@ -77,15 +80,51 @@ def get_video_sources(info, target_resolution):
+ source['vcodec'] + '"') + source['vcodec'] + '"')
video_only_sources.append(source) video_only_sources.append(source)
# Remove alternative mp4 codecs from video sources
def codec_name(vcodec):
if vcodec.startswith('avc'):
return 'h.264'
elif vcodec.startswith('av01'):
return 'av1'
else:
return 'unknown'
quality_to_codecs = {}
for src in video_only_sources:
if src['quality'] in quality_to_codecs:
quality_to_codecs[src['quality']].add(codec_name(src['vcodec']))
else:
quality_to_codecs[src['quality']] = {codec_name(src['vcodec'])}
i = 0
while i < len(video_only_sources):
src = video_only_sources[i]
codecs_for_quality = quality_to_codecs[src['quality']]
have_both = ('h.264' in codecs_for_quality
and 'av1' in codecs_for_quality)
have_one = ('h.264' in codecs_for_quality
or 'av1' in codecs_for_quality)
name = codec_name(src['vcodec'])
if name == 'unknown' and have_one:
del video_only_sources[i]
continue
if not have_both:
i += 1
continue
if name == 'av1' and settings.preferred_video_codec == 0:
del video_only_sources[i]
elif name == 'h.264' and settings.preferred_video_codec == 1:
del video_only_sources[i]
else:
i += 1
audio_sources.sort(key=lambda source: source['audio_bitrate']) audio_sources.sort(key=lambda source: source['audio_bitrate'])
video_only_sources.sort(key=lambda src: src['quality']) video_only_sources.sort(key=lambda src: src['quality'])
uni_sources.sort(key=lambda src: src['quality']) uni_sources.sort(key=lambda src: src['quality'])
for source in video_only_sources: for source in video_only_sources:
# choose an audio source to go with it # choose an audio source to go with it
# 0.15 is semiarbitrary empirical constant to spread audio sources # 0.5 is semiarbitrary empirical constant to spread audio sources
# between 144p and 1080p. Use something better eventually. # between 144p and 1080p. Use something better eventually.
target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.15 target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.5
compat_audios = [a for a in audio_sources if a['ext'] == source['ext']] compat_audios = [a for a in audio_sources if a['ext'] == source['ext']]
if compat_audios: if compat_audios:
closest_audio_source = compat_audios[0] closest_audio_source = compat_audios[0]
@ -421,7 +460,13 @@ def video_quality_string(format):
def short_video_quality_string(fmt): def short_video_quality_string(fmt):
result = str(fmt['quality'] or '?') + 'p' result = str(fmt['quality'] or '?') + 'p'
if fmt['fps']: if fmt['fps']:
result += ' ' + str(fmt['fps']) + 'fps' result += str(fmt['fps'])
if fmt['vcodec'].startswith('av01'):
result += ' AV1'
elif fmt['vcodec'].startswith('avc'):
result += ' h264'
else:
result += ' ' + fmt['vcodec']
return result return result

View File

@ -166,14 +166,17 @@ def extract_formatted_text(node):
return [{'text': node['simpleText']}] return [{'text': node['simpleText']}]
return [] return []
def extract_int(string, default=None): def extract_int(string, default=None, whole_word=True):
if isinstance(string, int): if isinstance(string, int):
return string return string
if not isinstance(string, str): if not isinstance(string, str):
string = extract_str(string) string = extract_str(string)
if not string: if not string:
return default return default
match = re.search(r'\b(\d+)\b', string.replace(',', '')) if whole_word:
match = re.search(r'\b(\d+)\b', string.replace(',', ''))
else:
match = re.search(r'(\d+)', string.replace(',', ''))
if match is None: if match is None:
return default return default
try: try:

View File

@ -445,6 +445,14 @@ def _extract_formats(info, player_response):
for key, value in hardcoded_itag_info.items(): for key, value in hardcoded_itag_info.items():
conservative_update(fmt, key, value) # prefer info from YouTube conservative_update(fmt, key, value) # prefer info from YouTube
fmt['quality'] = hardcoded_itag_info.get('height') fmt['quality'] = hardcoded_itag_info.get('height')
conservative_update(
fmt, 'quality',
extract_int(yt_fmt.get('quality'), whole_word=False)
)
conservative_update(
fmt, 'quality',
extract_int(yt_fmt.get('qualityLabel'), whole_word=False)
)
info['formats'].append(fmt) info['formats'].append(fmt)