Fix (dis)like, music list extraction due to YouTube changes (again)

YouTube reverted the changes they made that prompted f9f5d5ba.

In case they change their minds again, this adds support for both
formats.

The liberal_update and conservative_update functions needed to be
modified to handle the cases of empty lists, so that
a successfully extracted 'music_list': [{'Author':...},...] will
not be overwritten by 'music_list': [] in the calls to
liberal_dict_update.

Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
James Taylor 2021-08-07 17:18:48 -07:00 committed by Jesús
parent 3dee7ea0d1
commit 2039972ab3
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
2 changed files with 56 additions and 9 deletions

View File

@ -1,6 +1,7 @@
import re import re
import urllib.parse import urllib.parse
import collections import collections
import collections.abc
def get(object, key, default=None, types=()): def get(object, key, default=None, types=()):
'''Like dict.get(), but returns default if the result doesn't match one of the types. '''Like dict.get(), but returns default if the result doesn't match one of the types.
@ -62,17 +63,40 @@ def multi_deep_get(object, *key_sequences, default=None, types=()):
continue continue
return default return default
def _is_empty(value):
'''Determines if value is None or an empty iterable, such as '' and []'''
if value is None:
return True
elif isinstance(value, collections.abc.Iterable) and not value:
return True
return False
def liberal_update(obj, key, value): def liberal_update(obj, key, value):
'''Updates obj[key] with value as long as value is not None. '''Updates obj[key] with value as long as value is not None or empty.
Ensures obj[key] will at least get a value of None, however''' Ensures obj[key] will at least get an empty value, however'''
if (value is not None) or (key not in obj): if (not _is_empty(value)) or (key not in obj):
obj[key] = value obj[key] = value
def conservative_update(obj, key, value): def conservative_update(obj, key, value):
'''Only updates obj if it doesn't have key or obj[key] is None''' '''Only updates obj if it doesn't have key or obj[key] is None/empty'''
if obj.get(key) is None: if _is_empty(obj.get(key)):
obj[key] = value obj[key] = value
def liberal_dict_update(dict1, dict2):
'''Update dict1 with keys from dict2 using liberal_update'''
for key, value in dict2.items():
liberal_update(dict1, key, value)
def conservative_dict_update(dict1, dict2):
'''Update dict1 with keys from dict2 using conservative_update'''
for key, value in dict2.items():
conservative_update(dict1, key, value)
def concat_or_none(*strings): def concat_or_none(*strings):
'''Concatenates strings. Returns None if any of the arguments are None''' '''Concatenates strings. Returns None if any of the arguments are None'''
result = '' result = ''

View File

@ -2,7 +2,8 @@ from .common import (get, multi_get, deep_get, multi_deep_get,
liberal_update, conservative_update, remove_redirect, normalize_url, liberal_update, conservative_update, remove_redirect, normalize_url,
extract_str, extract_formatted_text, extract_int, extract_approx_int, extract_str, extract_formatted_text, extract_int, extract_approx_int,
extract_date, check_missing_keys, extract_item_info, extract_items, extract_date, check_missing_keys, extract_item_info, extract_items,
extract_response, concat_or_none) extract_response, concat_or_none, liberal_dict_update,
conservative_dict_update)
import json import json
import urllib.parse import urllib.parse
@ -211,13 +212,33 @@ def _extract_metadata_row_info(renderer_content):
return info return info
def _extract_from_video_metadata(renderer_content):
info = _extract_from_video_information_renderer(renderer_content)
liberal_dict_update(info, _extract_likes_dislikes(renderer_content))
liberal_dict_update(info, _extract_from_owner_renderer(renderer_content))
liberal_dict_update(info, _extract_metadata_row_info(deep_get(
renderer_content, 'metadataRowContainer',
'metadataRowContainerRenderer', default={}
)))
liberal_update(info, 'title', extract_str(renderer_content.get('title')))
liberal_update(
info, 'description',
extract_str(renderer_content.get('description'), recover_urls=True)
)
liberal_update(info, 'time_published',
extract_date(renderer_content.get('dateText')))
return info
visible_extraction_dispatch = { visible_extraction_dispatch = {
# Either these ones spread around in various places
'slimVideoInformationRenderer': _extract_from_video_information_renderer, 'slimVideoInformationRenderer': _extract_from_video_information_renderer,
'slimVideoActionBarRenderer': _extract_likes_dislikes, 'slimVideoActionBarRenderer': _extract_likes_dislikes,
'slimOwnerRenderer': _extract_from_owner_renderer, 'slimOwnerRenderer': _extract_from_owner_renderer,
'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer, 'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer,
'expandableVideoDescriptionRenderer': _extract_from_description_renderer, 'expandableVideoDescriptionRenderer': _extract_from_description_renderer,
'metadataRowContainerRenderer': _extract_metadata_row_info, 'metadataRowContainerRenderer': _extract_metadata_row_info,
# OR just this one, which contains SOME of the above inside it
'slimVideoMetadataRenderer': _extract_from_video_metadata,
} }
def _extract_watch_info_mobile(top_level): def _extract_watch_info_mobile(top_level):
@ -265,12 +286,14 @@ def _extract_watch_info_mobile(top_level):
for renderer in items: for renderer in items:
name, renderer_content = list(renderer.items())[0] name, renderer_content = list(renderer.items())[0]
found.add(name) found.add(name)
info.update(visible_extraction_dispatch[name](renderer_content)) liberal_dict_update(
info,
visible_extraction_dispatch[name](renderer_content)
)
# Call the function on blank dict for any that weren't found # Call the function on blank dict for any that weren't found
# so that the empty keys get added # so that the empty keys get added
for name in visible_extraction_dispatch.keys() - found: for name in visible_extraction_dispatch.keys() - found:
info.update(visible_extraction_dispatch[name]({})) liberal_dict_update(info, visible_extraction_dispatch[name]({}))
# comment section info # comment section info
items, _ = extract_items(response, item_types={ items, _ = extract_items(response, item_types={