Fix related video extraction sometimes failing

Youtube added some pointless variation in variable names
This commit is contained in:
James Taylor 2020-04-10 13:09:38 -07:00
parent 3e09193eaf
commit 1224dd88a3

View File

@ -223,10 +223,14 @@ def extract_item_info(item, additional_info={}):
info['type'] = 'playlist' info['type'] = 'playlist'
elif primary_type == 'channel': elif primary_type == 'channel':
info['type'] = 'channel' info['type'] = 'channel'
elif type == 'videoWithContextRenderer': # stupid exception
info['type'] = 'video'
primary_type = 'video'
else: else:
info['type'] = 'unsupported' info['type'] = 'unsupported'
info['title'] = extract_str(item.get('title')) # videoWithContextRenderer changes it to 'headline' just to be annoying
info['title'] = extract_str(multi_get(item, 'title', 'headline'))
if primary_type != 'channel': if primary_type != 'channel':
info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText')) info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
info['author_id'] = extract_str(multi_deep_get(item, info['author_id'] = extract_str(multi_deep_get(item,
@ -256,7 +260,10 @@ def extract_item_info(item, additional_info={}):
info['view_count'] = extract_int(item.get('viewCountText')) info['view_count'] = extract_int(item.get('viewCountText'))
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='') accessibility_label = multi_deep_get(item,
['title', 'accessibility', 'accessibilityData', 'label'],
['headline', 'accessibility', 'accessibilityData', 'label'],
default='')
timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label) timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
if timestamp: if timestamp:
conservative_update(info, 'time_published', timestamp.group(1)) conservative_update(info, 'time_published', timestamp.group(1))
@ -333,6 +340,7 @@ _item_types = {
'videoRenderer', 'videoRenderer',
'compactVideoRenderer', 'compactVideoRenderer',
'compactAutoplayRenderer', 'compactAutoplayRenderer',
'videoWithContextRenderer',
'gridVideoRenderer', 'gridVideoRenderer',
'playlistVideoRenderer', 'playlistVideoRenderer',