Fix related video extraction sometimes failing
Youtube added some pointless variation in variable names
This commit is contained in:
parent
3e09193eaf
commit
1224dd88a3
@ -223,10 +223,14 @@ def extract_item_info(item, additional_info={}):
|
|||||||
info['type'] = 'playlist'
|
info['type'] = 'playlist'
|
||||||
elif primary_type == 'channel':
|
elif primary_type == 'channel':
|
||||||
info['type'] = 'channel'
|
info['type'] = 'channel'
|
||||||
|
elif type == 'videoWithContextRenderer': # stupid exception
|
||||||
|
info['type'] = 'video'
|
||||||
|
primary_type = 'video'
|
||||||
else:
|
else:
|
||||||
info['type'] = 'unsupported'
|
info['type'] = 'unsupported'
|
||||||
|
|
||||||
info['title'] = extract_str(item.get('title'))
|
# videoWithContextRenderer changes it to 'headline' just to be annoying
|
||||||
|
info['title'] = extract_str(multi_get(item, 'title', 'headline'))
|
||||||
if primary_type != 'channel':
|
if primary_type != 'channel':
|
||||||
info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
|
info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
|
||||||
info['author_id'] = extract_str(multi_deep_get(item,
|
info['author_id'] = extract_str(multi_deep_get(item,
|
||||||
@ -256,7 +260,10 @@ def extract_item_info(item, additional_info={}):
|
|||||||
info['view_count'] = extract_int(item.get('viewCountText'))
|
info['view_count'] = extract_int(item.get('viewCountText'))
|
||||||
|
|
||||||
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
|
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
|
||||||
accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='')
|
accessibility_label = multi_deep_get(item,
|
||||||
|
['title', 'accessibility', 'accessibilityData', 'label'],
|
||||||
|
['headline', 'accessibility', 'accessibilityData', 'label'],
|
||||||
|
default='')
|
||||||
timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
|
timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
|
||||||
if timestamp:
|
if timestamp:
|
||||||
conservative_update(info, 'time_published', timestamp.group(1))
|
conservative_update(info, 'time_published', timestamp.group(1))
|
||||||
@ -333,6 +340,7 @@ _item_types = {
|
|||||||
'videoRenderer',
|
'videoRenderer',
|
||||||
'compactVideoRenderer',
|
'compactVideoRenderer',
|
||||||
'compactAutoplayRenderer',
|
'compactAutoplayRenderer',
|
||||||
|
'videoWithContextRenderer',
|
||||||
'gridVideoRenderer',
|
'gridVideoRenderer',
|
||||||
'playlistVideoRenderer',
|
'playlistVideoRenderer',
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user