extract_items: Handle case where continuation has multiple
[something]Continuation renderers, all of which are junk except one. Check the items in each one until the one which contains the items being sought is found. The usage in extract_comments_info needed to be changed to specify the items being sought. It was unspecified before which is strictly incorrect since extract_items by default looks for video/playlist/channel thumbnail items. It was relying on this special case for continuations. But now that wouldn't work anymore.
This commit is contained in:
parent
81ff5ab99c
commit
fa61874f97
@ -392,6 +392,13 @@ nested_renderer_list_dispatch = {
|
|||||||
'playlistVideoListRenderer': _traverse_standard_list,
|
'playlistVideoListRenderer': _traverse_standard_list,
|
||||||
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
||||||
}
|
}
|
||||||
|
def get_nested_renderer_list_function(key):
|
||||||
|
if key in nested_renderer_list_dispatch:
|
||||||
|
return nested_renderer_list_dispatch[key]
|
||||||
|
elif key.endswith('Continuation'):
|
||||||
|
return _traverse_standard_list
|
||||||
|
return None
|
||||||
|
|
||||||
def extract_items_from_renderer(renderer, item_types=_item_types):
|
def extract_items_from_renderer(renderer, item_types=_item_types):
|
||||||
ctoken = None
|
ctoken = None
|
||||||
items = []
|
items = []
|
||||||
@ -423,13 +430,13 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
|
|||||||
items.append(renderer)
|
items.append(renderer)
|
||||||
|
|
||||||
# has a list in it, add it to the iter stack
|
# has a list in it, add it to the iter stack
|
||||||
elif key in nested_renderer_list_dispatch:
|
elif get_nested_renderer_list_function(key):
|
||||||
renderer_list, continuation = nested_renderer_list_dispatch[key](value)
|
renderer_list, cont = get_nested_renderer_list_function(key)(value)
|
||||||
if renderer_list:
|
if renderer_list:
|
||||||
iter_stack.append(current_iter)
|
iter_stack.append(current_iter)
|
||||||
current_iter = iter(renderer_list)
|
current_iter = iter(renderer_list)
|
||||||
if continuation:
|
if cont:
|
||||||
ctoken = continuation
|
ctoken = cont
|
||||||
|
|
||||||
# new renderer nested inside this one
|
# new renderer nested inside this one
|
||||||
elif key in nested_renderer_dispatch:
|
elif key in nested_renderer_dispatch:
|
||||||
@ -441,12 +448,16 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
|
|||||||
def extract_items(response, item_types=_item_types):
|
def extract_items(response, item_types=_item_types):
|
||||||
'''return items, ctoken'''
|
'''return items, ctoken'''
|
||||||
if 'continuationContents' in response:
|
if 'continuationContents' in response:
|
||||||
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
|
# sometimes there's another, empty, junk [something]Continuation key
|
||||||
for key, renderer_continuation in get(response, 'continuationContents', {}).items():
|
# find real one
|
||||||
if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
|
for key, renderer_cont in get(response,
|
||||||
items = multi_get(renderer_continuation, 'contents', 'items', default=[])
|
'continuationContents', {}).items():
|
||||||
ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
|
# e.g. commentSectionContinuation, playlistVideoListContinuation
|
||||||
return items, ctoken
|
if key.endswith('Continuation'):
|
||||||
|
items, cont = extract_items_from_renderer({key: renderer_cont},
|
||||||
|
item_types=item_types)
|
||||||
|
if items:
|
||||||
|
return items, cont
|
||||||
return [], None
|
return [], None
|
||||||
elif 'contents' in response:
|
elif 'contents' in response:
|
||||||
renderer = get(response, 'contents', {})
|
renderer = get(response, 'contents', {})
|
||||||
|
@ -227,7 +227,8 @@ def extract_comments_info(polymer_json):
|
|||||||
info['sort'] = metadata.get('sort')
|
info['sort'] = metadata.get('sort')
|
||||||
info['video_title'] = None
|
info['video_title'] = None
|
||||||
|
|
||||||
comments, ctoken = extract_items(response)
|
comments, ctoken = extract_items(response,
|
||||||
|
item_types={'commentThreadRenderer', 'commentRenderer'})
|
||||||
info['comments'] = []
|
info['comments'] = []
|
||||||
info['ctoken'] = ctoken
|
info['ctoken'] = ctoken
|
||||||
for comment in comments:
|
for comment in comments:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user