yt_data_extract: Split up extract_items so renderer extraction works independently

extract_items_from_renderer will extract given just a renderer rather than a response
This commit is contained in:
James Taylor 2019-12-26 19:02:13 -08:00
parent b027f66738
commit 8e8a1b70b6

View File

@ -291,7 +291,7 @@ def extract_response(polymer_json):
return response, None
item_types = {
_item_types = {
'movieRenderer',
'didYouMeanRenderer',
'showingResultsForRenderer',
@ -350,26 +350,13 @@ nested_renderer_list_dispatch = {
'playlistVideoListRenderer': _traverse_standard_list,
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
}
def extract_items(response, item_types=item_types):
'''return items, ctoken'''
if 'continuationContents' in response:
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
for key, renderer_continuation in get(response, 'continuationContents', {}).items():
if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
items = multi_get(renderer_continuation, 'contents', 'items', default=[])
ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
return items, ctoken
return [], None
elif 'contents' in response:
def extract_items_from_renderer(renderer, item_types=_item_types):
ctoken = None
items = []
iter_stack = collections.deque()
current_iter = iter(())
renderer = get(response, 'contents', {})
while True:
# mode 1: get a new renderer by iterating.
# goes down the stack for an iterator if one has been exhausted
@ -408,5 +395,19 @@ def extract_items(response, item_types=item_types):
items.append(renderer)
renderer = None
def extract_items(response, item_types=_item_types):
'''return items, ctoken'''
if 'continuationContents' in response:
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
for key, renderer_continuation in get(response, 'continuationContents', {}).items():
if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
items = multi_get(renderer_continuation, 'contents', 'items', default=[])
ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
return items, ctoken
return [], None
elif 'contents' in response:
renderer = get(response, 'contents', {})
return extract_items_from_renderer(renderer, item_types=item_types)
else:
return [], None