yt_data_extract: Split up extract_items so renderer extraction works independently
extract_items_from_renderer will extract given just a renderer rather than a response
This commit is contained in:
parent
b027f66738
commit
8e8a1b70b6
@ -291,7 +291,7 @@ def extract_response(polymer_json):
|
|||||||
return response, None
|
return response, None
|
||||||
|
|
||||||
|
|
||||||
item_types = {
|
_item_types = {
|
||||||
'movieRenderer',
|
'movieRenderer',
|
||||||
'didYouMeanRenderer',
|
'didYouMeanRenderer',
|
||||||
'showingResultsForRenderer',
|
'showingResultsForRenderer',
|
||||||
@ -350,26 +350,13 @@ nested_renderer_list_dispatch = {
|
|||||||
'playlistVideoListRenderer': _traverse_standard_list,
|
'playlistVideoListRenderer': _traverse_standard_list,
|
||||||
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
||||||
}
|
}
|
||||||
|
def extract_items_from_renderer(renderer, item_types=_item_types):
|
||||||
def extract_items(response, item_types=item_types):
|
|
||||||
'''return items, ctoken'''
|
|
||||||
if 'continuationContents' in response:
|
|
||||||
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
|
|
||||||
for key, renderer_continuation in get(response, 'continuationContents', {}).items():
|
|
||||||
if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
|
|
||||||
items = multi_get(renderer_continuation, 'contents', 'items', default=[])
|
|
||||||
ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
|
|
||||||
return items, ctoken
|
|
||||||
return [], None
|
|
||||||
elif 'contents' in response:
|
|
||||||
ctoken = None
|
ctoken = None
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
iter_stack = collections.deque()
|
iter_stack = collections.deque()
|
||||||
current_iter = iter(())
|
current_iter = iter(())
|
||||||
|
|
||||||
renderer = get(response, 'contents', {})
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# mode 1: get a new renderer by iterating.
|
# mode 1: get a new renderer by iterating.
|
||||||
# goes down the stack for an iterator if one has been exhausted
|
# goes down the stack for an iterator if one has been exhausted
|
||||||
@ -408,5 +395,19 @@ def extract_items(response, item_types=item_types):
|
|||||||
items.append(renderer)
|
items.append(renderer)
|
||||||
|
|
||||||
renderer = None
|
renderer = None
|
||||||
|
|
||||||
|
def extract_items(response, item_types=_item_types):
|
||||||
|
'''return items, ctoken'''
|
||||||
|
if 'continuationContents' in response:
|
||||||
|
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
|
||||||
|
for key, renderer_continuation in get(response, 'continuationContents', {}).items():
|
||||||
|
if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
|
||||||
|
items = multi_get(renderer_continuation, 'contents', 'items', default=[])
|
||||||
|
ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
|
||||||
|
return items, ctoken
|
||||||
|
return [], None
|
||||||
|
elif 'contents' in response:
|
||||||
|
renderer = get(response, 'contents', {})
|
||||||
|
return extract_items_from_renderer(renderer, item_types=item_types)
|
||||||
else:
|
else:
|
||||||
return [], None
|
return [], None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user