yt_data_extract: Split up extract_items so renderer extraction works independently
extract_items_from_renderer will extract given just a renderer rather than a response
This commit is contained in:
parent
b027f66738
commit
8e8a1b70b6
@ -291,7 +291,7 @@ def extract_response(polymer_json):
|
|||||||
return response, None
|
return response, None
|
||||||
|
|
||||||
|
|
||||||
item_types = {
|
_item_types = {
|
||||||
'movieRenderer',
|
'movieRenderer',
|
||||||
'didYouMeanRenderer',
|
'didYouMeanRenderer',
|
||||||
'showingResultsForRenderer',
|
'showingResultsForRenderer',
|
||||||
@ -350,8 +350,53 @@ nested_renderer_list_dispatch = {
|
|||||||
'playlistVideoListRenderer': _traverse_standard_list,
|
'playlistVideoListRenderer': _traverse_standard_list,
|
||||||
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
|
||||||
}
|
}
|
||||||
|
def extract_items_from_renderer(renderer, item_types=_item_types):
|
||||||
|
ctoken = None
|
||||||
|
items = []
|
||||||
|
|
||||||
def extract_items(response, item_types=item_types):
|
iter_stack = collections.deque()
|
||||||
|
current_iter = iter(())
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# mode 1: get a new renderer by iterating.
|
||||||
|
# goes down the stack for an iterator if one has been exhausted
|
||||||
|
if not renderer:
|
||||||
|
try:
|
||||||
|
renderer = current_iter.__next__()
|
||||||
|
except StopIteration:
|
||||||
|
try:
|
||||||
|
current_iter = iter_stack.pop()
|
||||||
|
except IndexError:
|
||||||
|
return items, ctoken
|
||||||
|
# Get new renderer or check that the one we got is good before
|
||||||
|
# proceeding to mode 2
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
# mode 2: dig into the current renderer
|
||||||
|
key, value = list(renderer.items())[0]
|
||||||
|
|
||||||
|
# has a list in it, add it to the iter stack
|
||||||
|
if key in nested_renderer_list_dispatch:
|
||||||
|
renderer_list, continuation = nested_renderer_list_dispatch[key](value)
|
||||||
|
if renderer_list:
|
||||||
|
iter_stack.append(current_iter)
|
||||||
|
current_iter = iter(renderer_list)
|
||||||
|
if continuation:
|
||||||
|
ctoken = continuation
|
||||||
|
|
||||||
|
# new renderer nested inside this one
|
||||||
|
elif key in nested_renderer_dispatch:
|
||||||
|
renderer = nested_renderer_dispatch[key](value)
|
||||||
|
continue # don't reset renderer to None
|
||||||
|
|
||||||
|
# the renderer is an item
|
||||||
|
elif key in item_types:
|
||||||
|
items.append(renderer)
|
||||||
|
|
||||||
|
renderer = None
|
||||||
|
|
||||||
|
def extract_items(response, item_types=_item_types):
|
||||||
'''return items, ctoken'''
|
'''return items, ctoken'''
|
||||||
if 'continuationContents' in response:
|
if 'continuationContents' in response:
|
||||||
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
|
# always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
|
||||||
@ -362,51 +407,7 @@ def extract_items(response, item_types=item_types):
|
|||||||
return items, ctoken
|
return items, ctoken
|
||||||
return [], None
|
return [], None
|
||||||
elif 'contents' in response:
|
elif 'contents' in response:
|
||||||
ctoken = None
|
|
||||||
items = []
|
|
||||||
|
|
||||||
iter_stack = collections.deque()
|
|
||||||
current_iter = iter(())
|
|
||||||
|
|
||||||
renderer = get(response, 'contents', {})
|
renderer = get(response, 'contents', {})
|
||||||
|
return extract_items_from_renderer(renderer, item_types=item_types)
|
||||||
while True:
|
|
||||||
# mode 1: get a new renderer by iterating.
|
|
||||||
# goes down the stack for an iterator if one has been exhausted
|
|
||||||
if not renderer:
|
|
||||||
try:
|
|
||||||
renderer = current_iter.__next__()
|
|
||||||
except StopIteration:
|
|
||||||
try:
|
|
||||||
current_iter = iter_stack.pop()
|
|
||||||
except IndexError:
|
|
||||||
return items, ctoken
|
|
||||||
# Get new renderer or check that the one we got is good before
|
|
||||||
# proceeding to mode 2
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
# mode 2: dig into the current renderer
|
|
||||||
key, value = list(renderer.items())[0]
|
|
||||||
|
|
||||||
# has a list in it, add it to the iter stack
|
|
||||||
if key in nested_renderer_list_dispatch:
|
|
||||||
renderer_list, continuation = nested_renderer_list_dispatch[key](value)
|
|
||||||
if renderer_list:
|
|
||||||
iter_stack.append(current_iter)
|
|
||||||
current_iter = iter(renderer_list)
|
|
||||||
if continuation:
|
|
||||||
ctoken = continuation
|
|
||||||
|
|
||||||
# new renderer nested inside this one
|
|
||||||
elif key in nested_renderer_dispatch:
|
|
||||||
renderer = nested_renderer_dispatch[key](value)
|
|
||||||
continue # don't reset renderer to None
|
|
||||||
|
|
||||||
# the renderer is an item
|
|
||||||
elif key in item_types:
|
|
||||||
items.append(renderer)
|
|
||||||
|
|
||||||
renderer = None
|
|
||||||
else:
|
else:
|
||||||
return [], None
|
return [], None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user