yt_data_extract: Split up extract_items so renderer extraction works independently

extract_items_from_renderer will extract given just a renderer rather than a response
2019-12-26 19:02:13 -08:00
parent b027f66738
commit 8e8a1b70b6
1 changed files with 48 additions and 47 deletions
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -291,7 +291,7 @@ def extract_response(polymer_json):
        return response, None
-item_types = {
+_item_types = {
    'movieRenderer',
    'didYouMeanRenderer',
    'showingResultsForRenderer',
@@ -350,26 +350,13 @@ nested_renderer_list_dispatch = {
    'playlistVideoListRenderer': _traverse_standard_list,
    'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
 }
-
+def extract_items_from_renderer(renderer, item_types=_item_types):
 def extract_items(response, item_types=item_types):
    '''return items, ctoken'''
    if 'continuationContents' in response:
        # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
        for key, renderer_continuation in get(response, 'continuationContents', {}).items():
            if key.endswith('Continuation'):    # e.g. commentSectionContinuation, playlistVideoListContinuation
                items = multi_get(renderer_continuation, 'contents', 'items', default=[])
                ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
                return items, ctoken
        return [], None
    elif 'contents' in response:
    ctoken = None
    items = []
    iter_stack = collections.deque()
    current_iter = iter(())
        renderer = get(response, 'contents', {})
    while True:
        # mode 1: get a new renderer by iterating.
        # goes down the stack for an iterator if one has been exhausted
@@ -408,5 +395,19 @@ def extract_items(response, item_types=item_types):
            items.append(renderer)
        renderer = None
 def extract_items(response, item_types=_item_types):
    '''return items, ctoken'''
    if 'continuationContents' in response:
        # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
        for key, renderer_continuation in get(response, 'continuationContents', {}).items():
            if key.endswith('Continuation'):    # e.g. commentSectionContinuation, playlistVideoListContinuation
                items = multi_get(renderer_continuation, 'contents', 'items', default=[])
                ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
                return items, ctoken
        return [], None
    elif 'contents' in response:
        renderer = get(response, 'contents', {})
        return extract_items_from_renderer(renderer, item_types=item_types)
    else:
        return [], None