yt_data_extract: Split up extract_items so renderer extraction works independently

extract_items_from_renderer will extract given just a renderer rather than a response
2019-12-26 19:02:13 -08:00
parent b027f66738
commit 8e8a1b70b6
1 changed files with 48 additions and 47 deletions
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -291,7 +291,7 @@ def extract_response(polymer_json):
        return response, None


-item_types = {
+_item_types = {
    'movieRenderer',
    'didYouMeanRenderer',
    'showingResultsForRenderer',
@@ -350,26 +350,13 @@ nested_renderer_list_dispatch = {
    'playlistVideoListRenderer': _traverse_standard_list,
    'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
 }
-
-def extract_items(response, item_types=item_types):
-    '''return items, ctoken'''
-    if 'continuationContents' in response:
-        # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
-        for key, renderer_continuation in get(response, 'continuationContents', {}).items():
-            if key.endswith('Continuation'):    # e.g. commentSectionContinuation, playlistVideoListContinuation
-                items = multi_get(renderer_continuation, 'contents', 'items', default=[])
-                ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
-                return items, ctoken
-        return [], None
-    elif 'contents' in response:
+def extract_items_from_renderer(renderer, item_types=_item_types):
    ctoken = None
    items = []

    iter_stack = collections.deque()
    current_iter = iter(())

-        renderer = get(response, 'contents', {})
-
    while True:
        # mode 1: get a new renderer by iterating.
        # goes down the stack for an iterator if one has been exhausted
@@ -408,5 +395,19 @@ def extract_items(response, item_types=item_types):
            items.append(renderer)

        renderer = None
+
+def extract_items(response, item_types=_item_types):
+    '''return items, ctoken'''
+    if 'continuationContents' in response:
+        # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
+        for key, renderer_continuation in get(response, 'continuationContents', {}).items():
+            if key.endswith('Continuation'):    # e.g. commentSectionContinuation, playlistVideoListContinuation
+                items = multi_get(renderer_continuation, 'contents', 'items', default=[])
+                ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
+                return items, ctoken
+        return [], None
+    elif 'contents' in response:
+        renderer = get(response, 'contents', {})
+        return extract_items_from_renderer(renderer, item_types=item_types)
    else:
        return [], None