Extraction: Move playlist extraction to yt_data_extract
This commit is contained in:
parent
c362a5e834
commit
89e5761f8d
@ -89,28 +89,20 @@ def get_playlist_page():
|
|||||||
)
|
)
|
||||||
gevent.joinall(tasks)
|
gevent.joinall(tasks)
|
||||||
first_page_json, this_page_json = tasks[0].value, tasks[1].value
|
first_page_json, this_page_json = tasks[0].value, tasks[1].value
|
||||||
|
|
||||||
try: # first page
|
|
||||||
video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents']
|
|
||||||
except KeyError: # other pages
|
|
||||||
video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
|
|
||||||
|
|
||||||
parsed_video_list = [yt_data_extract.parse_info_prepare_for_html(video_json) for video_json in video_list]
|
info = yt_data_extract.extract_playlist_info(this_page_json)
|
||||||
|
if page != '1':
|
||||||
|
info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
|
||||||
|
|
||||||
|
yt_data_extract.prefix_urls(info['metadata'])
|
||||||
metadata = yt_data_extract.renderer_info(first_page_json['response']['header'])
|
for item in info['items']:
|
||||||
yt_data_extract.prefix_urls(metadata)
|
yt_data_extract.prefix_urls(item)
|
||||||
|
yt_data_extract.add_extra_html_info(item)
|
||||||
if 'description' not in metadata:
|
|
||||||
metadata['description'] = ''
|
|
||||||
|
|
||||||
video_count = int(metadata['size'].replace(',', ''))
|
|
||||||
metadata['size'] += ' videos'
|
|
||||||
|
|
||||||
return flask.render_template('playlist.html',
|
return flask.render_template('playlist.html',
|
||||||
video_list = parsed_video_list,
|
video_list = info['items'],
|
||||||
num_pages = math.ceil(video_count/20),
|
num_pages = math.ceil(info['metadata']['size']/20),
|
||||||
parameters_dictionary = request.args,
|
parameters_dictionary = request.args,
|
||||||
|
|
||||||
**metadata
|
**info['metadata']
|
||||||
).encode('utf-8')
|
).encode('utf-8')
|
||||||
|
@ -55,7 +55,7 @@
|
|||||||
<a class="playlist-author" href="{{ author_url }}">{{ author }}</a>
|
<a class="playlist-author" href="{{ author_url }}">{{ author }}</a>
|
||||||
<div class="playlist-stats">
|
<div class="playlist-stats">
|
||||||
<div>{{ views }}</div>
|
<div>{{ views }}</div>
|
||||||
<div>{{ size }}</div>
|
<div>{{ size }} videos</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="playlist-description">{{ common_elements.text_runs(description) }}</div>
|
<div class="playlist-description">{{ common_elements.text_runs(description) }}</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -449,3 +449,30 @@ def extract_search_info(polymer_json):
|
|||||||
|
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
def extract_playlist_metadata(polymer_json):
|
||||||
|
metadata = renderer_info(polymer_json['response']['header'])
|
||||||
|
|
||||||
|
if 'description' not in metadata:
|
||||||
|
metadata['description'] = ''
|
||||||
|
|
||||||
|
metadata['size'] = int(metadata['size'].replace(',', ''))
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
def extract_playlist_info(polymer_json):
|
||||||
|
info = {}
|
||||||
|
try: # first page
|
||||||
|
video_list = polymer_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents']
|
||||||
|
first_page = True
|
||||||
|
except KeyError: # other pages
|
||||||
|
video_list = polymer_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
|
||||||
|
first_page = False
|
||||||
|
|
||||||
|
info['items'] = [renderer_info(renderer) for renderer in video_list]
|
||||||
|
|
||||||
|
if first_page:
|
||||||
|
info['metadata'] = extract_playlist_metadata(polymer_json)
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user