Extraction: Move html post processing stuff from yt_data_extract to util

This commit is contained in:
James Taylor
2019-12-19 19:48:53 -08:00
parent 76376b29a0
commit d1d908d5b1
9 changed files with 50 additions and 52 deletions

View File

@@ -1,8 +1,7 @@
from .common import (get, multi_get, deep_get, multi_deep_get,
liberal_update, conservative_update, remove_redirect, normalize_url,
extract_str, extract_formatted_text, extract_int, extract_approx_int,
extract_date, extract_item_info, extract_items, extract_response,
prefix_urls, add_extra_html_info, parse_info_prepare_for_html)
extract_date, extract_item_info, extract_items, extract_response)
from .everything_else import (extract_channel_info, extract_search_info,
extract_playlist_metadata, extract_playlist_info, extract_comments_info)

View File

@@ -1,6 +1,3 @@
from youtube import util
import json
import re
import urllib.parse
import collections
@@ -179,35 +176,6 @@ def check_missing_keys(object, *key_sequences):
return None
def prefix_urls(item):
try:
item['thumbnail'] = util.prefix_url(item['thumbnail'])
except KeyError:
pass
try:
item['author_url'] = util.prefix_url(item['author_url'])
except KeyError:
pass
def add_extra_html_info(item):
if item['type'] == 'video':
item['url'] = (util.URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
video_info = {}
for key in ('id', 'title', 'author', 'duration'):
try:
video_info[key] = item[key]
except KeyError:
video_info[key] = ''
item['video_info'] = json.dumps(video_info)
elif item['type'] == 'playlist':
item['url'] = (util.URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
elif item['type'] == 'channel':
item['url'] = (util.URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
def extract_item_info(item, additional_info={}):
if not item:
return {'error': 'No item given'}
@@ -307,13 +275,6 @@ def extract_item_info(item, additional_info={}):
)))
return info
def parse_info_prepare_for_html(renderer, additional_info={}):
item = extract_item_info(renderer, additional_info)
prefix_urls(item)
add_extra_html_info(item)
return item
def extract_response(polymer_json):
'''return response, error'''
response = multi_deep_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict)