Extraction: Move html post processing stuff from yt_data_extract to util
This commit is contained in:
parent
76376b29a0
commit
d1d908d5b1
@ -142,8 +142,8 @@ def post_process_channel_info(info):
|
||||
info['avatar'] = util.prefix_url(info['avatar'])
|
||||
info['channel_url'] = util.prefix_url(info['channel_url'])
|
||||
for item in info['items']:
|
||||
yt_data_extract.prefix_urls(item)
|
||||
yt_data_extract.add_extra_html_info(item)
|
||||
util.prefix_urls(item)
|
||||
util.add_extra_html_info(item)
|
||||
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ def get_local_playlist_videos(name, offset=0, amount=50):
|
||||
info['thumbnail'] = util.get_thumbnail_url(info['id'])
|
||||
missing_thumbnails.append(info['id'])
|
||||
info['type'] = 'video'
|
||||
yt_data_extract.add_extra_html_info(info)
|
||||
util.add_extra_html_info(info)
|
||||
videos.append(info)
|
||||
except json.decoder.JSONDecodeError:
|
||||
if not video_json.strip() == '':
|
||||
|
@ -97,10 +97,10 @@ def get_playlist_page():
|
||||
if page != '1':
|
||||
info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
|
||||
|
||||
yt_data_extract.prefix_urls(info['metadata'])
|
||||
util.prefix_urls(info['metadata'])
|
||||
for item in info.get('items', ()):
|
||||
yt_data_extract.prefix_urls(item)
|
||||
yt_data_extract.add_extra_html_info(item)
|
||||
util.prefix_urls(item)
|
||||
util.add_extra_html_info(item)
|
||||
if 'id' in item:
|
||||
item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg'
|
||||
|
||||
|
@ -80,8 +80,8 @@ def get_search_page():
|
||||
return flask.render_template('error.html', error_message = search_info['error'])
|
||||
|
||||
for extract_item_info in search_info['items']:
|
||||
yt_data_extract.prefix_urls(extract_item_info)
|
||||
yt_data_extract.add_extra_html_info(extract_item_info)
|
||||
util.prefix_urls(extract_item_info)
|
||||
util.add_extra_html_info(extract_item_info)
|
||||
|
||||
corrections = search_info['corrections']
|
||||
if corrections['type'] == 'did_you_mean':
|
||||
|
@ -766,7 +766,7 @@ def get_subscriptions_page():
|
||||
video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg'
|
||||
video['type'] = 'video'
|
||||
video['item_size'] = 'small'
|
||||
yt_data_extract.add_extra_html_info(video)
|
||||
util.add_extra_html_info(video)
|
||||
|
||||
tags = _get_all_tags(cursor)
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import settings
|
||||
from youtube import yt_data_extract
|
||||
import socks, sockshandler
|
||||
import gzip
|
||||
import brotli
|
||||
@ -6,6 +7,7 @@ import urllib.parse
|
||||
import re
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
import gevent
|
||||
import gevent.queue
|
||||
import gevent.lock
|
||||
@ -321,3 +323,39 @@ def left_remove(string, substring):
|
||||
return string[len(substring):]
|
||||
return string
|
||||
|
||||
|
||||
def prefix_urls(item):
|
||||
try:
|
||||
item['thumbnail'] = prefix_url(item['thumbnail'])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
item['author_url'] = prefix_url(item['author_url'])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def add_extra_html_info(item):
|
||||
if item['type'] == 'video':
|
||||
item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
|
||||
|
||||
video_info = {}
|
||||
for key in ('id', 'title', 'author', 'duration'):
|
||||
try:
|
||||
video_info[key] = item[key]
|
||||
except KeyError:
|
||||
video_info[key] = ''
|
||||
|
||||
item['video_info'] = json.dumps(video_info)
|
||||
|
||||
elif item['type'] == 'playlist':
|
||||
item['url'] = (URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
|
||||
elif item['type'] == 'channel':
|
||||
item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
|
||||
|
||||
def parse_info_prepare_for_html(renderer, additional_info={}):
|
||||
item = yt_data_extract.extract_item_info(renderer, additional_info)
|
||||
prefix_urls(item)
|
||||
add_extra_html_info(item)
|
||||
|
||||
return item
|
||||
|
@ -370,8 +370,8 @@ def get_watch_page():
|
||||
}
|
||||
|
||||
for item in info['related_videos']:
|
||||
yt_data_extract.prefix_urls(item)
|
||||
yt_data_extract.add_extra_html_info(item)
|
||||
util.prefix_urls(item)
|
||||
util.add_extra_html_info(item)
|
||||
|
||||
if settings.gather_googlevideo_domains:
|
||||
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f:
|
||||
|
@ -1,8 +1,7 @@
|
||||
from .common import (get, multi_get, deep_get, multi_deep_get,
|
||||
liberal_update, conservative_update, remove_redirect, normalize_url,
|
||||
extract_str, extract_formatted_text, extract_int, extract_approx_int,
|
||||
extract_date, extract_item_info, extract_items, extract_response,
|
||||
prefix_urls, add_extra_html_info, parse_info_prepare_for_html)
|
||||
extract_date, extract_item_info, extract_items, extract_response)
|
||||
|
||||
from .everything_else import (extract_channel_info, extract_search_info,
|
||||
extract_playlist_metadata, extract_playlist_info, extract_comments_info)
|
||||
|
@ -1,6 +1,3 @@
|
||||
from youtube import util
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import collections
|
||||
@ -179,35 +176,6 @@ def check_missing_keys(object, *key_sequences):
|
||||
|
||||
return None
|
||||
|
||||
def prefix_urls(item):
|
||||
try:
|
||||
item['thumbnail'] = util.prefix_url(item['thumbnail'])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
item['author_url'] = util.prefix_url(item['author_url'])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def add_extra_html_info(item):
|
||||
if item['type'] == 'video':
|
||||
item['url'] = (util.URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
|
||||
|
||||
video_info = {}
|
||||
for key in ('id', 'title', 'author', 'duration'):
|
||||
try:
|
||||
video_info[key] = item[key]
|
||||
except KeyError:
|
||||
video_info[key] = ''
|
||||
|
||||
item['video_info'] = json.dumps(video_info)
|
||||
|
||||
elif item['type'] == 'playlist':
|
||||
item['url'] = (util.URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
|
||||
elif item['type'] == 'channel':
|
||||
item['url'] = (util.URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
|
||||
|
||||
def extract_item_info(item, additional_info={}):
|
||||
if not item:
|
||||
return {'error': 'No item given'}
|
||||
@ -307,13 +275,6 @@ def extract_item_info(item, additional_info={}):
|
||||
)))
|
||||
return info
|
||||
|
||||
def parse_info_prepare_for_html(renderer, additional_info={}):
|
||||
item = extract_item_info(renderer, additional_info)
|
||||
prefix_urls(item)
|
||||
add_extra_html_info(item)
|
||||
|
||||
return item
|
||||
|
||||
def extract_response(polymer_json):
|
||||
'''return response, error'''
|
||||
response = multi_deep_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict)
|
||||
|
Loading…
x
Reference in New Issue
Block a user