Refactor search page

This commit is contained in:
James Taylor 2019-06-21 21:41:41 -07:00
parent e33bae2e50
commit 1156b09987
7 changed files with 364 additions and 86 deletions

View File

@ -6,7 +6,7 @@ from youtube import yt_app
from youtube import util from youtube import util
# these are just so the files get run - they import yt_app and add routes to it # these are just so the files get run - they import yt_app and add routes to it
from youtube import watch from youtube import watch, search
import settings import settings

View File

@ -1,16 +1,12 @@
from youtube import util, html_common, yt_data_extract, proto from youtube import util, yt_data_extract, proto, local_playlist
from youtube import yt_app
import json import json
import urllib import urllib
import html
from string import Template
import base64 import base64
from math import ceil from math import ceil
from flask import request
import flask
with open("yt_search_results_template.html", "r") as file:
yt_search_results_template = file.read()
# Sort: 1 # Sort: 1
# Upload date: 2 # Upload date: 2
@ -60,39 +56,30 @@ def get_search_json(query, page, autocorrect, sort, filters):
return info return info
showing_results_for = Template(''' @yt_app.route('/search')
<div>Showing results for <a>$corrected_query</a></div> def get_search_page():
<div>Search instead for <a href="$original_query_url">$original_query</a></div> if len(request.args) == 0:
''') return flask.render_template('base.html', title="Search")
did_you_mean = Template('''
<div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> if 'query' not in request.args:
''') abort(400)
def get_search_page(env, start_response):
start_response('200 OK', [('Content-type','text/html'),]) query = request.args.get("query")
parameters = env['parameters'] page = request.args.get("page", "1")
if len(parameters) == 0: autocorrect = int(request.args.get("autocorrect", "1"))
return html_common.yt_basic_template.substitute( sort = int(request.args.get("sort", "0"))
page_title = "Search",
header = html_common.get_header(),
style = '',
page = '',
).encode('utf-8')
query = parameters["query"][0]
page = parameters.get("page", "1")[0]
autocorrect = int(parameters.get("autocorrect", "1")[0])
sort = int(parameters.get("sort", "0")[0])
filters = {} filters = {}
filters['time'] = int(parameters.get("time", "0")[0]) filters['time'] = int(request.args.get("time", "0"))
filters['type'] = int(parameters.get("type", "0")[0]) filters['type'] = int(request.args.get("type", "0"))
filters['duration'] = int(parameters.get("duration", "0")[0]) filters['duration'] = int(request.args.get("duration", "0"))
info = get_search_json(query, page, autocorrect, sort, filters) info = get_search_json(query, page, autocorrect, sort, filters)
estimated_results = int(info[1]['response']['estimatedResults']) estimated_results = int(info[1]['response']['estimatedResults'])
estimated_pages = ceil(estimated_results/20) estimated_pages = ceil(estimated_results/20)
results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
corrections = '' parsed_results = []
result_list_html = "" corrections = {'type': None}
for renderer in results: for renderer in results:
type = list(renderer.keys())[0] type = list(renderer.keys())[0]
if type == 'shelfRenderer': if type == 'shelfRenderer':
@ -102,41 +89,39 @@ def get_search_page(env, start_response):
corrected_query_string = parameters.copy() corrected_query_string = parameters.copy()
corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
corrections = did_you_mean.substitute(
corrected_query_url = corrected_query_url, corrections = {
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), 'type': 'did_you_mean',
) 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
'corrected_query_url': corrected_query_url,
}
continue continue
if type == 'showingResultsForRenderer': if type == 'showingResultsForRenderer':
renderer = renderer[type] renderer = renderer[type]
no_autocorrect_query_string = parameters.copy() no_autocorrect_query_string = parameters.copy()
no_autocorrect_query_string['autocorrect'] = ['0'] no_autocorrect_query_string['autocorrect'] = ['0']
no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
corrections = showing_results_for.substitute(
corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), corrections = {
original_query_url = no_autocorrect_query_url, 'type': 'showing_results_for',
original_query = html.escape(renderer['originalQuery']['simpleText']), 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
) 'original_query_url': no_autocorrect_query_url,
'original_query': renderer['originalQuery']['simpleText'],
}
continue continue
result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
page = int(page) info = yt_data_extract.parse_info_prepare_for_html(renderer)
if page <= 5: if info['type'] != 'unsupported':
page_start = 1 parsed_results.append(info)
page_end = min(9, estimated_pages)
else: return flask.render_template('search.html',
page_start = page - 4 header_playlist_names = local_playlist.get_playlist_names(),
page_end = min(page + 4, estimated_pages) query = query,
estimated_results = estimated_results,
estimated_pages = estimated_pages,
corrections = corrections,
results = parsed_results,
parameters_dictionary = request.args,
)
result = Template(yt_search_results_template).substitute(
header = html_common.get_header(query),
results = result_list_html,
page_title = query + " - Search",
search_box_value = html.escape(query),
number_of_results = '{:,}'.format(estimated_results),
number_of_pages = '{:,}'.format(estimated_pages),
page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']),
corrections = corrections
)
return result.encode('utf-8')

View File

@ -219,6 +219,12 @@ address{
max-height:2.4em; max-height:2.4em;
overflow:hidden; overflow:hidden;
} }
.medium-item .stats > *::after{
content: " | ";
}
.medium-item .stats > *:last-child::after{
content: "";
}
.medium-item .description{ .medium-item .description{
grid-column: 2 / span 2; grid-column: 2 / span 2;

View File

@ -2,13 +2,14 @@
<html> <html>
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>{% block page_title %}{% endblock %}</title> <title>{% block page_title %}{{ title }}{% endblock %}</title>
<link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet"> <link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet">
<link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet"> <link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet">
<link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon"> <link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon">
<link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml">
<style type="text/css"> <style type="text/css">
{% block style %} {% block style %}
{{ style }}
{% endblock %} {% endblock %}
</style> </style>
</head> </head>
@ -105,6 +106,7 @@
</header> </header>
<main> <main>
{% block main %} {% block main %}
{{ main }}
{% endblock %} {% endblock %}
</main> </main>
</body> </body>

View File

@ -0,0 +1,152 @@
{% macro text_runs(runs) %}
{%- if runs[0] is mapping -%}
{%- for text_run in runs -%}
{%- if text_run.get("bold", false) -%}
<b>{{ text_run["text"] }}</b>
{%- elif text_run.get('italics', false) -%}
<i>{{ text_run["text"] }}</i>
{%- else -%}
{{ text_run["text"] }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{ runs }}
{%- endif -%}
{% endmacro %}
{% macro small_item(info) %}
<div class="small-item-box">
<div class="small-item">
{% if info['type'] == 'video' %}
<a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}">
<img class="video-thumbnail-img" src="{{ info['thumbnail'] }}">
<span class="video-duration">{{ info['duration'] }}</span>
</a>
<a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a>
<address>{{ info['author'] }}</address>
<span class="views">{{ info['views'] }}</span>
{% elif info['type'] == 'playlist' %}
<a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}">
<img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}">
<div class="playlist-thumbnail-info">
<span>{{ info['size'] }}</span>
</div>
</a>
<a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a>
<address>{{ info['author'] }}</address>
{% else %}
Error: unsupported item type
{% endif %}
</div>
{% if info['type'] == 'video' %}
<input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit">
{% endif %}
</div>
{% endmacro %}
{% macro get_stats(info) %}
{% if 'author_url' is in(info) %}
<address>By <a href="{{ info['author_url'] }}">{{ info['author'] }}</a></address>
{% else %}
<address><b>{{ info['author'] }}</b></address>
{% endif %}
{% if 'views' is in(info) %}
<span class="views">{{ info['views'] }}</span>
{% endif %}
{% if 'published' is in(info) %}
<time>{{ info['published'] }}</time>
{% endif %}
{% endmacro %}
{% macro medium_item(info) %}
<div class="medium-item-box">
<div class="medium-item">
{% if info['type'] == 'video' %}
<a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}">
<img class="video-thumbnail-img" src="{{ info['thumbnail'] }}">
<span class="video-duration">{{ info['duration'] }}</span>
</a>
<a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a>
<div class="stats">
{{ get_stats(info) }}
</div>
<span class="description">{{ text_runs(info['description']) }}</span>
<span class="badges">{{ info['badges']|join(' | ') }}</span>
{% elif info['type'] == 'playlist' %}
<a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}">
<img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}">
<div class="playlist-thumbnail-info">
<span>{{ info['size'] }}</span>
</div>
</a>
<a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a>
<div class="stats">
{{ get_stats(info) }}
</div>
{% elif info['type'] == 'channel' %}
<a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}">
<img class="video-thumbnail-img" src="{{ info['thumbnail'] }}">
</a>
<a class="title" href="{{ info['url'] }}">{{ info['title'] }}</a>
<span>{{ info['subscriber_count'] }}</span>
<span>{{ info['size'] }}</span>
<span class="description">{{ text_runs(info['description']) }}</span>
{% else %}
Error: unsupported item type
{% endif %}
</div>
{% if info['type'] == 'video' %}
<input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit">
{% endif %}
</div>
{% endmacro %}
{% macro item(info) %}
{% if info['item_size'] == 'small' %}
{{ small_item(info) }}
{% elif info['item_size'] == 'medium' %}
{{ medium_item(info) }}
{% else %}
Error: Unknown item size
{% endif %}
{% endmacro %}
{% macro page_buttons(estimated_pages, url, parameters_dictionary) %}
{% set current_page = parameters_dictionary.get('page', 1)|int %}
{% set parameters_dictionary = parameters_dictionary.to_dict() %}
{% if current_page is le(5) %}
{% set page_start = 1 %}
{% set page_end = [9, estimated_pages]|min %}
{% else %}
{% set page_start = current_page - 4 %}
{% set page_end = [current_page + 4, estimated_pages]|min %}
{% endif %}
{% for page in range(page_start, page_end+1) %}
{% if page == current_page %}
<div class="page-button">{{ page }}</div>
{% else %}
{# IMPORTANT: Jinja SUCKS #}
{# https://stackoverflow.com/questions/36886650/how-to-add-a-new-entry-into-a-dictionary-object-while-using-jinja2 #}
{% set _ = parameters_dictionary.__setitem__('page', page) %}
<a class="page-button" href="{{ url + '?' + parameters_dictionary|urlencode }}">{{ page }}</a>
{% endif %}
{% endfor %}
{% endmacro %}

View File

@ -0,0 +1,54 @@
{% set search_box_value = query %}
{% extends "base.html" %}
{% block page_title %}{{ query + ' - Search' }}{% endblock %}
{% import "common_elements.html" as common_elements %}
{% block style %}
main{
display:grid;
grid-template-columns: minmax(0px, 1fr) 800px minmax(0px,2fr);
max-width:100vw;
}
#number-of-results{
font-weight:bold;
}
#result-info{
grid-row: 1;
grid-column:2;
align-self:center;
}
.page-button-row{
grid-column: 2;
justify-self: center;
}
.item-list{
grid-row: 2;
grid-column: 2;
}
.badge{
background-color:#cccccc;
}
{% endblock style %}
{% block main %}
<div id="result-info">
<div id="number-of-results">Approximately {{ '{:,}'.format(estimated_results) }} results ({{ '{:,}'.format(estimated_pages) }} pages)</div>
{% if corrections['type'] == 'showing_results_for' %}
<div>Showing results for <a>{{ corrections['corrected_query']|safe }}</a></div>
<div>Search instead for <a href="{{ corrections['original_query_url'] }}">{{ corrections['original_query'] }}</a></div>
{% elif corrections['type'] == 'did_you_mean' %}
<div>Did you mean <a href="{{ corrections['corrected_query_url'] }}">{{ corrections['corrected_query']|safe }}</a></div>
{% endif %}
</div>
<div class="item-list">
{% for info in results %}
{{ common_elements.item(info) }}
{% endfor %}
</div>
<nav class="page-button-row">
{{ common_elements.page_buttons(estimated_pages, '/https://www.youtube.com/search', parameters_dictionary) }}
</nav>
{% endblock main %}

View File

@ -1,4 +1,7 @@
from youtube import util
import html import html
import json
# videos (all of type str): # videos (all of type str):
@ -138,9 +141,83 @@ dispatch = {
} }
def renderer_info(renderer): def ajax_info(item_json):
try: try:
info = {} info = {}
for key, node in item_json.items():
try:
simple_key, function = dispatch[key]
except KeyError:
continue
info[simple_key] = function(node)
return info
except KeyError:
print(item_json)
raise
def prefix_urls(item):
try:
item['thumbnail'] = '/' + item['thumbnail'].lstrip('/')
except KeyError:
pass
try:
item['author_url'] = util.URL_ORIGIN + item['author_url']
except KeyError:
pass
def add_extra_html_info(item):
if item['type'] == 'video':
item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id']
video_info = {}
for key in ('id', 'title', 'author', 'duration'):
try:
video_info[key] = item[key]
except KeyError:
video_info[key] = ''
item['video_info'] = json.dumps(video_info)
elif item['type'] == 'playlist':
item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id']
elif item['type'] == 'channel':
item['url'] = util.URL_ORIGIN + "/channel/" + item['id']
def renderer_info(renderer, additional_info={}):
type = list(renderer.keys())[0]
renderer = renderer[type]
info = {}
if type == 'itemSectionRenderer':
return renderer_info(renderer['contents'][0], additional_info)
if type in ('movieRenderer', 'clarificationRenderer'):
info['type'] = 'unsupported'
return info
info.update(additional_info)
if type.startswith('compact'):
info['item_size'] = 'small'
else:
info['item_size'] = 'medium'
if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'):
info['type'] = 'video'
elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer',
'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer',
'showRenderer', 'compactShowRenderer', 'gridShowRenderer'):
info['type'] = 'playlist'
elif type == 'channelRenderer':
info['type'] = 'channel'
else:
info['type'] = 'unsupported'
return info
try:
if 'viewCountText' in renderer: # prefer this one as it contains all the digits if 'viewCountText' in renderer: # prefer this one as it contains all the digits
info['views'] = get_text(renderer['viewCountText']) info['views'] = get_text(renderer['viewCountText'])
elif 'shortViewCountText' in renderer: elif 'shortViewCountText' in renderer:
@ -183,23 +260,25 @@ def renderer_info(renderer):
except KeyError: except KeyError:
continue continue
info[simple_key] = function(node) info[simple_key] = function(node)
if info['type'] == 'video' and 'duration' not in info:
info['duration'] = 'Live'
return info return info
except KeyError: except KeyError:
print(renderer) print(renderer)
raise raise
def ajax_info(item_json):
try:
info = {} #print(renderer)
for key, node in item_json.items(): #raise NotImplementedError('Unknown renderer type: ' + type)
try: return ''
simple_key, function = dispatch[key]
except KeyError: def parse_info_prepare_for_html(renderer):
continue item = renderer_info(renderer)
info[simple_key] = function(node) prefix_urls(item)
return info add_extra_html_info(item)
except KeyError:
print(item_json) return item
raise