Merge branch 'master' into add_sponsorblock

2020-10-21 18:53:12 -07:00
parent 3b5df36b03 f01ef36a37
commit aa52c7a42e
20 changed files with 163 additions and 106 deletions
--- a/generate_release.py
+++ b/generate_release.py
@@ -119,7 +119,7 @@ wine_run(['./python/python.exe', '-I', 'get-pip.py'])

 ## Isolated mode
    We want to run in what is called isolated mode, given by the switch -I.
-This mode prevents the embedded python distribution from searching in 
+This mode prevents the embedded python distribution from searching in
 global directories for imports

    For example, if a user has `C:\Python37` and the embedded distribution is
--- a/settings.py
+++ b/settings.py
@@ -160,6 +160,19 @@ For security reasons, enabling this is not recommended.''',
        ],
    }),

+    ('font', {
+        'type': int,
+        'default': 1,
+        'comment': '',
+        'options': [
+            (0, 'Browser default'),
+            (1, 'Arial'),
+            (2, 'Liberation Serif'),
+            (3, 'Verdana'),
+            (4, 'Tahoma'),
+        ],
+    }),
+
    ('autocheck_subscriptions', {
        'type': bool,
        'default': 0,
@@ -319,12 +332,6 @@ else:
 globals().update(current_settings_dict)


-if proxy_images:
-    img_prefix = "/"
-else:
-    img_prefix = ""
-
-

 if route_tor:
    print("Tor routing is ON")
@@ -343,6 +350,19 @@ def add_setting_changed_hook(setting, func):
        hooks[setting] = [func]


+def set_img_prefix(old_value=None, value=None):
+    global img_prefix
+    if value is None:
+        value = proxy_images
+    if value:
+        img_prefix = '/'
+    else:
+        img_prefix = ''
+set_img_prefix()
+add_setting_changed_hook('proxy_images', set_img_prefix)
+
+
+
 def settings_page():
    if request.method == 'GET':
        return flask.render_template('settings.html',
--- a/youtube/init.py
+++ b/youtube/init.py
@@ -1,5 +1,6 @@
 from youtube import util
 import flask
+from flask import request
 import settings
 import traceback
 import re
@@ -59,6 +60,7 @@ def timestamps(text):

@yt_app.errorhandler(500)
 def error_page(e):
+    slim = request.args.get('slim', False) # whether it was an ajax request
    if (exc_info()[0] == util.FetchError
        and exc_info()[1].code == '429'
        and settings.route_tor
@@ -68,5 +70,22 @@ def error_page(e):
            ' using the New Identity button in the Tor Browser.')
        if exc_info()[1].ip:
            error_message += ' Exit node IP address: ' + exc_info()[1].ip
-        return flask.render_template('error.html', error_message=error_message), 502
-    return flask.render_template('error.html', traceback=traceback.format_exc()), 500
+        return flask.render_template('error.html', error_message=error_message, slim=slim), 502
+    return flask.render_template('error.html', traceback=traceback.format_exc(), slim=slim), 500
+
+font_choices = {
+    0: 'initial',
+    1: 'arial, "liberation sans", sans-serif',
+    2: '"liberation serif", "times new roman", calibri, carlito, serif',
+    3: 'verdana, sans-serif',
+    4: 'tahoma, sans-serif',
+}
+
+@yt_app.route('/shared.css')
+def get_css():
+    return flask.Response(
+        flask.render_template('shared.css',
+            font_family = font_choices[settings.font]
+        ),
+        mimetype='text/css',
+    )
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -90,7 +90,7 @@ def single_comment_ctoken(video_id, comment_id):
 def post_process_comments_info(comments_info):
    for comment in comments_info['comments']:
        comment['author_url'] = concat_or_none(
-            util.URL_ORIGIN, comment['author_url'])
+            '/', comment['author_url'])
        comment['author_avatar'] = concat_or_none(
            settings.img_prefix, comment['author_avatar'])

--- a/youtube/post_comment.py
+++ b/youtube/post_comment.py
@@ -155,7 +155,7 @@ def get_delete_comment_page():
 def get_post_comment_page():
    video_id = request.args['video_id']
    parent_id = request.args.get('parent_id', '')
-    
+
    if parent_id:   # comment reply
        form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
        replying = True
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -5,13 +5,13 @@ import io
 def byte(n):
    return bytes((n,))

-    
+
 def varint_encode(offset):
    '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
    The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
    aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
    1ccccccc 1bbbbbbb 0aaaaaaa
-    
+
    This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
    See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
    needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
@@ -20,20 +20,20 @@ def varint_encode(offset):
        encoded_bytes[i] = (offset & 127) | 128  # 7 least significant bits
        offset = offset >> 7
    encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
-    
+
    return bytes(encoded_bytes)

-    
+
 def varint_decode(encoded):
    decoded = 0
    for i, byte in enumerate(encoded):
        decoded |= (byte & 127) << 7*i
-        
+
        if not (byte & 128):
            break
    return decoded

-    
+
 def string(field_number, data):
    data = as_bytes(data)
    return _proto_field(2, field_number, varint_encode(len(data)) + data)
@@ -41,20 +41,20 @@ nested = string

 def uint(field_number, value):
    return _proto_field(0, field_number, varint_encode(value))
-    

-    
-    
+
+
+
 def _proto_field(wire_type, field_number, data):
    ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
    return varint_encode( (field_number << 3) | wire_type) + data


-    
+
 def percent_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
-    
-    
+
+
 def unpadded_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'')

@@ -81,7 +81,7 @@ def read_varint(data):
        i += 1
    return result

-                                
+
 def read_group(data, end_sequence):
    start = data.tell()
    index = data.original.find(end_sequence, start)
@@ -101,7 +101,7 @@ def read_protobuf(data):
            break
        wire_type = tag & 7
        field_number = tag >> 3
-        
+
        if wire_type == 0:
            value = read_varint(data)
        elif wire_type == 1:
--- a/youtube/static/comments.css
+++ b/youtube/static/comments.css
@@ -1,4 +1,4 @@
-.video-metadata{    
+.video-metadata{
    display: grid;
    grid-template-columns: auto 1fr;
    grid-template-rows: auto auto 1fr auto;
@@ -124,6 +124,23 @@
    grid-column-gap: 10px;
 }

+details.replies > summary{
+    background-color: var(--interface-color);
+    border-style: outset;
+    border-width: 1px;
+    font-weight: bold;
+    padding-bottom: 0px;
+}
+
+.replies-open-new-tab{
+    display: inline-block;
+    margin-top: 5px;
+}
+
+details.replies .comment{
+    width: 600px;
+}
+
 .more-comments{
    justify-self:center;
    margin-top:10px;
--- a/youtube/static/js/common.js
+++ b/youtube/static/js/common.js
@@ -41,9 +41,7 @@ function doXhr(url, callback=null) {
    var xhr = new XMLHttpRequest();
    xhr.open("GET", url);
    xhr.onload = (e) => {
-      let ok = xhr.status >= 200 && xhr.status < 300;
-      if (ok) callback(e.currentTarget.response);
-      else alert(`${xhr.responseURL} status code: ${xhr.status}`);
+      callback(e.currentTarget.response);
    }
    xhr.send();
    return xhr;
--- a/youtube/templates/base.html
+++ b/youtube/templates/base.html
@@ -4,12 +4,9 @@
        <meta charset="utf-8">
        <title>{{ page_title }}</title>
        <meta http-equiv="Content-Security-Policy" content="default-src 'self' 'unsafe-inline'; media-src 'self' https://*.googlevideo.com;
-            {% if not settings.proxy_images %}
-                img-src https://*.googleusercontent.com https://*.ggpht.com https://*.ytimg.com;
-            {% endif %}">
-
+            {{ "img-src 'self' https://*.googleusercontent.com https://*.ggpht.com https://*.ytimg.com;" if not settings.proxy_images else "" }}">
        <link href="{{ theme_path }}" type="text/css" rel="stylesheet">
-        <link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet">
+        <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet">
        <link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet">
        <link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon">
        <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml">
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@@ -34,11 +34,11 @@
        main .channel-tabs{
            grid-row:2;
            grid-column: 1 / span 2;
-            
+
            display:grid;
            grid-auto-flow: column;
            justify-content:start;
-            
+
            background-color: var(--interface-color);
            padding: 3px;
            padding-left: 6px;
@@ -103,7 +103,7 @@
        }
 {% endblock style %}

-{% block main %}     
+{% block main %}
    <img class="avatar" src="{{ avatar }}">
    <div class="summary">
        <h2 class="title">{{ channel_name }}</h2>
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@@ -25,6 +25,7 @@
                {% if settings.use_comments_js and comment['reply_count'] %}
                    <details class="replies" src="{{ comment['replies_url'] }}">
                        <summary>{{ comment['view_replies_text'] }}</summary>
+                        <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
                        <div class="comment_page">loading..</div>
                    </details>
                {% else %}
--- a/youtube/templates/error.html
+++ b/youtube/templates/error.html
@@ -1,29 +1,8 @@
 {% set page_title = 'Error' %}
-{% extends "base.html" %}

-{% block style %}
-    h1{
-        font-size: 2rem;
-        font-weight: normal;
-    }
-    #error-box, #error-message{
-        background-color: var(--interface-color);
-        width: 80%;
-        margin: auto;
-        margin-top: 20px;
-        padding: 5px;
-    }
-    #error-box > div, #error-box > p, #error-box > h1{
-        white-space: pre-wrap;
-        margin-bottom: 10px;
-    }
-    .code-box{
-        padding: 5px;
-        border-style:solid;
-        border-width:1px;
-        border-radius:5px;
-    }
-{% endblock style %}
+{% if not slim %}
+    {% extends "base.html" %}
+{% endif %}

 {% block main %}
    {% if traceback %}
--- a/youtube/templates/local_playlist.html
+++ b/youtube/templates/local_playlist.html
@@ -25,7 +25,7 @@
        }
 {% endblock style %}

-{% block main %}   
+{% block main %}
    <div class="playlist-metadata">
        <h2 class="playlist-title">{{ playlist_name }}</h2>
        <input type="hidden" name="playlist_page" value="{{ playlist_name }}" form="playlist-edit">
--- a/youtube/templates/playlist.html
+++ b/youtube/templates/playlist.html
@@ -6,7 +6,7 @@
        width: 800px;
        margin:auto;
    }
-        
+
    .playlist-metadata{
        display:grid;
        grid-template-columns: 0fr 1fr;
@@ -44,7 +44,7 @@
        display: grid;
        grid-auto-rows: 0fr;
        grid-row-gap: 10px;
-        
+
    }
 {% endblock style %}

@@ -61,7 +61,7 @@
        <div class="playlist-description">{{ common_elements.text_runs(description) }}</div>
    </div>

-    <div id="results">          
+    <div id="results">
        {% for info in video_list %}
            {{ common_elements.item(info) }}
        {% endfor %}
--- a/youtube/templates/shared.css
+++ b/youtube/templates/shared.css
@@ -12,17 +12,17 @@ address{
 }

 html{
-    font-family: "liberation serif", "times new roman", calibri, carlito, serif;
+    font-family: {{ font_family }};
 }

 body{
    margin:0;
    padding: 0;
    color:var(--text-color);
-    
-    
+
+
    background-color:var(--background-color);
-    
+
    min-height:100vh;
    display: flex;
    flex-direction: column;
@@ -141,7 +141,7 @@ body{
 .item-list{
    display: grid;
    grid-row-gap: 10px;
-    
+
 }


@@ -164,7 +164,7 @@ body{
 .item-box{
    display: inline-flex;
    flex-direction: row;
-    /* prevent overflow due to long titles with no spaces: 
+    /* prevent overflow due to long titles with no spaces:
    https://stackoverflow.com/a/43312314 */
    min-width: 0;
 }
@@ -185,7 +185,7 @@ body{
        align-content: start;
        grid-template-columns: auto 1fr;
        grid-template-rows: auto auto auto auto 1fr;
-        /* prevent overflow due to long titles with no spaces: 
+        /* prevent overflow due to long titles with no spaces:
        https://stackoverflow.com/a/43312314 */
        min-width: 0;
    }
@@ -308,7 +308,7 @@ body{
    justify-content: center;
    display: grid;
    grid-auto-columns: 40px;
-    grid-auto-flow: column; 
+    grid-auto-flow: column;
    height: 40px;
 }
 .next-previous-button-row{
@@ -334,3 +334,26 @@ body{
    padding: 2px;
    justify-self: start;
 }
+
+/* error page stuff */
+h1{
+    font-size: 2rem;
+    font-weight: normal;
+}
+#error-box, #error-message{
+    background-color: var(--interface-color);
+    width: 80%;
+    margin: auto;
+    margin-top: 20px;
+    padding: 5px;
+}
+#error-box > div, #error-box > p, #error-box > h1{
+    white-space: pre-wrap;
+    margin-bottom: 10px;
+}
+.code-box{
+    padding: 5px;
+    border-style:solid;
+    border-width:1px;
+    border-radius:5px;
+}
--- a/youtube/templates/watch.html
+++ b/youtube/templates/watch.html
@@ -14,18 +14,6 @@
        text-decoration: underline;
    }

-    details.replies > summary{
-        background-color: var(--interface-color);
-        border-style: outset;
-        border-width: 1px;
-        font-weight: bold;
-        padding-bottom: 0px;
-    }
-
-    details.replies .comment{
-        width: 600px;
-    }
-
    .playability-error{
        height: 360px;
        width: 640px;
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -226,15 +226,19 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)

-    # age restriction bypass
-    if info['age_restricted']:
-        print('Fetching age restriction bypass page')
+    # request player if it's missing
+    # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
+    if info['age_restricted'] or info['player_response_missing']:
+        if info['age_restricted']:
+            print('Age restricted video. Fetching get_video_info page')
+        else:
+            print('Missing player. Fetching get_video_info page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
-        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
+        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched get_video_info page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)

    # signature decryption
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -90,15 +90,20 @@ def remove_redirect(url):
        return urllib.parse.parse_qs(query_string)['q'][0]
    return url

-youtube_url_re = re.compile(r'^(?:(?:(?:https?:)?//)?(?:www\.)?youtube\.com)?(/.*)$')
+norm_url_re = re.compile(r'^(?:(?:https?:)?//)?((?:[\w-]+\.)+[\w-]+)?(/.*)$')
 def normalize_url(url):
+    '''Insert https, resolve relative paths for youtube.com, and put www. infront of youtube.com'''
    if url is None:
        return None
-    match = youtube_url_re.fullmatch(url)
+    match = norm_url_re.fullmatch(url)
    if match is None:
-        raise Exception()
+        raise Exception(url)

-    return 'https://www.youtube.com' + match.group(1)
+    domain = match.group(1) or 'www.youtube.com'
+    if domain == 'youtube.com':
+        domain = 'www.youtube.com'
+
+    return 'https://' + domain + match.group(2)

 def _recover_urls(runs):
    for run in runs:
@@ -240,11 +245,11 @@ def extract_item_info(item, additional_info={}):
        ))
        info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
    info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText'))
-    info['thumbnail'] = multi_deep_get(item,
+    info['thumbnail'] = normalize_url(multi_deep_get(item,
        ['thumbnail', 'thumbnails', 0, 'url'],      # videos
        ['thumbnails', 0, 'thumbnails', 0, 'url'],  # playlists
        ['thumbnailRenderer', 'showCustomThumbnailRenderer', 'thumbnail', 'thumbnails', 0, 'url'], # shows
-    )
+    ))

    info['badges'] = []
    for badge_node in multi_get(item, 'badges', 'ownerBadges', default=()):
@@ -290,7 +295,7 @@ def extract_item_info(item, additional_info={}):
        info['duration'] = extract_str(item.get('lengthText'))

        # if it's an item in a playlist, get its index
-        if 'index' in item: # url has wrong index on playlist page 
+        if 'index' in item: # url has wrong index on playlist page
            info['index'] = extract_int(item.get('index'))
        elif 'indexText' in item:
            # Current item in playlist has ▶ instead of the actual index, must
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -49,10 +49,10 @@ def extract_channel_info(polymer_json, tab):
    if info['short_description'] and len(info['short_description']) > 730:
        info['short_description'] = info['short_description'][0:730] + '...'
    info['channel_name'] = metadata.get('title')
-    info['avatar'] = multi_deep_get(metadata,
+    info['avatar'] = normalize_url(multi_deep_get(metadata,
        ['avatar', 'thumbnails', 0, 'url'],
        ['thumbnail', 'thumbnails', 0, 'url'],
-    )
+    ))
    channel_url = multi_get(metadata, 'urlCanonical', 'channelUrl')
    if channel_url:
        channel_id = get(channel_url.rstrip('/').split('/'), -1)
@@ -164,7 +164,7 @@ def extract_playlist_metadata(polymer_json):
    metadata['video_count'] = extract_int(header.get('numVideosText'))
    metadata['description'] = extract_str(header.get('descriptionText'), default='')
    metadata['author'] = extract_str(header.get('ownerText'))
-    metadata['author_id'] = multi_deep_get(header, 
+    metadata['author_id'] = multi_deep_get(header,
        ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
        ['ownerEndpoint', 'browseEndpoint', 'browseId'])
    if metadata['author_id']:
@@ -263,13 +263,13 @@ def extract_comments_info(polymer_json):

        # These 3 are sometimes absent, likely because the channel was deleted
        comment_info['author'] = extract_str(comment_renderer.get('authorText'))
-        comment_info['author_url'] = deep_get(comment_renderer,
-            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')
+        comment_info['author_url'] = normalize_url(deep_get(comment_renderer,
+            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'))
        comment_info['author_id'] = deep_get(comment_renderer,
            'authorEndpoint', 'browseEndpoint', 'browseId')

-        comment_info['author_avatar'] = deep_get(comment_renderer,
-            'authorThumbnail', 'thumbnails', 0, 'url')
+        comment_info['author_avatar'] = normalize_url(deep_get(
+            comment_renderer, 'authorThumbnail', 'thumbnails', 0, 'url'))
        comment_info['id'] = comment_renderer.get('commentId')
        comment_info['text'] = extract_formatted_text(comment_renderer.get('contentText'))
        comment_info['time_published'] = extract_str(comment_renderer.get('publishedTimeText'))
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -172,7 +172,7 @@ def _extract_watch_info_mobile(top_level):
        else:
            info['playlist'] = {}
            info['playlist']['title'] = playlist.get('title')
-            info['playlist']['author'] = extract_str(multi_get(playlist, 
+            info['playlist']['author'] = extract_str(multi_get(playlist,
                'ownerName', 'longBylineText', 'shortBylineText', 'ownerText'))
            author_id = deep_get(playlist, 'longBylineText', 'runs', 0,
                'navigationEndpoint', 'browseEndpoint', 'browseId')
@@ -447,7 +447,8 @@ def _extract_playability_error(info, player_response, error_prefix=''):

 SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 def extract_watch_info(polymer_json):
-    info = {'playability_error': None, 'error': None}
+    info = {'playability_error': None, 'error': None,
+        'player_response_missing': None}

    if isinstance(polymer_json, dict):
        top_level = polymer_json
@@ -477,6 +478,10 @@ def extract_watch_info(polymer_json):
    else:
        embedded_player_response = {}

+    # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
+    info['player_response_missing'] = not (
+        player_response or embedded_player_response)
+
    # captions
    info['automatic_caption_languages'] = []
    info['manual_caption_languages'] = []
@@ -580,7 +585,8 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
    return url

 def update_with_age_restricted_info(info, video_info_page):
-    ERROR_PREFIX = 'Error bypassing age-restriction: '
+    '''Inserts urls from 'player_response' in get_video_info page'''
+    ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: '

    video_info = urllib.parse.parse_qs(video_info_page)
    player_response = deep_get(video_info, 'player_response', 0)