Merge branch 'master' into add_sponsorblock

2020-10-21 18:53:12 -07:00 · 2020-10-21 18:53:12 -07:00 · aa52c7a42e
commit aa52c7a42e
parent 3b5df36b03 f01ef36a37
20 changed files with 163 additions and 106 deletions
--- a/generate_release.py
+++ b/generate_release.py
@ -119,7 +119,7 @@ wine_run(['./python/python.exe', '-I', 'get-pip.py'])
 ## Isolated mode
    We want to run in what is called isolated mode, given by the switch -I.
-This mode prevents the embedded python distribution from searching in 
+This mode prevents the embedded python distribution from searching in
 global directories for imports
    For example, if a user has `C:\Python37` and the embedded distribution is
--- a/settings.py
+++ b/settings.py
@ -160,6 +160,19 @@ For security reasons, enabling this is not recommended.''',
        ],
    }),
    ('font', {
        'type': int,
        'default': 1,
        'comment': '',
        'options': [
            (0, 'Browser default'),
            (1, 'Arial'),
            (2, 'Liberation Serif'),
            (3, 'Verdana'),
            (4, 'Tahoma'),
        ],
    }),
    ('autocheck_subscriptions', {
        'type': bool,
        'default': 0,
@ -319,12 +332,6 @@ else:
 globals().update(current_settings_dict)
 if proxy_images:
    img_prefix = "/"
 else:
    img_prefix = ""
 if route_tor:
    print("Tor routing is ON")
@ -343,6 +350,19 @@ def add_setting_changed_hook(setting, func):
        hooks[setting] = [func]
 def set_img_prefix(old_value=None, value=None):
    global img_prefix
    if value is None:
        value = proxy_images
    if value:
        img_prefix = '/'
    else:
        img_prefix = ''
 set_img_prefix()
 add_setting_changed_hook('proxy_images', set_img_prefix)
 def settings_page():
    if request.method == 'GET':
        return flask.render_template('settings.html',
--- a/youtube/init.py
+++ b/youtube/init.py
@ -1,5 +1,6 @@
 from youtube import util
 import flask
 from flask import request
 import settings
 import traceback
 import re
@ -59,6 +60,7 @@ def timestamps(text):
@yt_app.errorhandler(500)
 def error_page(e):
    slim = request.args.get('slim', False) # whether it was an ajax request
    if (exc_info()[0] == util.FetchError
        and exc_info()[1].code == '429'
        and settings.route_tor
@ -68,5 +70,22 @@ def error_page(e):
            ' using the New Identity button in the Tor Browser.')
        if exc_info()[1].ip:
            error_message += ' Exit node IP address: ' + exc_info()[1].ip
-        return flask.render_template('error.html', error_message=error_message), 502
+        return flask.render_template('error.html', error_message=error_message, slim=slim), 502
-    return flask.render_template('error.html', traceback=traceback.format_exc()), 500
+    return flask.render_template('error.html', traceback=traceback.format_exc(), slim=slim), 500
 font_choices = {
    0: 'initial',
    1: 'arial, "liberation sans", sans-serif',
    2: '"liberation serif", "times new roman", calibri, carlito, serif',
    3: 'verdana, sans-serif',
    4: 'tahoma, sans-serif',
 }
@yt_app.route('/shared.css')
 def get_css():
    return flask.Response(
        flask.render_template('shared.css',
            font_family = font_choices[settings.font]
        ),
        mimetype='text/css',
    )
--- a/youtube/comments.py
+++ b/youtube/comments.py
@ -90,7 +90,7 @@ def single_comment_ctoken(video_id, comment_id):
 def post_process_comments_info(comments_info):
    for comment in comments_info['comments']:
        comment['author_url'] = concat_or_none(
-            util.URL_ORIGIN, comment['author_url'])
+            '/', comment['author_url'])
        comment['author_avatar'] = concat_or_none(
            settings.img_prefix, comment['author_avatar'])
--- a/youtube/post_comment.py
+++ b/youtube/post_comment.py
@ -155,7 +155,7 @@ def get_delete_comment_page():
 def get_post_comment_page():
    video_id = request.args['video_id']
    parent_id = request.args.get('parent_id', '')
-    
+
    if parent_id:   # comment reply
        form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
        replying = True
--- a/youtube/proto.py
+++ b/youtube/proto.py
@ -5,13 +5,13 @@ import io
 def byte(n):
    return bytes((n,))
-    
+
 def varint_encode(offset):
    '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
    The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
    aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
    1ccccccc 1bbbbbbb 0aaaaaaa
-    
+
    This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
    See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
    needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
@ -20,20 +20,20 @@ def varint_encode(offset):
        encoded_bytes[i] = (offset & 127) | 128  # 7 least significant bits
        offset = offset >> 7
    encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
-    
+
    return bytes(encoded_bytes)
-    
+
 def varint_decode(encoded):
    decoded = 0
    for i, byte in enumerate(encoded):
        decoded |= (byte & 127) << 7*i
-        
+
        if not (byte & 128):
            break
    return decoded
-    
+
 def string(field_number, data):
    data = as_bytes(data)
    return _proto_field(2, field_number, varint_encode(len(data)) + data)
@ -41,20 +41,20 @@ nested = string
 def uint(field_number, value):
    return _proto_field(0, field_number, varint_encode(value))
-    
+
-    
+
 def _proto_field(wire_type, field_number, data):
    ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
    return varint_encode( (field_number << 3) | wire_type) + data
-    
+
 def percent_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
-    
+
-    
+
 def unpadded_b64encode(data):
    return base64.urlsafe_b64encode(data).replace(b'=', b'')
@ -81,7 +81,7 @@ def read_varint(data):
        i += 1
    return result
-                                
+
 def read_group(data, end_sequence):
    start = data.tell()
    index = data.original.find(end_sequence, start)
@ -101,7 +101,7 @@ def read_protobuf(data):
            break
        wire_type = tag & 7
        field_number = tag >> 3
-        
+
        if wire_type == 0:
            value = read_varint(data)
        elif wire_type == 1:
--- a/youtube/static/comments.css
+++ b/youtube/static/comments.css
@ -1,4 +1,4 @@
-.video-metadata{    
+.video-metadata{
    display: grid;
    grid-template-columns: auto 1fr;
    grid-template-rows: auto auto 1fr auto;
@ -124,6 +124,23 @@
    grid-column-gap: 10px;
 }
 details.replies > summary{
    background-color: var(--interface-color);
    border-style: outset;
    border-width: 1px;
    font-weight: bold;
    padding-bottom: 0px;
 }
 .replies-open-new-tab{
    display: inline-block;
    margin-top: 5px;
 }
 details.replies .comment{
    width: 600px;
 }
 .more-comments{
    justify-self:center;
    margin-top:10px;
--- a/youtube/static/js/common.js
+++ b/youtube/static/js/common.js
@ -41,9 +41,7 @@ function doXhr(url, callback=null) {
    var xhr = new XMLHttpRequest();
    xhr.open("GET", url);
    xhr.onload = (e) => {
-      let ok = xhr.status >= 200 && xhr.status < 300;
+      callback(e.currentTarget.response);
      if (ok) callback(e.currentTarget.response);
      else alert(`${xhr.responseURL} status code: ${xhr.status}`);
    }
    xhr.send();
    return xhr;
--- a/youtube/templates/base.html
+++ b/youtube/templates/base.html
@ -4,12 +4,9 @@
        <meta charset="utf-8">
        <title>{{ page_title }}</title>
        <meta http-equiv="Content-Security-Policy" content="default-src 'self' 'unsafe-inline'; media-src 'self' https://*.googlevideo.com;
-            {% if not settings.proxy_images %}
+            {{ "img-src 'self' https://*.googleusercontent.com https://*.ggpht.com https://*.ytimg.com;" if not settings.proxy_images else "" }}">
                img-src https://*.googleusercontent.com https://*.ggpht.com https://*.ytimg.com;
            {% endif %}">
        <link href="{{ theme_path }}" type="text/css" rel="stylesheet">
-        <link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet">
+        <link href="/youtube.com/shared.css" type="text/css" rel="stylesheet">
        <link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet">
        <link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon">
        <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml">
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@ -34,11 +34,11 @@
        main .channel-tabs{
            grid-row:2;
            grid-column: 1 / span 2;
-            
+
            display:grid;
            grid-auto-flow: column;
            justify-content:start;
-            
+
            background-color: var(--interface-color);
            padding: 3px;
            padding-left: 6px;
@ -103,7 +103,7 @@
        }
 {% endblock style %}
-{% block main %}     
+{% block main %}
    <img class="avatar" src="{{ avatar }}">
    <div class="summary">
        <h2 class="title">{{ channel_name }}</h2>
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@ -25,6 +25,7 @@
                {% if settings.use_comments_js and comment['reply_count'] %}
                    <details class="replies" src="{{ comment['replies_url'] }}">
                        <summary>{{ comment['view_replies_text'] }}</summary>
                        <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
                        <div class="comment_page">loading..</div>
                    </details>
                {% else %}
--- a/youtube/templates/error.html
+++ b/youtube/templates/error.html
@ -1,29 +1,8 @@
 {% set page_title = 'Error' %}
 {% extends "base.html" %}
-{% block style %}
+{% if not slim %}
-    h1{
+    {% extends "base.html" %}
-        font-size: 2rem;
+{% endif %}
        font-weight: normal;
    }
    #error-box, #error-message{
        background-color: var(--interface-color);
        width: 80%;
        margin: auto;
        margin-top: 20px;
        padding: 5px;
    }
    #error-box > div, #error-box > p, #error-box > h1{
        white-space: pre-wrap;
        margin-bottom: 10px;
    }
    .code-box{
        padding: 5px;
        border-style:solid;
        border-width:1px;
        border-radius:5px;
    }
 {% endblock style %}
 {% block main %}
    {% if traceback %}
--- a/youtube/templates/local_playlist.html
+++ b/youtube/templates/local_playlist.html
@ -25,7 +25,7 @@
        }
 {% endblock style %}
-{% block main %}   
+{% block main %}
    <div class="playlist-metadata">
        <h2 class="playlist-title">{{ playlist_name }}</h2>
        <input type="hidden" name="playlist_page" value="{{ playlist_name }}" form="playlist-edit">
--- a/youtube/templates/playlist.html
+++ b/youtube/templates/playlist.html
@ -6,7 +6,7 @@
        width: 800px;
        margin:auto;
    }
-        
+
    .playlist-metadata{
        display:grid;
        grid-template-columns: 0fr 1fr;
@ -44,7 +44,7 @@
        display: grid;
        grid-auto-rows: 0fr;
        grid-row-gap: 10px;
-        
+
    }
 {% endblock style %}
@ -61,7 +61,7 @@
        <div class="playlist-description">{{ common_elements.text_runs(description) }}</div>
    </div>
-    <div id="results">          
+    <div id="results">
        {% for info in video_list %}
            {{ common_elements.item(info) }}
        {% endfor %}
--- a/youtube/templates/shared.css
+++ b/youtube/templates/shared.css
@ -12,17 +12,17 @@ address{
 }
 html{
-    font-family: "liberation serif", "times new roman", calibri, carlito, serif;
+    font-family: {{ font_family }};
 }
 body{
    margin:0;
    padding: 0;
    color:var(--text-color);
-    
+
-    
+
    background-color:var(--background-color);
-    
+
    min-height:100vh;
    display: flex;
    flex-direction: column;
@ -141,7 +141,7 @@ body{
 .item-list{
    display: grid;
    grid-row-gap: 10px;
-    
+
 }
@ -164,7 +164,7 @@ body{
 .item-box{
    display: inline-flex;
    flex-direction: row;
-    /* prevent overflow due to long titles with no spaces: 
+    /* prevent overflow due to long titles with no spaces:
    https://stackoverflow.com/a/43312314 */
    min-width: 0;
 }
@ -185,7 +185,7 @@ body{
        align-content: start;
        grid-template-columns: auto 1fr;
        grid-template-rows: auto auto auto auto 1fr;
-        /* prevent overflow due to long titles with no spaces: 
+        /* prevent overflow due to long titles with no spaces:
        https://stackoverflow.com/a/43312314 */
        min-width: 0;
    }
@ -308,7 +308,7 @@ body{
    justify-content: center;
    display: grid;
    grid-auto-columns: 40px;
-    grid-auto-flow: column; 
+    grid-auto-flow: column;
    height: 40px;
 }
 .next-previous-button-row{
@ -334,3 +334,26 @@ body{
    padding: 2px;
    justify-self: start;
 }
 /* error page stuff */
 h1{
    font-size: 2rem;
    font-weight: normal;
 }
 #error-box, #error-message{
    background-color: var(--interface-color);
    width: 80%;
    margin: auto;
    margin-top: 20px;
    padding: 5px;
 }
 #error-box > div, #error-box > p, #error-box > h1{
    white-space: pre-wrap;
    margin-bottom: 10px;
 }
 .code-box{
    padding: 5px;
    border-style:solid;
    border-width:1px;
    border-radius:5px;
 }
--- a/youtube/templates/watch.html
+++ b/youtube/templates/watch.html
@ -14,18 +14,6 @@
        text-decoration: underline;
    }
    details.replies > summary{
        background-color: var(--interface-color);
        border-style: outset;
        border-width: 1px;
        font-weight: bold;
        padding-bottom: 0px;
    }
    details.replies .comment{
        width: 600px;
    }
    .playability-error{
        height: 360px;
        width: 640px;
--- a/youtube/watch.py
+++ b/youtube/watch.py
@ -226,15 +226,19 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)
-    # age restriction bypass
+    # request player if it's missing
-    if info['age_restricted']:
+    # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
-        print('Fetching age restriction bypass page')
+    if info['age_restricted'] or info['player_response_missing']:
        if info['age_restricted']:
            print('Age restricted video. Fetching get_video_info page')
        else:
            print('Missing player. Fetching get_video_info page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data)
-        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8')
+        video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched get_video_info page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)
    # signature decryption
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@ -90,15 +90,20 @@ def remove_redirect(url):
        return urllib.parse.parse_qs(query_string)['q'][0]
    return url
-youtube_url_re = re.compile(r'^(?:(?:(?:https?:)?//)?(?:www\.)?youtube\.com)?(/.*)$')
+norm_url_re = re.compile(r'^(?:(?:https?:)?//)?((?:[\w-]+\.)+[\w-]+)?(/.*)$')
 def normalize_url(url):
    '''Insert https, resolve relative paths for youtube.com, and put www. infront of youtube.com'''
    if url is None:
        return None
-    match = youtube_url_re.fullmatch(url)
+    match = norm_url_re.fullmatch(url)
    if match is None:
-        raise Exception()
+        raise Exception(url)
-    return 'https://www.youtube.com' + match.group(1)
+    domain = match.group(1) or 'www.youtube.com'
    if domain == 'youtube.com':
        domain = 'www.youtube.com'
    return 'https://' + domain + match.group(2)
 def _recover_urls(runs):
    for run in runs:
@ -240,11 +245,11 @@ def extract_item_info(item, additional_info={}):
        ))
        info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
    info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText'))
-    info['thumbnail'] = multi_deep_get(item,
+    info['thumbnail'] = normalize_url(multi_deep_get(item,
        ['thumbnail', 'thumbnails', 0, 'url'],      # videos
        ['thumbnails', 0, 'thumbnails', 0, 'url'],  # playlists
        ['thumbnailRenderer', 'showCustomThumbnailRenderer', 'thumbnail', 'thumbnails', 0, 'url'], # shows
-    )
+    ))
    info['badges'] = []
    for badge_node in multi_get(item, 'badges', 'ownerBadges', default=()):
@ -290,7 +295,7 @@ def extract_item_info(item, additional_info={}):
        info['duration'] = extract_str(item.get('lengthText'))
        # if it's an item in a playlist, get its index
-        if 'index' in item: # url has wrong index on playlist page 
+        if 'index' in item: # url has wrong index on playlist page
            info['index'] = extract_int(item.get('index'))
        elif 'indexText' in item:
            # Current item in playlist has ▶ instead of the actual index, must
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@ -49,10 +49,10 @@ def extract_channel_info(polymer_json, tab):
    if info['short_description'] and len(info['short_description']) > 730:
        info['short_description'] = info['short_description'][0:730] + '...'
    info['channel_name'] = metadata.get('title')
-    info['avatar'] = multi_deep_get(metadata,
+    info['avatar'] = normalize_url(multi_deep_get(metadata,
        ['avatar', 'thumbnails', 0, 'url'],
        ['thumbnail', 'thumbnails', 0, 'url'],
-    )
+    ))
    channel_url = multi_get(metadata, 'urlCanonical', 'channelUrl')
    if channel_url:
        channel_id = get(channel_url.rstrip('/').split('/'), -1)
@ -164,7 +164,7 @@ def extract_playlist_metadata(polymer_json):
    metadata['video_count'] = extract_int(header.get('numVideosText'))
    metadata['description'] = extract_str(header.get('descriptionText'), default='')
    metadata['author'] = extract_str(header.get('ownerText'))
-    metadata['author_id'] = multi_deep_get(header, 
+    metadata['author_id'] = multi_deep_get(header,
        ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
        ['ownerEndpoint', 'browseEndpoint', 'browseId'])
    if metadata['author_id']:
@ -263,13 +263,13 @@ def extract_comments_info(polymer_json):
        # These 3 are sometimes absent, likely because the channel was deleted
        comment_info['author'] = extract_str(comment_renderer.get('authorText'))
-        comment_info['author_url'] = deep_get(comment_renderer,
+        comment_info['author_url'] = normalize_url(deep_get(comment_renderer,
-            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')
+            'authorEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'))
        comment_info['author_id'] = deep_get(comment_renderer,
            'authorEndpoint', 'browseEndpoint', 'browseId')
-        comment_info['author_avatar'] = deep_get(comment_renderer,
+        comment_info['author_avatar'] = normalize_url(deep_get(
-            'authorThumbnail', 'thumbnails', 0, 'url')
+            comment_renderer, 'authorThumbnail', 'thumbnails', 0, 'url'))
        comment_info['id'] = comment_renderer.get('commentId')
        comment_info['text'] = extract_formatted_text(comment_renderer.get('contentText'))
        comment_info['time_published'] = extract_str(comment_renderer.get('publishedTimeText'))
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@ -172,7 +172,7 @@ def _extract_watch_info_mobile(top_level):
        else:
            info['playlist'] = {}
            info['playlist']['title'] = playlist.get('title')
-            info['playlist']['author'] = extract_str(multi_get(playlist, 
+            info['playlist']['author'] = extract_str(multi_get(playlist,
                'ownerName', 'longBylineText', 'shortBylineText', 'ownerText'))
            author_id = deep_get(playlist, 'longBylineText', 'runs', 0,
                'navigationEndpoint', 'browseEndpoint', 'browseId')
@ -447,7 +447,8 @@ def _extract_playability_error(info, player_response, error_prefix=''):
 SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 def extract_watch_info(polymer_json):
-    info = {'playability_error': None, 'error': None}
+    info = {'playability_error': None, 'error': None,
        'player_response_missing': None}
    if isinstance(polymer_json, dict):
        top_level = polymer_json
@ -477,6 +478,10 @@ def extract_watch_info(polymer_json):
    else:
        embedded_player_response = {}
    # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
    info['player_response_missing'] = not (
        player_response or embedded_player_response)
    # captions
    info['automatic_caption_languages'] = []
    info['manual_caption_languages'] = []
@ -580,7 +585,8 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
    return url
 def update_with_age_restricted_info(info, video_info_page):
-    ERROR_PREFIX = 'Error bypassing age-restriction: '
+    '''Inserts urls from 'player_response' in get_video_info page'''
    ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: '
    video_info = urllib.parse.parse_qs(video_info_page)
    player_response = deep_get(video_info, 'player_response', 0)