Fix comment replies

Comment reply protobuf now requires the channel id of the uploader
of the video. Otherwise the endpoint returns 500.

Instead of making the protobuf ourselves and passing this data
around through query parameters, just use the ctoken provided to us
but modify the max_replies field from 10 to 250.

Fixes #53

Signed-off-by: Jesús <heckyel@hyperbola.info>
This commit is contained in:
James Taylor 2021-02-25 15:55:23 -08:00 committed by Jesús
parent f26c9be85e
commit 00ef1c8627
No known key found for this signature in database
GPG Key ID: F6EE7BC59A315766
4 changed files with 129 additions and 51 deletions

View File

@ -33,8 +33,8 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
video_id = proto.as_bytes(video_id)
secret_key = proto.as_bytes(secret_key)
page_info = proto.string(4, video_id) + proto.uint(6, sort)
page_info = proto.string(4,video_id) + proto.uint(6, sort)
offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
if secret_key:
offset_information = proto.string(1, secret_key) + offset_information
@ -47,15 +47,6 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
return base64.urlsafe_b64encode(result).decode('ascii')
def comment_replies_ctoken(video_id, comment_id, max_results=500):
params = proto.string(2, comment_id) + proto.uint(9, max_results)
params = proto.nested(3, params)
result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3, 6) + proto.nested(6, params)
return base64.urlsafe_b64encode(result).decode('ascii')
mobile_headers = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
'Accept': '*/*',
@ -66,10 +57,11 @@ mobile_headers = {
def request_comments(ctoken, replies=False):
if replies: # let's make it use different urls for no reason despite all the data being encoded
base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
base_url = 'https://m.youtube.com/watch_comment?'
if replies:
base_url += 'action_get_comment_replies=1&ctoken='
else:
base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
base_url += 'action_get_comments=1&ctoken='
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
content = util.fetch_url(
@ -99,17 +91,24 @@ def post_process_comments_info(comments_info):
comment['permalink'] = concat_or_none(
util.URL_ORIGIN, '/watch?v=',
comments_info['video_id'], '&lc=', comment['id'])
comments_info['video_id'],
'&lc=', comment['id']
)
reply_count = comment['reply_count']
if reply_count == 0:
comment['replies_url'] = None
else:
comment['replies_url'] = concat_or_none(
util.URL_ORIGIN,
'/comments?parent_id=', comment['id'],
'&video_id=', comments_info['video_id'])
comment['replies_url'] = None
if comment['reply_ctoken']:
# change max_replies field to 250 in ctoken
ctoken = comment['reply_ctoken']
ctoken, err = proto.set_protobuf_value(
ctoken,
'base64p', 6, 3, 9, value=250)
if err:
print('Error setting ctoken value:')
print(err)
comment['replies_url'] = None
comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
'/comments?replies=1&ctoken=' + ctoken)
if reply_count == 0:
comment['view_replies_text'] = 'Reply'
@ -118,6 +117,7 @@ def post_process_comments_info(comments_info):
else:
comment['view_replies_text'] = str(reply_count) + ' replies'
if comment['like_count'] == 1:
comment['likes_text'] = '1 like'
else:
@ -125,10 +125,12 @@ def post_process_comments_info(comments_info):
comments_info['include_avatars'] = settings.enable_comment_avatars
if comments_info['ctoken']:
replies_param = '&replies=1' if comments_info['is_replies'] else ''
comments_info['more_comments_url'] = concat_or_none(
util.URL_ORIGIN,
'/comments?ctoken=',
comments_info['ctoken']
comments_info['ctoken'],
replies_param
)
comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
@ -137,14 +139,11 @@ def post_process_comments_info(comments_info):
comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
comments_info['video_url'] = concat_or_none(
util.URL_ORIGIN,
'/watch?v=',
comments_info['video_id']
)
util.URL_ORIGIN, '/watch?v=', comments_info['video_id'])
comments_info['video_thumbnail'] = concat_or_none(
settings.img_prefix, 'https://i.ytimg.com/vi/',
comments_info['video_id'], '/mqdefault.jpg')
comments_info['video_id'], '/mqdefault.jpg'
)
def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@ -198,17 +197,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@yt_app.route('/comments')
def get_comments_page():
ctoken = request.args.get('ctoken', '')
replies = False
if not ctoken:
video_id = request.args['video_id']
parent_id = request.args['parent_id']
ctoken = comment_replies_ctoken(video_id, parent_id)
replies = True
comments_info = yt_data_extract.extract_comments_info(
request_comments(ctoken, replies))
replies = request.args.get('replies', '0') == '1'
comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
post_process_comments_info(comments_info)
if not replies:

View File

@ -1,6 +1,7 @@
from math import ceil
import base64
import io
import traceback
def byte(n):
@ -92,7 +93,6 @@ def read_group(data, end_sequence):
data.seek(index + len(end_sequence))
return data.original[start:index]
def read_protobuf(data):
data_original = data
data = io.BytesIO(data)
@ -122,12 +122,89 @@ def read_protobuf(data):
yield (wire_type, field_number, value)
def parse(data):
return {field_number: value for _, field_number, value in read_protobuf(data)}
def parse(data, include_wire_type=False):
'''Returns a dict mapping field numbers to values
data is the protobuf structure, which must not be b64-encoded'''
if include_wire_type:
return {field_number: [wire_type, value]
for wire_type, field_number, value in read_protobuf(data)}
return {field_number: value
for _, field_number, value in read_protobuf(data)}
base64_enc_funcs = {
'base64': base64.urlsafe_b64encode,
'base64s': unpadded_b64encode,
'base64p': percent_b64encode,
}
def _make_protobuf(data):
# must be dict mapping field_number to [wire_type, value]
if isinstance(data, dict):
new_data = []
for field_num, (wire_type, value) in sorted(data.items()):
new_data.append((wire_type, field_num, value))
data = new_data
if isinstance(data, str):
return data.encode('utf-8')
elif len(data) == 2 and data[0] in base64_enc_funcs:
return base64_enc_funcs[data[0]](make_proto(data[1]))
elif isinstance(data, list):
result = b''
for field in data:
if field[0] == 0:
result += uint(field[1], field[2])
elif field[0] == 2:
result += string(field[1], _make_protobuf(field[2]))
else:
raise NotImplementedError('Wire type ' + str(field[0])
+ ' not implemented')
return result
return data
def make_protobuf(data):
return _make_protobuf(data).decode('ascii')
def _set_protobuf_value(data, *path, value):
if not path:
return value
op = path[0]
if op in base64_enc_funcs:
inner_data = b64_to_bytes(data)
return base64_enc_funcs[op](
_set_protobuf_value(inner_data, *path[1:], value=value)
)
pb_dict = parse(data, include_wire_type=True)
pb_dict[op][1] = _set_protobuf_value(
pb_dict[op][1], *path[1:], value=value
)
return _make_protobuf(pb_dict)
def set_protobuf_value(data, *path, value):
'''Set a field's value in a raw protobuf structure
path is a list of field numbers and/or base64 encoding directives
The directives are
base64: normal base64 encoding with equal signs padding
base64s ("stripped"): no padding
base64p: %3D instead of = for padding
return new_protobuf, err'''
try:
new_protobuf = _set_protobuf_value(data, *path, value=value)
return new_protobuf.decode('ascii'), None
except Exception:
return None, traceback.format_exc()
def b64_to_bytes(data):
if isinstance(data, bytes):
data = data.decode('ascii')
data = data.replace("%3D", "=")
return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4))
return base64.urlsafe_b64decode(data + "="*((4 - len(data) % 4) % 4))

View File

@ -23,14 +23,18 @@
<span class="comment-likes">{{ comment['likes_text'] if comment['like_count'] else ''}}</span>
<div class="button-row">
{% if settings.use_comments_js and comment['reply_count'] %}
<details class="replies" data-src="{{ comment['replies_url'] }}">
<summary>{{ comment['view_replies_text'] }}</summary>
<a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
<div class="comment_page">loading..</div>
</details>
{% elif comment['reply_count'] %}
<a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
{% if comment['reply_count'] %}
{% if settings.use_comments_js and comment['replies_url'] %}
<details class="replies" src="{{ comment['replies_url'] }}">
<summary>{{ comment['view_replies_text'] }}</summary>
<a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
<div class="comment_page">loading...</div>
</details>
{% elif comment['replies_url'] %}
<a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
{% else %}
<a class="replies">{{ comment['view_replies_text'] }} (error constructing url)</a>
{% endif %}
{% endif %}
</div>
</div>

View File

@ -251,13 +251,19 @@ def extract_comments_info(polymer_json):
info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
if 'replies' not in comment_thread:
comment_info['reply_count'] = 0
comment_info['reply_ctoken'] = None
else:
comment_info['reply_count'] = extract_int(deep_get(comment_thread,
'replies', 'commentRepliesRenderer', 'moreText'
), default=1) # With 1 reply, the text reads "View reply"
comment_info['reply_ctoken'] = deep_get(comment_thread,
'replies', 'commentRepliesRenderer', 'continuations', 0,
'nextContinuationData', 'continuation'
)
comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
elif 'commentRenderer' in comment: # replies
comment_info['reply_count'] = 0 # replyCount, below, not present for replies even if the reply has further replies to it
comment_info['reply_ctoken'] = None
conservative_update(info, 'is_replies', True)
comment_renderer = comment['commentRenderer']
else: