fix comment replies being limited to 10
This commit is contained in:
parent
2ba4fd8994
commit
666456146d
@ -59,6 +59,21 @@ def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''):
|
|||||||
result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
|
result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
|
||||||
return base64.urlsafe_b64encode(result).decode('ascii')
|
return base64.urlsafe_b64encode(result).decode('ascii')
|
||||||
|
|
||||||
|
def comment_replies_ctoken(video_id, comment_id, max_results=500):
|
||||||
|
|
||||||
|
params = proto.string(2, comment_id) + proto.uint(9, max_results)
|
||||||
|
params = proto.nested(3, params)
|
||||||
|
|
||||||
|
result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
|
||||||
|
return base64.urlsafe_b64encode(result).decode('ascii')
|
||||||
|
|
||||||
|
def get_ids(ctoken):
|
||||||
|
params = proto.parse(proto.b64_to_bytes(ctoken))
|
||||||
|
video_id = proto.parse(params[2])[2]
|
||||||
|
params = proto.parse(params[6])
|
||||||
|
params = proto.parse(params[3])
|
||||||
|
return params[2].decode('ascii'), video_id.decode('ascii')
|
||||||
|
|
||||||
mobile_headers = {
|
mobile_headers = {
|
||||||
'Host': 'm.youtube.com',
|
'Host': 'm.youtube.com',
|
||||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
|
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
|
||||||
@ -83,7 +98,7 @@ def request_comments(ctoken, replies=False):
|
|||||||
print("got <!DOCTYPE>, retrying")
|
print("got <!DOCTYPE>, retrying")
|
||||||
continue
|
continue
|
||||||
break
|
break
|
||||||
'''with open('comments_debug', 'wb') as f:
|
'''with open('debug/comments_debug', 'wb') as f:
|
||||||
f.write(content)'''
|
f.write(content)'''
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -100,7 +115,8 @@ def parse_comments(content, replies=False):
|
|||||||
if not replies:
|
if not replies:
|
||||||
if comment_raw['replies'] is not None:
|
if comment_raw['replies'] is not None:
|
||||||
ctoken = comment_raw['replies']['continuations'][0]['continuation']
|
ctoken = comment_raw['replies']['continuations'][0]['continuation']
|
||||||
replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1"
|
comment_id, video_id = get_ids(ctoken)
|
||||||
|
replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
|
||||||
comment_raw = comment_raw['comment']
|
comment_raw = comment_raw['comment']
|
||||||
comment = {
|
comment = {
|
||||||
'author': comment_raw['author']['runs'][0]['text'],
|
'author': comment_raw['author']['runs'][0]['text'],
|
||||||
@ -148,8 +164,13 @@ more_comments_template = Template('''<a class="page-button more-comments" href="
|
|||||||
|
|
||||||
def get_comments_page(query_string):
|
def get_comments_page(query_string):
|
||||||
parameters = urllib.parse.parse_qs(query_string)
|
parameters = urllib.parse.parse_qs(query_string)
|
||||||
ctoken = parameters['ctoken'][0]
|
ctoken = default_multi_get(parameters, 'ctoken', 0, default='')
|
||||||
replies = default_multi_get(parameters, 'replies', 0, default="0") == "1"
|
if not ctoken:
|
||||||
|
video_id = parameters['video_id'][0]
|
||||||
|
parent_id = parameters['parent_id'][0]
|
||||||
|
|
||||||
|
ctoken = comment_replies_ctoken(video_id, parent_id)
|
||||||
|
replies = True
|
||||||
|
|
||||||
result = parse_comments(request_comments(ctoken, replies), replies)
|
result = parse_comments(request_comments(ctoken, replies), replies)
|
||||||
comments_html, ctoken = get_comments_html(result)
|
comments_html, ctoken = get_comments_html(result)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from math import ceil
|
from math import ceil
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
|
|
||||||
def byte(n):
|
def byte(n):
|
||||||
return bytes((n,))
|
return bytes((n,))
|
||||||
@ -61,5 +62,68 @@ def as_bytes(value):
|
|||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
return value.encode('ascii')
|
return value.encode('ascii')
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def read_varint(data):
|
||||||
|
result = 0
|
||||||
|
i = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
byte = data.read(1)[0]
|
||||||
|
except IndexError:
|
||||||
|
if i == 0:
|
||||||
|
raise EOFError()
|
||||||
|
raise Exception('Unterminated varint starting at ' + str(data.tell() - i))
|
||||||
|
result |= (byte & 127) << 7*i
|
||||||
|
if not byte & 128:
|
||||||
|
break
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def read_group(data, end_sequence):
|
||||||
|
start = data.tell()
|
||||||
|
index = data.original.find(end_sequence, start)
|
||||||
|
if index == -1:
|
||||||
|
raise Exception('Unterminated group')
|
||||||
|
data.seek(index + len(end_sequence))
|
||||||
|
return data.original[start:index]
|
||||||
|
|
||||||
|
def read_protobuf(data):
|
||||||
|
data_original = data
|
||||||
|
data = io.BytesIO(data)
|
||||||
|
data.original = data_original
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
tag = read_varint(data)
|
||||||
|
except EOFError:
|
||||||
|
break
|
||||||
|
wire_type = tag & 7
|
||||||
|
field_number = tag >> 3
|
||||||
|
|
||||||
|
if wire_type == 0:
|
||||||
|
value = read_varint(data)
|
||||||
|
elif wire_type == 1:
|
||||||
|
value = data.read(8)
|
||||||
|
elif wire_type == 2:
|
||||||
|
length = read_varint(data)
|
||||||
|
value = data.read(length)
|
||||||
|
elif wire_type == 3:
|
||||||
|
end_bytes = encode_varint((field_number << 3) | 4)
|
||||||
|
value = read_group(data, end_bytes)
|
||||||
|
elif wire_type == 5:
|
||||||
|
value = data.read(4)
|
||||||
|
else:
|
||||||
|
raise Exception("Unknown wire type: " + str(wire_type) + ", Tag: " + bytes_to_hex(succinct_encode(tag)) + ", at position " + str(data.tell()))
|
||||||
|
yield (wire_type, field_number, value)
|
||||||
|
|
||||||
|
def parse(data):
|
||||||
|
return {field_number: value for _, field_number, value in read_protobuf(data)}
|
||||||
|
|
||||||
|
def b64_to_bytes(data):
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
data = data.decode('ascii')
|
||||||
|
data = data.replace("%3D", "=")
|
||||||
|
return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4) )
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user