fix comment replies being limited to 10

2018-07-06 20:11:08 -07:00
parent 2ba4fd8994
commit 666456146d
2 changed files with 91 additions and 6 deletions
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -59,6 +59,21 @@ def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''):
    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
    return base64.urlsafe_b64encode(result).decode('ascii')

+def comment_replies_ctoken(video_id, comment_id, max_results=500):  
+
+    params = proto.string(2, comment_id) + proto.uint(9, max_results)
+    params = proto.nested(3, params)
+    
+    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params)
+    return base64.urlsafe_b64encode(result).decode('ascii')
+
+def get_ids(ctoken):
+    params = proto.parse(proto.b64_to_bytes(ctoken))
+    video_id = proto.parse(params[2])[2]
+    params = proto.parse(params[6])
+    params = proto.parse(params[3])
+    return params[2].decode('ascii'), video_id.decode('ascii')
+
 mobile_headers = {
    'Host': 'm.youtube.com',
    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
@@ -83,7 +98,7 @@ def request_comments(ctoken, replies=False):
            print("got <!DOCTYPE>, retrying")
            continue
        break
-    '''with open('comments_debug', 'wb') as f:
+    '''with open('debug/comments_debug', 'wb') as f:
        f.write(content)'''
    return content

@@ -100,7 +115,8 @@ def parse_comments(content, replies=False):
            if not replies:
                if comment_raw['replies'] is not None:
                    ctoken = comment_raw['replies']['continuations'][0]['continuation']
-                    replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1"
+                    comment_id, video_id = get_ids(ctoken)
+                    replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
                comment_raw = comment_raw['comment']
            comment = {
            'author': comment_raw['author']['runs'][0]['text'],
@@ -148,8 +164,13 @@ more_comments_template = Template('''<a class="page-button more-comments" href="

 def get_comments_page(query_string):
    parameters = urllib.parse.parse_qs(query_string)
-    ctoken = parameters['ctoken'][0]
-    replies = default_multi_get(parameters, 'replies', 0, default="0") == "1"
+    ctoken = default_multi_get(parameters, 'ctoken', 0, default='')
+    if not ctoken:
+        video_id = parameters['video_id'][0]
+        parent_id = parameters['parent_id'][0]
+
+        ctoken = comment_replies_ctoken(video_id, parent_id)
+        replies = True
    
    result = parse_comments(request_comments(ctoken, replies), replies)
    comments_html, ctoken = get_comments_html(result)
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -1,5 +1,6 @@
 from math import ceil
 import base64
+import io

 def byte(n):
    return bytes((n,))
@@ -61,5 +62,68 @@ def as_bytes(value):
    if isinstance(value, str):
        return value.encode('ascii')
    return value
-    
-    
+
+
+def read_varint(data):
+    result = 0
+    i = 0
+    while True:
+        try:
+            byte = data.read(1)[0]
+        except IndexError:
+            if i == 0:
+                raise EOFError()
+            raise Exception('Unterminated varint starting at ' + str(data.tell() - i))
+        result |= (byte & 127) << 7*i
+        if not byte & 128:
+            break
+
+        i += 1
+    return result
+
+                                
+def read_group(data, end_sequence):
+    start = data.tell()
+    index = data.original.find(end_sequence, start)
+    if index == -1:
+        raise Exception('Unterminated group')
+    data.seek(index + len(end_sequence))
+    return data.original[start:index]
+
+def read_protobuf(data):
+    data_original = data
+    data = io.BytesIO(data)
+    data.original = data_original
+    while True:
+        try:
+            tag = read_varint(data)
+        except EOFError:
+            break
+        wire_type = tag & 7
+        field_number = tag >> 3
+        
+        if wire_type == 0:
+            value = read_varint(data)
+        elif wire_type == 1:
+            value = data.read(8)
+        elif wire_type == 2:
+            length = read_varint(data)
+            value = data.read(length)
+        elif wire_type == 3:
+            end_bytes = encode_varint((field_number << 3) | 4)
+            value = read_group(data, end_bytes)
+        elif wire_type == 5:
+            value = data.read(4)
+        else:
+            raise Exception("Unknown wire type: " + str(wire_type) + ", Tag: " + bytes_to_hex(succinct_encode(tag)) + ", at position " + str(data.tell()))
+        yield (wire_type, field_number, value)
+
+def parse(data):
+    return {field_number: value for _, field_number, value in read_protobuf(data)}
+
+def b64_to_bytes(data):
+    if isinstance(data, bytes):
+        data = data.decode('ascii')
+    data = data.replace("%3D", "=")
+    return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4) )
+