Merge branch 'modular-data-extract'
Commits in this branch are prefixed with "Extraction:" This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module. Responses from requests are given to the module and it parses them into a consistent, more useful format. The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons: (1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle. (2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos. (3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain). (4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
This commit is contained in:
@@ -172,7 +172,7 @@ def _get_videos(cursor, number_per_page, offset, tag = None):
|
||||
'id': db_video[0],
|
||||
'title': db_video[1],
|
||||
'duration': db_video[2],
|
||||
'published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]),
|
||||
'time_published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]),
|
||||
'author': db_video[5],
|
||||
})
|
||||
|
||||
@@ -455,10 +455,17 @@ def _get_upstream_videos(channel_id):
|
||||
print('Failed to read atoma feed for ' + channel_status_name)
|
||||
traceback.print_exc()
|
||||
|
||||
videos = channel.extract_info(json.loads(channel_tab), 'videos')['items']
|
||||
channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')
|
||||
if channel_info['error']:
|
||||
print('Error checking channel ' + channel_status_name + ': ' + channel_info['error'])
|
||||
return
|
||||
|
||||
videos = channel_info['items']
|
||||
for i, video_item in enumerate(videos):
|
||||
if 'description' not in video_item:
|
||||
if not video_item.get('description'):
|
||||
video_item['description'] = ''
|
||||
else:
|
||||
video_item['description'] = ''.join(run.get('text', '') for run in video_item['description'])
|
||||
|
||||
if video_item['id'] in times_published:
|
||||
video_item['time_published'] = times_published[video_item['id']]
|
||||
@@ -466,7 +473,7 @@ def _get_upstream_videos(channel_id):
|
||||
else:
|
||||
video_item['is_time_published_exact'] = False
|
||||
try:
|
||||
video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i # subtract a few seconds off the videos so they will be in the right order
|
||||
video_item['time_published'] = youtube_timestamp_to_posix(video_item['time_published']) - i # subtract a few seconds off the videos so they will be in the right order
|
||||
except KeyError:
|
||||
print(video_item)
|
||||
|
||||
@@ -759,7 +766,7 @@ def get_subscriptions_page():
|
||||
video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg'
|
||||
video['type'] = 'video'
|
||||
video['item_size'] = 'small'
|
||||
yt_data_extract.add_extra_html_info(video)
|
||||
util.add_extra_html_info(video)
|
||||
|
||||
tags = _get_all_tags(cursor)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user