Fix parsing shorts

Add check for extracting duration for shorts
Make short duration extraction stricter
Fix handling shorts with no views
This commit is contained in:
Jesus E 2023-06-17 16:08:52 -04:00
parent f322035d4a
commit e6fd9b40f4
No known key found for this signature in database
GPG Key ID: 159C8F8BC9AED8B6

View File

@ -323,8 +323,7 @@ def extract_item_info(item, additional_info={}):
# handle case where it is "No views"
if not info['approx_view_count']:
if ('No views' in item.get('shortViewCountText', '')
or 'no views' in accessibility_label.lower()):
if ('No views' in extract_str(item.get('viewCountText', ''))):
info['view_count'] = 0
info['approx_view_count'] = '0'
@ -365,12 +364,13 @@ def extract_item_info(item, additional_info={}):
['accessibility', 'accessibilityData', 'label'],
default='')
duration = re.search(r'(\d+) (second|seconds|minute) - play video',
duration = re.search(r'(\d+) (second|seconds|minute) - play video$',
accessibility_label)
if duration:
if duration.group(2) == 'minute':
info['duration'] = "1:00"
info['duration'] = '1:00'
else:
info['duration'] = "0:" + duration.group(1).zfill(2)
info['duration'] = '0:' + duration.group(1).zfill(2)
# if it's an item in a playlist, get its index
if 'index' in item: # url has wrong index on playlist page