Fix breakage due to youtube changes

2019-09-11 15:09:20 -07:00
parent 21c617faf7
commit f48de1aad2
2 changed files with 499 additions and 236 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -28,9 +28,11 @@ from ..compat import (
    compat_str,
 )
 from ..utils import (
+    bool_or_none,
    clean_html,
    dict_get,
    error_to_compat_str,
+    extract_attributes,
    ExtractorError,
    float_or_none,
    get_element_by_attribute,
@@ -40,7 +42,6 @@ from ..utils import (
    orderedSet,
    parse_codecs,
    parse_duration,
-    qualities,
    remove_quotes,
    remove_start,
    smuggle_url,
@@ -118,6 +119,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                'f.req': json.dumps(f_req),
                'flowName': 'GlifWebSignIn',
                'flowEntry': 'ServiceLogin',
+                # TODO: reverse actual botguard identifier generation algo
+                'bgRequest': '["identifier",""]',
            })
            return self._download_json(
                url, None, note=note, errnote=errnote,
@@ -323,17 +326,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
        for video_id, video_title in self.extract_videos_from_page(content):
            yield self.url_result(video_id, 'Youtube', video_id, video_title)

-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-        for mobj in re.finditer(self._VIDEO_RE, page):
+    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
+        for mobj in re.finditer(video_re, page):
            # The link with index 0 is not the first video of the playlist (not sure if still actual)
            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
                continue
            video_id = mobj.group('id')
-            video_title = unescapeHTML(mobj.group('title'))
+            video_title = unescapeHTML(
+                mobj.group('title')) if 'title' in mobj.groupdict() else None
            if video_title:
                video_title = video_title.strip()
+            if video_title == '► Play all':
+                video_title = None
            try:
                idx = ids_in_page.index(video_id)
                if video_title and not titles_in_page[idx]:
@@ -341,6 +345,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
            except ValueError:
                ids_in_page.append(video_id)
                titles_in_page.append(video_title)
+
+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+        self.extract_videos_from_page_impl(
+            self._VIDEO_RE, page, ids_in_page, titles_in_page)
        return zip(ids_in_page, titles_in_page)


@@ -370,11 +380,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?hooktube\.com/|
                            (?:www\.)?yourepeat\.com/|
                            tube\.majestyc\.net/|
+                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
                            (?:(?:www|dev)\.)?invidio\.us/|
-                            (?:www\.)?invidiou\.sh/|
-                            (?:www\.)?invidious\.snopyta\.org/|
+                            (?:(?:www|no)\.)?invidiou\.sh/|
+                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
+                            (?:www\.)?invidious\.enkirton\.net/|
+                            (?:www\.)?invidious\.13ad\.de/|
+                            (?:www\.)?invidious\.mastodon\.host/|
+                            (?:www\.)?invidious\.nixnet\.xyz/|
+                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?vid\.wxzm\.sx/|
+                            (?:www\.)?yt\.elukerio\.org/|
+                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
+                            (?:www\.)?qklhadlycap4cnod\.onion/|
+                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
@@ -1589,17 +1612,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        video_id = mobj.group(2)
        return video_id

-    def _extract_annotations(self, video_id):
-        return self._download_webpage(
-            'https://www.youtube.com/annotations_invideo', video_id,
-            note='Downloading annotations',
-            errnote='Unable to download video annotations', fatal=False,
-            query={
-                'features': 1,
-                'legacy': 1,
-                'video_id': video_id,
-            })
-
    @staticmethod
    def _extract_chapters(description, duration):
        if not description:
@@ -1696,6 +1708,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        def extract_token(v_info):
            return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))

+        def extract_player_response(player_response, video_id):
+            pl_response = str_or_none(player_response)
+            if not pl_response:
+                return
+            pl_response = self._parse_json(pl_response, video_id, fatal=False)
+            if isinstance(pl_response, dict):
+                add_dash_mpd_pr(pl_response)
+                return pl_response
+
        player_response = {}


@@ -1780,7 +1801,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                note='Refetching age-gated info webpage',
                errnote='unable to download video info webpage')
            video_info = compat_parse_qs(video_info_webpage)
+            pl_response = video_info.get('player_response', [None])[0]
+            player_response = extract_player_response(pl_response, video_id)
            add_dash_mpd(video_info)
+            view_count = extract_view_count(video_info)
        else:
            age_gate = False
            video_info = None
@@ -1803,11 +1827,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    is_live = True
                sts = ytplayer_config.get('sts')
                if not player_response:
-                    pl_response = str_or_none(args.get('player_response'))
-                    if pl_response:
-                        pl_response = self._parse_json(pl_response, video_id, fatal=False)
-                        if isinstance(pl_response, dict):
-                            player_response = pl_response
+                    player_response = extract_player_response(args.get('player_response'), video_id)
            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                add_dash_mpd_pr(player_response)
                # We also try looking in get_video_info since it may contain different dashmpd
@@ -1839,9 +1859,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    get_video_info = compat_parse_qs(video_info_webpage)
                    if not player_response:
                        pl_response = get_video_info.get('player_response', [None])[0]
-                        if isinstance(pl_response, dict):
-                            player_response = pl_response
-                            add_dash_mpd_pr(player_response)
+                        player_response = extract_player_response(pl_response, video_id)
                    add_dash_mpd(get_video_info)
                    if view_count is None:
                        view_count = extract_view_count(get_video_info)
@@ -1864,9 +1882,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        break

        def extract_unavailable_message():
-            return self._html_search_regex(
-                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
-                video_webpage, 'unavailable message', default=None)
+            messages = []
+            for tag, kind in (('h1', 'message'), ('div', 'submessage')):
+                msg = self._html_search_regex(
+                    r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
+                    video_webpage, 'unavailable %s' % kind, default=None)
+                if msg:
+                    messages.append(msg)
+            if messages:
+                return '\n'.join(messages)

        if not video_info:
            unavailable_message = extract_unavailable_message()
@@ -1883,7 +1907,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            self._downloader.report_warning('Unable to extract video title')
            video_title = '_'

-        # description
        description_original = video_description = get_element_by_id("eow-description", video_webpage)
        if video_description:

@@ -1946,6 +1969,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if view_count is None and video_details:
            view_count = int_or_none(video_details.get('viewCount'))

+        if is_live is None:
+            is_live = bool_or_none(video_details.get('isLive'))
+
        # Check for "rental" videos
        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
            raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
@@ -1954,6 +1980,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            return int_or_none(self._search_regex(
                r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

+        streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+        streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
            self.report_rtmp_download()
            formats = [{
@@ -1962,10 +1991,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'url': video_info['conn'][0],
                'player_url': player_url,
            }]
-        elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+        elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
            if 'rtmpe%3Dyes' in encoded_url_map:
                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+            formats = []
            formats_spec = {}
            fmt_list = video_info.get('fmt_list', [''])[0]
            if fmt_list:
@@ -1979,91 +2009,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                'width': int_or_none(width_height[0]),
                                'height': int_or_none(width_height[1]),
                            }
-            q = qualities(['small', 'medium', 'hd720'])
-            streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
-            if streaming_formats:
-                for fmt in streaming_formats:
-                    itag = str_or_none(fmt.get('itag'))
-                    if not itag:
-                        continue
-                    quality = fmt.get('quality')
-                    quality_label = fmt.get('qualityLabel') or quality
-                    formats_spec[itag] = {
-                        'asr': int_or_none(fmt.get('audioSampleRate')),
-                        'filesize': int_or_none(fmt.get('contentLength')),
-                        'format_note': quality_label,
-                        'fps': int_or_none(fmt.get('fps')),
-                        'height': int_or_none(fmt.get('height')),
-                        'quality': q(quality),
-                        # bitrate for itag 43 is always 2147483647
-                        'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
-                        'width': int_or_none(fmt.get('width')),
-                    }
-            formats = []
-            for url_data_str in encoded_url_map.split(','):
-                url_data = compat_parse_qs(url_data_str)
-                if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
+            for fmt in streaming_formats:
+                itag = str_or_none(fmt.get('itag'))
+                if not itag:
                    continue
+                quality = fmt.get('quality')
+                quality_label = fmt.get('qualityLabel') or quality
+                formats_spec[itag] = {
+                    'asr': int_or_none(fmt.get('audioSampleRate')),
+                    'filesize': int_or_none(fmt.get('contentLength')),
+                    'format_note': quality_label,
+                    'fps': int_or_none(fmt.get('fps')),
+                    'height': int_or_none(fmt.get('height')),
+                    # bitrate for itag 43 is always 2147483647
+                    'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+                    'width': int_or_none(fmt.get('width')),
+                }
+
+            for fmt in streaming_formats:
+                if fmt.get('drm_families'):
+                    continue
+                url = url_or_none(fmt.get('url'))
+
+                if not url:
+                    cipher = fmt.get('cipher')
+                    if not cipher:
+                        continue
+                    url_data = compat_parse_qs(cipher)
+                    url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+                    if not url:
+                        continue
+                else:
+                    cipher = None
+                    url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
                stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
                # Unsupported FORMAT_STREAM_TYPE_OTF
                if stream_type == 3:
                    continue
-                format_id = url_data['itag'][0]
-                url = url_data['url'][0]

-                if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
-                    ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-                    jsplayer_url_json = self._search_regex(
-                        ASSETS_RE,
-                        embed_webpage if age_gate else video_webpage,
-                        'JS player URL (1)', default=None)
-                    if not jsplayer_url_json and not age_gate:
-                        # We need the embed website after all
-                        if embed_webpage is None:
-                            embed_url = proto + '://www.youtube.com/embed/%s' % video_id
-                            embed_webpage = self._download_webpage(
-                                embed_url, video_id, 'Downloading embed webpage')
+                format_id = fmt.get('itag') or url_data['itag'][0]
+                if not format_id:
+                    continue
+                format_id = compat_str(format_id)
+
+                if cipher:
+                    if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+                        ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
                        jsplayer_url_json = self._search_regex(
-                            ASSETS_RE, embed_webpage, 'JS player URL')
+                            ASSETS_RE,
+                            embed_webpage if age_gate else video_webpage,
+                            'JS player URL (1)', default=None)
+                        if not jsplayer_url_json and not age_gate:
+                            # We need the embed website after all
+                            if embed_webpage is None:
+                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+                                embed_webpage = self._download_webpage(
+                                    embed_url, video_id, 'Downloading embed webpage')
+                            jsplayer_url_json = self._search_regex(
+                                ASSETS_RE, embed_webpage, 'JS player URL')

-                    player_url = json.loads(jsplayer_url_json)
-                    if player_url is None:
-                        player_url_json = self._search_regex(
-                            r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                            video_webpage, 'age gate player URL')
-                        player_url = json.loads(player_url_json)
-
-                if 'sig' in url_data:
-                    url += '&signature=' + url_data['sig'][0]
-                elif 's' in url_data:
-                    encrypted_sig = url_data['s'][0]
-
-                    if self._downloader.params.get('verbose'):
+                        player_url = json.loads(jsplayer_url_json)
                        if player_url is None:
-                            player_version = 'unknown'
-                            player_desc = 'unknown'
-                        else:
-                            if player_url.endswith('swf'):
-                                player_version = self._search_regex(
-                                    r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                    'flash player', fatal=False)
-                                player_desc = 'flash player %s' % player_version
+                            player_url_json = self._search_regex(
+                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                                video_webpage, 'age gate player URL')
+                            player_url = json.loads(player_url_json)
+
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    elif 's' in url_data:
+                        encrypted_sig = url_data['s'][0]
+
+                        if self._downloader.params.get('verbose'):
+                            if player_url is None:
+                                player_version = 'unknown'
+                                player_desc = 'unknown'
                            else:
-                                player_version = self._search_regex(
-                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                     r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
-                                    player_url,
-                                    'html5 player', fatal=False)
-                                player_desc = 'html5 player %s' % player_version
+                                if player_url.endswith('swf'):
+                                    player_version = self._search_regex(
+                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+                                        'flash player', fatal=False)
+                                    player_desc = 'flash player %s' % player_version
+                                else:
+                                    player_version = self._search_regex(
+                                        [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+                                         r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+                                        player_url,
+                                        'html5 player', fatal=False)
+                                    player_desc = 'html5 player %s' % player_version

-                        parts_sizes = self._signature_cache_id(encrypted_sig)
-                        self.to_screen('{%s} signature length %s, %s' %
-                                       (format_id, parts_sizes, player_desc))
+                            parts_sizes = self._signature_cache_id(encrypted_sig)
+                            self.to_screen('{%s} signature length %s, %s' %
+                                           (format_id, parts_sizes, player_desc))

-                    signature = self._decrypt_signature(
-                        encrypted_sig, video_id, player_url, age_gate)
-                    sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
-                    url += '&%s=%s' % (sp, signature)
+                        signature = self._decrypt_signature(
+                            encrypted_sig, video_id, player_url, age_gate)
+                        sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+                        url += '&%s=%s' % (sp, signature)
                if 'ratebypass' not in url:
                    url += '&ratebypass=yes'

@@ -2083,24 +2126,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
                width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

+                if width is None:
+                    width = int_or_none(fmt.get('width'))
+                if height is None:
+                    height = int_or_none(fmt.get('height'))
+
                filesize = int_or_none(url_data.get(
                    'clen', [None])[0]) or _extract_filesize(url)

-                quality = url_data.get('quality', [None])[0]
+                quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+                quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+                tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+                       or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+                fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

                more_fields = {
                    'filesize': filesize,
-                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+                    'tbr': tbr,
                    'width': width,
                    'height': height,
-                    'fps': int_or_none(url_data.get('fps', [None])[0]),
-                    'format_note': url_data.get('quality_label', [None])[0] or quality,
-                    'quality': q(quality),
+                    'fps': fps,
+                    'format_note': quality_label or quality,
                }
                for key, value in more_fields.items():
                    if value:
                        dct[key] = value
-                type_ = url_data.get('type', [None])[0]
+                type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
                if type_:
                    type_split = type_.split(';')
                    kind_ext = type_split[0].split('/')
@@ -2148,11 +2200,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
                    formats.append(a_format)
            else:
-                error_message = clean_html(video_info.get('reason', [None])[0])
+                error_message = extract_unavailable_message()
                alt_error_message = clean_html(video_info.get('reason', [None])[0])
                print(alt_error_message)
                if not error_message:
                    error_message = alt_error_message
+                if not error_message:
+                    error_message = clean_html(
+                        try_get(video_info, lambda x: x['reason'][0], compat_str))
                if error_message:
                    raise YoutubeError(error_message)
                raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
@@ -2323,7 +2378,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # annotations
        video_annotations = None
        if self._downloader.params.get('writeannotations', False):
-            video_annotations = self._extract_annotations(video_id)
+            xsrf_token = self._search_regex(
+                r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
+                video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+            invideo_url = try_get(
+                player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+            if xsrf_token and invideo_url:
+                xsrf_field_name = self._search_regex(
+                    r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+                    video_webpage, 'xsrf field name',
+                    group='xsrf_field_name', default='session_token')
+                video_annotations = self._download_webpage(
+                    self._proto_relative_url(invideo_url),
+                    video_id, note='Downloading annotations',
+                    errnote='Unable to download video annotations', fatal=False,
+                    data=urlencode_postdata({xsrf_field_name: xsrf_token}))

        chapters = self._extract_chapters(description_original, video_duration)

@@ -2484,7 +2553,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                        (%(playlist_id)s)
                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
+    _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
+    _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -2507,6 +2577,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': '29C3: Not my department',
            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+            'uploader': 'Christiaan008',
+            'uploader_id': 'ChRiStIaAn008',
        },
        'playlist_count': 95,
    }, {
@@ -2515,6 +2587,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': '[OLD]Team Fortress 2 (Class-based LP)',
            'id': 'PLBB231211A4F62143',
+            'uploader': 'Wickydoo',
+            'uploader_id': 'Wickydoo',
        },
        'playlist_mincount': 26,
    }, {
@@ -2523,6 +2597,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'Uploads from Cauchemar',
            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+            'uploader': 'Cauchemar',
+            'uploader_id': 'Cauchemar89',
        },
        'playlist_mincount': 799,
    }, {
@@ -2540,13 +2616,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'JODA15',
            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+            'uploader': 'milan',
+            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
        }
    }, {
        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
        'playlist_mincount': 485,
        'info_dict': {
-            'title': '2017 華語最新單曲 (2/24更新)',
+            'title': '2018 Chinese New Singles (11/6 updated)',
            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+            'uploader': 'LBK',
+            'uploader_id': 'sdragonfang',
        }
    }, {
        'note': 'Embedded SWF player',
@@ -2555,13 +2635,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'JODA7',
            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
-        }
+        },
+        'skip': 'This playlist does not exist',
    }, {
        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
        'info_dict': {
            'title': 'Uploads from Interstellar Movie',
            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+            'uploader': 'Interstellar Movie',
+            'uploader_id': 'InterstellarMovie1',
        },
        'playlist_mincount': 21,
    }, {
@@ -2586,6 +2669,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'params': {
            'skip_download': True,
        },
+        'skip': 'This video is not available.',
        'add_ie': [YoutubeIE.ie_key()],
    }, {
        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
@@ -2597,7 +2681,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
            'uploader_id': 'backuspagemuseum',
            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
            'upload_date': '20161008',
-            'license': 'Standard YouTube License',
            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
            'categories': ['Nonprofits & Activism'],
            'tags': list,
@@ -2608,6 +2691,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
            'noplaylist': True,
            'skip_download': True,
        },
+    }, {
+        # https://github.com/ytdl-org/youtube-dl/issues/21844
+        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+        'info_dict': {
+            'title': 'Data Analysis with Dr Mike Pound',
+            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+            'uploader_id': 'Computerphile',
+            'uploader': 'Computerphile',
+        },
+        'playlist_mincount': 11,
    }, {
        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
        'only_matching': True,
@@ -2626,6 +2719,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
    def _real_initialize(self):
        self._login()

+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+
+        for item in re.findall(
+                r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
+            attrs = extract_attributes(item)
+            video_id = attrs['data-video-id']
+            video_title = unescapeHTML(attrs.get('data-title'))
+            if video_title:
+                video_title = video_title.strip()
+            ids_in_page.append(video_id)
+            titles_in_page.append(video_title)
+
+        # Fallback with old _VIDEO_RE
+        self.extract_videos_from_page_impl(
+            self._VIDEO_RE, page, ids_in_page, titles_in_page)
+
+        # Relaxed fallbacks
+        self.extract_videos_from_page_impl(
+            r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
+            ids_in_page, titles_in_page)
+        self.extract_videos_from_page_impl(
+            r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
+            ids_in_page, titles_in_page)
+
+        return zip(ids_in_page, titles_in_page)
+
    def _extract_mix(self, playlist_id):
        # The mixes are generated from a single video
        # the id of the playlist is just 'RD' + video_id
@@ -2774,6 +2895,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
            'title': 'Uploads from lex will',
+            'uploader': 'lex will',
+            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
        }
    }, {
        'note': 'Age restricted channel',
@@ -2783,6 +2906,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'id': 'UUs0ifCMCm1icqRbqhUINa0w',
            'title': 'Uploads from Deus Ex',
+            'uploader': 'Deus Ex',
+            'uploader_id': 'DeusExOfficial',
        },
    }, {
        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
@@ -2867,6 +2992,8 @@ class YoutubeUserIE(YoutubeChannelIE):
        'info_dict': {
            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
            'title': 'Uploads from The Linux Foundation',
+            'uploader': 'The Linux Foundation',
+            'uploader_id': 'TheLinuxFoundation',
        }
    }, {
        # Only available via https://www.youtube.com/c/12minuteathlete/videos
@@ -2876,6 +3003,8 @@ class YoutubeUserIE(YoutubeChannelIE):
        'info_dict': {
            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
            'title': 'Uploads from 12 Minute Athlete',
+            'uploader': '12 Minute Athlete',
+            'uploader_id': 'the12minuteathlete',
        }
    }, {
        'url': 'ytuser:phihag',
@@ -2969,7 +3098,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
        'playlist_mincount': 4,
        'info_dict': {
            'id': 'ThirstForScience',
-            'title': 'Thirst for Science',
+            'title': 'ThirstForScience',
        },
    }, {
        # with "Load more" button
@@ -2986,6 +3115,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
            'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
            'title': 'Chem Player',
        },
+        'skip': 'Blocked',
    }]


--- a/youtube_dl/extractor/youtube_unmodified_reference.py
+++ b/youtube_dl/extractor/youtube_unmodified_reference.py
@@ -27,9 +27,11 @@ from ..compat import (
    compat_str,
 )
 from ..utils import (
+    bool_or_none,
    clean_html,
    dict_get,
    error_to_compat_str,
+    extract_attributes,
    ExtractorError,
    float_or_none,
    get_element_by_attribute,
@@ -39,7 +41,6 @@ from ..utils import (
    orderedSet,
    parse_codecs,
    parse_duration,
-    qualities,
    remove_quotes,
    remove_start,
    smuggle_url,
@@ -116,6 +117,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                'f.req': json.dumps(f_req),
                'flowName': 'GlifWebSignIn',
                'flowEntry': 'ServiceLogin',
+                # TODO: reverse actual botguard identifier generation algo
+                'bgRequest': '["identifier",""]',
            })
            return self._download_json(
                url, None, note=note, errnote=errnote,
@@ -321,17 +324,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
        for video_id, video_title in self.extract_videos_from_page(content):
            yield self.url_result(video_id, 'Youtube', video_id, video_title)

-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-        for mobj in re.finditer(self._VIDEO_RE, page):
+    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
+        for mobj in re.finditer(video_re, page):
            # The link with index 0 is not the first video of the playlist (not sure if still actual)
            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
                continue
            video_id = mobj.group('id')
-            video_title = unescapeHTML(mobj.group('title'))
+            video_title = unescapeHTML(
+                mobj.group('title')) if 'title' in mobj.groupdict() else None
            if video_title:
                video_title = video_title.strip()
+            if video_title == '► Play all':
+                video_title = None
            try:
                idx = ids_in_page.index(video_id)
                if video_title and not titles_in_page[idx]:
@@ -339,6 +343,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
            except ValueError:
                ids_in_page.append(video_id)
                titles_in_page.append(video_title)
+
+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+        self.extract_videos_from_page_impl(
+            self._VIDEO_RE, page, ids_in_page, titles_in_page)
        return zip(ids_in_page, titles_in_page)


@@ -368,11 +378,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?hooktube\.com/|
                            (?:www\.)?yourepeat\.com/|
                            tube\.majestyc\.net/|
+                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
                            (?:(?:www|dev)\.)?invidio\.us/|
-                            (?:www\.)?invidiou\.sh/|
-                            (?:www\.)?invidious\.snopyta\.org/|
+                            (?:(?:www|no)\.)?invidiou\.sh/|
+                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
+                            (?:www\.)?invidious\.enkirton\.net/|
+                            (?:www\.)?invidious\.13ad\.de/|
+                            (?:www\.)?invidious\.mastodon\.host/|
+                            (?:www\.)?invidious\.nixnet\.xyz/|
+                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?vid\.wxzm\.sx/|
+                            (?:www\.)?yt\.elukerio\.org/|
+                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
+                            (?:www\.)?qklhadlycap4cnod\.onion/|
+                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
@@ -1587,17 +1610,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        video_id = mobj.group(2)
        return video_id

-    def _extract_annotations(self, video_id):
-        return self._download_webpage(
-            'https://www.youtube.com/annotations_invideo', video_id,
-            note='Downloading annotations',
-            errnote='Unable to download video annotations', fatal=False,
-            query={
-                'features': 1,
-                'legacy': 1,
-                'video_id': video_id,
-            })
-
    @staticmethod
    def _extract_chapters(description, duration):
        if not description:
@@ -1692,6 +1704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        def extract_token(v_info):
            return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))

+        def extract_player_response(player_response, video_id):
+            pl_response = str_or_none(player_response)
+            if not pl_response:
+                return
+            pl_response = self._parse_json(pl_response, video_id, fatal=False)
+            if isinstance(pl_response, dict):
+                add_dash_mpd_pr(pl_response)
+                return pl_response
+
        player_response = {}

        # Get video info
@@ -1714,7 +1735,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                note='Refetching age-gated info webpage',
                errnote='unable to download video info webpage')
            video_info = compat_parse_qs(video_info_webpage)
+            pl_response = video_info.get('player_response', [None])[0]
+            player_response = extract_player_response(pl_response, video_id)
            add_dash_mpd(video_info)
+            view_count = extract_view_count(video_info)
        else:
            age_gate = False
            video_info = None
@@ -1737,11 +1761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    is_live = True
                sts = ytplayer_config.get('sts')
                if not player_response:
-                    pl_response = str_or_none(args.get('player_response'))
-                    if pl_response:
-                        pl_response = self._parse_json(pl_response, video_id, fatal=False)
-                        if isinstance(pl_response, dict):
-                            player_response = pl_response
+                    player_response = extract_player_response(args.get('player_response'), video_id)
            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                add_dash_mpd_pr(player_response)
                # We also try looking in get_video_info since it may contain different dashmpd
@@ -1773,9 +1793,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    get_video_info = compat_parse_qs(video_info_webpage)
                    if not player_response:
                        pl_response = get_video_info.get('player_response', [None])[0]
-                        if isinstance(pl_response, dict):
-                            player_response = pl_response
-                            add_dash_mpd_pr(player_response)
+                        player_response = extract_player_response(pl_response, video_id)
                    add_dash_mpd(get_video_info)
                    if view_count is None:
                        view_count = extract_view_count(get_video_info)
@@ -1798,9 +1816,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        break

        def extract_unavailable_message():
-            return self._html_search_regex(
-                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
-                video_webpage, 'unavailable message', default=None)
+            messages = []
+            for tag, kind in (('h1', 'message'), ('div', 'submessage')):
+                msg = self._html_search_regex(
+                    r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
+                    video_webpage, 'unavailable %s' % kind, default=None)
+                if msg:
+                    messages.append(msg)
+            if messages:
+                return '\n'.join(messages)

        if not video_info:
            unavailable_message = extract_unavailable_message()
@@ -1879,6 +1903,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if view_count is None and video_details:
            view_count = int_or_none(video_details.get('viewCount'))

+        if is_live is None:
+            is_live = bool_or_none(video_details.get('isLive'))
+
        # Check for "rental" videos
        if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
            raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
@@ -1887,6 +1914,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            return int_or_none(self._search_regex(
                r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

+        streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+        streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
        if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
            self.report_rtmp_download()
            formats = [{
@@ -1895,10 +1925,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'url': video_info['conn'][0],
                'player_url': player_url,
            }]
-        elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+        elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
            if 'rtmpe%3Dyes' in encoded_url_map:
                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+            formats = []
            formats_spec = {}
            fmt_list = video_info.get('fmt_list', [''])[0]
            if fmt_list:
@@ -1912,91 +1943,104 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                'width': int_or_none(width_height[0]),
                                'height': int_or_none(width_height[1]),
                            }
-            q = qualities(['small', 'medium', 'hd720'])
-            streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
-            if streaming_formats:
-                for fmt in streaming_formats:
-                    itag = str_or_none(fmt.get('itag'))
-                    if not itag:
-                        continue
-                    quality = fmt.get('quality')
-                    quality_label = fmt.get('qualityLabel') or quality
-                    formats_spec[itag] = {
-                        'asr': int_or_none(fmt.get('audioSampleRate')),
-                        'filesize': int_or_none(fmt.get('contentLength')),
-                        'format_note': quality_label,
-                        'fps': int_or_none(fmt.get('fps')),
-                        'height': int_or_none(fmt.get('height')),
-                        'quality': q(quality),
-                        # bitrate for itag 43 is always 2147483647
-                        'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
-                        'width': int_or_none(fmt.get('width')),
-                    }
-            formats = []
-            for url_data_str in encoded_url_map.split(','):
-                url_data = compat_parse_qs(url_data_str)
-                if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
+            for fmt in streaming_formats:
+                itag = str_or_none(fmt.get('itag'))
+                if not itag:
                    continue
+                quality = fmt.get('quality')
+                quality_label = fmt.get('qualityLabel') or quality
+                formats_spec[itag] = {
+                    'asr': int_or_none(fmt.get('audioSampleRate')),
+                    'filesize': int_or_none(fmt.get('contentLength')),
+                    'format_note': quality_label,
+                    'fps': int_or_none(fmt.get('fps')),
+                    'height': int_or_none(fmt.get('height')),
+                    # bitrate for itag 43 is always 2147483647
+                    'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+                    'width': int_or_none(fmt.get('width')),
+                }
+
+            for fmt in streaming_formats:
+                if fmt.get('drm_families'):
+                    continue
+                url = url_or_none(fmt.get('url'))
+
+                if not url:
+                    cipher = fmt.get('cipher')
+                    if not cipher:
+                        continue
+                    url_data = compat_parse_qs(cipher)
+                    url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+                    if not url:
+                        continue
+                else:
+                    cipher = None
+                    url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
                stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
                # Unsupported FORMAT_STREAM_TYPE_OTF
                if stream_type == 3:
                    continue
-                format_id = url_data['itag'][0]
-                url = url_data['url'][0]

-                if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
-                    ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-                    jsplayer_url_json = self._search_regex(
-                        ASSETS_RE,
-                        embed_webpage if age_gate else video_webpage,
-                        'JS player URL (1)', default=None)
-                    if not jsplayer_url_json and not age_gate:
-                        # We need the embed website after all
-                        if embed_webpage is None:
-                            embed_url = proto + '://www.youtube.com/embed/%s' % video_id
-                            embed_webpage = self._download_webpage(
-                                embed_url, video_id, 'Downloading embed webpage')
+                format_id = fmt.get('itag') or url_data['itag'][0]
+                if not format_id:
+                    continue
+                format_id = compat_str(format_id)
+
+                if cipher:
+                    if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+                        ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
                        jsplayer_url_json = self._search_regex(
-                            ASSETS_RE, embed_webpage, 'JS player URL')
+                            ASSETS_RE,
+                            embed_webpage if age_gate else video_webpage,
+                            'JS player URL (1)', default=None)
+                        if not jsplayer_url_json and not age_gate:
+                            # We need the embed website after all
+                            if embed_webpage is None:
+                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+                                embed_webpage = self._download_webpage(
+                                    embed_url, video_id, 'Downloading embed webpage')
+                            jsplayer_url_json = self._search_regex(
+                                ASSETS_RE, embed_webpage, 'JS player URL')

-                    player_url = json.loads(jsplayer_url_json)
-                    if player_url is None:
-                        player_url_json = self._search_regex(
-                            r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                            video_webpage, 'age gate player URL')
-                        player_url = json.loads(player_url_json)
-
-                if 'sig' in url_data:
-                    url += '&signature=' + url_data['sig'][0]
-                elif 's' in url_data:
-                    encrypted_sig = url_data['s'][0]
-
-                    if self._downloader.params.get('verbose'):
+                        player_url = json.loads(jsplayer_url_json)
                        if player_url is None:
-                            player_version = 'unknown'
-                            player_desc = 'unknown'
-                        else:
-                            if player_url.endswith('swf'):
-                                player_version = self._search_regex(
-                                    r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
-                                    'flash player', fatal=False)
-                                player_desc = 'flash player %s' % player_version
+                            player_url_json = self._search_regex(
+                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                                video_webpage, 'age gate player URL')
+                            player_url = json.loads(player_url_json)
+
+                    if 'sig' in url_data:
+                        url += '&signature=' + url_data['sig'][0]
+                    elif 's' in url_data:
+                        encrypted_sig = url_data['s'][0]
+
+                        if self._downloader.params.get('verbose'):
+                            if player_url is None:
+                                player_version = 'unknown'
+                                player_desc = 'unknown'
                            else:
-                                player_version = self._search_regex(
-                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                     r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
-                                    player_url,
-                                    'html5 player', fatal=False)
-                                player_desc = 'html5 player %s' % player_version
+                                if player_url.endswith('swf'):
+                                    player_version = self._search_regex(
+                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+                                        'flash player', fatal=False)
+                                    player_desc = 'flash player %s' % player_version
+                                else:
+                                    player_version = self._search_regex(
+                                        [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+                                         r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+                                        player_url,
+                                        'html5 player', fatal=False)
+                                    player_desc = 'html5 player %s' % player_version

-                        parts_sizes = self._signature_cache_id(encrypted_sig)
-                        self.to_screen('{%s} signature length %s, %s' %
-                                       (format_id, parts_sizes, player_desc))
+                            parts_sizes = self._signature_cache_id(encrypted_sig)
+                            self.to_screen('{%s} signature length %s, %s' %
+                                           (format_id, parts_sizes, player_desc))

-                    signature = self._decrypt_signature(
-                        encrypted_sig, video_id, player_url, age_gate)
-                    sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
-                    url += '&%s=%s' % (sp, signature)
+                        signature = self._decrypt_signature(
+                            encrypted_sig, video_id, player_url, age_gate)
+                        sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+                        url += '&%s=%s' % (sp, signature)
                if 'ratebypass' not in url:
                    url += '&ratebypass=yes'

@@ -2016,24 +2060,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
                width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

+                if width is None:
+                    width = int_or_none(fmt.get('width'))
+                if height is None:
+                    height = int_or_none(fmt.get('height'))
+
                filesize = int_or_none(url_data.get(
                    'clen', [None])[0]) or _extract_filesize(url)

-                quality = url_data.get('quality', [None])[0]
+                quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+                quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+                tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+                       or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+                fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

                more_fields = {
                    'filesize': filesize,
-                    'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+                    'tbr': tbr,
                    'width': width,
                    'height': height,
-                    'fps': int_or_none(url_data.get('fps', [None])[0]),
-                    'format_note': url_data.get('quality_label', [None])[0] or quality,
-                    'quality': q(quality),
+                    'fps': fps,
+                    'format_note': quality_label or quality,
                }
                for key, value in more_fields.items():
                    if value:
                        dct[key] = value
-                type_ = url_data.get('type', [None])[0]
+                type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
                if type_:
                    type_split = type_.split(';')
                    kind_ext = type_split[0].split('/')
@@ -2081,9 +2134,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
                    formats.append(a_format)
            else:
-                error_message = clean_html(video_info.get('reason', [None])[0])
+                error_message = extract_unavailable_message()
                if not error_message:
-                    error_message = extract_unavailable_message()
+                    error_message = clean_html(try_get(
+                        player_response, lambda x: x['playabilityStatus']['reason'],
+                        compat_str))
+                if not error_message:
+                    error_message = clean_html(
+                        try_get(video_info, lambda x: x['reason'][0], compat_str))
                if error_message:
                    raise ExtractorError(error_message, expected=True)
                raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
@@ -2254,7 +2312,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        # annotations
        video_annotations = None
        if self._downloader.params.get('writeannotations', False):
-            video_annotations = self._extract_annotations(video_id)
+            xsrf_token = self._search_regex(
+                r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
+                video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+            invideo_url = try_get(
+                player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+            if xsrf_token and invideo_url:
+                xsrf_field_name = self._search_regex(
+                    r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+                    video_webpage, 'xsrf field name',
+                    group='xsrf_field_name', default='session_token')
+                video_annotations = self._download_webpage(
+                    self._proto_relative_url(invideo_url),
+                    video_id, note='Downloading annotations',
+                    errnote='Unable to download video annotations', fatal=False,
+                    data=urlencode_postdata({xsrf_field_name: xsrf_token}))

        chapters = self._extract_chapters(description_original, video_duration)

@@ -2412,7 +2484,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                        (%(playlist_id)s)
                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
+    _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
+    _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
@@ -2435,6 +2508,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': '29C3: Not my department',
            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+            'uploader': 'Christiaan008',
+            'uploader_id': 'ChRiStIaAn008',
        },
        'playlist_count': 95,
    }, {
@@ -2443,6 +2518,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': '[OLD]Team Fortress 2 (Class-based LP)',
            'id': 'PLBB231211A4F62143',
+            'uploader': 'Wickydoo',
+            'uploader_id': 'Wickydoo',
        },
        'playlist_mincount': 26,
    }, {
@@ -2451,6 +2528,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'Uploads from Cauchemar',
            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+            'uploader': 'Cauchemar',
+            'uploader_id': 'Cauchemar89',
        },
        'playlist_mincount': 799,
    }, {
@@ -2468,13 +2547,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'JODA15',
            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+            'uploader': 'milan',
+            'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
        }
    }, {
        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
        'playlist_mincount': 485,
        'info_dict': {
-            'title': '2017 華語最新單曲 (2/24更新)',
+            'title': '2018 Chinese New Singles (11/6 updated)',
            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+            'uploader': 'LBK',
+            'uploader_id': 'sdragonfang',
        }
    }, {
        'note': 'Embedded SWF player',
@@ -2483,13 +2566,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'title': 'JODA7',
            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
-        }
+        },
+        'skip': 'This playlist does not exist',
    }, {
        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
        'info_dict': {
            'title': 'Uploads from Interstellar Movie',
            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+            'uploader': 'Interstellar Movie',
+            'uploader_id': 'InterstellarMovie1',
        },
        'playlist_mincount': 21,
    }, {
@@ -2514,6 +2600,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
        'params': {
            'skip_download': True,
        },
+        'skip': 'This video is not available.',
        'add_ie': [YoutubeIE.ie_key()],
    }, {
        'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
@@ -2525,7 +2612,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
            'uploader_id': 'backuspagemuseum',
            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
            'upload_date': '20161008',
-            'license': 'Standard YouTube License',
            'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
            'categories': ['Nonprofits & Activism'],
            'tags': list,
@@ -2536,6 +2622,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
            'noplaylist': True,
            'skip_download': True,
        },
+    }, {
+        # https://github.com/ytdl-org/youtube-dl/issues/21844
+        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+        'info_dict': {
+            'title': 'Data Analysis with Dr Mike Pound',
+            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+            'uploader_id': 'Computerphile',
+            'uploader': 'Computerphile',
+        },
+        'playlist_mincount': 11,
    }, {
        'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
        'only_matching': True,
@@ -2554,6 +2650,34 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
    def _real_initialize(self):
        self._login()

+    def extract_videos_from_page(self, page):
+        ids_in_page = []
+        titles_in_page = []
+
+        for item in re.findall(
+                r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
+            attrs = extract_attributes(item)
+            video_id = attrs['data-video-id']
+            video_title = unescapeHTML(attrs.get('data-title'))
+            if video_title:
+                video_title = video_title.strip()
+            ids_in_page.append(video_id)
+            titles_in_page.append(video_title)
+
+        # Fallback with old _VIDEO_RE
+        self.extract_videos_from_page_impl(
+            self._VIDEO_RE, page, ids_in_page, titles_in_page)
+
+        # Relaxed fallbacks
+        self.extract_videos_from_page_impl(
+            r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
+            ids_in_page, titles_in_page)
+        self.extract_videos_from_page_impl(
+            r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
+            ids_in_page, titles_in_page)
+
+        return zip(ids_in_page, titles_in_page)
+
    def _extract_mix(self, playlist_id):
        # The mixes are generated from a single video
        # the id of the playlist is just 'RD' + video_id
@@ -2702,6 +2826,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
            'title': 'Uploads from lex will',
+            'uploader': 'lex will',
+            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
        }
    }, {
        'note': 'Age restricted channel',
@@ -2711,6 +2837,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
        'info_dict': {
            'id': 'UUs0ifCMCm1icqRbqhUINa0w',
            'title': 'Uploads from Deus Ex',
+            'uploader': 'Deus Ex',
+            'uploader_id': 'DeusExOfficial',
        },
    }, {
        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
@@ -2795,6 +2923,8 @@ class YoutubeUserIE(YoutubeChannelIE):
        'info_dict': {
            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
            'title': 'Uploads from The Linux Foundation',
+            'uploader': 'The Linux Foundation',
+            'uploader_id': 'TheLinuxFoundation',
        }
    }, {
        # Only available via https://www.youtube.com/c/12minuteathlete/videos
@@ -2804,6 +2934,8 @@ class YoutubeUserIE(YoutubeChannelIE):
        'info_dict': {
            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
            'title': 'Uploads from 12 Minute Athlete',
+            'uploader': '12 Minute Athlete',
+            'uploader_id': 'the12minuteathlete',
        }
    }, {
        'url': 'ytuser:phihag',
@@ -2897,7 +3029,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
        'playlist_mincount': 4,
        'info_dict': {
            'id': 'ThirstForScience',
-            'title': 'Thirst for Science',
+            'title': 'ThirstForScience',
        },
    }, {
        # with "Load more" button
@@ -2914,6 +3046,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
            'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
            'title': 'Chem Player',
        },
+        'skip': 'Blocked',
    }]