Use BeautifulSoup to extract information from YT searches

Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches
2019-11-27 16:23:59 -05:00
parent abbf953e64
commit 8dbd068524
3 changed files with 32 additions and 28 deletions
--- a/README.md
+++ b/README.md
@@ -14,9 +14,10 @@ Livie allows the user to search youtube.com and play the video from `mpv`.
 - `python >= 3.5`
 - `python-requests`
 - `python-beautifulsoup4`
 - `mpv`
-  `sudo pacman -S python mpv python-requests`
+  `sudo pacman -S python mpv python-requests python-beautifulsoup4`
 ## Installation
--- a/livie.el
+++ b/livie.el
@@ -47,8 +47,7 @@
  :group 'livie
  :type 'string)
-(defvar livie-youtube-regexp
+(defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
  "https://invidio.us/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true")
 (define-derived-mode livie-mode
  special-mode "livie"
--- a/livie.py
+++ b/livie.py
@@ -1,34 +1,38 @@
 """This module does render video"""
 import sys
 import datetime
 import json
 import requests
 from bs4 import BeautifulSoup
-URL = 'https://invidio.us'
+URL = 'https://www.youtube.com'
 FILTER = '&sp=EgIQAQ%253D%253D'
 INPUT = sys.argv[1]
-SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
+SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
 REQUEST = requests.get(SEARCH)
-SD = '&itag=18&local=true'
+SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
-HD = '&itag=22&local=true'
+FIRST = True
 FIRST = True  # skip line loop
-VIDEOS = json.loads(REQUEST.content.decode('utf-8'))
+def replace(string):
    """Remove unnecessary characters"""
    string = string.replace(' - Duration: ', '')
    string = string.replace('.', '')
    string = string.replace(' views', '')
    return string
-for video in VIDEOS:
+for vid in SOUP.find_all(class_='yt-lockup-content'):
    try:
-        title = video.get('title', '')
+        link = URL + vid.h3.a['href']
-        videoid = video.get('videoId', '')
+        title = vid.h3.a.text
-        author = video.get('author', '')
+        description = vid.h3.span.text
        author = vid.find(class_='yt-lockup-byline').a.text
        meta = vid.find(class_='yt-lockup-meta').ul.contents
        time_srt = vid.find(class_='yt-lockup-title').span.text
        time = replace(time_srt)
        uploaded = meta[0].text
        views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
        views = replace(views_str.text)
        # Make URL
        sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
        hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
        timer = video.get('lengthSeconds', '')
        time = str(datetime.timedelta(seconds=timer))
        publish = video.get('publishedText', '')
    except TypeError:
        continue
@@ -38,9 +42,9 @@ for video in VIDEOS:
        print()  # print skip line
    # prints
-    print('    title: %s' % (title))
+    print('    title: %s' % title)
-    print('       SD: %s' % (sd))
+    print('      url: %s' % link)
-    print('       HD: %s' % (hd))
+    print('  channel: %s' % author)
-    print('           HD ^ Only some videos available caused by DRM')
+    print(' uploaded: %s' % uploaded)
-    print('  channel: %s' % (author))
+    print('     time: %s' % time)
-    print('     time: %s' % (time))
+    print('    views: %s' % views)