Revert "Use BeautifulSoup to extract information from YT searches"

This reverts commit 8dbd068524. (closes #7)
2020-07-17 16:52:56 -05:00
parent cc0df81f19
commit 938265700a
3 changed files with 30 additions and 35 deletions
--- a/README.md
+++ b/README.md
@@ -10,12 +10,9 @@ Livie allows the user to search youtube.com and play the video from `mpv`.

 - `python >= 3.5`
 - `python-requests`
- `python-beautifulsoup4`
- `python-lxml`
- `hypervideo`
 - `mpv`

-  `sudo pacman -S python mpv python-requests python-beautifulsoup4 python-lxml hypervideo`
+  `sudo pacman -S python mpv python-requests`

 ## Installation

--- a/livie.el
+++ b/livie.el
@@ -47,7 +47,10 @@
  :group 'livie
  :type 'string)

-(defvar livie-youtube-regexp "https://invidio.us/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
+
+(defvar livie-youtube-regexp
+  "https://\\<\\(invidious.snopyta.org\\|invidio.us\\)[df]?\\>/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true"
+  )

 (define-derived-mode livie-mode
  special-mode "livie"
--- a/livie.py
+++ b/livie.py
@@ -1,39 +1,34 @@
 """This module does render video"""

 import sys
+import datetime
+import json
 import requests
-from bs4 import BeautifulSoup

-URL = 'https://www.youtube.com'
-FILTER = '&sp=EgIQAQ%253D%253D'
+URL = 'https://invidio.us'
 INPUT = sys.argv[1]
-SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
+SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
 REQUEST = requests.get(SEARCH)
-SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
-FIRST = True
+SD = '&itag=18&local=true'
+HD = '&itag=22&local=true'

+FIRST = True  # skip line loop

-def replace(string):
-    """Remove unnecessary characters"""
-    string = string.replace(' - Duration: ', '')
-    string = string.replace('.', '')
-    string = string.replace(' views', '')
-    return string
+VIDEOS = json.loads(REQUEST.content.decode('utf-8'))

-
-for vid in SOUP.find_all(class_='yt-lockup-content'):
+for video in VIDEOS:
    try:
-        link = 'https://invidio.us%s' % vid.h3.a['href']
-        title = vid.h3.a.text
-        description = vid.h3.span.text
-        author = vid.find(class_='yt-lockup-byline').a.text
-        meta = vid.find(class_='yt-lockup-meta').ul.contents
-        time_srt = vid.find(class_='yt-lockup-title').span.text
-        time = replace(time_srt)
-        uploaded = meta[0].text
-        views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
-        views = replace(views_str.text)
+        title = video.get('title', '')
+        videoid = video.get('videoId', '')
+        author = video.get('author', '')

+        # Make URL
+        sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
+        hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
+
+        timer = video.get('lengthSeconds', '')
+        time = str(datetime.timedelta(seconds=timer))
+        publish = video.get('publishedText', '')
    except TypeError:
        continue

@@ -43,9 +38,9 @@ for vid in SOUP.find_all(class_='yt-lockup-content'):
        print()  # print skip line

    # prints
-    print('    title: %s' % title)
-    print('      url: %s' % link)
-    print('  channel: %s' % author)
-    print(' uploaded: %s' % uploaded)
-    print('     time: %s' % time)
-    print('    views: %s' % views)
+    print('    title: %s' % (title))
+    print('       SD: %s' % (sd))
+    print('       HD: %s' % (hd))
+    print('           HD ^ Only some videos available caused by DRM')
+    print('  channel: %s' % (author))
+    print('     time: %s' % (time))