yt-local/youtube/subscriptions.py

from youtube import common, settings, channel
import sqlite3
import os
import secrets
import datetime

# so as to not completely break on people who have updated but don't know of new dependency
try:
    import atoma
except ModuleNotFoundError:
    print('Error: atoma not installed, subscriptions will not work')

# https://stackabuse.com/a-sqlite-tutorial-with-python/

database_path = os.path.join(settings.data_dir, "subscriptions.sqlite")

def open_database():
    if not os.path.exists(settings.data_dir):
        os.makedirs(settings.data_dir)
    connection = sqlite3.connect(database_path)

    # Create tables if they don't exist
    try:
        cursor = connection.cursor()
        cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels (
                              id integer PRIMARY KEY,
                              channel_id text NOT NULL,
                              channel_name text NOT NULL,
                              time_last_checked integer
                          )''')
        cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
                              id integer PRIMARY KEY,
                              uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
                              video_id text NOT NULL,
                              title text NOT NULL,
                              time_published integer NOT NULL,
                              description text,
                          )''')
        connection.commit()
    except:
        connection.rollback()
        connection.close()
        raise

    return connection

def _subscribe(channel_id, channel_name):
    connection = open_database()
    try:
        cursor = connection.cursor()
        cursor.execute("INSERT INTO subscribed_channels (channel_id, name) VALUES (?, ?)", (channel_id, channel_name))
        connection.commit()
    except:
        connection.rollback()
        raise
    finally:
        connection.close()

def _unsubscribe(channel_id):
    connection = open_database()
    try:
        cursor = connection.cursor()
        cursor.execute("DELETE FROM subscribed_channels WHERE channel_id=?", (channel_id, ))
        connection.commit()
    except:
        connection.rollback()
        raise
    finally:
        connection.close()

def _get_videos(number, offset):
    connection = open_database()
    try:
        cursor = connection.cursor()
        cursor.execute('''SELECT video_id, title, time_published, description, channel_id, channel_name
                          FROM videos
                          INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id
                          ORDER BY time_published DESC
                          LIMIT ? OFFSET ?''', number, offset)
    except:
        connection.rollback()
        raise
    finally:
        connection.close()


units = {
    'year': 31536000,   # 365*24*3600
    'month': 2592000,   # 30*24*3600
    'week': 604800,     # 7*24*3600
    'day':  86400,      # 24*3600
    'hour': 3600,
    'minute': 60,
    'second': 1,
}
def youtube_timestamp_to_posix(dumb_timestamp):
    ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago,
         approximates the unix time (seconds since 1/1/1970) '''
    dumb_timestamp = dumb_timestamp.lower()
    now = time.time()
    if dumb_timestamp == "just now":
        return now
    split = dumb_timestamp.split(' ')
    number, unit = int(split[0]), split[1]
    if number > 1:
        unit = unit[:-1]    # remove s from end
    return now - number*units[unit]


weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
def _get_upstream_videos(channel_id, channel_name, time_last_checked):
    feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id
    headers = {}

    # randomly change time_last_checked up to one day earlier to make tracking harder
    time_last_checked = time_last_checked - secrets.randbelow(24*3600)

    # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
    struct_time = time.gmtime(time_last_checked)
    weekday = weekdays[struct_time.tm_wday]     # dumb requirement
    month = months[struct_time.tm_mon - 1]
    headers['If-Modified-Since'] = time.strftime(weekday + ', %d ' + month + ' %Y %H:%M:%S GMT', struct_time)
    print(headers['If-Modified-Since'])


    headers['User-Agent'] = 'Python-urllib'     # Don't leak python version
    headers['Accept-Encoding'] = 'gzip, br'
    req = urllib.request.Request(url, headers=headers)
    if settings.route_tor:
        opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150))
    else:
        opener = urllib.request.build_opener()
    response = opener.open(req, timeout=15)


    if response.getcode == '304':
        print('No new videos for ' + channel_id)
        return []


    content = response.read()
    print('Retrieved videos for ' + channel_id)
    content = common.decode_content(content, response.getheader('Content-Encoding', default='identity'))


    feed = atoma.parse_atom_bytes(content)
    atom_videos = {}
    for entry in feed.entries:
        video_id = entry.id_[9:]     # example of id_: yt:video:q6EoRBvdVPQ

        # standard names used in this program for purposes of html templating
        atom_videos[video_id] = {
            'title': entry.title.value,
            'author': entry.authors[0].name,
            #'description': '',              # Not supported by atoma
            #'duration': '',                 # Youtube's atom feeds don't provide it.. very frustrating
            'published':    entry.published.strftime('%m/%d/%Y'),
            'time_published':   int(entry.published.timestamp()),
        }


    # final list
    videos = []

    # Now check channel page to retrieve missing information for videos
    json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response'])
    for json_video in json_channel_videos:
        info = renderer_info(json_video)
        if info['id'] in atom_videos:
            info.update(atom_videos[info['id']])
        else:
            info['author'] = channel_name
            info['time published'] = youtube_timestamp_to_posix(info['published'])
        videos.append(info)
    return videos