Polishing the webfinger implementation

- Changed quotes in the templates from " to ' - Changed all link generation to use request.urlgen - Moved xrd links data generation from template to view - Added parsing of the account URI using urlparse
2011-12-20 22:06:36 +01:00 · 2011-12-20 22:06:36 +01:00 · c8cb0ee88f
commit c8cb0ee88f
parent 9df07e87a8
4 changed files with 623 additions and 30 deletions
--- a/mediagoblin/templates/mediagoblin/webfinger/host-meta.xml
+++ b/mediagoblin/templates/mediagoblin/webfinger/host-meta.xml
@ -14,14 +14,14 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -#}
-<?xml version='1.0' encoding='UTF-8'?>
-<XRD xmlns='http://docs.oasis-open.org/ns/xri/xrd-1.0'
-     xmlns:hm='http://host-meta.net/xrd/1.0'>
+<?xml version="1.0" encoding="UTF-8"?>
+<XRD xmlns="http://docs.oasis-open.org/ns/xri/xrd-1.0"
+     xmlns:hm="http://host-meta.net/xrd/1.0">
 
    <hm:Host>{{ request.host }}</hm:Host>
 
-    <Link rel='lrdd'
-          template='{{ lrdd_template }}'>
-        <Title>{{ llrd_title }}</Title>
+    <Link rel="lrdd"
+          template="{{ lrdd_template|replace(placeholder, '{uri}') }}">
+        <Title>{{ lrdd_title }}</Title>
    </Link>
 </XRD>
--- a/mediagoblin/templates/mediagoblin/webfinger/xrd.xml
+++ b/mediagoblin/templates/mediagoblin/webfinger/xrd.xml
@ -14,16 +14,14 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -#}
-<?xml version='1.0' encoding='UTF-8'?>
-<XRD xmlns='http://docs.oasis-open.org/ns/xri/xrd-1.0'>
+<?xml version="1.0" encoding="UTF-8"?>
+<XRD xmlns="http://docs.oasis-open.org/ns/xri/xrd-1.0">
 
-    <Subject>{{ request.GET.get('uri') }}</Subject>
-    <Alias>http://{{ request.host }}/u/{{ username }}</Alias>
- 
-    <Link rel='http://microformats.org/profile/hcard'
-          href='http://{{ request.host }}/u/{{ username }}' />
-
-    <Link rel="http://schemas.google.com/g/2010#updates-from"
-          type="application/atom+xml"
-          href="http://{{ request.host }}/u/{{ username }}/atom/" />
+    <Subject>{{ subject }}</Subject>
+    <Alias>{{ alias }}</Alias>
+    {% for link in links %}
+    <Link 
+      {%- for attr, value in link.attrs.items() %} {{ attr }}="{{ value}}"
+      {%- endfor %} />
+    {%- endfor %}
 </XRD>
--- a/mediagoblin/tools/feed.py
+++ b/mediagoblin/tools/feed.py
@ -0,0 +1,527 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from lxml import etree
+from lxml.builder import ElementMaker
+from werkzeug.wrappers import BaseResponse
+
+import datetime
+
+"""
+    Feed engine written for GNU MediaGoblin,
+    based on werkzeug atom feeds tool (werkzeug.contrib.atom)
+
+    The feed library contains two types of classes:
+        - Entities that contains the feed data.
+        - Generators that are injected to the above classes and are able to
+            generate feeds in a specific format. An atom feed genearator is
+            provided, but others could be written as well.
+
+    The Werkzeurg library interface have been mimetized, so the replacement can
+    be done with only switching the import call.
+
+    Example::
+
+        def atom_feed(request):
+            feed = AtomFeed("My Blog", feed_url=request.url,
+                            url=request.host_url,
+                            subtitle="My example blog for a feed test.")
+            for post in Post.query.limit(10).all():
+                feed.add(post.title, post.body, content_type='html',
+                         author=post.author, url=post.url, id=post.uid,
+                         updated=post.last_update, published=post.pub_date)
+            return feed.get_response()
+"""
+
+
+##
+# Class FeedGenerator
+#
+class FeedGenerator(object):
+    def __init__(self):
+        pass
+
+    def format_iso8601(self, obj):
+        """Format a datetime object for iso8601"""
+        return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
+
+
+##
+# Class AtomGenerator
+#
+class AtomGenerator(FeedGenerator):
+    """ Generator that generate feeds in Atom format """
+    NAMESPACE = "http://www.w3.org/2005/Atom"
+
+    def __init__(self):
+        pass
+
+    def generate(self, data):
+        """Return an XML tree representation."""
+        if isinstance(data, AtomFeed):
+            return self.generate_feed(data)
+        elif isinstance(data, FeedEntry):
+            return self.generate_feedEntry(data)
+
+    def generate_text_block(self, name, content, content_type=None):
+        """Helper method for the builder that creates an XML text block."""
+        root = etree.Element(name)
+
+        if content_type:
+            root.set('type', content_type)
+
+        if content_type == 'xhtml':
+            div_ele = etree.Element('div')
+            div_ele.set('xmlns', XHTML_NAMESPACE)
+            div_ele.text = content
+            root.append(div_ele)
+        else:
+            root.text = content
+
+        return root
+
+    def generate_feed(self, data):
+        """Return an XML tree representation of the feed."""
+        NSMAP = {None: self.NAMESPACE}
+        root = etree.Element("feed", nsmap=NSMAP)
+
+        E = ElementMaker()
+
+        # atom demands either an author element in every entry or a global one
+        if not data.author:
+            if False in map(lambda e: bool(e.author), data.entries):
+                data.author = ({'name': 'Unknown author'},)
+
+        if not data.updated:
+            dates = sorted([entry.updated for entry in data.entries])
+            data.updated = dates and dates[-1] or datetime.utcnow()
+
+        title_ele = self.generate_text_block(
+            'title',
+            data.title,
+            data.title_type)
+        root.append(title_ele)
+
+        root.append(E.id(data.id))
+        root.append(E.updated(self.format_iso8601(data.updated)))
+
+        if data.url:
+            link_ele = etree.Element("link")
+            link_ele.set("href", data.url)
+            root.append(link_ele)
+
+        if data.feed_url:
+            link_ele = etree.Element("link")
+            link_ele.set("href", data.feed_url)
+            link_ele.set("rel", "self")
+            root.append(link_ele)
+
+        for link in data.links:
+            link_ele = etree.Element("link")
+            for name, value in link.items():
+                link_ele.set(name, value)
+            root.append(link_ele)
+
+        for author in data.author:
+            author_element = etree.Element("author")
+            author_element.append(E.name(author['name']))
+            if 'uri' in author:
+                author_element.append(E.name(author['uri']))
+            if 'email' in author:
+                author_element.append(E.name(author['email']))
+
+            root.append(author_element)
+
+        if data.subtitle:
+            root.append(self.generate_text_block('subtitle', data.subtitle,
+                                                    data.subtitle_type))
+        if data.icon:
+            root.append(E.icon(data.icon))
+
+        if data.logo:
+            root.append(E.logo(data.logo))
+
+        if data.rights:
+            root.append(self.generate_text_block('rights', data.rights,
+                                                    data.rights_type))
+
+        generator_name, generator_url, generator_version = data.generator
+        if generator_name or generator_url or generator_version:
+            generator_ele = etree.Element("generator")
+            if generator_url:
+                generator_ele.set("uri", generator_url, True)
+            if generator_version:
+                generator_ele.set("version", generator_version)
+
+            generator_ele.text = generator_name
+
+            root.append(generator_ele)
+
+        for entry in data.entries:
+            root.append(entry.generate())
+
+        return root
+
+    def generate_feedEntry(self, data):
+        """Return an XML tree representation of the feed entry."""
+        E = ElementMaker()
+        root = etree.Element("entry")
+
+        if data.xml_base:
+            root.base = data.xml_base
+
+        title_ele = self.generate_text_block(
+            'title',
+            data.title,
+            data.title_type)
+        root.append(title_ele)
+
+        root.append(E.id(data.id))
+        root.append(E.updated(self.format_iso8601(data.updated)))
+
+        if data.published:
+            root.append(E.published(self.format_iso8601(data.published)))
+
+        if data.url:
+            link_ele = etree.Element("link")
+            link_ele.set("href", data.url)
+            root.append(link_ele)
+
+        for author in data.author:
+            author_element = etree.Element("author")
+            author_element.append(E.name(author['name']))
+            if 'uri' in author:
+                author_element.append(E.name(author['uri']))
+            if 'email' in author:
+                author_element.append(E.name(author['email']))
+
+            root.append(author_element)
+
+        for link in data.links:
+            link_ele = etree.Element("link")
+            for name, value in link.items():
+                link_ele.set(name, value)
+            root.append(link_ele)
+
+        print data.thumbnail
+
+        if data.thumbnail:
+            namespace = "http://search.yahoo.com/mrss/"
+            nsmap = {"media": namespace}
+            thumbnail_ele = etree.Element(
+                "{http://search.yahoo.com/mrss/}thumbnail", nsmap=nsmap)
+            thumbnail_ele.set("url", data.thumbnail)
+
+            root.append(thumbnail_ele)
+
+        if data.summary:
+            summary_ele = self.generate_text_block('summary', data.summary,
+                data.summary_type)
+            root.append(summary_ele)
+
+        if data.content:
+            content = data.content
+
+            if data.thumbnail:
+                thumbnail_html = etree.Element("img")
+                thumbnail_html.set("src", data.thumbnail)
+                content = etree.tostring(thumbnail_html) + content
+
+            content_ele = self.generate_text_block('content', content,
+                data.content_type)
+            root.append(content_ele)
+
+        for name, value in data.custom.items():
+            element = etree.Element(name)
+            element.text = value
+            root.append(element)
+
+        return root
+
+
+##
+# Class AtomFeed
+#
+class AtomFeed(object):
+    """
+    A helper class that contains feeds. By default, it uses the AtomGenerator
+    but others could be injected. It has the AtomFeed name to keep the name
+    it had on werkzeug library
+
+    Following Werkzeurg implementation, the constructor takes a lot of
+    parameters. As an addition, the class will also store custom parameters for
+    fields not explicitly supported by the library.
+
+    :param feed_generator: The generator that will be used to generate the feed
+                            defaults to AtomGenerator
+    :param title: the title of the feed. Required.
+    :param title_type: the type attribute for the title element.  One of
+                       ``'html'``, ``'text'`` or ``'xhtml'``.
+    :param url: the url for the feed (not the url *of* the feed)
+    :param id: a globally unique id for the feed.  Must be an URI.  If
+               not present the `feed_url` is used, but one of both is
+               required.
+    :param updated: the time the feed was modified the last time.  Must
+                    be a :class:`datetime.datetime` object.  If not
+                    present the latest entry's `updated` is used.
+    :param feed_url: the URL to the feed.  Should be the URL that was
+                     requested.
+    :param author: the author of the feed.  Must be either a string (the
+                   name) or a dict with name (required) and uri or
+                   email (both optional).  Can be a list of (may be
+                   mixed, too) strings and dicts, too, if there are
+                   multiple authors. Required if not every entry has an
+                   author element.
+    :param icon: an icon for the feed.
+    :param logo: a logo for the feed.
+    :param rights: copyright information for the feed.
+    :param rights_type: the type attribute for the rights element.  One of
+                        ``'html'``, ``'text'`` or ``'xhtml'``.  Default is
+                        ``'text'``.
+    :param subtitle: a short description of the feed.
+    :param subtitle_type: the type attribute for the subtitle element.
+                          One of ``'text'``, ``'html'``, ``'text'``
+                          or ``'xhtml'``.  Default is ``'text'``.
+    :param links: additional links.  Must be a list of dictionaries with
+                  href (required) and rel, type, hreflang, title, length
+                  (all optional)
+    :param generator: the software that generated this feed.  This must be
+                      a tuple in the form ``(name, url, version)``.  If
+                      you don't want to specify one of them, set the item
+                      to `None`.
+    :param entries: a list with the entries for the feed. Entries can also
+                    be added later with :meth:`add`.
+
+    For more information on the elements see
+    http://www.atomenabled.org/developers/syndication/
+
+    Everywhere where a list is demanded, any iterable can be used.
+    """
+
+    default_generator = ('GNU Mediagoblin', None, None)
+    default_feed_generator = AtomGenerator()
+
+    def __init__(self, title=None, entries=None, feed_generator=None,
+                 **kwargs):
+        self.feed_generator = feed_generator
+        self.title = title
+        self.title_type = kwargs.get('title_type', 'text')
+        self.url = kwargs.get('url')
+        self.feed_url = kwargs.get('feed_url', self.url)
+        self.id = kwargs.get('id', self.feed_url)
+        self.updated = kwargs.get('updated')
+        self.author = kwargs.get('author', ())
+        self.icon = kwargs.get('icon')
+        self.logo = kwargs.get('logo')
+        self.rights = kwargs.get('rights')
+        self.rights_type = kwargs.get('rights_type')
+        self.subtitle = kwargs.get('subtitle')
+        self.subtitle_type = kwargs.get('subtitle_type', 'text')
+        self.generator = kwargs.get('generator')
+        if self.generator is None:
+            self.generator = self.default_generator
+        self.links = kwargs.get('links', [])
+        self.entries = entries and list(entries) or []
+
+        if not hasattr(self.author, '__iter__') \
+           or isinstance(self.author, (basestring, dict)):
+            self.author = [self.author]
+        for i, author in enumerate(self.author):
+            if not isinstance(author, dict):
+                self.author[i] = {'name': author}
+
+        if not self.feed_generator:
+            self.feed_generator = self.default_feed_generator
+        if not self.title:
+            raise ValueError('title is required')
+        if not self.id:
+            raise ValueError('id is required')
+        for author in self.author:
+            if 'name' not in author:
+                raise TypeError('author must contain at least a name')
+
+        # Look for arguments that we haven't matched with object members.
+        # They will be added to the custom dictionary.
+        # This way we can have custom fields not specified in this class.
+        self.custom = {}
+        properties = dir(self)
+
+        for name, value in kwargs.items():
+            if (properties.count(name) == 0):
+                self.custom[name] = value
+
+    def add(self, *args, **kwargs):
+        """Add a new entry to the feed.  This function can either be called
+        with a :class:`FeedEntry` or some keyword and positional arguments
+        that are forwarded to the :class:`FeedEntry` constructor.
+        """
+        if len(args) == 1 and not kwargs and isinstance(args[0], FeedEntry):
+            args[0].generator = self.generator
+            self.entries.append(args[0])
+        else:
+            kwargs['feed_url'] = self.feed_url
+            self.entries.append(FeedEntry(feed_generator=self.feed_generator, 
+                                            *args, **kwargs))
+
+    def __repr__(self):
+        return '<%s %r (%d entries)>' % (
+            self.__class__.__name__,
+            self.title,
+            len(self.entries)
+        )
+
+    def generate(self):
+        """Return an XML tree representation of the feed."""
+        return self.feed_generator.generate(self)
+
+    def to_string(self):
+        """Convert the feed into a string."""
+        return etree.tostring(self.generate(), encoding='UTF-8')
+
+    def get_response(self):
+        """Return a response object for the feed."""
+        return BaseResponse(self.to_string(), mimetype='application/atom+xml')
+
+    def __call__(self, environ, start_response):
+        """Use the class as WSGI response object."""
+        return self.get_response()(environ, start_response)
+
+    def __unicode__(self):
+        return self.to_string()
+
+    def __str__(self):
+        return self.to_string().encode('utf-8')
+
+
+##
+# Class FeedEntry
+#
+class FeedEntry(object):
+    """Represents a single entry in a feed.
+        
+    Following Werkzeurg implementation, the constructor takes a lot of 
+    parameters. As an addition, the class will also store custom parameters for
+    fields not explicitly supported by the library.
+
+    :param feed_generator: The generator that will be used to generate the feed.
+                            defaults to AtomGenerator
+    :param title: the title of the entry. Required.
+    :param title_type: the type attribute for the title element.  One of
+                       ``'html'``, ``'text'`` or ``'xhtml'``.
+    :param content: the content of the entry.
+    :param content_type: the type attribute for the content element.  One
+                         of ``'html'``, ``'text'`` or ``'xhtml'``.
+    :param summary: a summary of the entry's content.
+    :param summary_type: the type attribute for the summary element.  One
+                         of ``'html'``, ``'text'`` or ``'xhtml'``.
+    :param url: the url for the entry.
+    :param id: a globally unique id for the entry.  Must be an URI.  If
+               not present the URL is used, but one of both is required.
+    :param updated: the time the entry was modified the last time.  Must
+                    be a :class:`datetime.datetime` object. Required.
+    :param author: the author of the feed.  Must be either a string (the
+                   name) or a dict with name (required) and uri or
+                   email (both optional).  Can be a list of (may be
+                   mixed, too) strings and dicts, too, if there are
+                   multiple authors. Required if not every entry has an
+                   author element.
+    :param published: the time the entry was initially published.  Must
+                      be a :class:`datetime.datetime` object.
+    :param rights: copyright information for the entry.
+    :param rights_type: the type attribute for the rights element.  One of
+                        ``'html'``, ``'text'`` or ``'xhtml'``.  Default is
+                        ``'text'``.
+    :param links: additional links.  Must be a list of dictionaries with
+                  href (required) and rel, type, hreflang, title, length
+                  (all optional)
+    :param xml_base: The xml base (url) for this feed item.  If not provided
+                     it will default to the item url.
+
+    For more information on the elements see
+    http://www.atomenabled.org/developers/syndication/
+
+    Everywhere where a list is demanded, any iterable can be used.
+    """
+    
+    default_feed_generator = AtomGenerator()
+
+    def __init__(self, title=None, content=None, feed_url=None, 
+                    feed_generator=None, **kwargs):
+        self.feed_generator = feed_generator
+        self.title = title
+        self.title_type = kwargs.get('title_type', 'text')
+        self.content = content
+        self.content_type = kwargs.get('content_type', 'html')
+        self.url = kwargs.get('url')
+        self.id = kwargs.get('id', self.url)
+        self.updated = kwargs.get('updated')
+        self.summary = kwargs.get('summary')
+        self.summary_type = kwargs.get('summary_type', 'html')
+        self.author = kwargs.get('author')
+        self.published = kwargs.get('published')
+        self.rights = kwargs.get('rights')
+        self.links = kwargs.get('links', [])
+        self.xml_base = kwargs.get('xml_base', feed_url)
+        self.thumbnail = kwargs.get('thumbnail')
+
+
+        if not hasattr(self.author, '__iter__') \
+           or isinstance(self.author, (basestring, dict)):
+            self.author = [self.author]
+        for i, author in enumerate(self.author):
+            if not isinstance(author, dict):
+                self.author[i] = {'name': author}
+
+        if not self.feed_generator:
+            self.feed_generator = self.default_feed_generator
+        if not self.title:
+            raise ValueError('title is required')
+        if not self.id:
+            raise ValueError('id is required')
+        if not self.updated:
+            raise ValueError('updated is required')
+            
+        # Look for arguments that we haven't matched with object members.
+        # They will be added to the custom dictionary.
+        # This way we can have custom fields not specified in this class.
+        self.custom = {}
+        properties = dir(self)
+        
+        for name, value in kwargs.items():
+            if ( properties.count(name) == 0 ):
+                self.custom[name] = value
+        
+
+    def __repr__(self):
+        return '<%s %r>' % (
+            self.__class__.__name__,
+            self.title
+        )
+        
+    def generate(self):
+        """Returns lxml element tree representation of the feed entry"""
+        return self.feed_generator.generate(self)
+
+    def to_string(self):
+        """Convert the feed item into a unicode object."""
+        return etree.tostring(self.generate(), encoding='utf-8')        
+
+    def __unicode__(self):
+        return self.to_string()
+
+    def __str__(self):
+        return self.to_string().encode('utf-8')
+
+
--- a/mediagoblin/webfinger/views.py
+++ b/mediagoblin/webfinger/views.py
@ -15,32 +15,100 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

 import re
-import mediagoblin.mg_globals as mg_globals

-from mediagoblin.tools.response import render_to_response
+from urlparse import urlparse

-LRDD_TEMPLATE = '{protocol}://{host}/api/webfinger/xrd?uri={{uri}}'
+from mediagoblin.tools.response import render_to_response, render_404

 def host_meta(request):
    '''
    Webfinger host-meta
    '''
+
+    placeholder = 'MG_LRDD_PLACEHOLDER'
+
+    lrdd_title = 'GNU MediaGoblin - User lookup'
+
+    lrdd_template = request.urlgen(
+        'mediagoblin.webfinger.xrd',
+        uri=placeholder,
+        qualified=True)
+
    return render_to_response(
        request,
        'mediagoblin/webfinger/host-meta.xml',
        {'request': request,
-         'lrdd_template': LRDD_TEMPLATE.format(
-                protocol='http',
-                host=request.host)})
+         'lrdd_template': lrdd_template,
+         'lrdd_title': lrdd_title,
+         'placeholder': placeholder})
+
+MATCH_SCHEME_PATTERN = re.compile(r'^acct:')

 def xrd(request):
    ''' 
    Find user data based on a webfinger URI
    '''
-    return render_to_response(
-        request,
-        'mediagoblin/webfinger/xrd.xml',
-        {'request': request,
-         'username': re.search(
-                r'^(acct:)?([^@]*)',
-                request.GET.get('uri')).group(2)})
+    param_uri = request.GET.get('uri')
+
+    if not param_uri:
+        return render_404(request)
+
+    '''
+    :py:module:`urlparse` does not recognize usernames in URIs of the
+    form ``acct:user@example.org`` or ``user@example.org``.
+    '''
+    if not MATCH_SCHEME_PATTERN.search(param_uri):
+        # Assume the URI is in the form ``user@example.org``
+        uri = 'acct://' + param_uri
+    else:
+        # Assumes the URI looks like ``acct:user@example.org
+        uri = MATCH_SCHEME_PATTERN.sub(
+            'acct://', param_uri)
+
+    parsed = urlparse(uri)
+
+    xrd_subject = param_uri
+
+    # TODO: Verify that the user exists
+    # Q: Does webfinger support error handling in this case?
+    #    Returning 404 seems intuitive, need to check.
+    if parsed.username:
+        # The user object
+        # TODO: Fetch from database instead of using the MockUser
+        user = MockUser()
+        user.username = parsed.username
+
+        xrd_links = [
+            {'attrs': {
+                    'rel': 'http://microformats.org/profile/hcard',
+                    'href': request.urlgen(
+                        'mediagoblin.user_pages.user_home',
+                        user=user.username,
+                        qualified=True)}},
+            {'attrs': {
+                    'rel': 'http://schemas.google.com/g/2010#updates-from',
+                    'href': request.urlgen(
+                        'mediagoblin.user_pages.atom_feed',
+                        user=user.username,
+                        qualified=True)}}]
+
+        xrd_alias = request.urlgen(
+            'mediagoblin.user_pages.user_home',
+            user=user.username,
+            qualified=True)
+
+        return render_to_response(
+            request,
+            'mediagoblin/webfinger/xrd.xml',
+            {'request': request,
+             'subject': xrd_subject,
+             'alias': xrd_alias,
+             'links': xrd_links })
+    else:
+        return render_404(request)
+
+class MockUser(object):
+    '''
+    TEMPORARY user object
+    '''
+    username = None