add pdf media type

The new media type supports pdf and a subset of media recognized by libreoffice via unoconv. Every document added goes through: * conversion to pdf with unoconv if not already a pdf * creation of thumbnail and medium sized image, and pdfinfo generates some information (even for unoconv produces docs - should fix this) Poppler (pdftocairo, pdfinfo) is used. http://poppler.freedesktop.org/ A working but uglified pdf.js integration exists, which is enabled by setting pdf.pdf_js=true mediagoblin_local.ini (disabled in mediagoblin.ini) Adds one test to the test_submission test suite, and another separate test_pdf suite. The tests are only run if media_types.pdf.processing.check_prerequisites passes, so the test suite will not require any extra package. TODO: make test suite say 'skipped' in that case instead of just 'ok' Signed-off-by: Alon Levy <alon@pobox.com>
2013-03-27 12:21:10 +02:00 · 2013-03-27 12:21:10 +02:00 · a80ebf3b64
commit a80ebf3b64
parent 3cadb4a6cd
17 changed files with 5838 additions and 1 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,6 @@
 [submodule "pdf.js"]
 	path = pdf.js
 	url = git://github.com/mozilla/pdf.js.git
 [submodule "extlib/pdf.js"]
 	path = extlib/pdf.js
 	url = git://github.com/mozilla/pdf.js.git
--- a/docs/source/siteadmin/media-types.rst
+++ b/docs/source/siteadmin/media-types.rst
@ -195,3 +195,40 @@ Run
 You should now be able to upload .obj and .stl files and MediaGoblin
 will be able to present them to your wide audience of admirers!
 PDF and Document
 ================
 To enable the "PDF and Document" support plugin, you need pdftocairo, pdfinfo,
 unoconv with headless support.  All executables must be on your execution path.
 To install this on Fedora:
 .. code-block:: bash
    sudo yum install -y ppoppler-utils unoconv libreoffice-headless
 pdf.js relies on git submodules, so be sure you have fetched them:
 .. code-block:: bash
    git submodule init
    git submodule update
 This feature has been tested on Fedora with:
 poppler-utils-0.20.2-9.fc18.x86_64
 unoconv-0.5-2.fc18.noarch
 libreoffice-headless-3.6.5.2-8.fc18.x86_64
 It may work on some earlier versions, but that is not guaranteed.
 Add ``mediagoblin.media_types.pdf`` to the ``media_types`` list in your
 ``mediagoblin_local.ini`` and restart MediaGoblin. 
 Run
 .. code-block:: bash
    ./bin/gmg dbupdate
--- a/extlib/pdf.js
+++ b/extlib/pdf.js
@ -0,0 +1 @@
 Subproject commit b898935eb04fa86e0911fdfa0d41828cb04802f8
--- a/mediagoblin/config_spec.ini
+++ b/mediagoblin/config_spec.ini
@ -125,6 +125,8 @@ spectrogram_fft_size = integer(default=4096)
 [media_type:mediagoblin.media_types.ascii]
 thumbnail_font = string(default=None)
 [media_type:mediagoblin.media_types.pdf]
 pdf_js = boolean(default=False)
 [celery]
 # default result stuff
--- a/mediagoblin/media_types/pdf/init.py
+++ b/mediagoblin/media_types/pdf/init.py
@ -0,0 +1,29 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from mediagoblin.media_types.pdf.processing import process_pdf, \
    sniff_handler
 MEDIA_MANAGER = {
    "human_readable": "PDF",
    "processor": process_pdf, # alternately a string,
                                # 'mediagoblin.media_types.image.processing'?
    "sniff_handler": sniff_handler,
    "display_template": "mediagoblin/media_displays/pdf.html",
    "default_thumb": "images/media_thumbs/pdf.jpg",
    "accepted_extensions": [
        "pdf"]}
--- a/mediagoblin/media_types/pdf/migrations.py
+++ b/mediagoblin/media_types/pdf/migrations.py
@ -0,0 +1,17 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 MIGRATIONS = {}
--- a/mediagoblin/media_types/pdf/models.py
+++ b/mediagoblin/media_types/pdf/models.py
@ -0,0 +1,58 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from mediagoblin.db.base import Base
 from sqlalchemy import (
    Column, Float, Integer, String, DateTime, ForeignKey)
 from sqlalchemy.orm import relationship, backref
 BACKREF_NAME = "pdf__media_data"
 class PdfData(Base):
    __tablename__ = "pdf__mediadata"
    # The primary key *and* reference to the main media_entry
    media_entry = Column(Integer, ForeignKey('core__media_entries.id'),
        primary_key=True)
    get_media_entry = relationship("MediaEntry",
        backref=backref(BACKREF_NAME, uselist=False,
                        cascade="all, delete-orphan"))
    pages = Column(Integer)
    # These are taken from what pdfinfo can do, perhaps others make sense too
    pdf_author = Column(String)
    pdf_title = Column(String)
    # note on keywords: this is the pdf parsed string, it should be considered a cached
    # value like the rest of these values, since they can be deduced at query time / client
    # side too.
    pdf_keywords = Column(String)
    pdf_creator = Column(String)
    pdf_producer = Column(String)
    pdf_creation_date = Column(DateTime)
    pdf_modified_date = Column(DateTime)
    pdf_version_major = Column(Integer)
    pdf_version_minor = Column(Integer)
    pdf_page_size_width = Column(Float) # unit: pts
    pdf_page_size_height = Column(Float)
    pdf_pages = Column(Integer)
 DATA_MODEL = PdfData
 MODELS = [PdfData]
--- a/mediagoblin/media_types/pdf/processing.py
+++ b/mediagoblin/media_types/pdf/processing.py
@ -0,0 +1,276 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import chardet
 import os
 import Image
 import logging
 import dateutil.parser
 from subprocess import STDOUT, check_output, call, CalledProcessError
 from mediagoblin import mg_globals as mgg
 from mediagoblin.processing import (create_pub_filepath,
                                    FilenameBuilder, BadMediaFail)
 from mediagoblin.tools.translate import fake_ugettext_passthrough as _
 _log = logging.getLogger(__name__)
 # TODO - cache (memoize) util
 # This is a list created via uniconv --show and hand removing some types that
 # we already support via other media types better.
 unoconv_supported = [
  'bib', #      - BibTeX [.bib]
  #bmp      - Windows Bitmap [.bmp]
  'csv', #      - Text CSV [.csv]
  'dbf', #      - dBASE [.dbf]
  'dif', #      - Data Interchange Format [.dif]
  'doc6', #     - Microsoft Word 6.0 [.doc]
  'doc95', #    - Microsoft Word 95 [.doc]
  'docbook', #  - DocBook [.xml]
  'doc', #      - Microsoft Word 97/2000/XP [.doc]
  'docx7', #    - Microsoft Office Open XML [.docx]
  'docx', #     - Microsoft Office Open XML [.docx]
  #emf      - Enhanced Metafile [.emf]
  'eps', #      - Encapsulated PostScript [.eps]
  'fodp', #     - OpenDocument Presentation (Flat XML) [.fodp]
  'fods', #     - OpenDocument Spreadsheet (Flat XML) [.fods]
  'fodt', #     - OpenDocument Text (Flat XML) [.fodt]
  #gif      - Graphics Interchange Format [.gif]
  'html', #     - HTML Document (OpenOffice.org Writer) [.html]
  #jpg      - Joint Photographic Experts Group [.jpg]
  'latex', #    - LaTeX 2e [.ltx]
  'mediawiki', # - MediaWiki [.txt]
  'met', #      - OS/2 Metafile [.met]
  'odd', #      - OpenDocument Drawing [.odd]
  'odg', #      - ODF Drawing (Impress) [.odg]
  'odp', #      - ODF Presentation [.odp]
  'ods', #      - ODF Spreadsheet [.ods]
  'odt', #      - ODF Text Document [.odt]
  'ooxml', #    - Microsoft Office Open XML [.xml]
  'otg', #      - OpenDocument Drawing Template [.otg]
  'otp', #      - ODF Presentation Template [.otp]
  'ots', #      - ODF Spreadsheet Template [.ots]
  'ott', #      - Open Document Text [.ott]
  #pbm      - Portable Bitmap [.pbm]
  #pct      - Mac Pict [.pct]
  'pdb', #      - AportisDoc (Palm) [.pdb]
  #pdf      - Portable Document Format [.pdf]
  #pgm      - Portable Graymap [.pgm]
  #png      - Portable Network Graphic [.png]
  'pot', #      - Microsoft PowerPoint 97/2000/XP Template [.pot]
  'potm', #     - Microsoft PowerPoint 2007/2010 XML Template [.potm]
  #ppm      - Portable Pixelmap [.ppm]
  'pps', #      - Microsoft PowerPoint 97/2000/XP (Autoplay) [.pps]
  'ppt', #      - Microsoft PowerPoint 97/2000/XP [.ppt]
  'pptx', #     - Microsoft PowerPoint 2007/2010 XML [.pptx]
  'psw', #      - Pocket Word [.psw]
  'pwp', #      - PlaceWare [.pwp]
  'pxl', #      - Pocket Excel [.pxl]
  #ras      - Sun Raster Image [.ras]
  'rtf', #      - Rich Text Format [.rtf]
  'sda', #      - StarDraw 5.0 (OpenOffice.org Impress) [.sda]
  'sdc3', #     - StarCalc 3.0 [.sdc]
  'sdc4', #     - StarCalc 4.0 [.sdc]
  'sdc', #      - StarCalc 5.0 [.sdc]
  'sdd3', #     - StarDraw 3.0 (OpenOffice.org Impress) [.sdd]
  'sdd4', #     - StarImpress 4.0 [.sdd]
  'sdd', #      - StarImpress 5.0 [.sdd]
  'sdw3', #     - StarWriter 3.0 [.sdw]
  'sdw4', #     - StarWriter 4.0 [.sdw]
  'sdw', #      - StarWriter 5.0 [.sdw]
  'slk', #      - SYLK [.slk]
  'stc', #      - OpenOffice.org 1.0 Spreadsheet Template [.stc]
  'std', #      - OpenOffice.org 1.0 Drawing Template [.std]
  'sti', #      - OpenOffice.org 1.0 Presentation Template [.sti]
  'stw', #      - Open Office.org 1.0 Text Document Template [.stw]
  #svg      - Scalable Vector Graphics [.svg]
  'svm', #      - StarView Metafile [.svm]
  'swf', #      - Macromedia Flash (SWF) [.swf]
  'sxc', #      - OpenOffice.org 1.0 Spreadsheet [.sxc]
  'sxd3', #     - StarDraw 3.0 [.sxd]
  'sxd5', #     - StarDraw 5.0 [.sxd]
  'sxd', #      - OpenOffice.org 1.0 Drawing (OpenOffice.org Impress) [.sxd]
  'sxi', #      - OpenOffice.org 1.0 Presentation [.sxi]
  'sxw', #      - Open Office.org 1.0 Text Document [.sxw]
  #text     - Text Encoded [.txt]
  #tiff     - Tagged Image File Format [.tiff]
  #txt      - Text [.txt]
  'uop', #      - Unified Office Format presentation [.uop]
  'uos', #      - Unified Office Format spreadsheet [.uos]
  'uot', #      - Unified Office Format text [.uot]
  'vor3', #     - StarDraw 3.0 Template (OpenOffice.org Impress) [.vor]
  'vor4', #     - StarWriter 4.0 Template [.vor]
  'vor5', #     - StarDraw 5.0 Template (OpenOffice.org Impress) [.vor]
  'vor', #      - StarCalc 5.0 Template [.vor]
  #wmf      - Windows Metafile [.wmf]
  'xhtml', #    - XHTML Document [.html]
  'xls5', #     - Microsoft Excel 5.0 [.xls]
  'xls95', #    - Microsoft Excel 95 [.xls]
  'xls', #      - Microsoft Excel 97/2000/XP [.xls]
  'xlt5', #     - Microsoft Excel 5.0 Template [.xlt]
  'xlt95', #    - Microsoft Excel 95 Template [.xlt]
  'xlt', #      - Microsoft Excel 97/2000/XP Template [.xlt]
  #xpm      - X PixMap [.xpm]
 ]
 def is_unoconv_working():
    try:
        output = check_output([where('unoconv'), '--show'], stderr=STDOUT)
    except CalledProcessError, e:
        _log.warn(_('unoconv failing to run, check log file'))
        return False
    if 'ERROR' in output:
        return False
    return True
 def supported_extensions(cache=[None]):
    if cache[0] == None:
        cache[0] = 'pdf'
        # TODO: must have libreoffice-headless installed too, need to check for it
        if where('unoconv') and is_unoconv_working():
            cache.extend(unoconv_supported)
    return cache
 def where(name):
    for p in os.environ['PATH'].split(os.pathsep):
        fullpath = os.path.join(p, name)
        if os.path.exists(fullpath):
            return fullpath
    return None
 def check_prerequisites():
    if not where('pdfinfo'):
        _log.warn('missing pdfinfo')
        return False
    if not where('pdftocairo'):
        _log.warn('missing pdfcairo')
        return False
    return True
 def sniff_handler(media_file, **kw):
    if not check_prerequisites():
        return False
    if kw.get('media') is not None:
        name, ext = os.path.splitext(kw['media'].filename)
        clean_ext = ext[1:].lower()
        if clean_ext in supported_extensions():
            return True
    return False
 def create_pdf_thumb(original, thumb_filename, width, height):
    # Note: pdftocairo adds '.png', remove it
    thumb_filename = thumb_filename[:-4]
    executable = where('pdftocairo')
    args = [executable, '-scale-to', str(min(width, height)),
            '-singlefile', '-png', original, thumb_filename]
    _log.debug('calling {0}'.format(repr(' '.join(args))))
    call(executable=executable, args=args)
 def pdf_info(original):
    """
    Extract dictionary of pdf information. This could use a library instead
    of a process.
    Note: I'm assuming pdfinfo output is sanitized (integers where integers are
    expected, etc.) - if this is wrong then an exception will be raised and caught
    leading to the dreaded error page. It seems a safe assumption.
    """
    ret_dict = {}
    pdfinfo = where('pdfinfo')
    try:
        lines = check_output(executable=pdfinfo,
                                args=[pdfinfo, original]).split(os.linesep)
    except CalledProcessError:
        _log.debug('pdfinfo could not read the pdf file.')
        raise BadMediaFail()
    info_dict = dict([[part.strip() for part in l.strip().split(':', 1)]
                      for l in lines if ':' in l])
    for date_key in [('pdf_mod_date', 'ModDate'),
                     ('pdf_creation_date', 'CreationDate')]:
        if date_key in info_dict:
            ret_dict[date_key] = dateutil.parser.parse(info_dict[date_key])
    for db_key, int_key in [('pdf_pages', 'Pages')]:
        if int_key in info_dict:
            ret_dict[db_key] = int(info_dict[int_key])
    # parse 'PageSize' field: 595 x 842 pts (A4)
    page_size_parts = info_dict['Page size'].split()
    ret_dict['pdf_page_size_width'] = float(page_size_parts[0])
    ret_dict['pdf_page_size_height'] = float(page_size_parts[2])
    for db_key, str_key in [('pdf_keywords', 'Keywords'),
        ('pdf_creator', 'Creator'), ('pdf_producer', 'Producer'),
        ('pdf_author', 'Author'), ('pdf_title', 'Title')]:
        ret_dict[db_key] = info_dict.get(str_key, None)
    ret_dict['pdf_version_major'], ret_dict['pdf_version_minor'] = \
        map(int, info_dict['PDF version'].split('.'))
    return ret_dict
 def process_pdf(proc_state):
    """Code to process a pdf file. Will be run by celery.
    A Workbench() represents a local tempory dir. It is automatically
    cleaned up when this function exits.
    """
    entry = proc_state.entry
    workbench = proc_state.workbench
    queued_filename = proc_state.get_queued_filename()
    name_builder = FilenameBuilder(queued_filename)
    media_files_dict = entry.setdefault('media_files', {})
    # Copy our queued local workbench to its final destination
    original_dest = name_builder.fill('{basename}{ext}')
    proc_state.copy_original(original_dest)
    # Create a pdf if this is a different doc, store pdf for viewer
    ext = queued_filename.rsplit('.', 1)[-1].lower()
    if ext == 'pdf':
        pdf_filename = queued_filename
    else:
        pdf_filename = queued_filename.rsplit('.', 1)[0] + '.pdf'
        unoconv = where('unoconv')
        call(executable=unoconv,
             args=[unoconv, '-v', '-f', 'pdf', queued_filename])
        if not os.path.exists(pdf_filename):
            _log.debug('unoconv failed to convert file to pdf')
            raise BadMediaFail()
        proc_state.store_public(keyname=u'pdf', local_file=pdf_filename)
    pdf_info_dict = pdf_info(pdf_filename)
    for name, width, height in [
        (u'thumb', mgg.global_config['media:thumb']['max_width'],
                   mgg.global_config['media:thumb']['max_height']),
        (u'medium', mgg.global_config['media:medium']['max_width'],
                   mgg.global_config['media:medium']['max_height']),
        ]:
        filename = name_builder.fill('{basename}.%s.png' % name)
        path = workbench.joinpath(filename)
        create_pdf_thumb(pdf_filename, path, width, height)
        assert(os.path.exists(path))
        proc_state.store_public(keyname=name, local_file=path)
    proc_state.delete_queue_file()
    entry.media_data_init(**pdf_info_dict)
    entry.save()
--- a/mediagoblin/static/css/pdf_viewer.css
+++ b/mediagoblin/static/css/pdf_viewer.css
--- a/mediagoblin/static/extlib/pdf.js
+++ b/mediagoblin/static/extlib/pdf.js
@ -0,0 +1 @@
 ../../../extlib/pdf.js
--- a/mediagoblin/static/js/pdf_viewer.js
+++ b/mediagoblin/static/js/pdf_viewer.js
--- a/mediagoblin/templates/mediagoblin/base.html
+++ b/mediagoblin/templates/mediagoblin/base.html
@ -16,7 +16,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -#}
 <!doctype html>
-<html>
+<html
 {% block mediagoblin_html_tag %}
 {% endblock mediagoblin_html_tag %}
 >
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
--- a/mediagoblin/templates/mediagoblin/media_displays/pdf.html
+++ b/mediagoblin/templates/mediagoblin/media_displays/pdf.html
@ -0,0 +1,284 @@
 {#
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #}
 {% extends 'mediagoblin/user_pages/media.html' %}
 {% set medium_view = request.app.public_store.file_url(
   media.media_files['medium']) %}
 {% if 'pdf' in media.media_files %}
    {% set pdf_view = request.app.public_store.file_url(
        media.media_files['pdf']) %}
 {% else %}
    {% set pdf_view = request.app.public_store.file_url(
        media.media_files['original']) %}
 {% endif %}
 {% set pdf_js = global_config.get('media_type:mediagoblin.media_types.pdf', {}).get('pdf_js', False) %}
 {% if pdf_js %}
    {% block mediagoblin_html_tag %}
    dir="ltr" mozdisallowselectionprint moznomarginboxes
    {% endblock mediagoblin_html_tag %}
 {% endif %}
 {% block mediagoblin_head -%}
  {{ super() }}
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
 {% if pdf_js %}
        <link rel="stylesheet" href="{{ request.staticdirect('/css/pdf_viewer.css') }}"/>
        {# <link rel="resource" type="application/l10n" href="locale/locale.properties"/> #}
        <script type="text/javascript">
    var DEFAULT_URL = '{{ pdf_view }}';
        </script>
        {# TODO: include compatibility only if this is not either chrome or firefox #}
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/web/compatibility.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/external/webL10n/l10n.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/core.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/util.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/api.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/metadata.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/canvas.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/obj.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/function.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/charsets.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/cidmaps.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/colorspace.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/crypto.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/evaluator.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/fonts.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/glyphlist.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/image.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/metrics.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/parser.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/pattern.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/stream.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/worker.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/external/jpgjs/jpg.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/jpx.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/jbig2.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/src/bidi.js') }}"></script>
        <script type="text/javascript">PDFJS.workerSrc = '{{ request.staticdirect('/extlib/pdf.js/src/worker_loader.js') }}';</script>
        <script type="text/javascript" src="{{ request.staticdirect('/extlib/pdf.js/web/debugger.js') }}"></script>
        <script type="text/javascript" src="{{ request.staticdirect('/js/pdf_viewer.js') }}"></script>
 {% endif %}
 {%- endblock %}
 {% block mediagoblin_media %}
 {% if pdf_js %}
    <div id="outerContainer">
      <div id="sidebarContainer">
        <div id="toolbarSidebar">
          <div class="splitToolbarButton toggled">
            <button id="viewThumbnail" class="toolbarButton group toggled" title="Show Thumbnails" tabindex="2" data-l10n-id="thumbs">
               <span data-l10n-id="thumbs_label">Thumbnails</span>
            </button>
            <button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="3" data-l10n-id="outline">
               <span data-l10n-id="outline_label">Document Outline</span>
            </button>
          </div>
        </div>
        <div id="sidebarContent">
          <div id="thumbnailView">
          </div>
          <div id="outlineView" class="hidden">
          </div>
        </div>
      </div>  <!-- sidebarContainer -->
      <div id="mainContainer">
        <div class="findbar hidden doorHanger hiddenSmallView" id="findbar">
          <label for="findInput" class="toolbarLabel" data-l10n-id="find_label">Find:</label>
          <input id="findInput" class="toolbarField" tabindex="21">
          <div class="splitToolbarButton">
            <button class="toolbarButton findPrevious" title="" id="findPrevious" tabindex="22" data-l10n-id="find_previous">
              <span data-l10n-id="find_previous_label">Previous</span>
            </button>
            <div class="splitToolbarButtonSeparator"></div>
            <button class="toolbarButton findNext" title="" id="findNext" tabindex="23" data-l10n-id="find_next">
              <span data-l10n-id="find_next_label">Next</span>
            </button>
          </div>
          <input type="checkbox" id="findHighlightAll" class="toolbarField">
          <label for="findHighlightAll" class="toolbarLabel" tabindex="24" data-l10n-id="find_highlight">Highlight all</label>
          <input type="checkbox" id="findMatchCase" class="toolbarField">
          <label for="findMatchCase" class="toolbarLabel" tabindex="25" data-l10n-id="find_match_case_label">Match case</label>
          <span id="findMsg" class="toolbarLabel"></span>
        </div>
        <div class="toolbar">
          <div id="toolbarContainer">
            <div id="toolbarViewer">
              <div id="toolbarViewerLeft">
                <button id="sidebarToggle" class="toolbarButton" title="Toggle Sidebar" tabindex="4" data-l10n-id="toggle_sidebar">
                  <span data-l10n-id="toggle_sidebar_label">Toggle Sidebar</span>
                </button>
                <div class="toolbarButtonSpacer"></div>
                <button id="viewFind" class="toolbarButton group hiddenSmallView" title="Find in Document" tabindex="5" data-l10n-id="findbar">
                   <span data-l10n-id="findbar_label">Find</span>
                </button>
                <div class="splitToolbarButton">
                  <button class="toolbarButton pageUp" title="Previous Page" id="previous" tabindex="6" data-l10n-id="previous">
                    <span data-l10n-id="previous_label">Previous</span>
                  </button>
                  <div class="splitToolbarButtonSeparator"></div>
                  <button class="toolbarButton pageDown" title="Next Page" id="next" tabindex="7" data-l10n-id="next">
                    <span data-l10n-id="next_label">Next</span>
                  </button>
                </div>
                <label id="pageNumberLabel" class="toolbarLabel" for="pageNumber" data-l10n-id="page_label">Page: </label>
                <input type="number" id="pageNumber" class="toolbarField pageNumber" value="1" size="4" min="1" tabindex="8">
                </input>
                <span id="numPages" class="toolbarLabel"></span>
              </div>
              <div id="toolbarViewerRight">
                <input id="fileInput" class="fileInput" type="file" oncontextmenu="return false;" style="visibility: hidden; position: fixed; right: 0; top: 0" />
                <button id="fullscreen" class="toolbarButton fullscreen hiddenSmallView" title="Switch to Presentation Mode" tabindex="12" data-l10n-id="presentation_mode">
                  <span data-l10n-id="presentation_mode_label">Presentation Mode</span>
                </button>
                <button id="openFile" class="toolbarButton openFile hiddenSmallView" title="Open File" tabindex="13" data-l10n-id="open_file">
                   <span data-l10n-id="open_file_label">Open</span>
                </button>
                <button id="print" class="toolbarButton print" title="Print" tabindex="14" data-l10n-id="print">
                  <span data-l10n-id="print_label">Print</span>
                </button>
                <button id="download" class="toolbarButton download" title="Download" tabindex="15" data-l10n-id="download">
                  <span data-l10n-id="download_label">Download</span>
                </button>
                <!-- <div class="toolbarButtonSpacer"></div> -->
                <a href="#" id="viewBookmark" class="toolbarButton bookmark hiddenSmallView" title="Current view (copy or open in new window)" tabindex="16" data-l10n-id="bookmark"><span data-l10n-id="bookmark_label">Current View</span></a>
              </div>
              <div class="outerCenter">
                <div class="innerCenter" id="toolbarViewerMiddle">
                  <div class="splitToolbarButton">
                    <button class="toolbarButton zoomOut" id="zoom_out" title="Zoom Out" tabindex="9" data-l10n-id="zoom_out">
                      <span data-l10n-id="zoom_out_label">Zoom Out</span>
                    </button>
                    <div class="splitToolbarButtonSeparator"></div>
                    <button class="toolbarButton zoomIn" id="zoom_in" title="Zoom In" tabindex="10" data-l10n-id="zoom_in">
                      <span data-l10n-id="zoom_in_label">Zoom In</span>
                     </button>
                  </div>
                  <span id="scaleSelectContainer" class="dropdownToolbarButton">
                     <select id="scaleSelect" title="Zoom" oncontextmenu="return false;" tabindex="11" data-l10n-id="zoom">
                      <option id="pageAutoOption" value="auto" selected="selected" data-l10n-id="page_scale_auto">Automatic Zoom</option>
                      <option id="pageActualOption" value="page-actual" data-l10n-id="page_scale_actual">Actual Size</option>
                      <option id="pageFitOption" value="page-fit" data-l10n-id="page_scale_fit">Fit Page</option>
                      <option id="pageWidthOption" value="page-width" data-l10n-id="page_scale_width">Full Width</option>
                      <option id="customScaleOption" value="custom"></option>
                      <option value="0.5">50%</option>
                      <option value="0.75">75%</option>
                      <option value="1">100%</option>
                      <option value="1.25">125%</option>
                      <option value="1.5">150%</option>
                      <option value="2">200%</option>
                    </select>
                  </span>
                </div>
              </div>
            </div>
          </div>
        </div>
        <menu type="context" id="viewerContextMenu">
          <menuitem label="First Page" id="first_page"
                    data-l10n-id="first_page" ></menuitem>
          <menuitem label="Last Page" id="last_page"
                    data-l10n-id="last_page" ></menuitem>
          <menuitem label="Rotate Counter-Clockwise" id="page_rotate_ccw"
                    data-l10n-id="page_rotate_ccw" ></menuitem>
          <menuitem label="Rotate Clockwise" id="page_rotate_cw"
                    data-l10n-id="page_rotate_cw" ></menuitem>
        </menu>
        <div id="viewerContainer" tabindex="1">
          <div id="viewer" contextmenu="viewerContextMenu"></div>
        </div>
        <div id="loadingBox">
          <div id="loading"></div>
          <div id="loadingBar"><div class="progress"></div></div>
        </div>
        <div id="errorWrapper" hidden='true'>
          <div id="errorMessageLeft">
            <span id="errorMessage"></span>
            <button id="errorShowMore" onclick="" oncontextmenu="return false;" data-l10n-id="error_more_info">
              More Information
            </button>
            <button id="errorShowLess" onclick="" oncontextmenu="return false;" data-l10n-id="error_less_info" hidden='true'>
              Less Information
            </button>
          </div>
          <div id="errorMessageRight">
            <button id="errorClose" oncontextmenu="return false;" data-l10n-id="error_close">
              Close
            </button>
          </div>
          <div class="clearBoth"></div>
          <textarea id="errorMoreInfo" hidden='true' readonly="readonly"></textarea>
        </div>
      </div> <!-- mainContainer -->
    </div> <!-- outerContainer -->
    <div id="printContainer"></div>
 {% else %}
  <a href="{{ pdf_view }}">
  <img id="medium"
  class="media_image"
  src="{{ medium_view }}"
  alt="{% trans media_title=media.title -%} Image for {{ media_title}}{% endtrans %}"/>
  </a>
 {% endif %}
 {% endblock %}
 {% block mediagoblin_sidebar %}
  <h3>{% trans %}Download{% endtrans %}</h3>
  <ul>
    {% if 'original' in media.media_files %}
      <li>
        <a href="{{ request.app.public_store.file_url(
                       media.media_files.original) }}">
          {%- trans %}Original file{% endtrans -%}
        </a>
      </li>
    {% endif %}
    {% if 'pdf' in media.media_files %}
      <li>
        <a href="{{ request.app.public_store.file_url(
                       media.media_files.pdf) }}">
          {%- trans %}PDF file{% endtrans -%}
        </a>
      </li>
    {% endif %}
  </ul>
 {% endblock %}
--- a/mediagoblin/tests/test_mgoblin_app.ini
+++ b/mediagoblin/tests/test_mgoblin_app.ini
@ -16,6 +16,8 @@ allow_attachments = True
 # mediagoblin.init.celery.from_celery
 celery_setup_elsewhere = true
 media_types = mediagoblin.media_types.image, mediagoblin.media_types.pdf
 [storage:publicstore]
 base_dir = %(here)s/test_user_dev/media/public
 base_url = /mgoblin_media/
--- a/mediagoblin/tests/test_pdf.py
+++ b/mediagoblin/tests/test_pdf.py
@ -0,0 +1,45 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
 # Copyright (C) 2013 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import tempfile
 import shutil
 import os
 from mediagoblin.tests.tools import fixture_add_collection, fixture_add_user, \
    get_app
 from mediagoblin.db.models import Collection, User
 from mediagoblin.db.base import Session
 from nose.tools import assert_equal
 from mediagoblin.media_types.pdf.processing import (
    pdf_info, check_prerequisites, create_pdf_thumb)
 GOOD='mediagoblin/tests/test_submission/good.pdf'
 def test_pdf():
    if not check_prerequisites():
        return
    good_dict = {'pdf_version_major': 1, 'pdf_title': '',
        'pdf_page_size_width': 612, 'pdf_author': '',
        'pdf_keywords': '', 'pdf_pages': 10,
        'pdf_producer': 'dvips + GNU Ghostscript 7.05',
        'pdf_version_minor': 3,
        'pdf_creator': 'LaTeX with hyperref package',
        'pdf_page_size_height': 792}
    assert pdf_info(GOOD) == good_dict
    temp_dir = tempfile.mkdtemp()
    create_pdf_thumb(GOOD, os.path.join(temp_dir, 'good_256_256.png'), 256, 256)
    shutil.rmtree(temp_dir)
--- a/mediagoblin/tests/test_submission.py
+++ b/mediagoblin/tests/test_submission.py
@ -28,6 +28,7 @@ from mediagoblin import mg_globals
 from mediagoblin.db.models import MediaEntry
 from mediagoblin.tools import template
 from mediagoblin.media_types.image import MEDIA_MANAGER as img_MEDIA_MANAGER
 from mediagoblin.media_types.pdf.processing import check_prerequisites as pdf_check_prerequisites
 def resource(filename):
    return resource_filename('mediagoblin.tests', 'test_submission/' + filename)
@ -39,6 +40,8 @@ EVIL_FILE = resource('evil')
 EVIL_JPG = resource('evil.jpg')
 EVIL_PNG = resource('evil.png')
 BIG_BLUE = resource('bigblue.png')
 GOOD_PDF = resource('good.pdf')
 from .test_exif import GPS_JPG
 GOOD_TAG_STRING = u'yin,yang'
@ -125,6 +128,16 @@ class TestSubmission:
        self._setup(test_app)
        self.check_normal_upload(u'Normal upload 2', GOOD_PNG)
    def test_normal_pdf(self, test_app):
        if not pdf_check_prerequisites():
            return
        self._setup(test_app)
        response, context = self.do_post({'title': u'Normal upload 3 (pdf)'},
                                         do_follow=True,
                                         **self.upload_data(GOOD_PDF))
        self.check_url(response, '/u/{0}/'.format(self.test_user.username))
        assert 'mediagoblin/user_pages/user.html' in context
    def check_media(self, request, find_data, count=None):
        media = MediaEntry.find(find_data)
        if count is not None:
--- a/mediagoblin/tests/test_submission/good.pdf
+++ b/mediagoblin/tests/test_submission/good.pdf
		`@ -0,0 +1 @@`
							`Subproject commit b898935eb04fa86e0911fdfa0d41828cb04802f8`