add pdf media type

The new media type supports pdf and a subset of media recognized by libreoffice via
unoconv.

Every document added goes through:
* conversion to pdf with unoconv if not already a pdf
* creation of thumbnail and medium sized image, and pdfinfo generates
 some information (even for unoconv produces docs - should fix this)

Poppler (pdftocairo, pdfinfo) is used.  http://poppler.freedesktop.org/

A working but uglified pdf.js integration exists, which is enabled by
setting pdf.pdf_js=true mediagoblin_local.ini (disabled in mediagoblin.ini)

Adds one test to the test_submission test suite, and another separate test_pdf suite.
The tests are only run if media_types.pdf.processing.check_prerequisites passes, so
the test suite will not require any extra package.

TODO: make test suite say 'skipped' in that case instead of just 'ok'

Signed-off-by: Alon Levy <alon@pobox.com>
This commit is contained in:
Alon Levy
2013-03-27 12:21:10 +02:00
parent 3cadb4a6cd
commit a80ebf3b64
17 changed files with 5838 additions and 1 deletions

View File

@@ -16,6 +16,8 @@ allow_attachments = True
# mediagoblin.init.celery.from_celery
celery_setup_elsewhere = true
media_types = mediagoblin.media_types.image, mediagoblin.media_types.pdf
[storage:publicstore]
base_dir = %(here)s/test_user_dev/media/public
base_url = /mgoblin_media/

View File

@@ -0,0 +1,45 @@
# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2013 MediaGoblin contributors. See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import tempfile
import shutil
import os
from mediagoblin.tests.tools import fixture_add_collection, fixture_add_user, \
get_app
from mediagoblin.db.models import Collection, User
from mediagoblin.db.base import Session
from nose.tools import assert_equal
from mediagoblin.media_types.pdf.processing import (
pdf_info, check_prerequisites, create_pdf_thumb)
GOOD='mediagoblin/tests/test_submission/good.pdf'
def test_pdf():
if not check_prerequisites():
return
good_dict = {'pdf_version_major': 1, 'pdf_title': '',
'pdf_page_size_width': 612, 'pdf_author': '',
'pdf_keywords': '', 'pdf_pages': 10,
'pdf_producer': 'dvips + GNU Ghostscript 7.05',
'pdf_version_minor': 3,
'pdf_creator': 'LaTeX with hyperref package',
'pdf_page_size_height': 792}
assert pdf_info(GOOD) == good_dict
temp_dir = tempfile.mkdtemp()
create_pdf_thumb(GOOD, os.path.join(temp_dir, 'good_256_256.png'), 256, 256)
shutil.rmtree(temp_dir)

View File

@@ -28,6 +28,7 @@ from mediagoblin import mg_globals
from mediagoblin.db.models import MediaEntry
from mediagoblin.tools import template
from mediagoblin.media_types.image import MEDIA_MANAGER as img_MEDIA_MANAGER
from mediagoblin.media_types.pdf.processing import check_prerequisites as pdf_check_prerequisites
def resource(filename):
return resource_filename('mediagoblin.tests', 'test_submission/' + filename)
@@ -39,6 +40,8 @@ EVIL_FILE = resource('evil')
EVIL_JPG = resource('evil.jpg')
EVIL_PNG = resource('evil.png')
BIG_BLUE = resource('bigblue.png')
GOOD_PDF = resource('good.pdf')
from .test_exif import GPS_JPG
GOOD_TAG_STRING = u'yin,yang'
@@ -125,6 +128,16 @@ class TestSubmission:
self._setup(test_app)
self.check_normal_upload(u'Normal upload 2', GOOD_PNG)
def test_normal_pdf(self, test_app):
if not pdf_check_prerequisites():
return
self._setup(test_app)
response, context = self.do_post({'title': u'Normal upload 3 (pdf)'},
do_follow=True,
**self.upload_data(GOOD_PDF))
self.check_url(response, '/u/{0}/'.format(self.test_user.username))
assert 'mediagoblin/user_pages/user.html' in context
def check_media(self, request, find_data, count=None):
media = MediaEntry.find(find_data)
if count is not None:

Binary file not shown.