From 3a89c23e7fd330ea662dd57ff74a9d424d476b84 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 11:18:03 -0500 Subject: [PATCH 01/15] Allow storage systems to be local and allow for a get_local_path method if applicable. --- mediagoblin/storage.py | 21 ++++++++++++++++++++- mediagoblin/tests/test_storage.py | 17 +++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/mediagoblin/storage.py b/mediagoblin/storage.py index 5d7e70d6..685039b2 100644 --- a/mediagoblin/storage.py +++ b/mediagoblin/storage.py @@ -60,6 +60,9 @@ class StorageInterface(object): StorageInterface. """ + # Whether this file store is on the local filesystem. + local_storage = False + def __raise_not_implemented(self): """ Raise a warning about some component not implemented by a @@ -127,12 +130,25 @@ class StorageInterface(object): else: return filepath + def get_local_path(self, filepath): + """ + If this is a local_storage implementation, give us a link to + the local filesystem reference to this file. + + >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg']) + u'/path/to/mounting/foo/bar/baz.jpg' + """ + # Subclasses should override this method, if applicable. + self.__raise_not_implemented() + class BasicFileStorage(StorageInterface): """ Basic local filesystem implementation of storage API """ + local_storage = True + def __init__(self, base_dir, base_url=None, **kwargs): """ Keyword arguments: @@ -177,6 +193,9 @@ class BasicFileStorage(StorageInterface): self.base_url, '/'.join(clean_listy_filepath(filepath))) + def get_local_path(self, filepath): + return self._resolve_filepath(filepath) + ########### # Utilities @@ -187,7 +206,7 @@ def clean_listy_filepath(listy_filepath): Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and clean out any nastiness from it. - For example: + >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) [u'dir1', u'foo_.._nasty', u'linooks.jpg'] diff --git a/mediagoblin/tests/test_storage.py b/mediagoblin/tests/test_storage.py index 61dd5dca..5efed405 100644 --- a/mediagoblin/tests/test_storage.py +++ b/mediagoblin/tests/test_storage.py @@ -214,3 +214,20 @@ def test_basic_storage_url_for_file(): ['dir1', 'dir2', 'filename.txt']) expected = 'http://media.example.org/ourmedia/dir1/dir2/filename.txt' assert result == expected + + +def test_basic_storage_get_local_path(): + tmpdir, this_storage = get_tmp_filestorage() + + result = this_storage.get_local_path( + ['dir1', 'dir2', 'filename.txt']) + + expected = os.path.join( + tmpdir, 'dir1/dir2/filename.txt') + + assert result == expected + + +def test_basic_storage_is_local(): + tmpdir, this_storage = get_tmp_filestorage() + assert this_storage.local_storage is True From 6a07362dd0145643edf4b61b0ae402e3093793c2 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 12:04:30 -0500 Subject: [PATCH 02/15] Adding a copy_locally() method to the StorageInterface and giving it a test. --- mediagoblin/storage.py | 21 +++++++++++++++++++++ mediagoblin/tests/test_storage.py | 16 ++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/mediagoblin/storage.py b/mediagoblin/storage.py index 685039b2..ba6ac017 100644 --- a/mediagoblin/storage.py +++ b/mediagoblin/storage.py @@ -16,6 +16,7 @@ import os import re +import shutil import urlparse import uuid @@ -141,6 +142,24 @@ class StorageInterface(object): # Subclasses should override this method, if applicable. self.__raise_not_implemented() + def copy_locally(self, filepath, dest_path): + """ + Copy this file locally. + + A basic working method for this is provided that should + function both for local_storage systems and remote storge + systems, but if more efficient systems for copying locally + apply to your system, override this method with something more + appropriate. + """ + if self.local_storage: + shutil.copy( + self.get_local_path(filepath), dest_path) + else: + with self.get_file(filepath, 'rb') as source_file: + with file(dest_path, 'wb') as dest_file: + dest_file.write(source_file.read()) + class BasicFileStorage(StorageInterface): """ @@ -272,3 +291,5 @@ def storage_system_from_paste_config(paste_config, storage_prefix): storage_class = util.import_component(storage_class) return storage_class(**config_params) + + diff --git a/mediagoblin/tests/test_storage.py b/mediagoblin/tests/test_storage.py index 5efed405..83f893f1 100644 --- a/mediagoblin/tests/test_storage.py +++ b/mediagoblin/tests/test_storage.py @@ -231,3 +231,19 @@ def test_basic_storage_get_local_path(): def test_basic_storage_is_local(): tmpdir, this_storage = get_tmp_filestorage() assert this_storage.local_storage is True + + +def test_basic_storage_copy_locally(): + tmpdir, this_storage = get_tmp_filestorage() + + dest_tmpdir = tempfile.mkdtemp() + + filepath = ['dir1', 'dir2', 'ourfile.txt'] + with this_storage.get_file(filepath, 'w') as our_file: + our_file.write('Testing this file') + + new_file_dest = os.path.join(dest_tmpdir, 'file2.txt') + + this_storage.copy_locally(filepath, new_file_dest) + + assert file(new_file_dest).read() == 'Testing this file' From 68784b9e6ad98cef88d5e9508e9536dc296b09e2 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 16:48:39 -0500 Subject: [PATCH 03/15] Base structure of workbench manager --- mediagoblin/process_media/workbench.py | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 mediagoblin/process_media/workbench.py diff --git a/mediagoblin/process_media/workbench.py b/mediagoblin/process_media/workbench.py new file mode 100644 index 00000000..41ccc081 --- /dev/null +++ b/mediagoblin/process_media/workbench.py @@ -0,0 +1,73 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 Free Software Foundation, Inc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import tempfile + + +class WorkbenchOutsideScope(Exception): + """ + Raised when a workbench is outside a WorkbenchManager scope. + """ + pass + + +# TODO: This doesn't seem like it'd work with Windows +DEFAULT_WORKBENCH_DIR = u'/tmp/workbench/' + + +class WorkbenchManager(object): + """ + A system for generating and destroying workbenches. + + Workbenches are actually just subdirectories of a temporary storage space + for during the processing stage. + """ + + def __init__(self, base_workbench_dir): + self.base_workbench_dir = base_workbench_dir + + def create_workbench(self): + """ + Create and return the path to a new workbench (directory). + """ + pass + + def destroy_workbench(self, workbench): + """ + Destroy this workbench! Deletes the directory and all its contents! + + Makes sure the workbench actually belongs to this manager though. + """ + pass + + def possibly_localize_file(self, workbench, storage, filepath): + """ + Possibly localize the file from this storage system (for read-only + purposes, modifications should be written to a new file.). + + If the file is already local, just return the absolute filename of that + local file. Otherwise, copy the file locally to the workbench, and + return the absolute path of the new file. + + Also returns whether or not it copied the file locally. + + Returns: + (localized_filename, copied_locally) + The first of these bieng the absolute filename as described above as a + unicode string, the second being a boolean stating whether or not we + had to copy the file locally. + """ + pass From 678d1a20ea0fa674253b835ca2c07b595fd8728c Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 17:28:58 -0500 Subject: [PATCH 04/15] Wrote functions and documentation for all the WorkbenchManager functions but haven't actually tested them yet. :) --- mediagoblin/process_media/workbench.py | 83 +++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/mediagoblin/process_media/workbench.py b/mediagoblin/process_media/workbench.py index 41ccc081..32ebe534 100644 --- a/mediagoblin/process_media/workbench.py +++ b/mediagoblin/process_media/workbench.py @@ -14,9 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import os +import shutil import tempfile +DEFAULT_WORKBENCH_DIR = os.path.join( + tempfile.gettempdir(), u'mgoblin_workbench') + + +# Exception(s) +# ------------ + class WorkbenchOutsideScope(Exception): """ Raised when a workbench is outside a WorkbenchManager scope. @@ -24,9 +33,8 @@ class WorkbenchOutsideScope(Exception): pass -# TODO: This doesn't seem like it'd work with Windows -DEFAULT_WORKBENCH_DIR = u'/tmp/workbench/' - +# Actual workbench stuff +# ---------------------- class WorkbenchManager(object): """ @@ -37,13 +45,15 @@ class WorkbenchManager(object): """ def __init__(self, base_workbench_dir): - self.base_workbench_dir = base_workbench_dir + self.base_workbench_dir = os.path.abspath(base_workbench_dir) + if not os.path.exists(self.base_workbench_dir): + os.makedirs(self.base_workbench_dir) def create_workbench(self): """ Create and return the path to a new workbench (directory). """ - pass + return tempfile.mkdtemp(dir=self.base_workbench_dir) def destroy_workbench(self, workbench): """ @@ -51,9 +61,18 @@ class WorkbenchManager(object): Makes sure the workbench actually belongs to this manager though. """ - pass + # just in case + workbench = os.path.abspath(workbench) - def possibly_localize_file(self, workbench, storage, filepath): + if not workbench.startswith(self.base_workbench_dir): + raise WorkbenchOutsideScope( + "Can't destroy workbench outside the base workbench dir") + + shutil.rmtree(workbench) + + def possibly_localize_file(self, workbench, storage, filepath, + filename_if_copying=None, + keep_extension_if_copying=True): """ Possibly localize the file from this storage system (for read-only purposes, modifications should be written to a new file.). @@ -62,6 +81,14 @@ class WorkbenchManager(object): local file. Otherwise, copy the file locally to the workbench, and return the absolute path of the new file. + If it is copying locally, we might want to require a filename like + "source.jpg" to ensure that we won't conflict with other filenames in + our workbench... if that's the case, make sure filename_if_copying is + set to something like 'source.jpg'. Relatedly, if you set + keep_extension_if_copying, you don't have to set an extension on + filename_if_copying yourself, it'll be set for you (assuming such an + extension can be extacted from the filename in the filepath). + Also returns whether or not it copied the file locally. Returns: @@ -69,5 +96,45 @@ class WorkbenchManager(object): The first of these bieng the absolute filename as described above as a unicode string, the second being a boolean stating whether or not we had to copy the file locally. + + Examples: + >>> wb_manager.possibly_localize_file( + ... '/our/workbench/subdir', local_storage, + ... ['path', 'to', 'foobar.jpg']) + (u'/local/storage/path/to/foobar.jpg', False) + + >>> wb_manager.possibly_localize_file( + ... '/our/workbench/subdir', remote_storage, + ... ['path', 'to', 'foobar.jpg']) + (u'/our/workbench/subdir/foobar.jpg', True) + + >>> wb_manager.possibly_localize_file( + ... '/our/workbench/subdir', remote_storage, + ... ['path', 'to', 'foobar.jpg'], 'source.jpeg', False) + (u'/our/workbench/subdir/foobar.jpeg', True) + + >>> wb_manager.possibly_localize_file( + ... '/our/workbench/subdir', remote_storage, + ... ['path', 'to', 'foobar.jpg'], 'source', True) + (u'/our/workbench/subdir/foobar.jpg', True) """ - pass + if storage.local_storage: + return (storage.get_local_path(filepath), False) + else: + if filename_if_copying is None: + dest_filename = filepath[-1] + else: + orig_filename, orig_ext = os.path.splitext(filepath[-1]) + if keep_extension_if_copying and orig_ext: + dest_filename = filename_if_copying + '.' + orig_ext + else: + dest_filename = filename_if_copying + + full_dest_filename = os.path.join( + workbench, dest_filename) + + # copy it over + storage.copy_locally( + filepath, full_dest_filename) + + return (full_dest_filename, True) From 2616d70903a775ee3790bdd38f6f0ad4003c0170 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 18:49:04 -0500 Subject: [PATCH 05/15] Tests for creating/destroying workbenches --- mediagoblin/tests/test_workbench.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 mediagoblin/tests/test_workbench.py diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py new file mode 100644 index 00000000..1d006645 --- /dev/null +++ b/mediagoblin/tests/test_workbench.py @@ -0,0 +1,44 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 Free Software Foundation, Inc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os +import tempfile + +from mediagoblin.process_media import workbench + + +class TestWorkbench(object): + def setUp(self): + self.workbench_manager = workbench.WorkbenchManager( + os.path.join(tempfile.gettempdir(), u'mgoblin_workbench_testing')) + + def test_create_workbench(self): + workbench = self.workbench_manager.create_workbench() + assert os.path.isdir(workbench) + assert workbench.startswith(self.workbench_manager.base_workbench_dir) + + def test_destroy_workbench(self): + # kill a workbench + workbench = self.workbench_manager.create_workbench() + tmpfile = file(os.path.join(workbench, 'temp.txt'), 'w') + with tmpfile: + tmpfile.write('lollerskates') + + assert os.path.exists(os.path.join(workbench, 'temp.txt')) + + self.workbench_manager.destroy_workbench(workbench) + assert not os.path.exists(os.path.join(workbench, 'temp.txt')) + assert not os.path.exists(workbench) From 2ecee34f08ad367f54f7a066a2e3ce81aba01f0f Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 18:52:48 -0500 Subject: [PATCH 06/15] Make sure workbench won't kill directories out of scope. --- mediagoblin/tests/test_workbench.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py index 1d006645..83e90c3d 100644 --- a/mediagoblin/tests/test_workbench.py +++ b/mediagoblin/tests/test_workbench.py @@ -17,6 +17,8 @@ import os import tempfile +from nose.tools import assert_raises + from mediagoblin.process_media import workbench @@ -32,13 +34,21 @@ class TestWorkbench(object): def test_destroy_workbench(self): # kill a workbench - workbench = self.workbench_manager.create_workbench() - tmpfile = file(os.path.join(workbench, 'temp.txt'), 'w') + this_workbench = self.workbench_manager.create_workbench() + tmpfile = file(os.path.join(this_workbench, 'temp.txt'), 'w') with tmpfile: tmpfile.write('lollerskates') - assert os.path.exists(os.path.join(workbench, 'temp.txt')) + assert os.path.exists(os.path.join(this_workbench, 'temp.txt')) - self.workbench_manager.destroy_workbench(workbench) - assert not os.path.exists(os.path.join(workbench, 'temp.txt')) - assert not os.path.exists(workbench) + self.workbench_manager.destroy_workbench(this_workbench) + assert not os.path.exists(os.path.join(this_workbench, 'temp.txt')) + assert not os.path.exists(this_workbench) + + # make sure we can't kill other stuff though + dont_kill_this = tempfile.mkdtemp() + + assert_raises( + workbench.WorkbenchOutsideScope, + self.workbench_manager.destroy_workbench, + dont_kill_this) From d91b5a7c2d8f518a0654d066c41b1359bee1d04e Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 19:17:44 -0500 Subject: [PATCH 07/15] Added a FakeRemoteStorage, for testing purposes --- mediagoblin/tests/test_storage.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mediagoblin/tests/test_storage.py b/mediagoblin/tests/test_storage.py index 83f893f1..55b66e84 100644 --- a/mediagoblin/tests/test_storage.py +++ b/mediagoblin/tests/test_storage.py @@ -52,6 +52,11 @@ class FakeStorageSystem(): self.foobie = foobie self.blech = blech +class FakeRemoteStorage(storage.BasicFileStorage): + # Theoretically despite this, all the methods should work but it + # should force copying to the workbench + local_storage = False + def test_storage_system_from_paste_config(): this_storage = storage.storage_system_from_paste_config( @@ -81,9 +86,12 @@ def test_storage_system_from_paste_config(): # Basic file storage tests ########################## -def get_tmp_filestorage(mount_url=None): +def get_tmp_filestorage(mount_url=None, fake_remote=False): tmpdir = tempfile.mkdtemp() - this_storage = storage.BasicFileStorage(tmpdir, mount_url) + if fake_remote: + this_storage = FakeRemoteStorage(tmpdir, mount_url) + else: + this_storage = storage.BasicFileStorage(tmpdir, mount_url) return tmpdir, this_storage From f2b96ff0a4efab989ff5558de44b33da21b1bf00 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 19:18:27 -0500 Subject: [PATCH 08/15] We don't need this extra '.' in making the filename --- mediagoblin/process_media/workbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediagoblin/process_media/workbench.py b/mediagoblin/process_media/workbench.py index 32ebe534..b1198adf 100644 --- a/mediagoblin/process_media/workbench.py +++ b/mediagoblin/process_media/workbench.py @@ -126,7 +126,7 @@ class WorkbenchManager(object): else: orig_filename, orig_ext = os.path.splitext(filepath[-1]) if keep_extension_if_copying and orig_ext: - dest_filename = filename_if_copying + '.' + orig_ext + dest_filename = filename_if_copying + orig_ext else: dest_filename = filename_if_copying From f43ecb0fc459c0ee64f3a40b98ed6bbf8564c107 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 19:18:51 -0500 Subject: [PATCH 09/15] test WorkbenchManager.possibly_localize_file() --- mediagoblin/tests/test_workbench.py | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py index 83e90c3d..fd71a6eb 100644 --- a/mediagoblin/tests/test_workbench.py +++ b/mediagoblin/tests/test_workbench.py @@ -20,6 +20,7 @@ import tempfile from nose.tools import assert_raises from mediagoblin.process_media import workbench +from mediagoblin.tests.test_storage import get_tmp_filestorage class TestWorkbench(object): @@ -52,3 +53,45 @@ class TestWorkbench(object): workbench.WorkbenchOutsideScope, self.workbench_manager.destroy_workbench, dont_kill_this) + + def test_possibly_localize_file(self): + tmpdir, this_storage = get_tmp_filestorage() + this_workbench = self.workbench_manager.create_workbench() + + # Write a brand new file + filepath = ['dir1', 'dir2', 'ourfile.txt'] + + with this_storage.get_file(filepath, 'w') as our_file: + our_file.write('Our file') + + # with a local file storage + filename, copied = self.workbench_manager.possibly_localize_file( + this_workbench, this_storage, filepath) + assert copied is False + assert filename == os.path.join( + tmpdir, 'dir1/dir2/ourfile.txt') + + # with a fake remote file storage + tmpdir, this_storage = get_tmp_filestorage(fake_remote=True) + + # ... write a brand new file, again ;) + with this_storage.get_file(filepath, 'w') as our_file: + our_file.write('Our file') + + filename, copied = self.workbench_manager.possibly_localize_file( + this_workbench, this_storage, filepath) + assert filename == os.path.join( + this_workbench, 'ourfile.txt') + + # fake remote file storage, filename_if_copying set + filename, copied = self.workbench_manager.possibly_localize_file( + this_workbench, this_storage, filepath, 'thisfile') + assert filename == os.path.join( + this_workbench, 'thisfile.txt') + + # fake remote file storage, filename_if_copying set, + # keep_extension_if_copying set to false + filename, copied = self.workbench_manager.possibly_localize_file( + this_workbench, this_storage, filepath, 'thisfile.text', False) + assert filename == os.path.join( + this_workbench, 'thisfile.text') From 7ecc58cc5cf49c87001c38ba5d0607644ca195d4 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 19:47:02 -0500 Subject: [PATCH 10/15] Have the application set up instances of the WorkbenchManager. --- mediagoblin/app.py | 11 ++++++++--- mediagoblin/celery_setup/from_celery.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/mediagoblin/app.py b/mediagoblin/app.py index e5949531..da2b7d35 100644 --- a/mediagoblin/app.py +++ b/mediagoblin/app.py @@ -25,6 +25,8 @@ from mediagoblin import routing, util, storage, staticdirect from mediagoblin.db.open import setup_connection_and_db_from_config from mediagoblin.globals import setup_globals from mediagoblin.celery_setup import setup_celery_from_config +from mediagoblin.process_media.workbench import ( + WorkbenchManager, DEFAULT_WORKBENCH_DIR) class Error(Exception): pass @@ -39,7 +41,8 @@ class MediaGoblinApp(object): public_store, queue_store, staticdirector, email_sender_address, email_debug_mode, - user_template_path=None): + user_template_path=None, + workbench_path=DEFAULT_WORKBENCH_DIR): # Get the template environment self.template_loader = util.get_jinja_loader(user_template_path) @@ -66,7 +69,8 @@ class MediaGoblinApp(object): db_connection=connection, database=self.db, public_store=self.public_store, - queue_store=self.queue_store) + queue_store=self.queue_store, + workbench_manager=WorkbenchManager(workbench_path)) def __call__(self, environ, start_response): request = Request(environ) @@ -154,6 +158,7 @@ def paste_app_factory(global_config, **app_config): email_sender_address=app_config.get( 'email_sender_address', 'notice@mediagoblin.example.org'), email_debug_mode=asbool(app_config.get('email_debug_mode')), - user_template_path=app_config.get('local_templates')) + user_template_path=app_config.get('local_templates'), + workbench_path=app_config.get('workbench_path', DEFAULT_WORKBENCH_DIR)) return mgoblin_app diff --git a/mediagoblin/celery_setup/from_celery.py b/mediagoblin/celery_setup/from_celery.py index 0669e80c..e1216815 100644 --- a/mediagoblin/celery_setup/from_celery.py +++ b/mediagoblin/celery_setup/from_celery.py @@ -23,7 +23,8 @@ from mediagoblin import storage from mediagoblin.db.open import setup_connection_and_db_from_config from mediagoblin.celery_setup import setup_celery_from_config from mediagoblin.globals import setup_globals -from mediagoblin import globals as mgoblin_globals +from mediagoblin.process_media.workbench import ( + WorkbenchManager, DEFAULT_WORKBENCH_DIR) OUR_MODULENAME = 'mediagoblin.celery_setup.from_celery' @@ -76,6 +77,10 @@ def setup_self(setup_globals_func=setup_globals): queue_store = storage.storage_system_from_paste_config( mgoblin_section, 'queuestore') + workbench_manager = WorkbenchManager( + mgoblin_section.get( + 'workbench_path', DEFAULT_WORKBENCH_DIR)) + setup_globals_func( db_connection=connection, database=db, @@ -84,7 +89,8 @@ def setup_self(setup_globals_func=setup_globals): email_sender_address=mgoblin_section.get( 'email_sender_address', 'notice@mediagoblin.example.org'), - queue_store=queue_store) + queue_store=queue_store, + workbench_manager=workbench_manager) if os.environ['CELERY_CONFIG_MODULE'] == OUR_MODULENAME: From 894facc68d5a52dd796451361cd545f0bd3116f5 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 19:48:49 -0500 Subject: [PATCH 11/15] Import mediagoblin.globals as mg_globals so we can be sure things are set up in the right order. --- mediagoblin/process_media/__init__.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/mediagoblin/process_media/__init__.py b/mediagoblin/process_media/__init__.py index 4f06a686..527c198c 100644 --- a/mediagoblin/process_media/__init__.py +++ b/mediagoblin/process_media/__init__.py @@ -18,7 +18,7 @@ import Image from mediagoblin.db.util import ObjectId from celery.task import task -from mediagoblin.globals import database, queue_store, public_store +from mediagoblin import globals as mg_globals THUMB_SIZE = 200, 200 @@ -26,38 +26,39 @@ THUMB_SIZE = 200, 200 @task def process_media_initial(media_id): - entry = database.MediaEntry.one( + entry = mg_globals.database.MediaEntry.one( {'_id': ObjectId(media_id)}) queued_filepath = entry['queued_media_file'] - queued_file = queue_store.get_file(queued_filepath, 'r') + queued_file = mg_globals.queue_store.get_file(queued_filepath, 'r') with queued_file: thumb = Image.open(queued_file) thumb.thumbnail(THUMB_SIZE, Image.ANTIALIAS) - thumb_filepath = public_store.get_unique_filepath( + thumb_filepath = mg_globals.public_store.get_unique_filepath( ['media_entries', unicode(entry['_id']), 'thumbnail.jpg']) - with public_store.get_file(thumb_filepath, 'w') as thumb_file: + thumb_file = mg_globals.public_store.get_file(thumb_filepath, 'w') + with thumb_file: thumb.save(thumb_file, "JPEG") # we have to re-read because unlike PIL, not everything reads # things in string representation :) - queued_file = queue_store.get_file(queued_filepath, 'rb') + queued_file = mg_globals.queue_store.get_file(queued_filepath, 'rb') with queued_file: - main_filepath = public_store.get_unique_filepath( + main_filepath = mg_globals.public_store.get_unique_filepath( ['media_entries', unicode(entry['_id']), queued_filepath[-1]]) - with public_store.get_file(main_filepath, 'wb') as main_file: + with mg_globals.public_store.get_file(main_filepath, 'wb') as main_file: main_file.write(queued_file.read()) - queue_store.delete_file(queued_filepath) + mg_globals.queue_store.delete_file(queued_filepath) media_files_dict = entry.setdefault('media_files', {}) media_files_dict['thumb'] = thumb_filepath media_files_dict['main'] = main_filepath From a32acafa0bfebfc886a05414b4e33943d358efc7 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 20:33:41 -0500 Subject: [PATCH 12/15] Moving workbench out of process_media --- mediagoblin/app.py | 3 +-- mediagoblin/celery_setup/from_celery.py | 3 +-- mediagoblin/tests/test_workbench.py | 2 +- mediagoblin/{process_media => }/workbench.py | 0 4 files changed, 3 insertions(+), 5 deletions(-) rename mediagoblin/{process_media => }/workbench.py (100%) diff --git a/mediagoblin/app.py b/mediagoblin/app.py index da2b7d35..5d594039 100644 --- a/mediagoblin/app.py +++ b/mediagoblin/app.py @@ -25,8 +25,7 @@ from mediagoblin import routing, util, storage, staticdirect from mediagoblin.db.open import setup_connection_and_db_from_config from mediagoblin.globals import setup_globals from mediagoblin.celery_setup import setup_celery_from_config -from mediagoblin.process_media.workbench import ( - WorkbenchManager, DEFAULT_WORKBENCH_DIR) +from mediagoblin.workbench import WorkbenchManager, DEFAULT_WORKBENCH_DIR class Error(Exception): pass diff --git a/mediagoblin/celery_setup/from_celery.py b/mediagoblin/celery_setup/from_celery.py index e1216815..5fa9ba76 100644 --- a/mediagoblin/celery_setup/from_celery.py +++ b/mediagoblin/celery_setup/from_celery.py @@ -23,8 +23,7 @@ from mediagoblin import storage from mediagoblin.db.open import setup_connection_and_db_from_config from mediagoblin.celery_setup import setup_celery_from_config from mediagoblin.globals import setup_globals -from mediagoblin.process_media.workbench import ( - WorkbenchManager, DEFAULT_WORKBENCH_DIR) +from mediagoblin.workbench import WorkbenchManager, DEFAULT_WORKBENCH_DIR OUR_MODULENAME = 'mediagoblin.celery_setup.from_celery' diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py index fd71a6eb..f08a26a0 100644 --- a/mediagoblin/tests/test_workbench.py +++ b/mediagoblin/tests/test_workbench.py @@ -19,7 +19,7 @@ import tempfile from nose.tools import assert_raises -from mediagoblin.process_media import workbench +from mediagoblin import workbench from mediagoblin.tests.test_storage import get_tmp_filestorage diff --git a/mediagoblin/process_media/workbench.py b/mediagoblin/workbench.py similarity index 100% rename from mediagoblin/process_media/workbench.py rename to mediagoblin/workbench.py From fdc5003903584ec8a1e83ccf80ebb6d3be3c671e Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 21:20:26 -0500 Subject: [PATCH 13/15] Don't bother returning whether or not we copied it or not, we can figure that out by looking to see whether our storage is local or not. --- mediagoblin/tests/test_workbench.py | 9 ++++----- mediagoblin/workbench.py | 19 +++++++------------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py index f08a26a0..994688c4 100644 --- a/mediagoblin/tests/test_workbench.py +++ b/mediagoblin/tests/test_workbench.py @@ -65,9 +65,8 @@ class TestWorkbench(object): our_file.write('Our file') # with a local file storage - filename, copied = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.possibly_localize_file( this_workbench, this_storage, filepath) - assert copied is False assert filename == os.path.join( tmpdir, 'dir1/dir2/ourfile.txt') @@ -78,20 +77,20 @@ class TestWorkbench(object): with this_storage.get_file(filepath, 'w') as our_file: our_file.write('Our file') - filename, copied = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.possibly_localize_file( this_workbench, this_storage, filepath) assert filename == os.path.join( this_workbench, 'ourfile.txt') # fake remote file storage, filename_if_copying set - filename, copied = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.possibly_localize_file( this_workbench, this_storage, filepath, 'thisfile') assert filename == os.path.join( this_workbench, 'thisfile.txt') # fake remote file storage, filename_if_copying set, # keep_extension_if_copying set to false - filename, copied = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.possibly_localize_file( this_workbench, this_storage, filepath, 'thisfile.text', False) assert filename == os.path.join( this_workbench, 'thisfile.text') diff --git a/mediagoblin/workbench.py b/mediagoblin/workbench.py index b1198adf..360e3e19 100644 --- a/mediagoblin/workbench.py +++ b/mediagoblin/workbench.py @@ -89,37 +89,32 @@ class WorkbenchManager(object): filename_if_copying yourself, it'll be set for you (assuming such an extension can be extacted from the filename in the filepath). - Also returns whether or not it copied the file locally. - Returns: - (localized_filename, copied_locally) - The first of these bieng the absolute filename as described above as a - unicode string, the second being a boolean stating whether or not we - had to copy the file locally. + localized_filename Examples: >>> wb_manager.possibly_localize_file( ... '/our/workbench/subdir', local_storage, ... ['path', 'to', 'foobar.jpg']) - (u'/local/storage/path/to/foobar.jpg', False) + u'/local/storage/path/to/foobar.jpg' >>> wb_manager.possibly_localize_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg']) - (u'/our/workbench/subdir/foobar.jpg', True) + '/our/workbench/subdir/foobar.jpg' >>> wb_manager.possibly_localize_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg'], 'source.jpeg', False) - (u'/our/workbench/subdir/foobar.jpeg', True) + '/our/workbench/subdir/foobar.jpeg' >>> wb_manager.possibly_localize_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg'], 'source', True) - (u'/our/workbench/subdir/foobar.jpg', True) + '/our/workbench/subdir/foobar.jpg' """ if storage.local_storage: - return (storage.get_local_path(filepath), False) + return storage.get_local_path(filepath) else: if filename_if_copying is None: dest_filename = filepath[-1] @@ -137,4 +132,4 @@ class WorkbenchManager(object): storage.copy_locally( filepath, full_dest_filename) - return (full_dest_filename, True) + return full_dest_filename From ca030ab6cd5c18b7991f08a1ee103175f313c5a1 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 21:20:39 -0500 Subject: [PATCH 14/15] Switch process_media over to using the workbench. --- mediagoblin/process_media/__init__.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mediagoblin/process_media/__init__.py b/mediagoblin/process_media/__init__.py index 527c198c..1bb43755 100644 --- a/mediagoblin/process_media/__init__.py +++ b/mediagoblin/process_media/__init__.py @@ -26,11 +26,17 @@ THUMB_SIZE = 200, 200 @task def process_media_initial(media_id): + workbench = mg_globals.workbench_manager.create_workbench() + entry = mg_globals.database.MediaEntry.one( {'_id': ObjectId(media_id)}) queued_filepath = entry['queued_media_file'] - queued_file = mg_globals.queue_store.get_file(queued_filepath, 'r') + queued_filename = mg_globals.workbench_manager.possibly_localize_file( + workbench, mg_globals.queue_store, queued_filepath, + 'source') + + queued_file = file(queued_filename, 'r') with queued_file: thumb = Image.open(queued_file) @@ -47,7 +53,7 @@ def process_media_initial(media_id): # we have to re-read because unlike PIL, not everything reads # things in string representation :) - queued_file = mg_globals.queue_store.get_file(queued_filepath, 'rb') + queued_file = file(queued_filename, 'rb') with queued_file: main_filepath = mg_globals.public_store.get_unique_filepath( @@ -64,3 +70,6 @@ def process_media_initial(media_id): media_files_dict['main'] = main_filepath entry['state'] = u'processed' entry.save() + + # clean up workbench + mg_globals.workbench_manager.destroy_workbench(workbench) From 68ffb13690fa0c364c514ce253364f928e50841c Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sat, 11 Jun 2011 21:23:32 -0500 Subject: [PATCH 15/15] possibly_localize_file->localized_file... a bit less terribly long. --- mediagoblin/process_media/__init__.py | 2 +- mediagoblin/tests/test_workbench.py | 10 +++++----- mediagoblin/workbench.py | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mediagoblin/process_media/__init__.py b/mediagoblin/process_media/__init__.py index 1bb43755..531eb16d 100644 --- a/mediagoblin/process_media/__init__.py +++ b/mediagoblin/process_media/__init__.py @@ -32,7 +32,7 @@ def process_media_initial(media_id): {'_id': ObjectId(media_id)}) queued_filepath = entry['queued_media_file'] - queued_filename = mg_globals.workbench_manager.possibly_localize_file( + queued_filename = mg_globals.workbench_manager.localized_file( workbench, mg_globals.queue_store, queued_filepath, 'source') diff --git a/mediagoblin/tests/test_workbench.py b/mediagoblin/tests/test_workbench.py index 994688c4..89f2ef33 100644 --- a/mediagoblin/tests/test_workbench.py +++ b/mediagoblin/tests/test_workbench.py @@ -54,7 +54,7 @@ class TestWorkbench(object): self.workbench_manager.destroy_workbench, dont_kill_this) - def test_possibly_localize_file(self): + def test_localized_file(self): tmpdir, this_storage = get_tmp_filestorage() this_workbench = self.workbench_manager.create_workbench() @@ -65,7 +65,7 @@ class TestWorkbench(object): our_file.write('Our file') # with a local file storage - filename = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.localized_file( this_workbench, this_storage, filepath) assert filename == os.path.join( tmpdir, 'dir1/dir2/ourfile.txt') @@ -77,20 +77,20 @@ class TestWorkbench(object): with this_storage.get_file(filepath, 'w') as our_file: our_file.write('Our file') - filename = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.localized_file( this_workbench, this_storage, filepath) assert filename == os.path.join( this_workbench, 'ourfile.txt') # fake remote file storage, filename_if_copying set - filename = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.localized_file( this_workbench, this_storage, filepath, 'thisfile') assert filename == os.path.join( this_workbench, 'thisfile.txt') # fake remote file storage, filename_if_copying set, # keep_extension_if_copying set to false - filename = self.workbench_manager.possibly_localize_file( + filename = self.workbench_manager.localized_file( this_workbench, this_storage, filepath, 'thisfile.text', False) assert filename == os.path.join( this_workbench, 'thisfile.text') diff --git a/mediagoblin/workbench.py b/mediagoblin/workbench.py index 360e3e19..d7252623 100644 --- a/mediagoblin/workbench.py +++ b/mediagoblin/workbench.py @@ -70,9 +70,9 @@ class WorkbenchManager(object): shutil.rmtree(workbench) - def possibly_localize_file(self, workbench, storage, filepath, - filename_if_copying=None, - keep_extension_if_copying=True): + def localized_file(self, workbench, storage, filepath, + filename_if_copying=None, + keep_extension_if_copying=True): """ Possibly localize the file from this storage system (for read-only purposes, modifications should be written to a new file.). @@ -93,22 +93,22 @@ class WorkbenchManager(object): localized_filename Examples: - >>> wb_manager.possibly_localize_file( + >>> wb_manager.localized_file( ... '/our/workbench/subdir', local_storage, ... ['path', 'to', 'foobar.jpg']) u'/local/storage/path/to/foobar.jpg' - >>> wb_manager.possibly_localize_file( + >>> wb_manager.localized_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg']) '/our/workbench/subdir/foobar.jpg' - >>> wb_manager.possibly_localize_file( + >>> wb_manager.localized_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg'], 'source.jpeg', False) '/our/workbench/subdir/foobar.jpeg' - >>> wb_manager.possibly_localize_file( + >>> wb_manager.localized_file( ... '/our/workbench/subdir', remote_storage, ... ['path', 'to', 'foobar.jpg'], 'source', True) '/our/workbench/subdir/foobar.jpg'