From 3cd6ea5b1764800f9c711d27de09de7f446f8aec Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sun, 26 Jun 2011 21:10:16 -0500 Subject: [PATCH 1/7] A base set of indexes for us to use with our new indexing tool. --- mediagoblin/db/indexes.py | 103 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 mediagoblin/db/indexes.py diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py new file mode 100644 index 00000000..4e99b8c8 --- /dev/null +++ b/mediagoblin/db/indexes.py @@ -0,0 +1,103 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011 Free Software Foundation, Inc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +""" +Indexes for the local database. + +Indexes are recorded in the following format: + +INDEXES = { + 'identifier': { # key identifier used for possibly deprecating later + 'collection': 'thiscollection', + 'index': [index_foo_goes_here]}} + +... and anything else being parameters to the create_index function +(including unique=True, etc) + +Current indexes must be registered in ACTIVE_INDEXES... deprecated +indexes should be marked in DEPRECATED_INDEXES. + +Remember, ordering of compound indexes MATTERS. Read below for more. + +REQUIRED READING: + - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/ + - http://www.mongodb.org/display/DOCS/Indexes + - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ +""" + +from pymongo import ASCENDING, DESCENDING + + +################ +# Active indexes +################ +ACTIVE_INDEXES = {} + +# MediaEntry indexes +# ------------------ + +MEDIAENTRY_INDEXES = { + 'mediaentry_uploader_slug_unique': { + # Matching an object to an uploader + slug. + # MediaEntries are unique on these two combined, eg: + # /u/${myuser}/m/${myslugname}/ + 'collection': 'media_entries', + 'index': [('uploader', ASCENDING), + ('slug', ASCENDING)], + 'unique': True}, + + 'mediaentry_created': { + # A global index for all media entries created, in descending + # order. This is used for the site's frontpage. + 'collection': 'media_entries', + 'index': [('created', DESCENDING)]}, + + 'mediaentry_uploader_created': { + # Indexing on uploaders and when media entries are created. + # Used for showing a user gallery, etc. + 'collection': 'media_entries', + 'index': [('uploader', ASCENDING), + ('created', DESCENDING)]}} + + +ACTIVE_INDEXES.update( + [MEDIAENTRY_INDEXES]) + + +# User indexes +# ------------ + +USER_INDEXES = { + 'user_username_unique': { + # Index usernames, and make sure they're unique. + # ... I guess we might need to adjust this once we're federated :) + 'collection': 'users', + 'index': 'username'}, + 'user_created': { + # All most recently created users + 'collection': 'users', + 'index': 'created'}} + + +ACTIVE_INDEXES.update( + [USER_INDEXES]) + + +#################### +# Deprecated indexes +#################### + +DEPRECATED_INDEXES = [] From ca5d2c51b68b67232263a08736da3515ce616290 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Sun, 26 Jun 2011 21:51:38 -0500 Subject: [PATCH 2/7] Store the collection information in the ACTIVE_INDEXES keys --- mediagoblin/db/indexes.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py index 4e99b8c8..85d43471 100644 --- a/mediagoblin/db/indexes.py +++ b/mediagoblin/db/indexes.py @@ -20,9 +20,9 @@ Indexes for the local database. Indexes are recorded in the following format: INDEXES = { - 'identifier': { # key identifier used for possibly deprecating later - 'collection': 'thiscollection', - 'index': [index_foo_goes_here]}} + 'collection_name': { + 'identifier': { # key identifier used for possibly deprecating later + 'index': [index_foo_goes_here]}} ... and anything else being parameters to the create_index function (including unique=True, etc) @@ -34,6 +34,7 @@ Remember, ordering of compound indexes MATTERS. Read below for more. REQUIRED READING: - http://kylebanker.com/blog/2010/09/21/the-joy-of-mongodb-indexes/ + - http://www.mongodb.org/display/DOCS/Indexes - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ """ @@ -54,7 +55,6 @@ MEDIAENTRY_INDEXES = { # Matching an object to an uploader + slug. # MediaEntries are unique on these two combined, eg: # /u/${myuser}/m/${myslugname}/ - 'collection': 'media_entries', 'index': [('uploader', ASCENDING), ('slug', ASCENDING)], 'unique': True}, @@ -62,19 +62,16 @@ MEDIAENTRY_INDEXES = { 'mediaentry_created': { # A global index for all media entries created, in descending # order. This is used for the site's frontpage. - 'collection': 'media_entries', 'index': [('created', DESCENDING)]}, 'mediaentry_uploader_created': { # Indexing on uploaders and when media entries are created. # Used for showing a user gallery, etc. - 'collection': 'media_entries', 'index': [('uploader', ASCENDING), ('created', DESCENDING)]}} -ACTIVE_INDEXES.update( - [MEDIAENTRY_INDEXES]) +ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES # User indexes @@ -84,20 +81,18 @@ USER_INDEXES = { 'user_username_unique': { # Index usernames, and make sure they're unique. # ... I guess we might need to adjust this once we're federated :) - 'collection': 'users', - 'index': 'username'}, + 'index': 'username', + 'unique': True}, 'user_created': { # All most recently created users - 'collection': 'users', 'index': 'created'}} -ACTIVE_INDEXES.update( - [USER_INDEXES]) +ACTIVE_INDEXES['users'] = USER_INDEXES #################### # Deprecated indexes #################### -DEPRECATED_INDEXES = [] +DEPRECATED_INDEXES = {} From 0f3167c9f01487bfddb43f1e859209c502a3db75 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Mon, 27 Jun 2011 16:56:12 -0500 Subject: [PATCH 3/7] Tools to add / remove indexes from collections --- mediagoblin/db/util.py | 65 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py index 470da531..219617ec 100644 --- a/mediagoblin/db/util.py +++ b/mediagoblin/db/util.py @@ -14,8 +14,73 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +""" +Utilities for database operations. + +Some note on migration and indexing tools: + +We store information about what the state of the database is in the +'mediagoblin' document of the 'app_metadata' collection. Keys in that +document relevant to here: + + - 'migration_number': The integer representing the current state of + the migrations +""" + +import copy # Imports that other modules might use from pymongo import DESCENDING from pymongo.errors import InvalidId from mongokit import ObjectId + +from mediagoblin.db.indexes import ACTIVE_INDEXES, DEPRECATED_INDEXES + + +def add_new_indexes(database, active_indexes=ACTIVE_INDEXES): + """ + Add any new indexes to the database. + + Returns: + A list of indexes added in form ('collection', 'index_name') + """ + indexes_added = [] + + for collection_name, indexes in active_indexes.iteritems(): + collection = database[collection_name] + collection_indexes = collection.index_information().keys() + + for index_name, index_data in indexes.iteritems(): + if not index_name in collection_indexes: + # Get a copy actually so we don't modify the actual + # structure + index_data = copy.copy(index_data) + index = index_data.pop('index') + collection.create_index( + index, name=index_name, **index_data) + + indexes_added.append((collection_name, index_name)) + + return indexes_added + + +def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES): + """ + Remove any deprecated indexes from the database. + + Returns: + A list of indexes removed in form ('collection', 'index_name') + """ + indexes_removed = [] + + for collection_name, indexes in deprecated_indexes.iteritems(): + collection = database[collection_name] + collection_indexes = collection.index_information().keys() + + for index_name, index_data in indexes.iteritems(): + if index_name in collection_indexes: + collection.drop_index(index_name) + + indexes_removed.append((collection_name, index_name)) + + return indexes_removed From b1db6f20dd3cdef9da9ad27513aeee2738c85262 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Mon, 27 Jun 2011 16:56:41 -0500 Subject: [PATCH 4/7] Adding our current indexes and removing the index that was in models.py --- mediagoblin/db/indexes.py | 18 ++++++++++++------ mediagoblin/db/models.py | 5 ----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py index 85d43471..bd97f9f2 100644 --- a/mediagoblin/db/indexes.py +++ b/mediagoblin/db/indexes.py @@ -37,6 +37,8 @@ REQUIRED READING: - http://www.mongodb.org/display/DOCS/Indexes - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ + + """ from pymongo import ASCENDING, DESCENDING @@ -51,7 +53,7 @@ ACTIVE_INDEXES = {} # ------------------ MEDIAENTRY_INDEXES = { - 'mediaentry_uploader_slug_unique': { + 'uploader_slug_unique': { # Matching an object to an uploader + slug. # MediaEntries are unique on these two combined, eg: # /u/${myuser}/m/${myslugname}/ @@ -59,12 +61,12 @@ MEDIAENTRY_INDEXES = { ('slug', ASCENDING)], 'unique': True}, - 'mediaentry_created': { + 'created': { # A global index for all media entries created, in descending # order. This is used for the site's frontpage. 'index': [('created', DESCENDING)]}, - 'mediaentry_uploader_created': { + 'uploader_created': { # Indexing on uploaders and when media entries are created. # Used for showing a user gallery, etc. 'index': [('uploader', ASCENDING), @@ -78,15 +80,15 @@ ACTIVE_INDEXES['media_entries'] = MEDIAENTRY_INDEXES # ------------ USER_INDEXES = { - 'user_username_unique': { + 'username_unique': { # Index usernames, and make sure they're unique. # ... I guess we might need to adjust this once we're federated :) 'index': 'username', 'unique': True}, - 'user_created': { + 'created': { # All most recently created users 'index': 'created'}} - + ACTIVE_INDEXES['users'] = USER_INDEXES @@ -95,4 +97,8 @@ ACTIVE_INDEXES['users'] = USER_INDEXES # Deprecated indexes #################### +# @@: Do we really need to keep the index form if we're removing by +# key name? I guess it's helpful to keep the record... + + DEPRECATED_INDEXES = {} diff --git a/mediagoblin/db/models.py b/mediagoblin/db/models.py index 600b79ff..8d06ae49 100644 --- a/mediagoblin/db/models.py +++ b/mediagoblin/db/models.py @@ -108,11 +108,6 @@ class MediaEntry(Document): migration_handler = migrations.MediaEntryMigration - indexes = [ - # Referene uniqueness of slugs by uploader - {'fields': ['uploader', 'slug'], - 'unique': True}] - def main_mediafile(self): pass From 8db03585a22904404543fe324c2c06b90471aed8 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Mon, 27 Jun 2011 20:05:01 -0500 Subject: [PATCH 5/7] Updating migrate.py to actually run the indexing commands --- mediagoblin/gmg_commands/migrate.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mediagoblin/gmg_commands/migrate.py b/mediagoblin/gmg_commands/migrate.py index 9e01d51c..ab1a267b 100644 --- a/mediagoblin/gmg_commands/migrate.py +++ b/mediagoblin/gmg_commands/migrate.py @@ -16,6 +16,7 @@ from mediagoblin.db import migrations +from mediagoblin.db import util as db_util from mediagoblin.gmg_commands import util as commands_util @@ -27,8 +28,17 @@ def migrate_parser_setup(subparser): def migrate(args): mgoblin_app = commands_util.setup_app(args) - print "Applying migrations..." + # Clear old indexes + print "== Clearing old indexes... ==" + removed_indexes = db_util.remove_deprecated_indexes(mgoblin_app.db) + + for collection, index_name in removed_indexes: + print "Removed index '%s' in collection '%s'" % ( + index_name, collection) + + # Migrate + print "== Applying migrations... ==" for model_name in migrations.MIGRATE_CLASSES: model = getattr(mgoblin_app.db, model_name) @@ -38,4 +48,10 @@ def migrate(args): migration = model.migration_handler(model) migration.migrate_all(collection=model.collection) - print "... done." + # Add new indexes + print "== Adding new indexes... ==" + new_indexes = db_util.add_new_indexes(mgoblin_app.db) + + for collection, index_name in new_indexes: + print "Added index '%s' to collection '%s'" % ( + index_name, collection) From 1f7749995dbbc2b6063d93e59948e008e60567eb Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Mon, 27 Jun 2011 20:40:31 -0500 Subject: [PATCH 6/7] Documentation updates to indexes.py's docstring Includes information on how to deprecate indexes. --- mediagoblin/db/indexes.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/mediagoblin/db/indexes.py b/mediagoblin/db/indexes.py index bd97f9f2..bbcceb6d 100644 --- a/mediagoblin/db/indexes.py +++ b/mediagoblin/db/indexes.py @@ -17,9 +17,12 @@ """ Indexes for the local database. +To add new indexes +------------------ + Indexes are recorded in the following format: -INDEXES = { +ACTIVE_INDEXES = { 'collection_name': { 'identifier': { # key identifier used for possibly deprecating later 'index': [index_foo_goes_here]}} @@ -39,6 +42,21 @@ REQUIRED READING: - http://www.mongodb.org/display/DOCS/Indexing+Advice+and+FAQ +To remove deprecated indexes +---------------------------- + +Removing deprecated indexes is easier, just do: + +INACTIVE_INDEXES = { + 'collection_name': [ + 'deprecated_index_identifier1', 'deprecated_index_identifier2']} + +... etc. + +If an index has been deprecated that identifier should NEVER BE USED +AGAIN. Eg, if you previously had 'awesomepants_unique', you shouldn't +use 'awesomepants_unique' again, you should create a totally new name +or at worst use 'awesomepants_unique2'. """ from pymongo import ASCENDING, DESCENDING @@ -97,8 +115,4 @@ ACTIVE_INDEXES['users'] = USER_INDEXES # Deprecated indexes #################### -# @@: Do we really need to keep the index form if we're removing by -# key name? I guess it's helpful to keep the record... - - DEPRECATED_INDEXES = {} From 2527754202ea9530c9046270e63d71581923dc77 Mon Sep 17 00:00:00 2001 From: Christopher Allan Webber Date: Mon, 27 Jun 2011 20:48:35 -0500 Subject: [PATCH 7/7] Documenging index utils and adjusting how remove_deprecated_indexes gets arguments More specifically, we now just take a dictionary of lists, like: {'collection': ['index_identifier1', 'index_identifier2']} Previously we took something with more info like in add_new_indexes, but that extra info isn't really necessary. --- mediagoblin/db/util.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mediagoblin/db/util.py b/mediagoblin/db/util.py index 219617ec..46f899f7 100644 --- a/mediagoblin/db/util.py +++ b/mediagoblin/db/util.py @@ -41,6 +41,16 @@ def add_new_indexes(database, active_indexes=ACTIVE_INDEXES): """ Add any new indexes to the database. + Args: + - database: pymongo or mongokit database instance. + - active_indexes: indexes to possibly add in the pattern of: + {'collection_name': { + 'identifier': { + 'index': [index_foo_goes_here], + 'unique': True}} + where 'index' is the index to add and all other options are + arguments for collection.create_index. + Returns: A list of indexes added in form ('collection', 'index_name') """ @@ -68,16 +78,21 @@ def remove_deprecated_indexes(database, deprecated_indexes=DEPRECATED_INDEXES): """ Remove any deprecated indexes from the database. + Args: + - database: pymongo or mongokit database instance. + - deprecated_indexes: the indexes to deprecate in the pattern of: + {'collection': ['index_identifier1', 'index_identifier2']} + Returns: A list of indexes removed in form ('collection', 'index_name') """ indexes_removed = [] - for collection_name, indexes in deprecated_indexes.iteritems(): + for collection_name, index_names in deprecated_indexes.iteritems(): collection = database[collection_name] collection_indexes = collection.index_information().keys() - for index_name, index_data in indexes.iteritems(): + for index_name in index_names: if index_name in collection_indexes: collection.drop_index(index_name)