Starting to add metadata tools, as well as mediagoblin's schema
This commit is contained in:
parent
a448628636
commit
e5e2cc2f16
47
mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld
Normal file
47
mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
{
|
||||||
|
"@context": {
|
||||||
|
"qb": "http://purl.org/linked-data/cube#",
|
||||||
|
"grddl": "http://www.w3.org/2003/g/data-view#",
|
||||||
|
"ma": "http://www.w3.org/ns/ma-ont#",
|
||||||
|
"owl": "http://www.w3.org/2002/07/owl#",
|
||||||
|
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||||
|
"rdfa": "http://www.w3.org/ns/rdfa#",
|
||||||
|
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
|
||||||
|
"rif": "http://www.w3.org/2007/rif#",
|
||||||
|
"rr": "http://www.w3.org/ns/r2rml#",
|
||||||
|
"skos": "http://www.w3.org/2004/02/skos/core#",
|
||||||
|
"skosxl": "http://www.w3.org/2008/05/skos-xl#",
|
||||||
|
"wdr": "http://www.w3.org/2007/05/powder#",
|
||||||
|
"void": "http://rdfs.org/ns/void#",
|
||||||
|
"wdrs": "http://www.w3.org/2007/05/powder-s#",
|
||||||
|
"xhv": "http://www.w3.org/1999/xhtml/vocab#",
|
||||||
|
"xml": "http://www.w3.org/XML/1998/namespace",
|
||||||
|
"xsd": "http://www.w3.org/2001/XMLSchema#",
|
||||||
|
"prov": "http://www.w3.org/ns/prov#",
|
||||||
|
"sd": "http://www.w3.org/ns/sparql-service-description#",
|
||||||
|
"org": "http://www.w3.org/ns/org#",
|
||||||
|
"gldp": "http://www.w3.org/ns/people#",
|
||||||
|
"cnt": "http://www.w3.org/2008/content#",
|
||||||
|
"dcat": "http://www.w3.org/ns/dcat#",
|
||||||
|
"earl": "http://www.w3.org/ns/earl#",
|
||||||
|
"ht": "http://www.w3.org/2006/http#",
|
||||||
|
"ptr": "http://www.w3.org/2009/pointers#",
|
||||||
|
"cc": "http://creativecommons.org/ns#",
|
||||||
|
"ctag": "http://commontag.org/ns#",
|
||||||
|
"dc": "http://purl.org/dc/terms/",
|
||||||
|
"dc11": "http://purl.org/dc/elements/1.1/",
|
||||||
|
"dcterms": "http://purl.org/dc/terms/",
|
||||||
|
"foaf": "http://xmlns.com/foaf/0.1/",
|
||||||
|
"gr": "http://purl.org/goodrelations/v1#",
|
||||||
|
"ical": "http://www.w3.org/2002/12/cal/icaltzd#",
|
||||||
|
"og": "http://ogp.me/ns#",
|
||||||
|
"rev": "http://purl.org/stuff/rev#",
|
||||||
|
"sioc": "http://rdfs.org/sioc/ns#",
|
||||||
|
"v": "http://rdf.data-vocabulary.org/#",
|
||||||
|
"vcard": "http://www.w3.org/2006/vcard/ns#",
|
||||||
|
"schema": "http://schema.org/",
|
||||||
|
"describedby": "http://www.w3.org/2007/05/powder-s#describedby",
|
||||||
|
"license": "http://www.w3.org/1999/xhtml/vocab#license",
|
||||||
|
"role": "http://www.w3.org/1999/xhtml/vocab#role"
|
||||||
|
}
|
||||||
|
}
|
106
mediagoblin/tools/metadata.py
Normal file
106
mediagoblin/tools/metadata.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
# GNU MediaGoblin -- federated, autonomous media hosting
|
||||||
|
# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from pkg_resources import resource_filename
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
from pyld import jsonld
|
||||||
|
from jsonschema import validate, FormatChecker, draft4_format_checker
|
||||||
|
from jsonschema.compat import str_types
|
||||||
|
|
||||||
|
|
||||||
|
MEDIAGOBLIN_CONTEXT_PATH = resource_filename(
|
||||||
|
"mediagoblin",
|
||||||
|
os.path.sep.join(["static", "metadata", "mediagoblin-0.1.dev.jsonld"]))
|
||||||
|
MEDIAGOBLIN_CONTEXT = json.loads(file(MEDIAGOBLIN_CONTEXT_PATH).read())
|
||||||
|
|
||||||
|
|
||||||
|
########################################################
|
||||||
|
## Set up the MediaGoblin format checker for json-schema
|
||||||
|
########################################################
|
||||||
|
|
||||||
|
URL_REGEX = re.compile(
|
||||||
|
r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
|
||||||
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
def is_uri(instance):
|
||||||
|
"""
|
||||||
|
jsonschema uri validator
|
||||||
|
"""
|
||||||
|
if not isinstance(instance, str_types):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return URL_REGEX.match(instance)
|
||||||
|
|
||||||
|
def is_datetime(instance):
|
||||||
|
"""
|
||||||
|
Is a date or datetime readable string.
|
||||||
|
"""
|
||||||
|
if not isinstance(instance, str_types):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return dateutil.parser.parse(instance)
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultChecker(FormatChecker):
|
||||||
|
"""
|
||||||
|
Default MediaGoblin format checker... extended to include a few extra things
|
||||||
|
"""
|
||||||
|
checkers = copy.deepcopy(draft4_format_checker.checkers)
|
||||||
|
|
||||||
|
|
||||||
|
DefaultChecker.checkers[u"uri"] = (is_uri, ())
|
||||||
|
DefaultChecker.checkers[u"date-time"] = (is_datetime, (ValueError, TypeError))
|
||||||
|
DEFAULT_CHECKER = DefaultChecker()
|
||||||
|
|
||||||
|
# Crappy default schema, checks for things we deem important
|
||||||
|
|
||||||
|
DEFAULT_SCHEMA = {
|
||||||
|
"$schema": "http://json-schema.org/schema#",
|
||||||
|
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"dcterms:rights": {
|
||||||
|
"format": "uri",
|
||||||
|
"type": "string",
|
||||||
|
},
|
||||||
|
"dcterms:created": {
|
||||||
|
"format": "date-time",
|
||||||
|
"type": "string",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def compact_and_validate(metadata, context=MEDIAGOBLIN_CONTEXT,
|
||||||
|
schema=DEFAULT_SCHEMA):
|
||||||
|
"""
|
||||||
|
compact json with supplied context, check against schema for errors
|
||||||
|
|
||||||
|
raises an exception (jsonschema.exceptions.ValidationError) if
|
||||||
|
there's an error.
|
||||||
|
|
||||||
|
You may wish to do this validation yourself... this is just for convenience.
|
||||||
|
"""
|
||||||
|
compacted = jsonld.compact(metadata, context)
|
||||||
|
validate(metadata, schema, format_checker=DEFAULT_CHECKER)
|
||||||
|
|
||||||
|
return compacted
|
Loading…
x
Reference in New Issue
Block a user