The URL format checker now works correctly

...though it isn't checking the right thing
2014-05-07 13:36:52 -05:00
parent 9f5d388ec0
commit af3a9107a9
1 changed files with 44 additions and 33 deletions
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -15,7 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import os
-import tempfile, tarfile, zipfile, subprocess, requests
+import copy, tempfile, tarfile, zipfile, subprocess, re, requests
 from csv import reader as csv_reader
 from urlparse import urlparse
 from pyld import jsonld
@@ -27,8 +27,10 @@ from mediagoblin.submit.lib import (
 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
 from mediagoblin import mg_globals
-from jsonschema import validate 
+from jsonschema import validate, FormatChecker, draft4_format_checker
 from jsonschema.exceptions import ValidationError
 from jsonschema.compat import str_types
 def parser_setup(subparser):
    subparser.description = """\
@@ -48,11 +50,6 @@ Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
 "metadata.csv" must begin with a row demonstrating the order of the columns. We
 have provided an example of these files at <url to be added>
 """))
    subparser.add_argument(
        "-l", "--license",
        help=(
           "License these media entry will be released under, if all the same. "
           "Should be a URL."))
    subparser.add_argument(
        '--celery',
        action='store_true',
@@ -149,6 +146,8 @@ zip files and directories"
        title = sanitized_metadata.get('dcterms:title')
        description = sanitized_metadata.get('dcterms:description')
        # TODO: this isn't the same thing
        license = sanitized_metadata.get('dcterms:rights')
        filename = url.path.split()[-1]
@@ -234,36 +233,48 @@ def build_json_ld_metadata(metadata_dict):
        output_dict[p] = description
    return output_dict
 ## Set up the MediaGoblin checker
 # 
 URL_REGEX = re.compile(
    r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
    re.IGNORECASE)
 def is_uri(instance):
    if not isinstance(instance, str_types):
        return True
    return URL_REGEX.match(instance)
 class DefaultChecker(FormatChecker):
    checkers = copy.deepcopy(draft4_format_checker.checkers)
 DefaultChecker.checkers[u"uri"] = (is_uri, ())
 DEFAULT_CHECKER = DefaultChecker()
 def check_metadata_format(metadata_dict):
    schema = {
-    "$schema":"http://json-schema.org/schema#",
+        "$schema": "http://json-schema.org/schema#",
-    "properties":{
+
-        "media:id":{},
+        "type": "object",
-        "dcterms:contributor":{},
+        "properties": {
-        "dcterms:coverage":{},
+            "dcterms:rights": {
-        "dcterms:created":{},
+                "format": "uri",
-        "dcterms:creator":{},
+                "type": "string",
-        "dcterms:date":{},
+            },
-        "dcterms:description":{},
+            "dcterms:created": {
-        "dcterms:format":{},
+                
-        "dcterms:identifier":{},
+            }
        "dcterms:language":{},
        "dcterms:publisher":{},
        "dcterms:relation":{},
        "dcterms:rights" : {
            "format":"uri",
            "type":"string"
        },
-        "dcterms:source":{},
+        # "required": ["dcterms:title", "media:id"],
-        "dcterms:subject":{},
+    }
-        "dcterms:title":{},
+
        "dcterms:type":{}
    },
    "additionalProperties": False,
    "required":["dcterms:title","media:id"]
 }
    try:
-        validate(metadata_dict, schema)
+        validate(metadata_dict, schema,
                 format_checker=DEFAULT_CHECKER)
        output_dict = metadata_dict
        # "media:id" is only for internal use, so we delete it for the output
        del output_dict['media:id']