diff --git a/ckanext/validation/plugin/__init__.py b/ckanext/validation/plugin/__init__.py index 6c38f723..4c352c0e 100644 --- a/ckanext/validation/plugin/__init__.py +++ b/ckanext/validation/plugin/__init__.py @@ -156,7 +156,11 @@ def _process_schema_fields(self, data_dict): return data_dict if ckan_2_10: - pass + def before_resource_create(self, context, data_dict): + + context["_resource_create_call"] = True + return self._process_schema_fields(data_dict) + else: def before_create(self, context, data_dict): @@ -167,7 +171,22 @@ def before_create(self, context, data_dict): if ckan_2_10: - pass + def after_dataset_create(self, context, data_dict): + + is_dataset = self._data_dict_is_dataset(data_dict) + + if not get_create_mode_from_config() == u'async': + return + + if is_dataset: + for resource in data_dict.get(u'resources', []): + self._handle_validation_for_resource(context, resource) + else: + # This is a resource. Resources don't need to be handled here + # as there is always a previous `package_update` call that will + # trigger the `before_update` and `after_update` hooks + pass + else: def after_create(self, context, data_dict): @@ -216,7 +235,46 @@ def _handle_validation_for_resource(self, context, resource): _run_async_validation(resource[u'id']) if ckan_2_10: - pass + def before_resource_update(self, context, current_resource, updated_resource): + + updated_resource = self._process_schema_fields(updated_resource) + + # the call originates from a resource API, so don't validate the entire package + package_id = updated_resource.get('package_id') + if not package_id: + existing_resource = t.get_action('resource_show')( + context={'ignore_auth': True}, data_dict={'id': updated_resource['id']}) + if existing_resource: + package_id = existing_resource['package_id'] + self.packages_to_skip[package_id] = True + + if not get_update_mode_from_config() == u'async': + return updated_resource + + needs_validation = False + if (( + # New file uploaded + updated_resource.get(u'upload') or + # External URL changed + updated_resource.get(u'url') != current_resource.get(u'url') or + # Schema changed + (updated_resource.get(u'schema') != + current_resource.get(u'schema')) or + # Format changed + (updated_resource.get(u'format', u'').lower() != + current_resource.get(u'format', u'').lower()) + ) and ( + # Make sure format is supported + updated_resource.get(u'format', u'').lower() in + settings.SUPPORTED_FORMATS + )): + needs_validation = True + + if needs_validation: + self.resources_to_validate[updated_resource[u'id']] = True + + return updated_resource + else: def before_update(self, context, current_resource, updated_resource): @@ -259,7 +317,63 @@ def before_update(self, context, current_resource, updated_resource): return updated_resource if ckan_2_10: - pass + def after_dataset_update(self, context, data_dict): + + is_dataset = self._data_dict_is_dataset(data_dict) + + # Need to allow create as well because resource_create calls + # package_update + if (not get_update_mode_from_config() == u'async' + and not get_create_mode_from_config() == u'async'): + return + + if context.get('_validation_performed'): + # Ugly, but needed to avoid circular loops caused by the + # validation job calling resource_patch (which calls + # package_update) + del context['_validation_performed'] + return + + if is_dataset: + package_id = data_dict.get('id') + if self.packages_to_skip.pop(package_id, None) or context.get('save', False): + # Either we're updating an individual resource, + # or we're updating the package metadata via the web form; + # in both cases, we don't need to validate every resource. + return + + if context.pop("_resource_create_call", False): + new_resource = data_dict["resources"][-1] + if new_resource: + # This is part of a resource_create call, we only need to validate + # the new resource being created + self._handle_validation_for_resource(context, new_resource) + return + + for resource in data_dict.get(u'resources', []): + if resource[u'id'] in self.resources_to_validate: + # This is part of a resource_update call, it will be + # handled on the next `after_update` call + continue + else: + # This is an actual package_update call, validate the + # resources if necessary + self._handle_validation_for_resource(context, resource) + + else: + # This is a resource + resource_id = data_dict[u'id'] + + if resource_id in self.resources_to_validate: + for plugin in p.PluginImplementations(IDataValidation): + if not plugin.can_validate(context, data_dict): + log.debug('Skipping validation for resource %s', data_dict['id']) + return + + del self.resources_to_validate[resource_id] + + _run_async_validation(resource_id) + else: def after_update(self, context, data_dict):