Skip to content
22 changes: 17 additions & 5 deletions ckanext/spatial/harvesters/csw.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,26 @@ def fetch_stage(self,harvest_object):
self._save_object_error('Error contacting the CSW server: %s' % e,
harvest_object)
return False


# load config
self._set_source_config(harvest_object.source.config)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document the new output_schema option and its default value in here so others are aware of it?

https://github.com/ckan/ckanext-spatial/blob/master/doc/harvesters.rst

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added fallback to default in case the server is not supporting iso19139 -> 19115 transformation
the fallback will log and switch back to default asking for iso19139 -> iso19139.

# get output_schema from config
output_schema = self.source_config.get('output_schema', self.output_schema())
identifier = harvest_object.guid
try:
record = self.csw.getrecordbyid([identifier], outputschema=self.output_schema())
record = self.csw.getrecordbyid([identifier], outputschema=output_schema)
except Exception as e:
self._save_object_error('Error getting the CSW record with GUID %s' % identifier, harvest_object)
return False

try:
log.warn('Unable to fetch GUID {} with output schema: {}'.format(identifier, output_schema))
if output_schema == self.output_schema():
raise e
log.info('Fetching GUID {} with output schema: {}'.format(identifier, self.output_schema()))
# retry with default output schema
record = self.csw.getrecordbyid([identifier], outputschema=self.output_schema())
except Exception as e:
self._save_object_error('Error getting the CSW record with GUID {}'.format(identifier), harvest_object)
return False

if record is None:
self._save_object_error('Empty record for GUID %s' % identifier,
harvest_object)
Expand Down
63 changes: 51 additions & 12 deletions ckanext/spatial/lib/csw_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,40 @@ class CswService(OwsService):
def __init__(self, endpoint=None):
super(CswService, self).__init__(endpoint)
self.sortby = SortBy([SortProperty('dc:identifier')])
# check capabilities
_cap = self.getcapabilities(endpoint)['response']
self.capabilities = etree.ElementTree(etree.fromstring(_cap))
self.output_schemas = {
'GetRecords': self._get_output_schemas('GetRecords'),
'GetRecordById': self._get_output_schemas('GetRecordById'),
}

def _get_output_schemas(self, operation):
_cap_ns = self.capabilities.getroot().nsmap
_ows_ns = _cap_ns.get('ows')
if not _ows_ns:
raise CswError('Bad getcapabilities response: OWS namespace not found ' + str(_cap_ns))
_op = self.capabilities.find("//{{{}}}Operation[@name='{}']".format(_ows_ns, operation))
_schemas = _op.find("{{{}}}Parameter[@name='outputSchema']".format(_ows_ns))
_values = map(lambda v: v.text, _schemas.findall("{{{}}}Value".format(_ows_ns)))
output_schemas = {}
for key, value in _schemas.nsmap.items():
if value in _values:
output_schemas.update({key : value})
return output_schemas

def getrecords(self, qtype=None, keywords=[],
typenames="csw:Record", esn="brief",
skip=0, count=10, outputschema="gmd", **kw):
from owslib.csw import namespaces

constraints = []
csw = self._ows(**kw)

# check target csw server capabilities for requested output schema
output_schemas = self.output_schemas['GetRecords']
if not output_schemas.get(outputschema):
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas))

if qtype is not None:
constraints.append(PropertyIsEqualTo("dc:type", qtype))

Expand All @@ -87,7 +113,7 @@ def getrecords(self, qtype=None, keywords=[],
"esn": esn,
"startposition": skip,
"maxrecords": count,
"outputschema": namespaces[outputschema],
"outputschema": output_schemas[outputschema],
"sortby": self.sortby
}
log.info('Making CSW request: getrecords2 %r', kwa)
Expand All @@ -102,10 +128,15 @@ def getrecords(self, qtype=None, keywords=[],
def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief",
keywords=[], limit=None, page=10, outputschema="gmd",
startposition=0, cql=None, **kw):
from owslib.csw import namespaces

constraints = []
csw = self._ows(**kw)

# check target csw server capabilities for requested output schema
output_schemas = self.output_schemas['GetRecords']
if not output_schemas.get(outputschema):
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas))

if qtype is not None:
constraints.append(PropertyIsEqualTo("dc:type", qtype))

Expand All @@ -115,7 +146,7 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief",
"esn": esn,
"startposition": startposition,
"maxrecords": page,
"outputschema": namespaces[outputschema],
"outputschema": output_schemas[outputschema],
"cql": cql,
"sortby": self.sortby
}
Expand All @@ -129,7 +160,6 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief",
err = 'Error getting identifiers: %r' % \
csw.exceptionreport.exceptions
#log.error(err)
raise CswError(err)

if matches == 0:
matches = csw.results['matches']
Expand All @@ -154,11 +184,17 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief",
kwa["startposition"] = startposition

def getrecordbyid(self, ids=[], esn="full", outputschema="gmd", **kw):
from owslib.csw import namespaces

csw = self._ows(**kw)

# fetch target csw server capabilities for requested output schema
output_schemas=output_schemas = self.output_schemas['GetRecordById']
if not output_schemas.get(outputschema):
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably here I should be more tolerant Logging ERROR and returning.


kwa = {
"esn": esn,
"outputschema": namespaces[outputschema],
"outputschema": output_schemas[outputschema],
}
# Ordinary Python version's don't support the metadata argument
log.info('Making CSW request: getrecordbyid %r %r', ids, kwa)
Expand All @@ -168,14 +204,17 @@ def getrecordbyid(self, ids=[], esn="full", outputschema="gmd", **kw):
csw.exceptionreport.exceptions
#log.error(err)
raise CswError(err)
if not csw.records:
elif csw.records:
record = self._xmd(list(csw.records.values())[0])
elif csw.response:
record = self._xmd(etree.fromstring(csw.response))
else:
return
record = self._xmd(list(csw.records.values())[0])

## strip off the enclosing results container, we only want the metadata
#md = csw._exml.find("/gmd:MD_Metadata")#, namespaces=namespaces)
# Ordinary Python version's don't support the metadata argument
md = csw._exml.find("/{http://www.isotc211.org/2005/gmd}MD_Metadata")
# '/{schema}*' expression should be safe enough and is able to match the
# desired schema followed by both MD_Metadata or MI_Metadata (iso19115[-2])
md = csw._exml.find("/{{{schema}}}*".format(schema=output_schemas[outputschema]))
mdtree = etree.ElementTree(md)
try:
record["xml"] = etree.tostring(mdtree, pretty_print=True, encoding=str)
Expand Down
2 changes: 2 additions & 0 deletions doc/harvesters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ The currently supported configuration options are:
and spaces replaced with dashes. Setting this option to False gives the same effect as leaving it unset.
* ``validator_profiles``: A list of string that specifies a list of validators that will be applied to the
current harvester, overriding the global ones defined by the 'ckan.spatial.validator.profiles' option.
* ``output_schema``: the namespace to use as outputSchema_ for a CSW request

.. _outputSchema: https://docs.opengeospatial.org/is/12-176r7/12-176r7.html#72

Customizing the harvesters
--------------------------
Expand Down