Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion geospaas/base_viewer/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


class BaseViewerConfig(AppConfig):
name = 'base_viewer'
name = 'geospaas.base_viewer'
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def crawl_and_ingest(url, **options):
# Create Dataset from OPeNDAP url - this is necessary to get all metadata
gds, cr = NansatDataset.objects.get_or_create(url, uri_service_name=name,
uri_service_type=service)
except (IOError, AttributeError) as e:
except (IOError, AttributeError, ValueError) as e:
# warnings.warn(e.message)
continue
if cr:
Expand Down
40 changes: 30 additions & 10 deletions geospaas/nansat_ingestor/managers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
import uuid
import warnings

import pythesint as pti
from django.contrib.gis.geos import WKTReader
from django.db import connection
from django.db import models

from geospaas.catalog.managers import FILE_SERVICE_NAME, LOCAL_FILE_SERVICE
Expand Down Expand Up @@ -56,23 +57,37 @@ def get_or_create(self,
uris = DatasetURI.objects.filter(uri=uri, **uri_filter_args)
if len(uris) > 0:
return uris[0].dataset, False
connection.close()

# Open file with Nansat
n = Nansat(nansat_filename(uri), **kwargs)

# get metadata from Nansat and get objects from vocabularies
n_metadata = n.get_metadata()

entry_id = n_metadata.get('entry_id', None)
entry_id = n_metadata.get('entry_id', n_metadata.get('id', None))
# set compulsory metadata (source)
platform, _ = Platform.objects.get_or_create(
json.loads(n_metadata['platform']))
instrument, _ = Instrument.objects.get_or_create(
json.loads(n_metadata['instrument']))
pp = n_metadata['platform']
try:
pp_dict = json.loads(pp)
except json.JSONDecodeError:
pp_entry = [elem.strip() for elem in pp.split('>')]
pp_dict = pti.get_gcmd_platform(pp_entry[-1].split('(')[1][0:-1])
platform, _ = Platform.objects.get_or_create(pp_dict)
connection.close()
ii = n_metadata['instrument']
try:
ii_dict = json.loads(ii)
except json.JSONDecodeError:
ii_entry = [elem.strip() for elem in ii.split('>')]
ii_dict = pti.get_gcmd_instrument(ii_entry[-1].split('(')[1][0:-1])
instrument, _ = Instrument.objects.get_or_create(ii_dict)
connection.close()
specs = n_metadata.get('specs', '')
source, _ = Source.objects.get_or_create(platform=platform,
instrument=instrument,
specs=specs)
connection.close()

default_char_fields = {
# Adding NERSC_ in front of the id violates the string representation of the uuid
Expand All @@ -88,9 +103,10 @@ def get_or_create(self,
existing_ds = Dataset.objects.get(entry_id=entry_id)
except Dataset.DoesNotExist:
existing_ds = None
connection.close()
for name in default_char_fields:
if name not in n_metadata:
warnings.warn('%s is not provided in Nansat metadata!' % name)
logging.debug('%s is not provided in Nansat metadata!' % name)
# prevent overwriting of existing values by defaults
if existing_ds:
options[name] = existing_ds.__getattribute__(name)
Expand All @@ -103,7 +119,7 @@ def get_or_create(self,
'gcmd_location': {'model': Location,
'value': pti.get_gcmd_location('SEA SURFACE')},
'data_center': {'model': DataCenter,
'value': pti.get_gcmd_provider('NERSC')},
'value': pti.get_gcmd_provider('NO/MET')},
'ISO_topic_category': {'model': ISOTopicCategory,
'value': pti.get_iso19115_topic_category('Oceans')},
}
Expand All @@ -113,12 +129,12 @@ def get_or_create(self,
value = default_foreign_keys[name]['value']
model = default_foreign_keys[name]['model']
if name not in n_metadata:
warnings.warn('%s is not provided in Nansat metadata!' % name)
logging.debug('%s is not provided in Nansat metadata!' % name)
else:
try:
value = json.loads(n_metadata[name])
except:
warnings.warn('%s value of %s metadata provided in Nansat is wrong!' %
logging.debug('%s value of %s metadata provided in Nansat is wrong!' %
(n_metadata[name], name))
if existing_ds:
options[name] = existing_ds.__getattribute__(name)
Expand All @@ -130,6 +146,7 @@ def get_or_create(self,
n.reproject_gcps()
geolocation = GeographicLocation.objects.get_or_create(
geometry=WKTReader().read(n.get_border_wkt(nPoints=n_points)))[0]
connection.close()

# create dataset
# - the get_or_create method should use get_or_create here as well,
Expand All @@ -145,6 +162,7 @@ def get_or_create(self,
'entry_title': options["entry_title"],
'summary': options["summary"]}
)
connection.close()

# create parameter
all_band_meta = n.bands()
Expand All @@ -162,12 +180,14 @@ def get_or_create(self,
params = params.filter(units=units)
if params.count() >= 1:
ds.parameters.add(params[0])
connection.close()

# create dataset URI
DatasetURI.objects.get_or_create(
name=uri_service_name,
service=uri_service_type,
uri=uri,
dataset=ds)
connection.close()

return ds, created
24 changes: 19 additions & 5 deletions geospaas/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
''' Utility functions to perform common operations '''
import os
import logging
from netCDF4 import Dataset

try:
Expand All @@ -16,31 +17,44 @@

from django.conf import settings

def create_folder(folder):
try:
os.mkdir(folder)
except FileExistsError:
logging.debug(f"{folder} already exists.")


def module_path(module, root):
media_path = root
for m in module.split('.'):
media_path = os.path.join(media_path, m)
if not os.path.exists(media_path):
os.mkdir(media_path)
create_folder(media_path)
return media_path


def path(module, filename, root):
def path(module, filename, root, date=None):
mp = module_path(module, root)

if date is not None:
for xx in [date.strftime('%Y'), date.strftime('%m'), date.strftime('%d')]:
mp = os.path.join(mp, xx)
if not os.path.exists(mp):
create_folder(mp)

# Get the path of media files created from <filename>
basename = os.path.split(filename)[-1].split('.')[0]
dataset_path = os.path.join(mp, basename)
if not os.path.exists(dataset_path):
os.mkdir(dataset_path)
create_folder(dataset_path)

return dataset_path

def media_path(module, filename):
return path(module, filename, settings.MEDIA_ROOT)

def product_path(module, filename):
return path(module, filename, settings.PRODUCTS_ROOT)
def product_path(module, filename, date=None):
return path(module, filename, settings.PRODUCTS_ROOT, date=date)

def validate_uri(uri):
""" Validation of URI and its existence
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ def handle(self, *args, **options):
models = [
Parameter,
DataCenter,
HorizontalDataResolution,
#HorizontalDataResolution,
Instrument,
ISOTopicCategory,
Location,
Platform,
Project,
#Project,
ScienceKeyword,
TemporalDataResolution,
VerticalDataResolution]
#TemporalDataResolution,
#VerticalDataResolution,
]

for model in models:
model.objects.create_from_vocabularies(**options)
28 changes: 22 additions & 6 deletions geospaas/vocabularies/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,20 @@ def create_instances(self, pti_list):
"""
num = 0
for entry in pti_list:
pp, created = self.get_or_create(entry)
if bool(entry):
pp, created = self.get_or_create(entry)
else:
created = False
if created: num+=1
print("Successfully added %d new entries" % num)

def get_or_create(self, entry, *args, **kwargs):
""" Get or create database instance from input pythesint entry """

params = {key : entry[self.mapping[key]] for key in self.mapping}
try:
params = {key : entry[self.mapping[key]] for key in self.mapping}
except:
import ipdb
ipdb.set_trace()
return super(VocabularyManager, self).get_or_create(**params)

def create_from_vocabularies(self, force=False, **kwargs):
Expand Down Expand Up @@ -114,11 +120,21 @@ def get_by_natural_key(self, standard_name):
class PlatformManager(VocabularyManager):
get_list = pti.get_gcmd_platform_list
update = pti.update_gcmd_platform
mapping = dict(category='Category',
series_entity='Series_Entity',
# mapping = dict(category='Category',
# series_entity='Series_Entity',
# short_name='Short_Name',
# long_name='Long_Name')
# New mapping to adapt to changes in GCMD
mapping = dict(category='Basis',
series_entity='Category',
short_name='Short_Name',
long_name='Long_Name')

# Correct mapping needs update of tables..:
# mapping = dict(basis='Basis',
# category='Category',
# sub_category='Sub_Category',
# short_name='Short_Name',
# long_name='Long_Name')


class InstrumentManager(VocabularyManager):
Expand Down