Source code for openeo_udf.api.datacube

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""OpenEO Python UDF interface"""

import numpy
import xarray
from typing import Dict, List


__license__ = "Apache License, Version 2.0"
__author__     = "Soeren Gebbert"
__copyright__  = "Copyright 2018, Soeren Gebbert"
__maintainer__ = "Soeren Gebbert"
__email__      = "soerengebbert@googlemail.com"


[docs]class DataCube: """This class is a hypercube representation of multi-dimensional data that stores an xarray and provides methods to convert the xarray into the HyperCube JSON representation >>> array = xarray.DataArray(numpy.zeros(shape=(2, 3)), coords={'x': [1, 2], 'y': [1, 2, 3]}, dims=('x', 'y')) >>> array.attrs["description"] = "This is an xarray with two dimensions" >>> array.name = "testdata" >>> h = DataCube(array=array) >>> d = h.to_dict() >>> d["id"] 'testdata' >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [{'name': 'x', 'coordinates': [1, 2]}, {'name': 'y', 'coordinates': [1, 2, 3]}] >>> d["description"] 'This is an xarray with two dimensions' >>> new_h = DataCube.from_dict(d) >>> d = new_h.to_dict() >>> d["id"] 'testdata' >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [{'name': 'x', 'coordinates': [1, 2]}, {'name': 'y', 'coordinates': [1, 2, 3]}] >>> d["description"] 'This is an xarray with two dimensions' >>> array = xarray.DataArray(numpy.zeros(shape=(2, 3)), coords={'x': [1, 2], 'y': [1, 2, 3]}, dims=('x', 'y')) >>> h = DataCube(array=array) >>> d = h.to_dict() >>> d["id"] >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [{'name': 'x', 'coordinates': [1, 2]}, {'name': 'y', 'coordinates': [1, 2, 3]}] >>> "description" not in d True >>> new_h = DataCube.from_dict(d) >>> d = new_h.to_dict() >>> d["id"] >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [{'name': 'x', 'coordinates': [1, 2]}, {'name': 'y', 'coordinates': [1, 2, 3]}] >>> "description" not in d True >>> array = xarray.DataArray(numpy.zeros(shape=(2, 3))) >>> h = DataCube(array=array) >>> d = h.to_dict() >>> d["id"] >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [] >>> "description" not in d True >>> new_h = DataCube.from_dict(d) >>> d = new_h.to_dict() >>> d["id"] >>> d["data"] [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] >>> d["dimensions"] [] >>> "description" not in d True """ def __init__(self, array: xarray.DataArray): self.set_array(array) def __str__(self): return "id: %(id)s\n" \ "data: %(data)s"%{"id":self.id, "data":self.array}
[docs] def get_array(self) -> xarray.DataArray: """Return the xarray.DataArray that contains the data and dimension definition Returns: xarray.DataArray: that contains the data and dimension definition """ return self._array
[docs] def set_array(self, array: xarray.DataArray): """Set the xarray.DataArray that contains the data and dimension definition This function will check if the provided data is a geopandas.GeoDataFrame and raises an Exception Args: array: xarray.DataArray that contains the data and dimension definition """ if isinstance(array, xarray.DataArray) is False: raise Exception("Argument data must be of type xarray.DataArray") self._array = array
@property def id(self): return self._array.name array = property(fget=get_array, fset=set_array)
[docs] def to_dict(self) -> Dict: """Convert this hypercube into a dictionary that can be converted into a valid JSON representation Returns: dict: HyperCube as a dictionary >>> example = { ... "id": "test_data", ... "data": [ ... [ ... [0.0, 0.1], ... [0.2, 0.3] ... ], ... [ ... [0.0, 0.1], ... [0.2, 0.3] ... ] ... ], ... "dimension": [{"name": "time", "unit": "ISO:8601", "coordinates":["2001-01-01", "2001-01-02"]}, ... {"name": "X", "unit": "degree", "coordinates":[50.0, 60.0]}, ... {"name": "Y", "unit": "degree"}, ... ] ... } """ d = {"id":"", "data": "", "dimensions":[]} if self._array is not None: xd = self._array.to_dict() if "name" in xd: d["id"] = xd["name"] if "data" in xd: d["data"] = xd["data"] if "attrs" in xd: if "description" in xd["attrs"]: d["description"] = xd["attrs"]["description"] if "dims" in xd and "coords" in xd: for dim in xd["dims"]: if dim in xd["coords"]: if "data" in xd["coords"][dim]: d["dimensions"].append({"name": dim, "coordinates": xd["coords"][dim]["data"]}) else: d["dimensions"].append({"name": dim}) return d
[docs] @staticmethod def from_dict(hc_dict: Dict) -> "DataCube": """Create a hypercube from a python dictionary that was created from the JSON definition of the HyperCube Args: hc_dict (dict): The dictionary that contains the hypercube definition Returns: HyperCube """ if "id" not in hc_dict: raise Exception("Missing id in dictionary") if "data" not in hc_dict: raise Exception("Missing data in dictionary") coords = {} dims = list() if "dimensions" in hc_dict: for dim in hc_dict["dimensions"]: dims.append(dim["name"]) if "coordinates" in dim: coords[dim["name"]] = dim["coordinates"] if dims and coords: data = xarray.DataArray(numpy.asarray(hc_dict["data"]), coords=coords, dims=dims) elif dims: data = xarray.DataArray(numpy.asarray(hc_dict["data"]), dims=dims) else: data = xarray.DataArray(numpy.asarray(hc_dict["data"])) if "id" in hc_dict: data.name = hc_dict["id"] if "description" in hc_dict: data.attrs["description"] = hc_dict["description"] hc = DataCube(array=data) return hc
[docs] def to_data_collection(self): pass
[docs] @staticmethod def from_data_collection(data_collection: 'openeo_udf.server.data_model.data_collection_schema.DataCollectionModel') -> List['DataCube']: """Create data cubes from a data collection Args: data_collection: Returns: A list of data cubes """ dc_list = [] data_cubes = data_collection.object_collections.data_cubes variables_collections = data_collection.variables_collections for cube in data_cubes: variable_collection = variables_collections[cube.variable_collection] # Read the one dimensional array and reshape it coords = {} for key in cube.dimensions.keys(): d = cube.dimensions[key] if d.values: coords[key] = d.values else: l = d.number_of_cells if l != 0 and d.extent: stepsize = (d.extent[1] - d.extent[0])/l values = [] predecessor = d.extent[0] for i in range(l): value = predecessor + stepsize/2.0 values.append(value) predecessor = predecessor + stepsize coords[key] = values for variable in variable_collection.variables: array = numpy.asarray(variable.values) array = array.reshape(variable_collection.size) data = xarray.DataArray(array, dims=cube.dim, coords=coords) data.name = variable.name dc = DataCube(array=data) dc_list.append(dc) return dc_list
if __name__ == "__main__": import doctest doctest.testmod()