Source code for openeo_udf.api.machine_learn_model

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""OpenEO Python UDF interface"""

import os
from typing import Optional, Dict
from openeo_udf.server.config import UdfConfiguration


__license__ = "Apache License, Version 2.0"
__author__     = "Soeren Gebbert"
__copyright__  = "Copyright 2018, Soeren Gebbert"
__maintainer__ = "Soeren Gebbert"
__email__      = "soerengebbert@googlemail.com"


[docs]class MachineLearnModelConfig: """This class represents a machine learn model. The model will be loaded at construction, based on the machine learn framework. The following frameworks are supported: - sklearn models that are created with sklearn.externals.joblib - pytorch models that are created with torch.save >>> from sklearn.ensemble import RandomForestRegressor >>> from sklearn.externals import joblib >>> model = RandomForestRegressor(n_estimators=10, max_depth=2, verbose=0) >>> path = '/tmp/test.pkl.xz' >>> dummy = joblib.dump(value=model, filename=path, compress=("xz", 3)) >>> m = MachineLearnModelConfig(framework="sklearn", name="test", ... description="Machine learn model", path=path) >>> m.get_model()# doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) >>> m.to_dict() # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE {'description': 'Machine learn model', 'name': 'test', 'framework': 'sklearn', 'path': '/tmp/test.pkl.xz', 'md5_hash': None} >>> d = {'description': 'Machine learn model', 'name': 'test', 'framework': 'sklearn', ... 'path': '/tmp/test.pkl.xz', "md5_hash": None} >>> m = MachineLearnModelConfig.from_dict(d) >>> m.get_model() # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False) >>> import torch >>> import torch.nn as nn >>> model = nn.Module >>> path = '/tmp/test.pt' >>> torch.save(model, path) >>> m = MachineLearnModelConfig(framework="pytorch", name="test", ... description="Machine learn model", path=path) >>> m.get_model()# doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE <class 'torch.nn.modules.module.Module'> >>> m.to_dict() # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE {'description': 'Machine learn model', 'name': 'test', 'framework': 'pytorch', 'path': '/tmp/test.pt', 'md5_hash': None} >>> d = {'description': 'Machine learn model', 'name': 'test', 'framework': 'pytorch', ... 'path': '/tmp/test.pt', "md5_hash": None} >>> m = MachineLearnModelConfig.from_dict(d) >>> m.get_model() # doctest: +ELLIPSIS ... # doctest: +NORMALIZE_WHITESPACE <class 'torch.nn.modules.module.Module'> """ def __init__(self, framework: str, name: str, description: str, path: Optional[str] = None, md5_hash: Optional[str] = None): """The constructor to create a machine learn model object Args: framework: The name of the framework, pytroch and sklearn are supported name: The name of the model description: The description of the model path: The path to the pre-trained machine learn model that should be applied md5_hash: The md5 hash of the machine learn model that is located in the local storage """ self.framework = framework self.name = name self.description = description self.path = path self.md5_hash = md5_hash self.model = None self.load_model()
[docs] def load_model(self): """Load the machine learn model from the path or md5 hash. Supported model: - sklearn models that are created with sklearn.externals.joblib - pytorch models that are created with torch.save """ if self.md5_hash is not None: filepath = os.path.join(UdfConfiguration.machine_learn_storage_path, self.md5_hash) else: filepath = self.path if os.path.exists(filepath) and os.path.isfile(filepath): if self.framework.lower() in "sklearn": from sklearn.externals import joblib self.model = joblib.load(filepath) if self.framework.lower() in "pytorch": import torch self.model = torch.load(filepath) else: raise Exception(f"Unable to find the specified machine learn model at path {filepath}")
[docs] def get_model(self): """Get the loaded machine learn model. This function will return None if the model was not loaded :return: the loaded model """ return self.model
[docs] def to_dict(self) -> Dict: return dict(description=self.description, name=self.name, framework=self.framework, path=self.path, md5_hash=self.md5_hash)
[docs] @staticmethod def from_dict(machine_learn_model: Dict): description = machine_learn_model["description"] name = machine_learn_model["name"] framework = machine_learn_model["framework"] path = None md5_hash = None if "path" in machine_learn_model: path = machine_learn_model["path"] if "md5_hash" in machine_learn_model: md5_hash = machine_learn_model["md5_hash"] return MachineLearnModelConfig(description=description, name=name, framework=framework, path=path, md5_hash=md5_hash)
if __name__ == "__main__": import doctest doctest.testmod()