Source code for

import datetime
import json
import logging
import textwrap
import time
import typing
from pathlib import Path
from typing import List, Union, Dict, Optional

import requests

from openeo.api.logs import LogEntry
from openeo.internal.jupyter import render_component, render_error, VisualDict, VisualList
from openeo.internal.warnings import deprecated
from import OpenEoClientException, JobFailedException, OpenEoApiError
from openeo.util import ensure_dir

if typing.TYPE_CHECKING:
    # Imports for type checking only (circular import issue at runtime).
    from import Connection

logger = logging.getLogger(__name__)

[docs]class BatchJob: """ Handle for an openEO batch job, allowing it to describe, start, cancel, inspect results, etc. .. versionadded:: 0.11.0 This class originally had the more cryptic name :py:class:`RESTJob`, which is still available as legacy alias, but :py:class:`BatchJob` is recommended since version 0.11.0. """ def __init__(self, job_id: str, connection: 'Connection'): self.job_id = job_id """Unique identifier of the batch job (string).""" self.connection = connection def __repr__(self): return '<{c} job_id={i!r}>'.format(c=self.__class__.__name__, i=self.job_id) def _repr_html_(self): data = self.describe_job() currency = self.connection.capabilities().currency() return render_component('job', data=data, parameters={'currency': currency})
[docs] def describe_job(self) -> dict: """ Get all job information.""" # GET /jobs/{job_id} # TODO: rename to just `describe`? #280 return self.connection.get("/jobs/{}".format(self.job_id), expected_status=200).json()
[docs] def status(self) -> str: """ Get the status of the batch job :return: batch job status, one of "created", "queued", "running", "canceled", "finished" or "error". """ return self.describe_job().get("status", "N/A")
[docs] def update_job(self, process_graph=None, output_format=None, output_parameters=None, title=None, description=None, plan=None, budget=None, additional=None): """ Update a job.""" # PATCH /jobs/{job_id} # TODO: rename to just `update`? #280 raise NotImplementedError
[docs] def delete_job(self): """ Delete a job.""" # DELETE /jobs/{job_id} # TODO: rename to just `delete`? #280 self.connection.delete("/jobs/{}".format(self.job_id), expected_status=204)
[docs] def estimate_job(self): """ Calculate an time/cost estimate for a job.""" # GET /jobs/{job_id}/estimate data = self.connection.get("/jobs/{}/estimate".format(self.job_id), expected_status=200).json() currency = self.connection.capabilities().currency() return VisualDict('job-estimate', data=data, parameters={'currency': currency})
[docs] def start_job(self): """ Start / queue a job for processing.""" # POST /jobs/{job_id}/results # TODO: rename to just `start`? #280 # TODO: return self, to allow chained calls"/jobs/{}/results".format(self.job_id), expected_status=202)
[docs] def stop_job(self): """ Stop / cancel job processing.""" # DELETE /jobs/{job_id}/results # TODO: rename to just `stop`? #280 self.connection.delete("/jobs/{}/results".format(self.job_id), expected_status=204)
[docs] @deprecated("Use :py:meth:`~BatchJob.get_results` instead.", version="0.4.10") def list_results(self) -> dict: """Get batch job results metadata.""" return self.get_results().get_metadata()
[docs] def download_result(self, target: Union[str, Path] = None) -> Path: """ Download single job result to the target file path or into folder (current working dir by default). Fails if there are multiple result files. :param target: String or path where the file should be downloaded to. """ return self.get_results().download_file(target=target)
[docs] @deprecated( "Instead use :py:meth:`BatchJob.get_results` and the more flexible download functionality of :py:class:`JobResults`", version="0.4.10") def download_results(self, target: Union[str, Path] = None) -> Dict[Path, dict]: """ Download all job result files into given folder (current working dir by default). The names of the files are taken directly from the backend. :param target: String/path, folder where to put the result files. :return: file_list: Dict containing the downloaded file path as value and asset metadata """ return self.get_result().download_files(target)
[docs] @deprecated("Use :py:meth:`BatchJob.get_results` instead.", version="0.4.10") def get_result(self): return _Result(self)
[docs] def get_results(self) -> "JobResults": """ Get handle to batch job results for result metadata inspection or downloading resulting assets. .. versionadded:: 0.4.10 """ return JobResults(self)
[docs] def logs(self, offset=None) -> List[LogEntry]: """ Retrieve job logs.""" # TODO: option to filter on level? Or move filtering functionality to a separate batch job logs class? url = "/jobs/{}/logs".format(self.job_id) logs = self.connection.get(url, params={'offset': offset}, expected_status=200).json()["logs"] entries = [LogEntry(log) for log in logs] return VisualList('logs', data=entries)
[docs] def run_synchronous( self, outputfile: Union[str, Path, None] = None, print=print, max_poll_interval=60, connection_retry_interval=30 ) -> 'BatchJob': """Start the job, wait for it to finish and download result""" self.start_and_wait( print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval ) # TODO #135 support multi file result sets too? if outputfile is not None: self.download_result(outputfile) return self
[docs] def start_and_wait( self, print=print, max_poll_interval: int = 60, connection_retry_interval: int = 30, soft_error_max=10 ) -> "BatchJob": """ Start the batch job, poll its status and wait till it finishes (or fails) :param print: print/logging function to show progress/status :param max_poll_interval: maximum number of seconds to sleep between status polls :param connection_retry_interval: how long to wait when status poll failed due to connection issue :param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow :return: """ # TODO rename `connection_retry_interval` to something more generic? start_time = time.time() def elapsed() -> str: return str(datetime.timedelta(seconds=time.time() - start_time)).rsplit(".")[0] def print_status(msg: str): print("{t} Job {i!r}: {m}".format(t=elapsed(), i=self.job_id, m=msg)) # TODO: make `max_poll_interval`, `connection_retry_interval` class constants or instance properties? print_status("send 'start'") self.start_job() # TODO: also add `wait` method so you can track a job that already has started explicitly # or just rename this method to `wait` and automatically do start if not started yet? # Start with fast polling. poll_interval = min(5, max_poll_interval) status = None _soft_error_count = 0 def soft_error(message: str): """Non breaking error (unless we had too much of them)""" nonlocal _soft_error_count _soft_error_count += 1 if _soft_error_count > soft_error_max: raise OpenEoClientException("Excessive soft errors") print_status(message) time.sleep(connection_retry_interval) while True: # TODO: also allow a hard time limit on this infinite poll loop? try: job_info = self.describe_job() except requests.ConnectionError as e: soft_error("Connection error while polling job status: {e}".format(e=e)) continue except OpenEoApiError as e: if e.http_status_code == 503: soft_error("Service availability error while polling job status: {e}".format(e=e)) continue else: raise status = job_info.get("status", "N/A") progress = '{p}%'.format(p=job_info["progress"]) if "progress" in job_info else "N/A" print_status("{s} (progress {p})".format(s=status, p=progress)) if status not in ('submitted', 'created', 'queued', 'running'): break # Sleep for next poll (and adaptively make polling less frequent) time.sleep(poll_interval) poll_interval = min(1.25 * poll_interval, max_poll_interval) if status != "finished": print(textwrap.dedent(""" Your batch job {i!r} failed. Logs can be inspected in an openEO (web) editor or with `connection.job({i!r}).logs()`. """.format(i=self.job_id))) # TODO: make it possible to disable printing logs automatically? # TODO: render logs jupyter-aware in a notebook context? # TODO: only print the error level logs? Or the tail of the logs? print("Printing logs:") print(self.logs()) raise JobFailedException("Batch job {i!r} didn't finish successfully. Status: {s} (after {t}).".format( i=self.job_id, s=status, t=elapsed() ), job=self) return self
[docs]@deprecated(reason="Use :py:class:`BatchJob` instead", version="0.11.0") class RESTJob(BatchJob): """ Legacy alias for :py:class:`BatchJob`. """
[docs]class ResultAsset: """ Result asset of a batch job (e.g. a GeoTIFF or JSON file) .. versionadded:: 0.4.10 """ def __init__(self, job: BatchJob, name: str, href: str, metadata: dict): self.job = job = name """Asset name as advertised by the backend.""" self.href = href """Download URL of the asset.""" self.metadata = metadata """Asset metadata provided by the backend, possibly containing keys "type" (for media type), "roles", "title", "description".""" def __repr__(self): return "<ResultAsset {n!r} (type {t}) at {h!r}>".format(, t=self.metadata.get("type", "unknown"), h=self.href )
[docs] def download(self, target: Optional[Union[Path, str]] = None, chunk_size=None) -> Path: """ Download asset to given location :param target: download target path. Can be an existing folder (in which case the filename advertised by backend will be used) or full file name. By default, the working directory will be used. """ target = Path(target or Path.cwd()) if target.is_dir(): target = target / ensure_dir(target.parent)"Downloading Job result asset {n!r} from {h!s} to {t!s}".format(, h=self.href, t=target)) with"wb") as f: response = self._get_response(stream=True) for block in response.iter_content(chunk_size=chunk_size): f.write(block) return target
def _get_response(self, stream=True) -> requests.Response: return self.job.connection.get(self.href, stream=stream)
[docs] def load_json(self) -> dict: """Load asset in memory and parse as JSON.""" if not (".json") or self.metadata.get("type") == "application/json"): logger.warning("Asset might not be JSON") return self._get_response().json()
[docs] def load_bytes(self) -> bytes: """Load asset in memory as raw bytes.""" return self._get_response().content
# TODO: more `load` methods e.g.: load GTiff asset directly as numpy array class MultipleAssetException(OpenEoClientException): pass
[docs]class JobResults: """ Results of a batch job: listing of one or more output files (assets) and some metadata. .. versionadded:: 0.4.10 """ def __init__(self, job: BatchJob): self._job = job self._results_url = "/jobs/{j}/results".format(j=self._job.job_id) self._results = None def __repr__(self): return "<JobResults for job {j!r}>".format(j=self._job.job_id) def _repr_html_(self): try: response = self.get_metadata() return render_component("batch-job-result", data = response) except OpenEoApiError as error: return render_error(error)
[docs] def get_metadata(self, force=False) -> dict: """Get batch job results metadata (parsed JSON)""" if self._results is None or force: self._results = self._job.connection.get(self._results_url, expected_status=200).json() return self._results
# TODO: provide methods for `stac_version`, `id`, `geometry`, `properties`, `links`, ...?
[docs] def get_assets(self) -> List[ResultAsset]: """ Get all assets from the job results. """ # TODO: add arguments to filter on metadata, e.g. to only get assets of type "image/tiff" metadata = self.get_metadata() if "assets" in metadata: # API 1.0 style: dictionary mapping filenames to metadata dict (with at least a "href" field) assets = metadata["assets"] else: # Best effort translation of on old style to "assets" style (#134) assets = {a["href"].split("/")[-1]: a for a in metadata["links"]} return [ ResultAsset(job=self._job, name=name, href=asset["href"], metadata=asset) for name, asset in assets.items() ]
[docs] def get_asset(self, name: str = None) -> ResultAsset: """ Get single asset by name or without name if there is only one. """ # TODO: also support getting a single asset by type or role? assets = self.get_assets() if len(assets) == 0: raise OpenEoClientException("No assets in result.") if name is None: if len(assets) == 1: return assets[0] else: raise MultipleAssetException("Multiple result assets for job {j}: {a}".format( j=self._job.job_id, a=[ for a in assets] )) else: try: return next(a for a in assets if == name) except StopIteration: raise OpenEoClientException( "No asset {n!r} in: {a}".format(n=name, a=[ for a in assets]) )
[docs] def download_file(self, target: Union[Path, str] = None, name: str = None) -> Path: """ Download single asset. Can be used when there is only one asset in the :py:class:`JobResults`, or when the desired asset name is given explicitly. :param target: path to download to. Can be an existing directory (in which case the filename advertised by backend will be used) or full file name. By default, the working directory will be used. :param name: asset name to download (not required when there is only one asset) :return: path of downloaded asset """ try: return self.get_asset(name=name).download(target=target) except MultipleAssetException: raise OpenEoClientException( "Can not use `download_file` with multiple assets. Use `download_files` instead.")
[docs] def download_files(self, target: Union[Path, str] = None, include_stac_metadata: bool = True) -> List[Path]: """ Download all assets to given folder. :param target: path to folder to download to (must be a folder if it already exists) :param include_stac_metadata: whether to download the job result metadata as a STAC (JSON) file. :return: list of paths to the downloaded assets. """ target = Path(target or Path.cwd()) if target.exists() and not target.is_dir(): raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") ensure_dir(target) downloaded = [ for a in self.get_assets()] if include_stac_metadata: # TODO #184: convention for metadata file name? metadata_file = target / "job-results.json" # TODO #184: rewrite references to locally downloaded assets? metadata_file.write_text(json.dumps(self.get_metadata())) downloaded.append(metadata_file) return downloaded
@deprecated(reason="Use :py:class:`JobResults` instead", version="0.4.10") class _Result: """ Wrapper around `JobResults` to adapt old deprecated "Result" API. .. deprecated:: 0.4.10 """ # TODO: deprecated: remove this def __init__(self, job): self.results = JobResults(job=job) def download_file(self, target: Union[str, Path] = None) -> Path: return self.results.download_file(target=target) def download_files(self, target: Union[str, Path] = None) -> Dict[Path, dict]: target = Path(target or Path.cwd()) if target.exists() and not target.is_dir(): raise OpenEoClientException(f"Target argument {target} exists but isn't a folder.") return { a.metadata for a in self.results.get_assets()} def load_json(self) -> dict: return self.results.get_asset().load_json() def load_bytes(self) -> bytes: return self.results.get_asset().load_bytes()