Source code for dkist.dataset.loader

import importlib.resources as importlib_resources
from pathlib import Path
from functools import singledispatch

from parfive import Results

import asdf

try:
    # first try to import from asdf.exceptions for asdf 2.15+
    from asdf.exceptions import ValidationError
except ImportError:
    # fall back to top level asdf for older versions of asdf
    from asdf import ValidationError


[docs] @singledispatch def load_dataset(target): """ Load a DKIST dataset from a variety of inputs. This function loads one or more DKIST ASDF files into `dkist.Dataset` or `dkist.TiledDataset` classes. It can take a variety of inputs (listed below) and will either return a single object or a list of objects if multiple datasets are loaded. Parameters ---------- target : {types} The location of one or more ASDF files. {types_list} Returns ------- datasets An instance of `dkist.Dataset` or `dkist.TiledDataset` or a list thereof. Examples -------- >>> dkist.load_dataset("/path/to/VISP_L1_ABCDE.asdf") # doctest: +SKIP >>> dkist.load_dataset("/path/to/ABCDE/") # doctest: +SKIP >>> dkist.load_dataset(Path("/path/to/ABCDE")) # doctest: +SKIP >>> from sunpy.net import Fido, attrs as a >>> import dkist.net >>> search_results = Fido.search(a.dkist.Dataset("AGLKO")) # doctest: +REMOTE_DATA >>> files = Fido.fetch(search_results) # doctest: +REMOTE_DATA >>> dkist.load_dataset(files) # doctest: +REMOTE_DATA <dkist.dataset.dataset.Dataset object at ...> This Dataset has 4 pixel and 5 world dimensions <BLANKLINE> dask.array<reshape, shape=(4, 1000, 976, 2555), dtype=float64, chunksize=(1, 1, 976, 2555), chunktype=numpy.ndarray> <BLANKLINE> Pixel Dim Axis Name Data size Bounds 0 polarization state 4 None 1 raster scan step number 1000 None 2 dispersion axis 976 None 3 spatial along slit 2555 None <BLANKLINE> World Dim Axis Name Physical Type Units 0 stokes phys.polarization.stokes unknown 1 time time s 2 helioprojective latitude custom:pos.helioprojective.lat arcsec 3 wavelength em.wl nm 4 helioprojective longitude custom:pos.helioprojective.lon arcsec <BLANKLINE> Correlation between pixel and world axes: <BLANKLINE> Pixel Dim World Dim 0 1 2 3 0 yes no no no 1 no yes no no 2 no yes no yes 3 no no yes no 4 no yes no yes """ known_types = _known_types_docs().keys() raise TypeError(f"Input type {type(target).__name__} not recognised. It must be one of {', '.join(known_types)}.")
@load_dataset.register(Results) def _load_from_results(results): """ The results from a call to ``Fido.fetch``, all results must be valid DKIST ASDF files. """ return _load_from_iterable(results) # In Python 3.11 we can use the Union type here @load_dataset.register(list) @load_dataset.register(tuple) def _load_from_iterable(iterable): """ A list or tuple of valid inputs to ``load_dataset``. """ datasets = [load_dataset(item) for item in iterable] if len(datasets) == 1: return datasets[0] return datasets @load_dataset.register def _load_from_string(path: str): """ A string representing a directory or an ASDF file. """ # TODO Adjust this to accept URLs as well return _load_from_path(Path(path)) @load_dataset.register def _load_from_path(path: Path): """ A path object representing a directory or an ASDF file. """ path = path.expanduser() if not path.is_dir(): if not path.exists(): raise ValueError(f"{path} does not exist.") return _load_from_asdf(path) return _load_from_directory(path) def _load_from_directory(directory): """ Construct a `~dkist.dataset.Dataset` from a directory containing one asdf file and a collection of FITS files. """ base_path = Path(directory).expanduser() asdf_files = tuple(base_path.glob("*.asdf")) if not asdf_files: raise ValueError(f"No asdf file found in directory {base_path}.") if len(asdf_files) > 1: return _load_from_iterable(asdf_files) asdf_file = asdf_files[0] return _load_from_asdf(asdf_file) def _load_from_asdf(filepath): """ Construct a dataset object from a filepath of a suitable asdf file. """ from dkist.dataset import TiledDataset filepath = Path(filepath).expanduser() base_path = filepath.parent try: with importlib_resources.as_file(importlib_resources.files("dkist.io") / "level_1_dataset_schema.yaml") as schema_path: with asdf.open(filepath, custom_schema=schema_path.as_posix(), lazy_load=False, copy_arrays=True) as ff: ds = ff.tree["dataset"] if isinstance(ds, TiledDataset): for sub in ds.flat: sub.files.basepath = base_path else: ds.files.basepath = base_path return ds except ValidationError as e: err = f"This file is not a valid DKIST Level 1 asdf file, it fails validation with: {e.message}." raise TypeError(err) from e def _known_types_docs(): known_types = load_dataset.registry.copy() known_types.pop(object) known_types_docs = {} for t, func in known_types.items(): name = t.__qualname__ if t.__module__ != "builtins": name = f"{t.__module__}.{name}" known_types_docs[name] = func.__doc__.strip() return known_types_docs def _formatted_types_docstring(known_types): lines = [f"| `{fqn}` - {doc}" for fqn, doc in known_types.items()] return "\n ".join(lines) load_dataset.__doc__ = load_dataset.__doc__.format(types_list=_formatted_types_docstring(_known_types_docs()), types=", ".join([f"`{t}`" for t in _known_types_docs().keys()]))