Source code for esrf_pathlib._schemas.definitions.esrf_v2

"""
ESRF Data Policy v2

Introduced with BLISS 1.10.1 for beamline data on Dec 16, 2022.

Path hierarchy::

    {data_root}/{proposal}/{beamline}/{session_date}/{data_type}/   [TID]
      ├── {data_type=raw}/                                          [BLISS]
      │     ├── {proposal}_{beamline}.h5
      │     ├── {collection}/
      │     │     ├── {proposal}_{collection}.h5
      │     |     └── {dataset}/
      │     |           └── {collection}_{dataset}.h5
      |     └── __icat__/
      |          └── {collection}_{dataset}.xml
      ├── {data_type=processed}/                                    [esrf-pathlib]
      │     ├── {proposal}_{beamline}.h5
      │     └── {collection}/
      │           ├── {proposal}_{collection}.h5
      │           └── {dataset}/
      │                 └── {collection}_{dataset}.h5
      └── {data_type=_nobackup}/                                    [esrf-pathlib]
            ├── {proposal}_{beamline}.h5
            └── {collection}/
                  ├── {proposal}_{collection}.h5
                  └── {dataset}/
                        └── {collection}_{dataset}.h5
"""

from typing import Union

from .. import constants
from ..schema.path import PathSchema
from . import unknown_v1
from .esrf_utils import BLISS_NAME_PATTERN
from .esrf_utils import SESSION_DATE_PATTERN
from .esrf_utils import derive_normalized_beamline
from .esrf_utils import deserialize_session_date
from .esrf_utils import deserializer_enum_type
from .esrf_utils import named_directory
from .esrf_utils import serialize_beamline
from .esrf_utils import serialize_session_date
from .esrf_utils import serializer_enum_type
from .types import DataType

_SESSION_TREE = {
    named_directory("session_path"): None,
    "{data_type=raw}": {
        named_directory("raw_data_path"): None,
        ("{proposal}_{beamline}.h5", "raw_proposal_file"): None,
        "{collection}": {
            named_directory("raw_collection_path"): None,
            ("{proposal}_{collection}.h5", "raw_collection_file"): None,
            "{collection}_{dataset}": {
                named_directory("raw_dataset_path"): None,
                ("{collection}_{dataset}.h5", "raw_dataset_file"): None,
                "gallery": {
                    named_directory("raw_gallery_path"): None,
                },
            },
        },
        "__icat__": {
            named_directory("raw_metadata_path"): None,
            ("{collection}_{dataset}.xml", "raw_metadata_file"): None,
        },
    },
    "{data_type=processed}": {
        named_directory("processed_data_path"): None,
        ("{proposal}_{beamline}.h5", "processed_proposal_file"): None,
        "{collection}": {
            named_directory("processed_collection_path"): None,
            ("{proposal}_{collection}.h5", "processed_collection_file"): None,
            "{collection}_{dataset}": {
                named_directory("processed_dataset_path"): None,
                ("{collection}_{dataset}.h5", "processed_dataset_file"): None,
                "gallery": {
                    named_directory("processed_gallery_path"): None,
                },
            },
        },
    },
    "{data_type=_nobackup}": {
        named_directory("nobackup_path"): None,
        ("{proposal}_{beamline}.h5", "nobackup_proposal_file"): None,
        "{collection}": {
            named_directory("nobackup_collection_path"): None,
            ("{proposal}_{collection}.h5", "nobackup_collection_file"): None,
            "{collection}_{dataset}": {
                named_directory("nobackup_dataset_path"): None,
                ("{collection}_{dataset}.h5", "nobackup_dataset_file"): None,
                "gallery": {
                    named_directory("nobackup_gallery_path"): None,
                },
            },
        },
    },
}

_ROOT_TREE = {
    "{data_root}": {
        "{proposal}": {
            "{beamline}": {
                "{session_date}": _SESSION_TREE,
            },
        },
    }
}


_STRING_TO_DATA_TYPE = {
    "raw": DataType.RAW,
    "processed": DataType.PROCESSED,
    "_nobackup": DataType.NOBACKUP,
}


def _deserializer_data_type(data_type: Union[str, DataType]) -> DataType:
    return deserializer_enum_type(
        data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
    )


def _serializer_data_type(data_type: Union[str, DataType]) -> str:
    return serializer_enum_type(
        data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
    )


_CONCEPTS = {
    "data_root": dict(
        description="Root directory",
        regex=r".*",
        default_value=constants.DEFAULT_DATA_ROOT,
        deserializer=unknown_v1.as_os_path,
        serializer=unknown_v1.as_os_path,
    ),
    "proposal": dict(
        description="ESRF proposal name",
        regex=BLISS_NAME_PATTERN,
        examples=["ma1234", "blc1234"],
    ),
    "beamline": dict(
        description="Beamline name",
        regex=BLISS_NAME_PATTERN,
        serializer=serialize_beamline,
        examples=["id21", "id23eh1", "id30a1"],
    ),
    "session_date": dict(
        description="Start date of the experimental session (YYYYMMDD)",
        regex=SESSION_DATE_PATTERN,
        deserializer=deserialize_session_date,
        serializer=serialize_session_date,
    ),
    "data_type": dict(
        description="Data type",
        regex="|".join(list(_STRING_TO_DATA_TYPE)),
        deserializer=_deserializer_data_type,
        serializer=_serializer_data_type,
    ),
    "collection": dict(
        description="Collection of datasets",
        regex=BLISS_NAME_PATTERN,
    ),
    "dataset": dict(
        description="Dataset name (smallest entity in the data portal)",
        regex=BLISS_NAME_PATTERN,
    ),
}

_DERIVED_CONCEPTS = {
    "beamline_normalized": dict(
        description="Normalized beamline name for the Data Portal",
        examples=["id21", "id23-1", "id30a-1"],
        derive_func=derive_normalized_beamline,
        derived_from=["beamline"],
    )
}


_SCHEMA = None



[docs]
def get_schema() -> PathSchema:
    global _SCHEMA
    if _SCHEMA is None:
        _SCHEMA = PathSchema(
            name="esrf",
            version=2,
            description=__doc__,
            concepts=_CONCEPTS,
            derived_concepts=_DERIVED_CONCEPTS,
            template_tree=_ROOT_TREE,
            extends=[unknown_v1.get_schema()],
            symbolic_root_segment="data",
        )
    return _SCHEMA