Source code for esrf_pathlib._schemas.definitions.esrf_v1

"""
ESRF Data Policy v1

Introduced with BLISS 1.2.0 for beamline data on Feb 11, 2020.

Path hierarchy::

    {data_root}/{proposal}/{beamline}/               [TID]
      └── {session_date}/                            [BLISS]
            ├── {proposal}_{beamline}.h5
            ├── {collection}/
            |     ├── {proposal}_{collection}.h5
            |     └── {dataset}/
            |           └── {collection}_{dataset}.h5
            └── __icat__/
                  └── {collection}_{dataset}.xml
"""

from .. import constants
from ..schema.path import PathSchema
from . import unknown_v1
from .esrf_utils import BLISS_NAME_PATTERN
from .esrf_utils import SESSION_DATE_PATTERN
from .esrf_utils import derive_normalized_beamline
from .esrf_utils import deserialize_session_date
from .esrf_utils import named_directory
from .esrf_utils import serialize_beamline
from .esrf_utils import serialize_session_date
from .types import DataType

_SESSION_TREE = {
    named_directory("session_path"): None,
    ("{proposal}_{beamline}.h5", "raw_proposal_file"): None,
    "{collection}": {
        named_directory("raw_collection_path"): None,
        ("{proposal}_{collection}.h5", "raw_collection_file"): None,
        "{collection}_{dataset}": {
            named_directory("raw_dataset_path"): None,
            ("{collection}_{dataset}.h5", "raw_dataset_file"): None,
            "gallery": {
                named_directory("raw_gallery_path"): None,
            },
        },
    },
    "__icat__": {
        named_directory("raw_metadata_path"): None,
        ("{collection}_{dataset}.xml", "raw_metadata_file"): None,
    },
}

_ROOT_TREE = {
    "{data_root}": {
        "{proposal}": {
            "{beamline}": {
                "{session_date}": _SESSION_TREE,
            },
        },
    },
}

_CONCEPTS = {
    "data_root": dict(
        description="Root directory",
        regex=r".*",
        default_value=constants.DEFAULT_DATA_ROOT,
        deserializer=unknown_v1.as_os_path,
        serializer=unknown_v1.as_os_path,
    ),
    "proposal": dict(
        description="ESRF proposal name",
        regex=BLISS_NAME_PATTERN,
        examples=["ma1234", "blc1234"],
    ),
    "beamline": dict(
        description="Beamline name",
        regex=BLISS_NAME_PATTERN,
        serializer=serialize_beamline,
        examples=["id21", "id23eh1", "id30a1"],
    ),
    "session_date": dict(
        description="Start date of the experimental session (YYYYMMDD)",
        regex=SESSION_DATE_PATTERN,
        deserializer=deserialize_session_date,
        serializer=serialize_session_date,
    ),
    "collection": dict(
        description="Collection identifier grouping datasets",
        regex=BLISS_NAME_PATTERN,
    ),
    "dataset": dict(
        description="Dataset name (smallest entity in the data portal)",
        regex=BLISS_NAME_PATTERN,
    ),
}


def _derive_data_type(_) -> DataType:
    return DataType.RAW


_DERIVED_CONCEPTS = {
    "beamline_normalized": dict(
        description="Normalized beamline name for the Data Portal",
        examples=["id21", "id23-1", "id30a-1"],
        derive_func=derive_normalized_beamline,
        derived_from=["beamline"],
    ),
    "data_type": dict(
        description="Data type. Fixed to 'raw' for this schema.",
        derive_func=_derive_data_type,
        derived_from=[],
    ),
}

_SCHEMA = None


[docs] def get_schema() -> PathSchema: global _SCHEMA if _SCHEMA is None: _SCHEMA = PathSchema( name="esrf", version=1, description=__doc__, concepts=_CONCEPTS, derived_concepts=_DERIVED_CONCEPTS, template_tree=_ROOT_TREE, extends=[unknown_v1.get_schema()], symbolic_root_segment="data", ) return _SCHEMA