"""
ESRF Data Policy v2
Introduced with BLISS 1.10.1 for beamline data on Dec 16, 2022.
Path hierarchy::
{data_root}/{proposal}/{beamline}/{session_date}/{data_type}/ [TID]
├── {data_type=raw}/ [BLISS]
│ ├── {proposal}_{beamline}.h5
│ ├── {collection}/
│ │ ├── {proposal}_{collection}.h5
│ | └── {dataset}/
│ | └── {collection}_{dataset}.h5
| └── __icat__/
| └── {collection}_{dataset}.xml
├── {data_type=processed}/ [esrf-pathlib]
│ ├── {proposal}_{beamline}.h5
│ └── {collection}/
│ ├── {proposal}_{collection}.h5
│ └── {dataset}/
│ └── {collection}_{dataset}.h5
└── {data_type=_nobackup}/ [esrf-pathlib]
├── {proposal}_{beamline}.h5
└── {collection}/
├── {proposal}_{collection}.h5
└── {dataset}/
└── {collection}_{dataset}.h5
"""
from typing import Union
from .. import constants
from ..schema.path import PathSchema
from . import unknown_v1
from .esrf_utils import BLISS_NAME_PATTERN
from .esrf_utils import SESSION_DATE_PATTERN
from .esrf_utils import derive_normalized_beamline
from .esrf_utils import deserialize_session_date
from .esrf_utils import deserializer_enum_type
from .esrf_utils import named_directory
from .esrf_utils import serialize_beamline
from .esrf_utils import serialize_session_date
from .esrf_utils import serializer_enum_type
from .types import DataType
_SESSION_TREE = {
named_directory("session_path"): None,
"{data_type=raw}": {
named_directory("raw_data_path"): None,
("{proposal}_{beamline}.h5", "raw_proposal_file"): None,
"{collection}": {
named_directory("raw_collection_path"): None,
("{proposal}_{collection}.h5", "raw_collection_file"): None,
"{collection}_{dataset}": {
named_directory("raw_dataset_path"): None,
("{collection}_{dataset}.h5", "raw_dataset_file"): None,
"gallery": {
named_directory("raw_gallery_path"): None,
},
},
},
"__icat__": {
named_directory("raw_metadata_path"): None,
("{collection}_{dataset}.xml", "raw_metadata_file"): None,
},
},
"{data_type=processed}": {
named_directory("processed_data_path"): None,
("{proposal}_{beamline}.h5", "processed_proposal_file"): None,
"{collection}": {
named_directory("processed_collection_path"): None,
("{proposal}_{collection}.h5", "processed_collection_file"): None,
"{collection}_{dataset}": {
named_directory("processed_dataset_path"): None,
("{collection}_{dataset}.h5", "processed_dataset_file"): None,
"gallery": {
named_directory("processed_gallery_path"): None,
},
},
},
},
"{data_type=_nobackup}": {
named_directory("nobackup_path"): None,
("{proposal}_{beamline}.h5", "nobackup_proposal_file"): None,
"{collection}": {
named_directory("nobackup_collection_path"): None,
("{proposal}_{collection}.h5", "nobackup_collection_file"): None,
"{collection}_{dataset}": {
named_directory("nobackup_dataset_path"): None,
("{collection}_{dataset}.h5", "nobackup_dataset_file"): None,
"gallery": {
named_directory("nobackup_gallery_path"): None,
},
},
},
},
}
_ROOT_TREE = {
"{data_root}": {
"{proposal}": {
"{beamline}": {
"{session_date}": _SESSION_TREE,
},
},
}
}
_STRING_TO_DATA_TYPE = {
"raw": DataType.RAW,
"processed": DataType.PROCESSED,
"_nobackup": DataType.NOBACKUP,
}
def _deserializer_data_type(data_type: Union[str, DataType]) -> DataType:
return deserializer_enum_type(
data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
)
def _serializer_data_type(data_type: Union[str, DataType]) -> str:
return serializer_enum_type(
data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
)
_CONCEPTS = {
"data_root": dict(
description="Root directory",
regex=r".*",
default_value=constants.DEFAULT_DATA_ROOT,
deserializer=unknown_v1.as_os_path,
serializer=unknown_v1.as_os_path,
),
"proposal": dict(
description="ESRF proposal name",
regex=BLISS_NAME_PATTERN,
examples=["ma1234", "blc1234"],
),
"beamline": dict(
description="Beamline name",
regex=BLISS_NAME_PATTERN,
serializer=serialize_beamline,
examples=["id21", "id23eh1", "id30a1"],
),
"session_date": dict(
description="Start date of the experimental session (YYYYMMDD)",
regex=SESSION_DATE_PATTERN,
deserializer=deserialize_session_date,
serializer=serialize_session_date,
),
"data_type": dict(
description="Data type",
regex="|".join(list(_STRING_TO_DATA_TYPE)),
deserializer=_deserializer_data_type,
serializer=_serializer_data_type,
),
"collection": dict(
description="Collection of datasets",
regex=BLISS_NAME_PATTERN,
),
"dataset": dict(
description="Dataset name (smallest entity in the data portal)",
regex=BLISS_NAME_PATTERN,
),
}
_DERIVED_CONCEPTS = {
"beamline_normalized": dict(
description="Normalized beamline name for the Data Portal",
examples=["id21", "id23-1", "id30a-1"],
derive_func=derive_normalized_beamline,
derived_from=["beamline"],
)
}
_SCHEMA = None
[docs]
def get_schema() -> PathSchema:
global _SCHEMA
if _SCHEMA is None:
_SCHEMA = PathSchema(
name="esrf",
version=2,
description=__doc__,
concepts=_CONCEPTS,
derived_concepts=_DERIVED_CONCEPTS,
template_tree=_ROOT_TREE,
extends=[unknown_v1.get_schema()],
symbolic_root_segment="data",
)
return _SCHEMA