"""
ESRF Data Policy v3
Introduced with BLISS 1.11.1 for beamline data on Mar 24, 2023.
Path hierarchy::
{data_root}/{proposal}/{beamline}/{session_date}/{data_type}/ [TID]
├── {data_type=RAW_DATA}/
│ ├── {proposal}_{beamline}.h5 [BLISS]
│ ├── {collection}/
│ │ ├── {proposal}_{collection}.h5
│ | └── {dataset}/
│ | └── {collection}_{dataset}.h5
| └── __icat__/
| └── {collection}_{dataset}.xml
├── {data_type=PROCESSED_DATA}/
│ ├── {proposal}_{beamline}.h5 [esrf-pathlib]
│ └── {collection}/
│ ├── {proposal}_{collection}.h5
│ └── {dataset}/
│ └── {collection}_{dataset}.h5
├── {data_type=NOBACKUP}/
│ ├── {proposal}_{beamline}.h5 [esrf-pathlib]
│ └── {collection}/
│ ├── {proposal}_{collection}.h5
│ └── {dataset}/
│ └── {collection}_{dataset}.h5
├── {data_type=SCRIPTS}/
├── {data_type=GALLERY}/
└── {data_type=NOTES}/
"""
from typing import Union
from .. import constants
from ..schema.path import PathSchema
from . import unknown_v1
from .esrf_utils import BLISS_NAME_PATTERN
from .esrf_utils import SESSION_DATE_PATTERN
from .esrf_utils import derive_normalized_beamline
from .esrf_utils import deserialize_session_date
from .esrf_utils import deserializer_enum_type
from .esrf_utils import named_directory
from .esrf_utils import serialize_beamline
from .esrf_utils import serialize_session_date
from .esrf_utils import serializer_enum_type
from .types import DataType
_SESSION_TREE = {
named_directory("session_path"): None,
"{data_type=RAW_DATA}": {
named_directory("raw_data_path"): None,
("{proposal}_{beamline}.h5", "raw_proposal_file"): None,
"{collection}": {
named_directory("raw_collection_path"): None,
("{proposal}_{collection}.h5", "raw_collection_file"): None,
"{collection}_{dataset}": {
named_directory("raw_dataset_path"): None,
("{collection}_{dataset}.h5", "raw_dataset_file"): None,
"gallery": {
named_directory("raw_gallery_path"): None,
},
},
},
"__icat__": {
named_directory("raw_metadata_path"): None,
("{collection}_{dataset}.xml", "raw_metadata_file"): None,
},
},
"{data_type=PROCESSED_DATA}": {
named_directory("processed_data_path"): None,
("{proposal}_{beamline}.h5", "processed_proposal_file"): None,
"{collection}": {
named_directory("processed_collection_path"): None,
("{proposal}_{collection}.h5", "processed_collection_file"): None,
"{collection}_{dataset}": {
named_directory("processed_dataset_path"): None,
("{collection}_{dataset}.h5", "processed_dataset_file"): None,
"gallery": {
named_directory("processed_gallery_path"): None,
},
},
},
},
"{data_type=NOBACKUP}": {
named_directory("nobackup_path"): None,
("{proposal}_{beamline}.h5", "nobackup_proposal_file"): None,
"{collection}": {
named_directory("nobackup_collection_path"): None,
("{proposal}_{collection}.h5", "nobackup_collection_file"): None,
"{collection}_{dataset}": {
named_directory("nobackup_dataset_path"): None,
("{collection}_{dataset}.h5", "nobackup_dataset_file"): None,
"gallery": {
named_directory("nobackup_gallery_path"): None,
},
},
},
},
"{data_type=SCRIPTS}": {
named_directory("scripts_path"): None,
},
"{data_type=GALLERY}": {
named_directory("gallery_path"): None,
},
"{data_type=NOTES}": {
named_directory("notes_path"): None,
},
}
_ROOT_TREE = {
"{data_root}": {
"{proposal}": {
"{beamline}": {
"{session_date}": _SESSION_TREE,
},
},
},
}
_STRING_TO_DATA_TYPE = {
"RAW_DATA": DataType.RAW,
"PROCESSED_DATA": DataType.PROCESSED,
"NOBACKUP": DataType.NOBACKUP,
"SCRIPTS": DataType.SCRIPT,
"GALLERY": DataType.GALLERY,
"NOTES": DataType.NOTE,
}
def _deserializer_data_type(data_type: Union[str, DataType]) -> DataType:
return deserializer_enum_type(
data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
)
def _serializer_data_type(data_type: Union[str, DataType]) -> str:
return serializer_enum_type(
data_type, enum_type=DataType, string_to_enum=_STRING_TO_DATA_TYPE
)
_CONCEPTS = {
"data_root": dict(
description="Root directory",
regex=r".*",
default_value=constants.DEFAULT_DATA_ROOT,
deserializer=unknown_v1.as_os_path,
serializer=unknown_v1.as_os_path,
),
"proposal": dict(
description="ESRF proposal name",
regex=BLISS_NAME_PATTERN,
examples=["ma1234", "blc1234"],
),
"beamline": dict(
description="Beamline name",
regex=BLISS_NAME_PATTERN,
serializer=serialize_beamline,
examples=["id21", "id23eh1", "id30a1"],
),
"session_date": dict(
description="Start date of the experimental session (YYYYMMDD)",
regex=SESSION_DATE_PATTERN,
deserializer=deserialize_session_date,
serializer=serialize_session_date,
),
"data_type": dict(
description="Data type",
regex="|".join(list(_STRING_TO_DATA_TYPE)),
deserializer=_deserializer_data_type,
serializer=_serializer_data_type,
),
"collection": dict(
description="Collection of datasets",
regex=BLISS_NAME_PATTERN,
),
"dataset": dict(
description="Dataset name (smallest entity in the data portal)",
regex=BLISS_NAME_PATTERN,
),
}
_DERIVED_CONCEPTS = {
"beamline_normalized": dict(
description="Normalized beamline name for the Data Portal",
examples=["id21", "id23-1", "id30a-1"],
derive_func=derive_normalized_beamline,
derived_from=["beamline"],
)
}
_SCHEMA = None
[docs]
def get_schema() -> PathSchema:
global _SCHEMA
if _SCHEMA is None:
_SCHEMA = PathSchema(
name="esrf",
version=3,
description=__doc__,
concepts=_CONCEPTS,
derived_concepts=_DERIVED_CONCEPTS,
template_tree=_ROOT_TREE,
extends=[unknown_v1.get_schema()],
symbolic_root_segment="data",
)
return _SCHEMA