Source code for esrf_pathlib._schemas.definitions.tomo_v1

"""
Tomography Processed Data Policy v1

Introduced on Feb 23, 2026.

Path hierarchy::

    {data_root}/{proposal}/{beamline}/{session_date}/     [TID]
        └── {data_type=PROCESSED_DATA}/
              ├── {data_type=SLICES}/
              │     └── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}
              ├── {data_type=VOLUMES}/
              |     └── {collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}/
              |           └── {collection}_{dataset}_{tomo_slice_number}.{tomo_extension}
              └── {collection}/   [esrf-pathlib]
                    ├── {data_type=SLICES}/
                    │     └── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}
                    ├── {data_type=VOLUMES}/
                    |     └── {collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}/
                    |           └── {collection}_{dataset}_{tomo_slice_number}.{tomo_extension}
                    └── {collection}_{dataset}/    [esrf-pathlib]
                          ├── references/
                          │     ├── {collection}_{dataset}_darks.hdf5
                          │     └── {collection}_{dataset}_flats.hdf5
                          ├── projections/
                          │     ├── {collection}_{dataset}.nx
                          │     └── gallery/
                          │           ├── metadata.json
                          │           ├── {collection}_{dataset}_{tomo_angle}_large.{thumbnail_file_type}
                          │           └── {collection}_{dataset}_{tomo_angle}.{thumbnail_file_type}
                          ├── slices/
                          │     ├── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}
                          │     └── gallery/
                          │           ├── metadata.json
                          │           ├── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}
                          │           └── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}
                          ├── volumes/
                          │     └── {collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}/
                          │           ├── {collection}_{dataset}_{tomo_slice_number}.{tomo_extension}
                          │           └── gallery/
                          │                 ├── metadata.json
                          │                 ├── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}
                          │                 └── {collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}
                          ├── volumes_{tomo_type}/
                          │     └── {collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_dtype}_{tomo_extension}/
                          │           ├── {collection}_{dataset}_{tomo_slice_number}.{tomo_extension}
                          │           └── gallery/
                          │                 ├── metadata.json
                          │                 ├── {collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}
                          │                 └── {collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}
                          └── logs/
                          │     └── {collection}_{dataset}_{tomo_application}.log
                          └── workflows/
                               └── gallery/
                               └── {collection}_{dataset}.json

"""

from typing import Union

from ..schema.path import PathSchema
from . import esrf_v3
from .esrf_utils import deserializer_enum_type
from .esrf_utils import named_directory
from .esrf_utils import serializer_enum_type
from .types import TomoResultType

_RESULT_TREE = {
    "references": {
        named_directory("references_path"): None,
        ("{collection}_{dataset}_darks.hdf5", "references_dark"): None,
        ("{collection}_{dataset}_flats.hdf5", "references_flat"): None,
    },
    "projections": {
        named_directory("projections_path"): None,
        ("{collection}_{dataset}.nx", "projections_file"): None,
        "gallery": {
            named_directory("projections_gallery_path"): None,
            ("metadata.json", "projections_metadata_file"): None,
            (
                "{collection}_{dataset}_{tomo_angle}_large.{thumbnail_file_type}",
                "projections_thumbnail",
            ): None,
            (
                "{collection}_{dataset}_{tomo_angle}.{thumbnail_file_type}",
                "projections_preview",
            ): None,
        },
    },
    "slices": {
        named_directory("slices_path"): None,
        (
            "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}",
            "slices_file",
        ): None,
        "gallery": {
            named_directory("slices_gallery_path"): None,
            ("metadata.json", "slices_metadata_file"): None,
            (
                "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}",
                "slices_thumbnail",
            ): None,
            (
                "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}",
                "slices_preview",
            ): None,
        },
    },
    "volumes": {
        named_directory("volumes_path"): None,
        (
            "{collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}",
            "volumes_file",
        ): {
            (
                "{collection}_{dataset}_{tomo_slice_number}.{tomo_extension}",
                "volumes_slice_file",
            ): None,
            "gallery": {
                named_directory("volumes_gallery_path"): None,
                ("metadata.json", "volumes_metadata_file"): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}",
                    "volumes_thumbnail",
                ): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}",
                    "volumes_preview",
                ): None,
            },
        },
    },
    "volumes_{tomo_type}": {
        named_directory("volumes_custom_type_path"): None,
        (
            "{collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_dtype}_{tomo_extension}",
            "volumes_custom_type_file",
        ): {
            (
                "{collection}_{dataset}_{tomo_slice_number}.{tomo_extension}",
                "volumes_custom_type_slice_file",
            ): None,
            "gallery": {
                named_directory("volumes_custom_type_gallery_path"): None,
                ("metadata.json", "volumes_custom_type_metadata_file"): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_slicing_direction}_{tomo_slice_number}_large.{thumbnail_file_type}",
                    "volumes_custom_type_thumbnail",
                ): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_type}_{tomo_slicing_direction}_{tomo_slice_number}.{thumbnail_file_type}",
                    "volumes_custom_type_preview",
                ): None,
            },
        },
    },
    "logs": {
        named_directory("tomo_log_path"): None,
        ("{collection}_{dataset}_{tomo_application}.log", "tomo_log_file"): None,
    },
    "workflows": {
        named_directory("workflows_path"): None,
        "gallery": {
            named_directory("workflows_gallery_path"): None,
            ("{collection}_{dataset}.json", "workflows_file"): None,
        },
    },
}


_SESSION_TREE = {
    "{data_type=PROCESSED_DATA}": {
        "{collection}": {
            "{collection}_{dataset}": _RESULT_TREE,
            "{tomo_result_type=SLICES}": {
                named_directory("slices_link_collection_path"): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}",
                    "slices_link_collection_file",
                ): None,
            },
            "{tomo_result_type=VOLUMES}": {
                named_directory("volumes_link_collection_path"): None,
                (
                    "{collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}",
                    "volumes_link_collection_file",
                ): {
                    (
                        "{collection}_{dataset}_{tomo_slice_number}.{tomo_extension}",
                        "volumes_link_collection_slice_file",
                    ): None,
                },
            },
        },
        "{tomo_result_type=SLICES}": {
            named_directory("slices_link_root_path"): None,
            (
                "{collection}_{dataset}_{tomo_technique}_{tomo_slicing_direction}_{tomo_slice_number}.{tomo_extension}",
                "slices_link_root_file",
            ): None,
        },
        "{tomo_result_type=VOLUMES}": {
            named_directory("volumes_link_root_path"): None,
            (
                "{collection}_{dataset}_{tomo_technique}_{tomo_dtype}_{tomo_extension}",
                "volumes_link_root_file",
            ): {
                (
                    "{collection}_{dataset}_{tomo_slice_number}.{tomo_extension}",
                    "volumes_link_root_slice_file",
                ): None,
            },
        },
    },
}


_ROOT_TREE = {
    "{data_root}": {
        "{proposal}": {
            "{beamline}": {
                "{session_date}": _SESSION_TREE,
            },
        },
    },
}


_STRING_TO_RESULT_TYPE = {
    "SLICES": TomoResultType.SLICE,
    "VOLUMES": TomoResultType.VOLUME,
}


def _deserializer_result_type(data_type: Union[str, TomoResultType]) -> TomoResultType:
    return deserializer_enum_type(
        data_type,
        enum_type=TomoResultType,
        string_to_enum=_STRING_TO_RESULT_TYPE,
    )


def _serializer_result_type(data_type: Union[str, TomoResultType]) -> str:
    return serializer_enum_type(
        data_type,
        enum_type=TomoResultType,
        string_to_enum=_STRING_TO_RESULT_TYPE,
    )


def _as_int_or_float(string_value: str) -> Union[int, float]:
    string_value = string_value.replace("deg", "")
    if string_value.isdigit():
        return int(string_value)
    return float(string_value)


def _serializer_angle(py_value: Union[str, int, float]) -> str:
    return f"{py_value}deg"


def _deserializer_slicing_direction(string_value: str) -> str:
    value = string_value.lower()
    if value not in {"xy", "yz", "xz"}:
        raise ValueError(f"Invalid slicing direction: {string_value!r}")
    return value


def _serializer_slicing_direction(py_value: str) -> str:
    return _deserializer_slicing_direction(py_value)


def _serializer_slice_number(py_value: Union[str, int]) -> str:
    return f"{int(py_value):05d}"


_ALPHA_NUMERIC_REGEX = r"[a-zA-Z0-9]+"

_CONCEPTS = {
    "tomo_angle": dict(
        description="Projection angle for raw projections in deg notation (e.g. 180deg, 180.5deg)",
        regex=r"\d+(\.\d+)?deg",
        deserializer=_as_int_or_float,
        serializer=_serializer_angle,
    ),
    "tomo_technique": dict(
        description="Tomography technique (e.g. absorption, phase)",
        regex=_ALPHA_NUMERIC_REGEX,
        examples=["absorption", "phase"],
    ),
    "tomo_type": dict(
        description="Processing type/context (e.g. stitched, denoised, segmented)",
        regex=_ALPHA_NUMERIC_REGEX,
        examples=["stitched", "denoised", "segmented"],
    ),
    "tomo_slicing_direction": dict(
        description="Slicing direction in ESRF coordinates (xy, yz or xz)",
        regex=r"(xy|yz|xz)",
        deserializer=_deserializer_slicing_direction,
        serializer=_serializer_slicing_direction,
    ),
    "tomo_slice_number": dict(
        description="Slice index (0-based) formatted with 5 digits minimum (e.g. 00000, 00001)",
        regex=r"\d{5,}",
        deserializer=int,
        serializer=_serializer_slice_number,
    ),
    "tomo_dtype": dict(
        description="Output numeric data type / bit depth (e.g. 16Bit, 32Bit)",
        regex=_ALPHA_NUMERIC_REGEX,
        examples=["16Bit", "32Bit"],
    ),
    "tomo_result_type": dict(
        description="Tomography result type.",
        regex="|".join(list(_STRING_TO_RESULT_TYPE)),
        deserializer=_deserializer_result_type,
        serializer=_serializer_result_type,
    ),
    "tomo_extension": dict(
        description="File extension (e.g. hdf5, tiff, vol)",
        regex=_ALPHA_NUMERIC_REGEX,
    ),
    "tomo_application": dict(
        description="Application/workflow name that produced the result (e.g. nabu)",
        regex=_ALPHA_NUMERIC_REGEX,
    ),
    "thumbnail_file_type": dict(
        description="Thumbnail file type (e.g. png, jpg)",
        regex=_ALPHA_NUMERIC_REGEX,
    ),
}


_SCHEMA = None


[docs] def get_schema() -> PathSchema: global _SCHEMA if _SCHEMA is None: _SCHEMA = PathSchema( name="tomo", version=1, description=__doc__, concepts=_CONCEPTS, template_tree=_ROOT_TREE, extends=[esrf_v3.get_schema()], ) return _SCHEMA