Source code for esrf_pathlib.tests.test_esrf

import datetime
import os
import pathlib
import re
from typing import Dict

import pytest

from .._schemas import errors
from .._schemas.constants import DEFAULT_DATA_ROOT
from .._schemas.constants import IS_WINDOWS
from .._schemas.definitions.types import DataType
from .._schemas.registry import ParsedPath
from .._schemas.registry import get_schema_registry
from ..core import ESRFPath
from .utils import make_path
from .utils import make_realpath


[docs] class EsrfPath(ESRFPath, esrf=3, fallback_depth=None): pass
NOT_ESRF_PATH = make_path("some", "other", "path", "that", "does", "not", "match") TEST_PATHS: Dict[str, dict] = { make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "foo_bar.h5", ): dict( schemas={"esrf": 3}, template_name="raw_dataset_file", data_root=make_path("visitor"), proposal="ma6658", beamline="id21", session_date=datetime.date(2025, 5, 9), data_type=DataType.RAW, collection="foo", dataset="bar", ), make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5", ): dict( schemas={"esrf": 2}, template_name="raw_dataset_file", data_root=make_path("visitor"), proposal="ma6658", beamline="id21", session_date=datetime.date(2025, 5, 9), data_type=DataType.RAW, collection="foo", dataset="bar", ), make_path( "visitor", "ma6658", "id21", "20250509", "foo", "foo_bar", "foo_bar.h5" ): dict( schemas={"esrf": 1}, template_name="raw_dataset_file", data_root=make_path("visitor"), proposal="ma6658", beamline="id21", session_date=datetime.date(2025, 5, 9), collection="foo", dataset="bar", ), NOT_ESRF_PATH: dict( schemas={"unknown": 1}, template_name="unknown_path", unknown_part=NOT_ESRF_PATH, ), } def _extected_parsed(data) -> ParsedPath: concept_values = dict(data) schemas = concept_values.pop("schemas") template_name = concept_values.pop("template_name") registry = get_schema_registry(**schemas, fallback_depth=0) template = registry.get_template(template_name) return ParsedPath(registry, concept_values, template=template)
[docs] @pytest.mark.parametrize("input_path", TEST_PATHS.keys()) def test_esrf_path_esrf_schema(input_path): path = EsrfPath(input_path) expected_parsed = _extected_parsed(TEST_PATHS[input_path]) assert path._esrf_parsed == expected_parsed
[docs] @pytest.mark.parametrize("input_path", TEST_PATHS.keys()) def test_esrf_path_attributes(input_path): path = EsrfPath(input_path) expected_parsed = _extected_parsed(TEST_PATHS[input_path]) if expected_parsed.schema_identifier == "unknown_v1": expected_to_raise = tuple() else: expected_to_raise = ("thumbnail_file", "note_file", "unknown_path") # Access concept values print(path._esrf_parsed.__info__(include_missing=True)) for name in expected_parsed.nonpath_field_names(): actual = getattr(path, name) expected = getattr(expected_parsed, name) assert actual == expected, name # Access paths for name in expected_parsed.path_field_names(): if name in expected_to_raise: with pytest.raises(errors.SchemaAttributeError, match=name): _ = getattr(path, name) else: actual = getattr(path, name) expected = pathlib.Path(getattr(expected_parsed, name)) assert actual == expected, name # Access non-existing attribute with pytest.raises(errors.SchemaAttributeError): _ = path.not_an_attribute
[docs] def test_inherit_from_existing_path(): p1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "foo_bar.h5", ) ) p2 = EsrfPath(p1) assert p2._esrf_parsed == p1._esrf_parsed
[docs] def test_parent(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5", ) native_path = pathlib.Path(path_str) path = EsrfPath(path_str) assert ( repr(path) == f"<EsrfPath {str(native_path)!r} schema='esrf_v2' template='raw_dataset_file'>" ) parent = path.parent native_parent = native_path.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_dataset_path'>" ) parent = parent.parent native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_collection_path'>" ) parent = parent.parent native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_data_path'>" ) parent = parent.parent native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='esrf_v3' template='session_path'>" ) parent = parent.parent native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>" ) parent = parent.parent parent = EsrfPath(str(parent)) native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>" ) parent = parent.parent native_parent = native_parent.parent assert ( repr(parent) == f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>" )
[docs] def test_str(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5" ) path = EsrfPath(path_str) native_path = pathlib.Path(path_str) assert str(path) == path_str assert str(native_path) == path_str assert "foo_bar.h5" in str(path) assert "foo_bar.h5" in str(native_path)
[docs] def test_repr(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5" ) path = EsrfPath(path_str) native_path = pathlib.Path(path_str) assert ( repr(path) == f"<EsrfPath {str(native_path)!r} schema='esrf_v2' template='raw_dataset_file'>" ) assert "foo_bar.h5" in repr(path) assert "foo_bar.h5" in repr(native_path)
[docs] def test_dir(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5" ) path = EsrfPath(path_str) expected = set(path._esrf_parsed.field_names()) assert expected.issubset(set(dir(path)))
[docs] def test_dir_without_esrf_schema(): esrf_path = EsrfPath(NOT_ESRF_PATH) path = pathlib.Path(NOT_ESRF_PATH) assert set(dir(path)).issubset(set(dir(esrf_path)))
[docs] @pytest.mark.parametrize("input_path", TEST_PATHS.keys()) def test_reconstruct_path(input_path): path = EsrfPath(input_path) assert path._esrf_parsed.render() == input_path
[docs] def test_replace_fields(): path1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "foo_bar.h5", ) ) path2 = path1.replace_fields(data_type="processed") assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "PROCESSED_DATA", "foo", "foo_bar", "foo_bar.h5", ) path1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "foo_bar.h5", ) ) path2 = path1.replace_fields(collection="baz") assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "baz", "baz_bar", "baz_bar.h5", ) path1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", ) ) path2 = path1.replace_fields(collection="baz") assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "baz", "baz_bar", ) path1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "something", "unknown.txt", ) ) path2 = path1.replace_fields(collection="baz") assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "baz", "baz_bar", "baz_bar.h5", # appears because we need to make glob pattterns work ) path2 = path1.replace_fields(collection="baz", template_name=path1.template_name) assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "baz", "baz_bar", "something", "unknown.txt", )
[docs] def test_raw_dataset_path(): path1 = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "PROCESSED_DATA", "foo", "foo_bar", "process1", "foo_bar_process1.h5", ) ) path2 = path1.raw_dataset_path assert str(path2) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", ) with pytest.raises(errors.SchemaAttributeError, match="raw_dataset_path"): _ = path2.parent.raw_dataset_path
[docs] def test_data_type_root_paths(): path = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "PROCESSED_DATA", "foo", "foo_bar", "process1", "foo_bar_process1.h5", ) ) assert str(path.raw_data_path) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", ) assert str(path.processed_data_path) == make_path( "visitor", "ma6658", "id21", "20250509", "PROCESSED_DATA", ) assert str(path.scripts_path) == make_path( "visitor", "ma6658", "id21", "20250509", "SCRIPTS", ) assert str(path.nobackup_path) == make_path( "visitor", "ma6658", "id21", "20250509", "NOBACKUP", ) path = EsrfPath( make_path( "visitor", "ma6658", "id21", ) ) with pytest.raises(errors.SchemaAttributeError, match="raw_data_path"): _ = path.raw_data_path
[docs] def test_filenames(): path = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "PROCESSED_DATA", "foo", "foo_bar", "process1", "foo_bar_process1.h5", ) ) assert str(path.raw_dataset_file) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar", "foo_bar.h5", ) assert str(path.raw_collection_file) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "ma6658_foo.h5", ) assert str(path.raw_proposal_file) == make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "ma6658_id21.h5", )
[docs] def test_from_fields(): with pytest.raises( errors.PathSchemaMatchError, match=re.escape( "No path template can be found that is fully described by the fields {" ), ): _ = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", ) path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", ) assert path.template_name == "raw_proposal_file" assert path.schema_name == "esrf_v3" assert str(path) == make_path( "visitor", "ma6658", "id00", "20250509", "RAW_DATA", "ma6658_id00.h5", ) path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", data_type="processed", ) assert path.template_name == "processed_proposal_file" assert path.schema_name == "esrf_v3" assert str(path) == make_path( "visitor", "ma6658", "id00", "20250509", "PROCESSED_DATA", "ma6658_id00.h5", )
[docs] @pytest.mark.parametrize( "beamline_raw, beamline_normalized", [ ("id21", "id21"), ("id23eh1", "id23-1"), ], ) def test_derived_concepts(beamline_raw, beamline_normalized): path1 = EsrfPath( make_path("visitor", "ma6658", beamline_raw, "20250509", "RAW_DATA", "foo") ) path2 = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline=beamline_raw, session_date="20250509", data_type="raw", collection="foo", ) assert path1 == path2.raw_collection_path assert path2.beamline == beamline_raw assert path2.beamline_normalized == beamline_normalized path1 = EsrfPath( make_path( "visitor", "ma6658", beamline_raw, "20250509", "RAW_DATA", "foo", "foo_bar" ) ) path2 = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline=beamline_normalized, session_date="20250509", data_type="raw", collection="foo", dataset="bar", ) assert path1 == path2.raw_dataset_path assert path2.beamline == beamline_raw assert path2.beamline_normalized == beamline_normalized
[docs] def test_immutable_schema_fields(): path = EsrfPath( make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", ) ) with pytest.raises( errors.SchemaAttributeError, match=re.escape( "Attribute 'proposal' is immutable. Create a new path instance with `EsrfPath.replace_fields(proposal='ma6659')`." ), ): path.proposal = "ma6659"
if IS_WINDOWS: custom_root = os.path.join("C:\\", "custom", "root") else: custom_root = os.path.join(os.sep, "custom", "root")
[docs] @pytest.mark.parametrize( "data_root, expected_data_root", [ (make_path("visitor"), make_path("visitor")), (None, DEFAULT_DATA_ROOT), (custom_root, custom_root), ], ) def test_data_root_default_value(data_root, expected_data_root): if IS_WINDOWS and data_root is None: with pytest.raises( errors.PathSchemaMatchError, match=re.escape( "No path template can be found that is fully described by the fields {" ), ): _ = EsrfPath.from_fields( data_root=data_root, proposal="ma6658", beamline="id00", session_date="20250828", ) else: path = EsrfPath.from_fields( data_root=data_root, proposal="ma6658", beamline="id00", session_date="20250828", ) assert path.data_root == expected_data_root
[docs] def test_raw_metadata_path(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", ) path = EsrfPath(path_str) expected = os.path.join(path_str, "__icat__") assert str(path.raw_metadata_path) == expected path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", ) path = EsrfPath.from_path(path_str, schema_name="esrf", schema_version=2) expected = os.path.join(path_str, "__icat__") assert str(path.raw_metadata_path) == expected path_str = make_path( "visitor", "ma6658", "id21", "20250509", ) path = EsrfPath.from_path(path_str, schema_name="esrf", schema_version=1) expected = os.path.join(path_str, "__icat__") assert str(path.raw_metadata_path) == expected
[docs] def test_raw_metadata_file(): path_str = make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", "foo_bar" ) path = EsrfPath(path_str) expected = os.path.join(path.raw_metadata_path, "foo_bar.xml") assert str(path.raw_metadata_file) == expected path_str = make_path( "visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar" ) path = EsrfPath.from_path(path_str, schema_name="esrf", schema_version=2) expected = os.path.join(path.raw_metadata_path, "foo_bar.xml") assert str(path.raw_metadata_file) == expected path_str = make_path("visitor", "ma6658", "id21", "20250509", "foo", "foo_bar") path = EsrfPath.from_path(path_str, schema_name="esrf", schema_version=1) expected = os.path.join(path.raw_metadata_path, "foo_bar.xml") assert str(path.raw_metadata_file) == expected path_str = make_path( "visitor", "ma6658", "id21", "20250509", "RAW_DATA", "foo", ) path = EsrfPath(path_str) with pytest.raises(errors.SchemaAttributeError, match="raw_metadata_file"): _ = path.raw_metadata_file
[docs] def test_glob_pattern(): path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", collection="sample", ) expected = make_path( "visitor", "ma6658", "id00", "20250509", "RAW_DATA", "sample", "sample_*", "sample_*.h5", ) assert str(path.replace_fields(dataset="*")) == expected expected = make_path( "visitor", "ma6658", "id00", "20250509", "RAW_DATA", "sample", "sample_*" ) assert ( str(path.replace_fields(dataset="*", template_name="raw_dataset_path")) == expected ) path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", collection="sample", data_type="processed", ) expected = make_path( "visitor", "ma6658", "id00", "20250509", "PROCESSED_DATA", "sample", "sample_*", "sample_*.h5", ) assert str(path.replace_fields(dataset="*")) == expected path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", ) expected = make_path("visitor", "ma6658", "id00", "*") assert ( str(path.replace_fields(session_date="*", template_name="session_path")) == expected )
[docs] def test_session_date(): original_path = make_path( "data", "visitor", "me1783", "id19", "20251114", # 8 digits and a valid date "RAW_DATA", "20251115", # 8 digits and a valid date "test_C001H001S0001", ) path = ESRFPath(original_path) assert original_path == str(path) assert path.template_name == "raw_collection_path" assert path.session_date == datetime.date(2025, 11, 14) assert path.collection == "20251115" assert path.unknown_part == "test_C001H001S0001" original_path = make_path( "data", "visitor", "me1783", "id19", "20251114", # 8 digits and a valid date "RAW_DATA", "14112025", # 8 digits but not a valid date "test_C001H001S0001", ) path = ESRFPath(original_path) assert original_path == str(path) assert path.template_name == "raw_collection_path" assert path.session_date == datetime.date(2025, 11, 14) assert path.collection == "14112025" assert path.unknown_part == "test_C001H001S0001" original_path = make_path( "data", "visitor", "me1783", "id19", "14112025", # 8 digits but not a valid date "RAW_DATA", "20251114", # 8 digits and a valid date "test_C001H001S0001", ) path = ESRFPath(original_path) assert original_path == str(path) assert path.template_name == "session_path" assert path.proposal == "14112025" assert path.beamline == "RAW_DATA" assert path.session_date == datetime.date(2025, 11, 14) assert path.unknown_part == "test_C001H001S0001" original_path = make_path( "data", "visitor", "me1783", "id19", "14112025", # 8 digits but not a valid date "RAW_DATA", "14112025", # 8 digits but not a valid date "test_C001H001S0001", ) path = ESRFPath(original_path) assert original_path == str(path) assert path.template_name == "unknown_path" assert path.unknown_part == original_path
[docs] def test_strip_mount_point(): realpath = EsrfPath.from_fields( data_root=make_realpath("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", collection="sample", ) path = EsrfPath.from_fields( data_root=make_path("visitor"), proposal="ma6658", beamline="id00", session_date="20250509", collection="sample", ) assert path == realpath.strip_mount_point()