import datetime
import os
import pathlib
import re
from typing import Dict
import pytest
from .._schemas import errors
from .._schemas.constants import DEFAULT_DATA_ROOT
from .._schemas.constants import IS_WINDOWS
from .._schemas.definitions.types import DataType
from .._schemas.registry import ParsedPath
from .._schemas.registry import get_schema_registry
from ..core import ESRFPath
from .utils import make_path
from .utils import make_realpath
[docs]
class EsrfPath(ESRFPath, esrf=3, fallback_depth=None):
pass
NOT_ESRF_PATH = make_path("some", "other", "path", "that", "does", "not", "match")
TEST_PATHS: Dict[str, dict] = {
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
): dict(
schemas={"esrf": 3},
template_name="raw_dataset_file",
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id21",
session_date=datetime.date(2025, 5, 9),
data_type=DataType.RAW,
collection="foo",
dataset="bar",
),
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"raw",
"foo",
"foo_bar",
"foo_bar.h5",
): dict(
schemas={"esrf": 2},
template_name="raw_dataset_file",
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id21",
session_date=datetime.date(2025, 5, 9),
data_type=DataType.RAW,
collection="foo",
dataset="bar",
),
make_path(
"visitor", "ma6658", "id21", "20250509", "foo", "foo_bar", "foo_bar.h5"
): dict(
schemas={"esrf": 1},
template_name="raw_dataset_file",
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id21",
session_date=datetime.date(2025, 5, 9),
collection="foo",
dataset="bar",
),
NOT_ESRF_PATH: dict(
schemas={"unknown": 1},
template_name="unknown_path",
unknown_part=NOT_ESRF_PATH,
),
}
def _extected_parsed(data) -> ParsedPath:
concept_values = dict(data)
schemas = concept_values.pop("schemas")
template_name = concept_values.pop("template_name")
registry = get_schema_registry(**schemas, fallback_depth=0)
template = registry.get_template(template_name)
return ParsedPath(registry, concept_values, template=template)
[docs]
@pytest.mark.parametrize("input_path", TEST_PATHS.keys())
def test_esrf_path_esrf_schema(input_path):
path = EsrfPath(input_path)
expected_parsed = _extected_parsed(TEST_PATHS[input_path])
assert path._esrf_parsed == expected_parsed
[docs]
@pytest.mark.parametrize("input_path", TEST_PATHS.keys())
def test_esrf_path_attributes(input_path):
path = EsrfPath(input_path)
expected_parsed = _extected_parsed(TEST_PATHS[input_path])
if expected_parsed.schema_identifier == "unknown_v1":
expected_to_raise = tuple()
else:
expected_to_raise = ("thumbnail_file", "note_file", "unknown_path")
# Access concept values
print(path._esrf_parsed.__info__(include_missing=True))
for name in expected_parsed.nonpath_field_names():
actual = getattr(path, name)
expected = getattr(expected_parsed, name)
assert actual == expected, name
# Access paths
for name in expected_parsed.path_field_names():
if name in expected_to_raise:
with pytest.raises(errors.SchemaAttributeError, match=name):
_ = getattr(path, name)
else:
actual = getattr(path, name)
expected = pathlib.Path(getattr(expected_parsed, name))
assert actual == expected, name
# Access non-existing attribute
with pytest.raises(errors.SchemaAttributeError):
_ = path.not_an_attribute
[docs]
def test_inherit_from_existing_path():
p1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
)
)
p2 = EsrfPath(p1)
assert p2._esrf_parsed == p1._esrf_parsed
[docs]
def test_parent():
path_str = make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"raw",
"foo",
"foo_bar",
"foo_bar.h5",
)
native_path = pathlib.Path(path_str)
path = EsrfPath(path_str)
assert (
repr(path)
== f"<EsrfPath {str(native_path)!r} schema='esrf_v2' template='raw_dataset_file'>"
)
parent = path.parent
native_parent = native_path.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_dataset_path'>"
)
parent = parent.parent
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_collection_path'>"
)
parent = parent.parent
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='esrf_v2' template='raw_data_path'>"
)
parent = parent.parent
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='esrf_v3' template='session_path'>"
)
parent = parent.parent
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>"
)
parent = parent.parent
parent = EsrfPath(str(parent))
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>"
)
parent = parent.parent
native_parent = native_parent.parent
assert (
repr(parent)
== f"<EsrfPath {str(native_parent)!r} schema='unknown_v1' template='unknown_path'>"
)
[docs]
def test_str():
path_str = make_path(
"visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5"
)
path = EsrfPath(path_str)
native_path = pathlib.Path(path_str)
assert str(path) == path_str
assert str(native_path) == path_str
assert "foo_bar.h5" in str(path)
assert "foo_bar.h5" in str(native_path)
[docs]
def test_repr():
path_str = make_path(
"visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5"
)
path = EsrfPath(path_str)
native_path = pathlib.Path(path_str)
assert (
repr(path)
== f"<EsrfPath {str(native_path)!r} schema='esrf_v2' template='raw_dataset_file'>"
)
assert "foo_bar.h5" in repr(path)
assert "foo_bar.h5" in repr(native_path)
[docs]
def test_dir():
path_str = make_path(
"visitor", "ma6658", "id21", "20250509", "raw", "foo", "foo_bar", "foo_bar.h5"
)
path = EsrfPath(path_str)
expected = set(path._esrf_parsed.field_names())
assert expected.issubset(set(dir(path)))
[docs]
def test_dir_without_esrf_schema():
esrf_path = EsrfPath(NOT_ESRF_PATH)
path = pathlib.Path(NOT_ESRF_PATH)
assert set(dir(path)).issubset(set(dir(esrf_path)))
[docs]
@pytest.mark.parametrize("input_path", TEST_PATHS.keys())
def test_reconstruct_path(input_path):
path = EsrfPath(input_path)
assert path._esrf_parsed.render() == input_path
[docs]
def test_replace_fields():
path1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
)
)
path2 = path1.replace_fields(data_type="processed")
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"PROCESSED_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
)
path1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
)
)
path2 = path1.replace_fields(collection="baz")
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"baz",
"baz_bar",
"baz_bar.h5",
)
path1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
)
)
path2 = path1.replace_fields(collection="baz")
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"baz",
"baz_bar",
)
path1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"something",
"unknown.txt",
)
)
path2 = path1.replace_fields(collection="baz")
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"baz",
"baz_bar",
"baz_bar.h5", # appears because we need to make glob pattterns work
)
path2 = path1.replace_fields(collection="baz", template_name=path1.template_name)
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"baz",
"baz_bar",
"something",
"unknown.txt",
)
[docs]
def test_raw_dataset_path():
path1 = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"PROCESSED_DATA",
"foo",
"foo_bar",
"process1",
"foo_bar_process1.h5",
)
)
path2 = path1.raw_dataset_path
assert str(path2) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
)
with pytest.raises(errors.SchemaAttributeError, match="raw_dataset_path"):
_ = path2.parent.raw_dataset_path
[docs]
def test_data_type_root_paths():
path = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"PROCESSED_DATA",
"foo",
"foo_bar",
"process1",
"foo_bar_process1.h5",
)
)
assert str(path.raw_data_path) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
)
assert str(path.processed_data_path) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"PROCESSED_DATA",
)
assert str(path.scripts_path) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"SCRIPTS",
)
assert str(path.nobackup_path) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"NOBACKUP",
)
path = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
)
)
with pytest.raises(errors.SchemaAttributeError, match="raw_data_path"):
_ = path.raw_data_path
[docs]
def test_filenames():
path = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"PROCESSED_DATA",
"foo",
"foo_bar",
"process1",
"foo_bar_process1.h5",
)
)
assert str(path.raw_dataset_file) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"foo_bar",
"foo_bar.h5",
)
assert str(path.raw_collection_file) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
"ma6658_foo.h5",
)
assert str(path.raw_proposal_file) == make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"ma6658_id21.h5",
)
[docs]
def test_from_fields():
with pytest.raises(
errors.PathSchemaMatchError,
match=re.escape(
"No path template can be found that is fully described by the fields {"
),
):
_ = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
)
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
)
assert path.template_name == "raw_proposal_file"
assert path.schema_name == "esrf_v3"
assert str(path) == make_path(
"visitor",
"ma6658",
"id00",
"20250509",
"RAW_DATA",
"ma6658_id00.h5",
)
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
data_type="processed",
)
assert path.template_name == "processed_proposal_file"
assert path.schema_name == "esrf_v3"
assert str(path) == make_path(
"visitor",
"ma6658",
"id00",
"20250509",
"PROCESSED_DATA",
"ma6658_id00.h5",
)
[docs]
@pytest.mark.parametrize(
"beamline_raw, beamline_normalized",
[
("id21", "id21"),
("id23eh1", "id23-1"),
],
)
def test_derived_concepts(beamline_raw, beamline_normalized):
path1 = EsrfPath(
make_path("visitor", "ma6658", beamline_raw, "20250509", "RAW_DATA", "foo")
)
path2 = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline=beamline_raw,
session_date="20250509",
data_type="raw",
collection="foo",
)
assert path1 == path2.raw_collection_path
assert path2.beamline == beamline_raw
assert path2.beamline_normalized == beamline_normalized
path1 = EsrfPath(
make_path(
"visitor", "ma6658", beamline_raw, "20250509", "RAW_DATA", "foo", "foo_bar"
)
)
path2 = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline=beamline_normalized,
session_date="20250509",
data_type="raw",
collection="foo",
dataset="bar",
)
assert path1 == path2.raw_dataset_path
assert path2.beamline == beamline_raw
assert path2.beamline_normalized == beamline_normalized
[docs]
def test_immutable_schema_fields():
path = EsrfPath(
make_path(
"visitor",
"ma6658",
"id21",
"20250509",
"RAW_DATA",
"foo",
)
)
with pytest.raises(
errors.SchemaAttributeError,
match=re.escape(
"Attribute 'proposal' is immutable. Create a new path instance with `EsrfPath.replace_fields(proposal='ma6659')`."
),
):
path.proposal = "ma6659"
if IS_WINDOWS:
custom_root = os.path.join("C:\\", "custom", "root")
else:
custom_root = os.path.join(os.sep, "custom", "root")
[docs]
@pytest.mark.parametrize(
"data_root, expected_data_root",
[
(make_path("visitor"), make_path("visitor")),
(None, DEFAULT_DATA_ROOT),
(custom_root, custom_root),
],
)
def test_data_root_default_value(data_root, expected_data_root):
if IS_WINDOWS and data_root is None:
with pytest.raises(
errors.PathSchemaMatchError,
match=re.escape(
"No path template can be found that is fully described by the fields {"
),
):
_ = EsrfPath.from_fields(
data_root=data_root,
proposal="ma6658",
beamline="id00",
session_date="20250828",
)
else:
path = EsrfPath.from_fields(
data_root=data_root,
proposal="ma6658",
beamline="id00",
session_date="20250828",
)
assert path.data_root == expected_data_root
[docs]
def test_glob_pattern():
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
collection="sample",
)
expected = make_path(
"visitor",
"ma6658",
"id00",
"20250509",
"RAW_DATA",
"sample",
"sample_*",
"sample_*.h5",
)
assert str(path.replace_fields(dataset="*")) == expected
expected = make_path(
"visitor", "ma6658", "id00", "20250509", "RAW_DATA", "sample", "sample_*"
)
assert (
str(path.replace_fields(dataset="*", template_name="raw_dataset_path"))
== expected
)
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
collection="sample",
data_type="processed",
)
expected = make_path(
"visitor",
"ma6658",
"id00",
"20250509",
"PROCESSED_DATA",
"sample",
"sample_*",
"sample_*.h5",
)
assert str(path.replace_fields(dataset="*")) == expected
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
)
expected = make_path("visitor", "ma6658", "id00", "*")
assert (
str(path.replace_fields(session_date="*", template_name="session_path"))
== expected
)
[docs]
def test_session_date():
original_path = make_path(
"data",
"visitor",
"me1783",
"id19",
"20251114", # 8 digits and a valid date
"RAW_DATA",
"20251115", # 8 digits and a valid date
"test_C001H001S0001",
)
path = ESRFPath(original_path)
assert original_path == str(path)
assert path.template_name == "raw_collection_path"
assert path.session_date == datetime.date(2025, 11, 14)
assert path.collection == "20251115"
assert path.unknown_part == "test_C001H001S0001"
original_path = make_path(
"data",
"visitor",
"me1783",
"id19",
"20251114", # 8 digits and a valid date
"RAW_DATA",
"14112025", # 8 digits but not a valid date
"test_C001H001S0001",
)
path = ESRFPath(original_path)
assert original_path == str(path)
assert path.template_name == "raw_collection_path"
assert path.session_date == datetime.date(2025, 11, 14)
assert path.collection == "14112025"
assert path.unknown_part == "test_C001H001S0001"
original_path = make_path(
"data",
"visitor",
"me1783",
"id19",
"14112025", # 8 digits but not a valid date
"RAW_DATA",
"20251114", # 8 digits and a valid date
"test_C001H001S0001",
)
path = ESRFPath(original_path)
assert original_path == str(path)
assert path.template_name == "session_path"
assert path.proposal == "14112025"
assert path.beamline == "RAW_DATA"
assert path.session_date == datetime.date(2025, 11, 14)
assert path.unknown_part == "test_C001H001S0001"
original_path = make_path(
"data",
"visitor",
"me1783",
"id19",
"14112025", # 8 digits but not a valid date
"RAW_DATA",
"14112025", # 8 digits but not a valid date
"test_C001H001S0001",
)
path = ESRFPath(original_path)
assert original_path == str(path)
assert path.template_name == "unknown_path"
assert path.unknown_part == original_path
[docs]
def test_strip_mount_point():
realpath = EsrfPath.from_fields(
data_root=make_realpath("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
collection="sample",
)
path = EsrfPath.from_fields(
data_root=make_path("visitor"),
proposal="ma6658",
beamline="id00",
session_date="20250509",
collection="sample",
)
assert path == realpath.strip_mount_point()