import os
import pytest
from ..._schemas import errors
from ..._schemas.fields.concept import PathConcept
from ..._schemas.fields.path import PathTemplate
from ..._schemas.fields.tree import parse_template_tree
from ..._schemas.identifier import SchemaIdentifier
[docs]
@pytest.fixture()
def path_tree():
schema_identifier = SchemaIdentifier(test=1)
template_tree = {
("{data_root}", "root_path"): {
"{experiment_id}": {
("{unknown_part}", "experiment_path", False): None,
("{optional_subdir1}", "optional1_path", False): {
("fileA.txt", "file_patha"): None,
("fileB.txt", "file_pathb"): None,
("{optional_subdir2}", None, False): {
("fileC.txt", "file_pathc"): None,
("fileD.txt", "file_pathd"): None,
},
},
("{data_id=data2}", "mandatory2_path"): {
("{file_name}.txt", "file_pathe"): None,
},
}
}
}
concepts = {
"unknown_part": PathConcept(
name="unknown_part",
description="Unknown path or path segment (placeholder for unmapped directories or files)",
schema_identifier=schema_identifier,
regex=r".+",
score=0,
),
"data_root": PathConcept(
name="data_root",
description="Top-level data directory",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"experiment_id": PathConcept(
name="experiment_id",
description="Experiment or proposal identifier",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"data_id": PathConcept(
name="data_id",
description="Data identifier",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"optional_subdir1": PathConcept(
name="optional_subdir1",
description="Optional subdirectory for processed data",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"optional_subdir2": PathConcept(
name="optional_subdir2",
description="Optional subdirectory for processed data",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"file_name": PathConcept(
name="file_name",
description="Base name of a generated file",
schema_identifier=schema_identifier,
regex=r"\w+",
),
}
return schema_identifier, template_tree, concepts
[docs]
def test_init_and_str(path_tree):
schema_identifier, template_tree, concepts = path_tree
templates = parse_template_tree(template_tree)
reprs = {}
for args in templates:
pt = PathTemplate(args.name, schema_identifier, args.segments, concepts)
reprs[args.name] = str(pt)
expected = {
"root_path": "<PathTemplate path='root_path' schema='test_v1' template='{data_root}'>",
"experiment_path": "<PathTemplate path='experiment_path' schema='test_v1' template='{data_root}/{experiment_id}[/{unknown_part}]'>",
"mandatory2_path": "<PathTemplate path='mandatory2_path' schema='test_v1' template='{data_root}/{experiment_id}/{data_id=data2}'>",
"optional1_path": "<PathTemplate path='optional1_path' schema='test_v1' template='{data_root}/{experiment_id}[/{optional_subdir1}]'>",
"file_patha": "<PathTemplate path='file_patha' schema='test_v1' template='{data_root}/{experiment_id}[/{optional_subdir1}/fileA.txt]'>",
"file_pathb": "<PathTemplate path='file_pathb' schema='test_v1' template='{data_root}/{experiment_id}[/{optional_subdir1}/fileB.txt]'>",
"file_pathc": "<PathTemplate path='file_pathc' schema='test_v1' template='{data_root}/{experiment_id}[/{optional_subdir1}[/{optional_subdir2}/fileC.txt]]'>",
"file_pathd": "<PathTemplate path='file_pathd' schema='test_v1' template='{data_root}/{experiment_id}[/{optional_subdir1}[/{optional_subdir2}/fileD.txt]]'>",
"file_pathe": "<PathTemplate path='file_pathe' schema='test_v1' template='{data_root}/{experiment_id}/{data_id=data2}/{file_name}.txt'>",
}
assert reprs == expected
[docs]
def test_render(path_tree):
schema_identifier, template_tree, concepts = path_tree
concept_values = {
"data_root": "DATA",
"experiment_id": "EXP001",
"optional_subdir1": "data1",
"file_name": "summary",
}
templates = parse_template_tree(template_tree)
paths = {}
for args in templates:
pt = PathTemplate(args.name, schema_identifier, args.segments, concepts)
paths[args.name] = pt.render(concept_values)
expected = {
"root_path": "DATA",
"experiment_path": os.path.join("DATA", "EXP001"),
"mandatory2_path": os.path.join("DATA", "EXP001", "data2"),
"optional1_path": os.path.join("DATA", "EXP001", "data1"),
"file_patha": os.path.join("DATA", "EXP001", "data1", "fileA.txt"),
"file_pathb": os.path.join("DATA", "EXP001", "data1", "fileB.txt"),
"file_pathc": os.path.join("DATA", "EXP001", "data1", "fileC.txt"),
"file_pathd": os.path.join("DATA", "EXP001", "data1", "fileD.txt"),
"file_pathe": os.path.join("DATA", "EXP001", "data2", "summary.txt"),
}
assert paths == expected
[docs]
def test_render_missing_literal(path_tree):
schema_identifier, template_tree, concepts = path_tree
concept_values = {
"data_root": "DATA",
"experiment_id": "EXP001",
"optional_subdir1": "data1",
"file_name": "summary",
}
templates = parse_template_tree(template_tree)
paths = {}
for args in templates:
pt = PathTemplate(args.name, schema_identifier, args.segments, concepts)
paths[args.name] = pt.render(concept_values)
expected = {
"root_path": "DATA",
"experiment_path": os.path.join("DATA", "EXP001"),
"mandatory2_path": os.path.join("DATA", "EXP001", "data2"),
"optional1_path": os.path.join("DATA", "EXP001", "data1"),
"file_patha": os.path.join("DATA", "EXP001", "data1", "fileA.txt"),
"file_pathb": os.path.join("DATA", "EXP001", "data1", "fileB.txt"),
"file_pathc": os.path.join("DATA", "EXP001", "data1", "fileC.txt"),
"file_pathd": os.path.join("DATA", "EXP001", "data1", "fileD.txt"),
"file_pathe": os.path.join("DATA", "EXP001", "data2", "summary.txt"),
}
assert paths == expected
[docs]
def test_render_missing(path_tree):
schema_identifier, template_tree, concepts = path_tree
concept_values = {
"data_root": "DATA",
"experiment_id": "EXP001",
"optional_subdir1": "data1",
}
templates = parse_template_tree(template_tree)
paths = {}
for args in templates:
pt = PathTemplate(args.name, schema_identifier, args.segments, concepts)
if args.name == "file_pathe":
with pytest.raises(
errors.PathConceptWithoutValue, match="'file_name' is not defined"
):
pt.render(concept_values)
else:
paths[args.name] = pt.render(concept_values)
expected = {
"root_path": "DATA",
"experiment_path": os.path.join("DATA", "EXP001"),
"mandatory2_path": os.path.join("DATA", "EXP001", "data2"),
"optional1_path": os.path.join("DATA", "EXP001", "data1"),
"file_patha": os.path.join("DATA", "EXP001", "data1", "fileA.txt"),
"file_pathb": os.path.join("DATA", "EXP001", "data1", "fileB.txt"),
"file_pathc": os.path.join("DATA", "EXP001", "data1", "fileC.txt"),
"file_pathd": os.path.join("DATA", "EXP001", "data1", "fileD.txt"),
}
assert paths == expected
[docs]
def test_render_missing_wildcard(path_tree):
schema_identifier, template_tree, concepts = path_tree
concept_values = {
"data_root": "DATA",
"optional_subdir1": "data1",
}
templates = parse_template_tree(template_tree)
paths = {}
for args in templates:
pt = PathTemplate(args.name, schema_identifier, args.segments, concepts)
paths[args.name] = pt.render(concept_values, raise_on_missing=False)
expected = {
"root_path": "DATA",
"experiment_path": os.path.join("DATA", "*"),
"mandatory2_path": os.path.join("DATA", "*", "data2"),
"optional1_path": os.path.join("DATA", "*", "data1"),
"file_patha": os.path.join("DATA", "*", "data1", "fileA.txt"),
"file_pathb": os.path.join("DATA", "*", "data1", "fileB.txt"),
"file_pathc": os.path.join("DATA", "*", "data1", "fileC.txt"),
"file_pathd": os.path.join("DATA", "*", "data1", "fileD.txt"),
"file_pathe": os.path.join("DATA", "*", "data2", "*.txt"),
}
assert paths == expected
[docs]
@pytest.fixture()
def simple_path_tree():
schema_identifier = SchemaIdentifier(test=1)
template_tree = {
("{root}", "root_path"): {
"{experiment_id}": {
("{data_id=data2}", "data_path"): {
("{file_name}.txt", "file_path"): None,
},
}
}
}
concepts = {
"root": PathConcept(
name="root",
description="root folder",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"experiment_id": PathConcept(
name="experiment_id",
description="Experiment ID",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"data_id": PathConcept(
name="data_id",
description="Data ID",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"file_name": PathConcept(
name="file_name",
description="Filename",
schema_identifier=schema_identifier,
regex=r"\w+",
),
}
return schema_identifier, template_tree, concepts
[docs]
def test_parse_basic_match(simple_path_tree):
schema_identifier, template_tree, concepts = simple_path_tree
templates = parse_template_tree(template_tree)
pt = PathTemplate("file_path", schema_identifier, templates[-1].segments, concepts)
path = pt.render(
{
"root": "DATA",
"experiment_id": "EXP001",
"data_id": "data2",
"file_name": "summary",
}
)
parsed = pt.parse(path)
assert parsed == {
"root": "DATA",
"experiment_id": "EXP001",
"data_id": "data2",
"file_name": "summary",
}
[docs]
def test_parse_fails_on_mismatch(simple_path_tree):
schema_identifier, template_tree, concepts = simple_path_tree
templates = parse_template_tree(template_tree)
pt = PathTemplate("file_path", schema_identifier, templates[-1].segments, concepts)
bad_path = os.path.join("DATA", "WRONG", "data3", "summary.txt")
with pytest.raises(errors.PathTemplateMatchError):
pt.parse(bad_path)
[docs]
def test_parse_respects_literals(simple_path_tree):
schema_identifier, template_tree, concepts = simple_path_tree
templates = parse_template_tree(template_tree)
pt = PathTemplate("data_path", schema_identifier, templates[-2].segments, concepts)
path = os.path.join("ROOT", "EXP001", "data2")
parsed = pt.parse(path)
assert parsed["data_id"] == "data2"
[docs]
def test_render_and_parse_roundtrip(simple_path_tree):
schema_identifier, template_tree, concepts = simple_path_tree
templates = parse_template_tree(template_tree)
pt = PathTemplate("file_path", schema_identifier, templates[-1].segments, concepts)
values = {
"root": "ROOT",
"experiment_id": "EXP001",
"data_id": "data2",
"file_name": "summary",
}
rendered = pt.render(values)
parsed = pt.parse(rendered)
assert parsed == values
[docs]
def test_render_and_parse_with_optional_segments():
schema_identifier = SchemaIdentifier(test=1)
template_tree = {
("{root}", "root_path"): {
("{optional}", "opt_path", False): {("{file_name}.txt", "file_path"): None}
}
}
concepts = {
"root": PathConcept(
name="root",
description="Root",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"optional": PathConcept(
name="optional",
description="Optional segment",
schema_identifier=schema_identifier,
regex=r"\w+",
),
"file_name": PathConcept(
name="file_name",
description="File",
schema_identifier=schema_identifier,
regex=r"\w+",
),
}
templates = parse_template_tree(template_tree)
pt = PathTemplate("file_path", schema_identifier, templates[-1].segments, concepts)
values = {"root": "ROOT", "optional": "SUB", "file_name": "data"}
path = pt.render(values)
assert pt.parse(path) == values
values = {"root": "ROOT", "file_name": "data"}
path = pt.render(values)
assert pt.parse(path) == {"root": "ROOT", "optional": None, "file_name": "data"}