"""Models for graph construction from semantic concepts"""
import warnings
from typing import Any, Dict, List, Optional, Union
from data2rdf.qudt.utils import _get_qudt_label_and_symbol, _get_query_match
from data2rdf.utils import make_prefix, split_namespace
from data2rdf.warnings import ParserWarning
from data2rdf.models.utils import ( # isort:skip
apply_datatype,
detect_datatype,
is_float,
is_integer,
)
from data2rdf.models.base import ( # isort:skip
BasicGraphModel,
BasicSuffixModel,
RelationType,
BaseConfigModel,
)
from pydantic import ( # isort:skip
AnyUrl,
AliasChoices,
BaseModel,
Field,
ValidationInfo,
field_validator,
model_validator,
)
[docs]class ValueRelationMapping(BaseModel):
"""Mapping between a object/data/annotation property and a value resolved from a location in the data file"""
value: Union[str, int, float, bool, AnyUrl] = Field(
...,
description="""Value resolved from the data file.""",
)
relation: Union[str, AnyUrl] = Field(
...,
description="""Object/Data/Annotation property for the value
resolving from `key` of this model""",
)
datatype: Optional[str] = Field(
None, description="XSD Datatype of the value"
)
[docs]class ClassTypeGraph(BasicGraphModel):
"""Graph of a potential concept or class in the T Box."""
suffix: str = Field(
...,
description="""Value of the suffix of the
ontological class to be used""",
)
rdfs_type: str = Field(
"owl:Class", description="rdfs:type for this concept"
)
annotation_properties: Optional[List[ValueRelationMapping]] = Field(
None, description="Mappings for Annotations Properties"
)
object_properties: Optional[List[ValueRelationMapping]] = Field(
None, description="Mappings for Object Properties"
)
data_properties: Optional[List[ValueRelationMapping]] = Field(
None, description="Mappings for Data Properties"
)
rdfs_properties: Optional[List[ValueRelationMapping]] = Field(
None, description="Mappings for rdfs:Properties"
)
# OVERRIDE
@property
def json_ld(self) -> "Dict[str, Any]":
annotations = {
model.relation: (
apply_datatype(model)
if model.datatype
else detect_datatype(str(model.value))
)
for model in self.annotation_properties
}
datatypes = {
model.relation: (
apply_datatype(model.value, model.datatype)
if model.datatype
else detect_datatype(str(model.value))
)
for model in self.data_properties
}
properties = {
model.relation: (
apply_datatype(model)
if model.datatype
else detect_datatype(str(model.value))
)
for model in self.rdfs_properties
}
objects = {
model.relation: {"@id": model.value}
for model in self.object_properties
}
return {
"@context": {
"owl": "http://www.w3.org/2002/07/owl#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"dcterms": "http://purl.org/dc/terms/",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
f"{self.config.prefix_name}": make_prefix(self.config),
},
"@id": f"{self.config.prefix_name}:{self.suffix}",
"@type": str(self.rdfs_type),
**annotations,
**datatypes,
**objects,
**properties,
}
[docs]class MeasurementUnit(BaseConfigModel):
iri: Union[str, AnyUrl] = Field(
...,
description="Ontological IRI related to the measurement unit",
)
label: Optional[str] = Field(
None,
description="Label of the measurement unit",
)
symbol: Optional[str] = Field(
None,
description="Symbol of the measurement unit",
)
namespace: Optional[str] = Field(
None,
description="Namespace of the measurement unit",
)
[docs] @model_validator(mode="after")
@classmethod
def validate_measurement_unit(cls, self) -> "MeasurementUnit":
unit = _get_qudt_label_and_symbol(
self.iri, self.config.qudt_units, self.config.language
)
if not self.label and "label" in unit:
self.label = unit["label"]
if not self.symbol and "symbol" in unit:
self.symbol = unit["symbol"]
if not self.namespace:
self.namespace = split_namespace(self.iri)
return self
[docs]class QuantityGraph(BasicGraphModel, BasicSuffixModel):
"""Quantity with or without a discrete value and a unit
E.g. a quantity with a single value and unit _or_
a quantity describing a column of a dataframe or table with a unit."""
unit: Optional[Union[str, AnyUrl]] = Field(
None, description="QUDT Symbol or any other IRI for the unit mapping"
)
value: Optional[Union[int, float, str]] = Field(
None, description="Value of the quantity"
)
unit_relation: Optional[Union[str, AnyUrl]] = Field(
"qudt:hasUnit",
description="""Object property for mapping the IRI
of the unit to the individual.""",
)
value_relation: Optional[Union[str, AnyUrl]] = Field(
"qudt:value",
description="""Data property
for mapping the data value to the individual.""",
)
measurement_unit: Optional[MeasurementUnit] = Field(
None,
description="Detailed QUDT Measurement Unit specification",
alias=AliasChoices(
"measurement_unit", "measurementunit", "measurementUnit"
),
)
[docs] @field_validator("value", mode="after")
@classmethod
def validate_value(
cls, value: Union[int, float, str]
) -> Union[int, float]:
if isinstance(value, str) and is_integer(value):
value = int(value)
elif isinstance(value, str) and is_float(value):
value = float(value)
elif isinstance(value, str):
warnings.warn(
f"Cannot type case value from str into float or int: {value}",
ParserWarning,
)
return value
[docs] @field_validator("unit", mode="after")
@classmethod
def validate_unit(
cls, value: Union[str, AnyUrl], info: ValidationInfo
) -> Optional[AnyUrl]:
config = info.data.get("config")
if isinstance(value, str):
if not (value.startswith("https:") or value.startswith("http:")):
match = _get_query_match(value, config.qudt_units)
if len(match) == 0:
warnings.warn(
f"No QUDT Mapping found for unit with symbol `{value}`."
)
value = None
elif len(match) > 1:
warnings.warn(
f"Multiple QUDT Mappings found for unit with symbol `{value}`."
)
value = match.pop()
else:
value = match.pop()
elif isinstance(value, AnyUrl):
value = str(value)
return value
[docs] @model_validator(mode="after")
@classmethod
def validate_quantity_graph(cls, self) -> "QuantityGraph":
if not self.measurement_unit and self.unit:
self.measurement_unit = MeasurementUnit(iri=self.unit)
if self.measurement_unit and not self.unit:
self.unit = self.measurement_unit.iri
return self
@property
def json_ld(self) -> Dict[str, Any]:
"""Return dict of json-ld for graph"""
return {
"@context": {
f"{self.config.prefix_name}": make_prefix(self.config),
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"qudt": "http://qudt.org/schema/qudt/",
},
"@id": f"{self.config.prefix_name}:{self.suffix}",
"@type": (
[str(iri) for iri in self.iri]
if isinstance(self.iri, list)
else str(self.iri)
),
**self.unit_json,
**self.value_json,
}
@property
def unit_json(self) -> "Dict[str, Any]":
"""Return json with unit definition"""
if self.unit:
value = {
self.unit_relation: {
"@value": self.unit,
"@type": "xsd:anyURI",
}
}
else:
value = {}
return value
@property
def value_json(self) -> "Dict[str, Any]":
"""Return json with value definition"""
if self.value:
value = {self.value_relation: detect_datatype(str(self.value))}
else:
value = {}
return value
[docs]class PropertyGraph(BasicGraphModel, BasicSuffixModel):
"""Mapping for an individual with arbitrary property. E.g. the
name of a tester or a testing facility. The value must not have a
discrete value but can also be a reference to a column in a table or
dataframe."""
value: Optional[
Union[str, int, float, bool, AnyUrl, "PropertyGraph", "QuantityGraph"]
] = Field(None, description="Value of the property")
annotation: Optional[Union[str, AnyUrl]] = Field(
None, description="Base IRI with which the value shall be concatenated"
)
value_relation: Optional[Union[str, AnyUrl]] = Field(
"rdfs:label",
description="""Data or annotation property
for mapping the data value to the individual.""",
alias=AliasChoices("relation", "value_relation", "valuerelation"),
)
value_relation_type: Optional[RelationType] = Field(
None,
description="Type of the semantic relation used in the mappings",
alias=AliasChoices(
"value_relation_type",
"value_relationtype",
"relation_type",
"relationtype",
),
)
value_datatype: Optional[str] = Field(
None,
description="In case of an annotation or data property, this field indicates the XSD Datatype of the value",
alias=AliasChoices(
"value_datatype", "value_data_type", "datatype", "data_type"
),
)
[docs] @field_validator("annotation")
@classmethod
def validate_annotation(cls, value: AnyUrl) -> AnyUrl:
"""Make sure that there are not blank spaces in the IRI"""
if value:
value = AnyUrl(str(value).strip())
return value
[docs] @model_validator(mode="after")
@classmethod
def validate_value(cls, self: "PropertyGraph") -> "PropertyGraph":
"""
Validate value of a property graph.
In case the value is a property graph or a quantity graph, make sure that
the config is set correctly.
"""
if isinstance(self.value, (PropertyGraph, QuantityGraph)):
self.value.config = self.config
return self
[docs] @model_validator(mode="after")
@classmethod
def validate_property_graph(cls, self: "PropertyGraph") -> "PropertyGraph":
"""Validate property graph in order to generate annotations"""
if self.annotation:
if str(self.annotation).endswith(self.config.separator):
self.annotation = str(self.annotation) + self.value
else:
self.annotation = (
str(self.annotation) + self.config.separator + self.value
)
return self
@property
def json_ld(self) -> Dict[str, Any]:
"""Return dict of json-ld for graph"""
return {
"@context": {
f"{self.config.prefix_name}": make_prefix(self.config),
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
},
"@id": f"{self.config.prefix_name}:{self.suffix}",
**self.value_json,
**self.types_json,
}
@property
def value_json(self) -> "Optional[Dict[str, str]]":
if not isinstance(self.value, type(None)):
if self.value_relation_type != RelationType.OBJECT_PROPERTY:
if not self.value_datatype:
spec = detect_datatype(str(self.value))
else:
spec = apply_datatype(self.value, self.value_datatype)
response = {self.value_relation: spec}
else:
if isinstance(self.value, (PropertyGraph, QuantityGraph)):
response = {self.value_relation: self.value.json_ld}
else:
response = {self.value_relation: {"@id": str(self.value)}}
else:
response = {}
return response
@property
def types_json(self) -> "Dict[str, Any]":
"""Dict of json-ld for class types of the individual"""
if self.annotation:
types = {
"@type": [
(
[str(iri) for iri in self.iri]
if isinstance(self.iri, list)
else str(self.iri)
),
self.annotation,
]
}
else:
types = {
"@type": [
(
[str(iri) for iri in self.iri]
if isinstance(self.iri, list)
else str(self.iri)
)
]
}
return types