Skip to content
554 changes: 10 additions & 544 deletions __init__.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():
if not args.json:
print(exc, file=sys.stderr)
else:
json.dump(exc.asdict(), sys.stdout)
json.dump([e.asdict() for e in getattr(exc, "errors", [exc])], sys.stdout, indent=2)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we actually catch both cases separately CollectedValidationErrors, ValidationError instead, or refactor ValidationError to always point to a list of errors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I designed it in a way that ValidationErrors is never raised directly; the the issues are represented in subclasses (e.g. SyntaxError, DuplicateNameError, etc) and collected into a single CollectedValidationErrors instance, which is then raised. As a result, the getattr was already redundant and we can rely on exc.errors directly
json.dump([e.asdict() for e in exc.errors], sys.stdout, indent=2)
ValidationError is now only used for testing as it covers all sub classes with pytest.raises(_ValidationError).

I've also renamed it to _ValidationErrors and added documentation, clarifying that we only use it internally. The parser itself also doesn't import it directly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great!

exit(1)

if __name__ == '__main__':
Expand Down
29 changes: 29 additions & 0 deletions fixtures/fail_multiple_duplicate_ids.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [CoordinationView]'),'2;1');
FILE_NAME('','2022-05-04T08:08:30',(''),(''),'IfcOpenShell-0.7.0','IfcOpenShell-0.7.0','');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'0.7.0','IfcOpenShell-0.7.0','');
#5=IFCOWNERHISTORY(#3,#4,$,.ADDED.,$,#3,#4,1651651710);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.,0.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#18=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#19=IFCPROJECT('2AyG2X0sb16Bjd4gQc07yZ',#5,'',$,$,$,$,(#11),#19);
ENDSEC;
END-ISO-10303-21;
30 changes: 30 additions & 0 deletions fixtures/fail_multiple_wrong_header_fields.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [ReferenceView_V1.2]', 'ExchangeRequirement [Any]'));
FILE_NAME('Header.ifc','2025-02-13T15:58:45',('tricott'),('Trimble Inc.'),'TrimBimToIFC rel. 4.0.2','Example - Example - 2025.0','IFC4 model', '');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'v0.7.0-6c9e130ca','IfcOpenShell-v0.7.0-6c9e130ca','');
#5=IFCOWNERHISTORY(#3,#4,$,.NOTDEFINED.,$,#3,#4,1700419055);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#17=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#20=IFCPROJECT('0iDmeiiLP3AOllitM2Favn',#5,'',$,$,$,$,(#11),#19);
#21=IFCSITE('3rg2jGkIH10RFhrQsGZKRk',#5,$,$,$,$,$,$,$,$,$,$,$,$);
ENDSEC;
END-ISO-10303-21;
30 changes: 30 additions & 0 deletions fixtures/fail_too_many_header_entity_fields.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [ReferenceView_V1.2]', 'ExchangeRequirement [Any]'),'2;1');
FILE_NAME('Header.ifc','2025-02-13T15:58:45',('tricott'),('Trimble Inc.'),'TrimBimToIFC rel. 4.0.2','Example - Example - 2025.0','IFC4 model', '');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'v0.7.0-6c9e130ca','IfcOpenShell-v0.7.0-6c9e130ca','');
#5=IFCOWNERHISTORY(#3,#4,$,.NOTDEFINED.,$,#3,#4,1700419055);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#17=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#20=IFCPROJECT('0iDmeiiLP3AOllitM2Favn',#5,'',$,$,$,$,(#11),#19);
#21=IFCSITE('3rg2jGkIH10RFhrQsGZKRk',#5,$,$,$,$,$,$,$,$,$,$,$,$);
ENDSEC;
END-ISO-10303-21;
Empty file added parser/__init__.py
Empty file.
105 changes: 105 additions & 0 deletions parser/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from lark.exceptions import UnexpectedToken

class ValidationError(Exception):
pass

class ErrorCollector:
def __init__(self):
self.errors = []

def add(self, error):
self.errors.append(error)

def raise_if_any(self):
if self.errors:
raise CollectedValidationErrors(self.errors)

class CollectedValidationErrors(ValidationError):
def __init__(self, errors):
self.errors = errors

def asdict(self, with_message=True):
return [e.asdict(with_message=with_message) for e in self.errors]

def __str__(self):
return f"{len(self.errors)} validation error(s) collected:\n" + "\n\n".join(str(e) for e in self.errors)

class SyntaxError(ValidationError):
def __init__(self, filecontent, exception):
self.filecontent = filecontent
self.exception = exception

def asdict(self, with_message=True):
return {
"type": (
"unexpected_token"
if isinstance(self.exception, UnexpectedToken)
else "unexpected_character"
),
"lineno": self.exception.line,
"column": self.exception.column,
"found_type": self.exception.token.type.lower(),
"found_value": self.exception.token.value,
"expected": sorted(x for x in self.exception.accepts if "__ANON" not in x),
"line": self.filecontent.split("\n")[self.exception.line - 1],
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
d = self.asdict(with_message=False)
if len(d["expected"]) == 1:
exp = d["expected"][0]
else:
exp = f"one of {' '.join(d['expected'])}"

sth = "character" if d["type"] == "unexpected_character" else ""

return f"On line {d['lineno']} column {d['column']}:\nUnexpected {sth}{d['found_type']} ('{d['found_value']}')\nExpecting {exp}\n{d['lineno']:05d} | {d['line']}\n {' ' * (self.exception.column - 1)}^"


class DuplicateNameError(ValidationError):
def __init__(self, filecontent, name, linenumbers):
self.name = name
self.filecontent = filecontent
self.linenumbers = linenumbers

def asdict(self, with_message=True):
return {
"type": "duplicate_name",
"name": self.name,
"lineno": self.linenumbers[0],
"line": self.filecontent.split("\n")[self.linenumbers[0] - 1],
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
d = self.asdict(with_message=False)

def build():
yield f"On line {d['lineno']}:\nDuplicate instance name #{d['name']}"
yield f"{d['lineno']:05d} | {d['line']}"
yield " " * 8 + "^" * len(d["line"].rstrip())

return "\n".join(build())


class HeaderFieldError(ValidationError):
def __init__(self, field, found_len, expected_len):
self.field = field
self.found_len = found_len
self.expected_len = expected_len

def asdict(self, with_message=True):
return {
"type": "invalid_header_field",
"field": self.field,
"expected_field_count": self.expected_len,
"actual_field_count": self.found_len,
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
return (
f"Invalid number of parameters for HEADER field '{self.field}'. "
f"Expected {self.expected_len}, found {self.found_len}."
)
108 changes: 108 additions & 0 deletions parser/file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import types
import re
import numbers
import itertools

from .parse import parse, ParseResult
from .grammar import HEADER_FIELDS
from .transformer import entity_instance

try:
from .mvd_info import MvdInfo, LARK_AVAILABLE
except ImportError: # in case of running module locally (e.g. test_parser.py)
from mvd_info import MvdInfo, LARK_AVAILABLE

class file:
"""
A somewhat compatible interface (but very limited) to ifcopenshell.file
"""

def __init__(self, result:ParseResult):
self.header_ = result.header
self.data_ = result.entities

@property
def schema_identifier(self) -> str:
return self.header_["FILE_SCHEMA"][0][0]

@property
def schema(self) -> str:
"""General IFC schema version: IFC2X3, IFC4, IFC4X3."""
prefixes = ("IFC", "X", "_ADD", "_TC")
reg = "".join(f"(?P<{s}>{s}\\d+)?" for s in prefixes)
match = re.match(reg, self.schema_identifier)
version_tuple = tuple(
map(
lambda pp: int(pp[1][len(pp[0]) :]) if pp[1] else None,
((p, match.group(p)) for p in prefixes),
)
)
return "".join(
"".join(map(str, t)) if t[1] else ""
for t in zip(prefixes, version_tuple[0:2])
)

@property
def schema_version(self) -> tuple[int, int, int, int]:
"""Numeric representation of the full IFC schema version.

E.g. IFC4X3_ADD2 is represented as (4, 3, 2, 0).
"""
schema = self.wrapped_data.schema
version = []
for prefix in ("IFC", "X", "_ADD", "_TC"):
number = re.search(prefix + r"(\d)", schema)
version.append(int(number.group(1)) if number else 0)
return tuple(version)


@property
def header(self):
header = {}
for field_name, namedtuple_class in HEADER_FIELDS.items():
field_data = self.header_.get(field_name.upper(), [])
header[field_name.lower()] = namedtuple_class(*field_data)

return types.SimpleNamespace(**header)


@property
def mvd(self):
if not LARK_AVAILABLE or MvdInfo is None:
return None
return MvdInfo(self.header)

def __getitem__(self, key: numbers.Integral) -> entity_instance:
return self.by_id(key)

def by_id(self, id: int) -> entity_instance:
"""Return an IFC entity instance filtered by IFC ID.

:param id: STEP numerical identifier
:type id: int

:raises RuntimeError: If `id` is not found or multiple definitions exist for `id`.

:rtype: entity_instance
"""
ns = self.data_.get(id, [])
if len(ns) == 0:
raise RuntimeError(f"Instance with id {id} not found")
elif len(ns) > 1:
raise RuntimeError(f"Duplicate definition for id {id}")
return ns[0]

def by_type(self, type: str) -> list[entity_instance]:
"""Return IFC objects filtered by IFC Type and wrapped with the entity_instance class.
:rtype: list[entity_instance]
"""
type_lc = type.lower()
return list(
filter(
lambda ent: ent.type.lower() == type_lc,
itertools.chain.from_iterable(self.data_.values()),
)
)

def open(fn, only_header= False) -> file:
return file(parse(filename=fn, only_header=only_header))
Loading