From b5aee9754a56d447ec629a2405e9ab7e393e4a82 Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Sat, 12 Oct 2024 20:31:10 +0000 Subject: [PATCH 1/6] Convert `Location` to a `namedtuple`, and associated cleanup. This change makes the `Location` dataclass, which does not change frequently, into a new `SourceLocation` namedtuple, and changes the `SourceLocation` serialization. As a result, with this change: * `embossc` runs about 25% faster on a large (7kLOC) input; `python3 -OO emboss` runs about 19% faster on the same input. * Serialized IR is about 45% smaller. Details: * Replace the `ir_data.Location` dataclass with a new `parser_types.SourceLocation` namedtuple. The rename helps clarify the difference between a location within source code (`SourceLocation`) and a location within a structure (`FieldLocation`). * Similarly, replace `ir_data.Position` with `parser_types.SourcePosition`. * Update any place that edits a `SourceLocation` with an appropriate assignment; e.g., `x.source_location.end = y` becomes `x.source_location = x.source_location._replace(end=y)`. In most cases, several fields were updated consecutively; those updates are been merged. * Update the JSON serialization to use the compact format. * Replace `format_location()` and `format_position()` with `__str__()` methods on `SourceLocation` and `SourcePosition`, respectively. * Replace `parse_location()` and `parse_position()` with `from_str()` class methods on `SourceLocation` and `SourcePosition`, respectively. * Move the `make_location()` functionality into `SourceLocation.__new__()`. * Update `_to_dict` and `_from_dict` in `IrDataSerializer` to stringify and destringify `SourceLocation`. It is tempting to try to do this during the JSON serialization step (with a `default=` parameter to `json.dumps` and an `object_hook=` parameter to `json.loads`), but it is tricky to get the `object_hook` to know when to convert. --- compiler/back_end/cpp/header_generator.py | 11 +- .../back_end/cpp/header_generator_test.py | 91 +- compiler/front_end/glue.py | 2 +- compiler/front_end/glue_test.py | 14 +- compiler/front_end/lr1.py | 25 +- compiler/front_end/lr1_test.py | 4 +- compiler/front_end/module_ir.py | 119 +-- compiler/front_end/module_ir_test.py | 131 +-- compiler/front_end/symbol_resolver.py | 3 +- compiler/front_end/synthetics.py | 6 +- compiler/front_end/tokenizer.py | 16 +- compiler/front_end/tokenizer_test.py | 21 +- compiler/front_end/write_inference.py | 7 +- compiler/util/BUILD | 13 +- compiler/util/error.py | 29 +- compiler/util/error_test.py | 106 +- compiler/util/ir_data.py | 85 +- compiler/util/ir_data_fields_test.py | 1 + compiler/util/ir_data_utils.py | 22 +- compiler/util/ir_data_utils_test.py | 45 +- compiler/util/ir_util_test.py | 6 +- compiler/util/parser_types.py | 167 ++- compiler/util/parser_types_test.py | 77 +- compiler/util/test_util.py | 4 +- compiler/util/test_util_test.py | 42 +- compiler/util/traverse_ir.py | 3 +- .../golden/span_se_log_file_status.ir.txt | 987 ++---------------- .../span_se_log_file_status.parse_tree.txt | 30 +- .../golden/span_se_log_file_status.tokens.txt | 30 +- 29 files changed, 619 insertions(+), 1478 deletions(-) diff --git a/compiler/back_end/cpp/header_generator.py b/compiler/back_end/cpp/header_generator.py index 0201d12..392d188 100644 --- a/compiler/back_end/cpp/header_generator.py +++ b/compiler/back_end/cpp/header_generator.py @@ -1747,9 +1747,14 @@ def _offset_source_location_column(source_location, offset): original start column. """ - new_location = ir_data_utils.copy(source_location) - new_location.start.column = source_location.start.column + offset[0] - new_location.end.column = source_location.start.column + offset[1] + new_location = source_location._replace( + start=source_location.start._replace( + column=source_location.start.column + offset[0] + ), + end=source_location.start._replace( + column=source_location.start.column + offset[1] + ), + ) return new_location diff --git a/compiler/back_end/cpp/header_generator_test.py b/compiler/back_end/cpp/header_generator_test.py index 6d31df8..d125d9e 100644 --- a/compiler/back_end/cpp/header_generator_test.py +++ b/compiler/back_end/cpp/header_generator_test.py @@ -126,12 +126,11 @@ def test_rejects_bad_enum_case_at_start(self): ) attr = ir.module[0].type[0].attribute[0] - bad_case_source_location = ir_data.Location() - bad_case_source_location = ir_data_utils.builder(bad_case_source_location) - bad_case_source_location.CopyFrom(attr.value.source_location) - # Location of SHORTY_CASE in the attribute line. - bad_case_source_location.start.column = 30 - bad_case_source_location.end.column = 41 + # SourceLocation of SHORTY_CASE in the attribute line. + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=30), + end=attr.value.source_location.end._replace(column=41), + ) self.assertEqual( [ @@ -156,12 +155,11 @@ def test_rejects_bad_enum_case_in_middle(self): ) attr = ir.module[0].type[0].attribute[0] - bad_case_source_location = ir_data.Location() - bad_case_source_location = ir_data_utils.builder(bad_case_source_location) - bad_case_source_location.CopyFrom(attr.value.source_location) - # Location of bad_CASE in the attribute line. - bad_case_source_location.start.column = 43 - bad_case_source_location.end.column = 51 + # SourceLocation of bad_CASE in the attribute line. + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=43), + end=attr.value.source_location.end._replace(column=51), + ) self.assertEqual( [ @@ -186,12 +184,11 @@ def test_rejects_bad_enum_case_at_end(self): ) attr = ir.module[0].type[0].attribute[0] - bad_case_source_location = ir_data.Location() - bad_case_source_location = ir_data_utils.builder(bad_case_source_location) - bad_case_source_location.CopyFrom(attr.value.source_location) - # Location of BAD_case in the attribute line. - bad_case_source_location.start.column = 55 - bad_case_source_location.end.column = 63 + # SourceLocation of BAD_case in the attribute line. + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=55), + end=attr.value.source_location.end._replace(column=63), + ) self.assertEqual( [ @@ -216,12 +213,11 @@ def test_rejects_duplicate_enum_case(self): ) attr = ir.module[0].type[0].attribute[0] - bad_case_source_location = ir_data.Location() - bad_case_source_location = ir_data_utils.builder(bad_case_source_location) - bad_case_source_location.CopyFrom(attr.value.source_location) - # Location of the second SHOUTY_CASE in the attribute line. - bad_case_source_location.start.column = 43 - bad_case_source_location.end.column = 54 + # SourceLocation of the second SHOUTY_CASE in the attribute line. + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=43), + end=attr.value.source_location.end._replace(column=54), + ) self.assertEqual( [ @@ -246,12 +242,11 @@ def test_rejects_empty_enum_case(self): ) attr = ir.module[0].type[0].attribute[0] - bad_case_source_location = ir_data.Location() - bad_case_source_location = ir_data_utils.builder(bad_case_source_location) - bad_case_source_location.CopyFrom(attr.value.source_location) - # Location of excess comma. - bad_case_source_location.start.column = 42 - bad_case_source_location.end.column = 42 + # SourceLocation of excess comma. + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=42), + end=attr.value.source_location.end._replace(column=42), + ) self.assertEqual( [ @@ -274,8 +269,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 30 - bad_case_source_location.end.column = 30 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=30), + end=attr.value.source_location.end._replace(column=30), + ) self.assertEqual( [ @@ -298,8 +295,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 54 - bad_case_source_location.end.column = 54 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=54), + end=attr.value.source_location.end._replace(column=54), + ) self.assertEqual( [ @@ -322,8 +321,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 45 - bad_case_source_location.end.column = 45 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=45), + end=attr.value.source_location.end._replace(column=45), + ) self.assertEqual( [ @@ -346,8 +347,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 30 - bad_case_source_location.end.column = 30 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=30), + end=attr.value.source_location.end._replace(column=30), + ) self.assertEqual( [ @@ -370,8 +373,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 35 - bad_case_source_location.end.column = 35 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=35), + end=attr.value.source_location.end._replace(column=35), + ) self.assertEqual( [ @@ -394,8 +399,10 @@ def test_rejects_empty_enum_case(self): " BAZ = 2\n" ) - bad_case_source_location.start.column = 31 - bad_case_source_location.end.column = 31 + bad_case_source_location = attr.value.source_location._replace( + start=attr.value.source_location.start._replace(column=31), + end=attr.value.source_location.end._replace(column=31), + ) self.assertEqual( [ diff --git a/compiler/front_end/glue.py b/compiler/front_end/glue.py index f12f27e..8b27260 100644 --- a/compiler/front_end/glue.py +++ b/compiler/front_end/glue.py @@ -203,7 +203,7 @@ def parse_module(file_name, file_reader): """ source_code, errors = file_reader(file_name) if errors: - location = parser_types.make_location((1, 1), (1, 1)) + location = parser_types.SourceLocation((1, 1), (1, 1)) return ( None, None, diff --git a/compiler/front_end/glue_test.py b/compiler/front_end/glue_test.py index 1decae3..fface8a 100644 --- a/compiler/front_end/glue_test.py +++ b/compiler/front_end/glue_test.py @@ -24,7 +24,7 @@ from compiler.util import parser_types from compiler.util import test_util -_location = parser_types.make_location +_location = parser_types.SourceLocation _ROOT_PACKAGE = "testdata.golden" _GOLDEN_PATH = "" @@ -232,7 +232,9 @@ def test_synthetic_error(self): self.assertFalse(errors) # Artificially mark the first field as is_synthetic. first_field = ir.module[0].type[0].structure.field[0] - first_field.source_location.is_synthetic = True + first_field.source_location = first_field.source_location._replace( + is_synthetic=True + ) ir, errors = glue.process_ir(ir, None) self.assertTrue(errors) self.assertEqual( @@ -259,8 +261,12 @@ def test_suppressed_synthetic_error(self): self.assertFalse(errors) # Artificially mark the name of the second field as is_synthetic. second_field = ir.module[0].type[0].structure.field[1] - second_field.name.source_location.is_synthetic = True - second_field.name.name.source_location.is_synthetic = True + second_field.name.source_location = second_field.name.source_location._replace( + is_synthetic=True + ) + second_field.name.name.source_location = ( + second_field.name.name.source_location._replace(is_synthetic=True) + ) ir, errors = glue.process_ir(ir, None) self.assertEqual(1, len(errors)) self.assertEqual("Duplicate name 'field'", errors[0][0].message) diff --git a/compiler/front_end/lr1.py b/compiler/front_end/lr1.py index 58d5076..42c648b 100644 --- a/compiler/front_end/lr1.py +++ b/compiler/front_end/lr1.py @@ -140,7 +140,9 @@ def __str__(self): # ANY_TOKEN is used by mark_error as a "wildcard" token that should be replaced # by every other token. -ANY_TOKEN = parser_types.Token(object(), "*", parser_types.parse_location("0:0-0:0")) +ANY_TOKEN = parser_types.Token( + object(), "*", parser_types.SourceLocation.from_str("0:0-0:0") +) class Reduction( @@ -690,26 +692,7 @@ def state(): # children, setting the source_location to None in that case. start_position = None end_position = None - for child in children: - if ( - hasattr(child, "source_location") - and child.source_location is not None - ): - start_position = child.source_location.start - break - for child in reversed(children): - if ( - hasattr(child, "source_location") - and child.source_location is not None - ): - end_position = child.source_location.end - break - if start_position is None: - source_location = None - else: - source_location = parser_types.make_location( - start_position, end_position - ) + source_location = parser_types.merge_source_locations(*children) reduction = Reduction( next_action.rule.lhs, children, next_action.rule, source_location ) diff --git a/compiler/front_end/lr1_test.py b/compiler/front_end/lr1_test.py index 6ca6e67..c77ebcf 100644 --- a/compiler/front_end/lr1_test.py +++ b/compiler/front_end/lr1_test.py @@ -34,7 +34,7 @@ def _tokenize(text): result = [] for i in range(len(text)): result.append( - Token(text[i], parser_types.make_location((1, i + 1), (1, i + 2))) + Token(text[i], parser_types.SourceLocation((1, i + 1), (1, i + 2))) ) return result @@ -209,7 +209,7 @@ def test_goto_table(self): def test_successful_parse(self): parser = _alsu_grammar.parser() - loc = parser_types.parse_location + loc = parser_types.SourceLocation.from_str s_to_c_c = parser_types.Production.parse("S -> C C") c_to_c_c = parser_types.Production.parse("C -> c C") c_to_d = parser_types.Production.parse("C -> d") diff --git a/compiler/front_end/module_ir.py b/compiler/front_end/module_ir.py index 4a459c2..145bba3 100644 --- a/compiler/front_end/module_ir.py +++ b/compiler/front_end/module_ir.py @@ -40,7 +40,7 @@ class _List(object): def __init__(self, l): assert isinstance(l, list), "_List object must wrap list, not '%r'" % l self.list = l - self.source_location = ir_data.Location() + self.source_location = parser_types.SourceLocation() class _ExpressionTail(object): @@ -65,7 +65,7 @@ class _ExpressionTail(object): def __init__(self, operator, expression): self.operator = operator self.expression = expression - self.source_location = ir_data.Location() + self.source_location = parser_types.SourceLocation() class _FieldWithType(object): @@ -76,7 +76,7 @@ class _FieldWithType(object): def __init__(self, field, subtypes=None): self.field = field self.subtypes = subtypes or [] - self.source_location = ir_data.Location() + self.source_location = parser_types.SourceLocation() def build_ir(parse_tree, used_productions=None): @@ -153,11 +153,11 @@ def _really_build_ir(parse_tree, used_productions): ] used_productions.add(parse_tree.production) result = _handlers[parse_tree.production](*parsed_children) - if parse_tree.source_location is not None: - if result.source_location: - ir_data_utils.update(result.source_location, parse_tree.source_location) + if parse_tree.source_location: + if isinstance(result, tuple): + result = result._replace(source_location=parse_tree.source_location) else: - result.source_location = ir_data_utils.copy(parse_tree.source_location) + result.source_location = parse_tree.source_location return result else: # For leaf nodes, the temporary "IR" is just the token. Higher-level rules @@ -191,7 +191,7 @@ def handles(f): def _make_prelude_import(position): """Helper function to construct a synthetic ir_data.Import for the prelude.""" - location = parser_types.make_location(position, position) + location = parser_types.SourceLocation(position, position) return ir_data.Import( file_name=ir_data.String(text="", source_location=location), local_name=ir_data.Word(text="", source_location=location), @@ -279,7 +279,7 @@ def _file(leading_newlines, docs, imports, attributes, type_definitions): and not attributes.list and not type_definitions.list ): - module_source_location = parser_types.make_location((1, 1), (1, 1)) + module_source_location = parser_types.SourceLocation((1, 1), (1, 1)) else: module_source_location = None @@ -462,10 +462,10 @@ def _expression(expression): ) def _choice_expression(condition, question, if_true, colon, if_false): """Constructs an IR node for a choice operator (`?:`) expression.""" - location = parser_types.make_location( + location = parser_types.SourceLocation( condition.source_location.start, if_false.source_location.end ) - operator_location = parser_types.make_location( + operator_location = parser_types.SourceLocation( question.source_location.start, colon.source_location.end ) # The function_name is a bit weird, but should suffice for any error @@ -490,7 +490,7 @@ def _no_op_comparative_expression(expression): " additive-expression inequality-operator additive-expression" ) def _comparative_expression(left, operator, right): - location = parser_types.make_location( + location = parser_types.SourceLocation( left.source_location.start, right.source_location.end ) return ir_data.Expression( @@ -538,7 +538,7 @@ def _binary_operator_expression(expression, expression_right): """ e = expression for right in expression_right.list: - location = parser_types.make_location( + location = parser_types.SourceLocation( e.source_location.start, right.source_location.end ) e = ir_data.Expression( @@ -611,7 +611,7 @@ def _chained_comparison_expression(expression, expression_right): comparisons = [] for i in range(0, len(sequence) - 1, 2): left, operator, right = sequence[i : i + 3] - location = parser_types.make_location( + location = parser_types.SourceLocation( left.source_location.start, right.source_location.end ) comparisons.append( @@ -627,7 +627,7 @@ def _chained_comparison_expression(expression, expression_right): ) e = comparisons[0] for comparison in comparisons[1:]: - location = parser_types.make_location( + location = parser_types.SourceLocation( e.source_location.start, comparison.source_location.end ) e = ir_data.Expression( @@ -717,7 +717,7 @@ def _expression_right_production(operator, expression): # allowed, but "+-5" or "-+-something" are not. @_handles("negation-expression -> additive-operator bottom-expression") def _negation_expression_with_operator(operator, expression): - phantom_zero_location = ir_data.Location( + phantom_zero_location = parser_types.SourceLocation( start=operator.source_location.start, end=operator.source_location.start ) return ir_data.Expression( @@ -733,7 +733,7 @@ def _negation_expression_with_operator(operator, expression): expression, ], function_name=operator, - source_location=ir_data.Location( + source_location=parser_types.SourceLocation( start=operator.source_location.start, end=expression.source_location.end ), ) @@ -759,7 +759,7 @@ def _bottom_expression_function(function, open_paren, arguments, close_paren): function=_text_to_function(function.text), args=arguments.list, function_name=function, - source_location=ir_data.Location( + source_location=parser_types.SourceLocation( start=function.source_location.start, end=close_paren.source_location.end, ), @@ -811,15 +811,11 @@ def _bottom_expression_from_reference(reference): @_handles("field-reference -> snake-reference field-reference-tail*") def _indirect_field_reference(field_reference, field_references): - if field_references.source_location.HasField("end"): - end_location = field_references.source_location.end - else: - end_location = field_reference.source_location.end return ir_data.Expression( field_reference=ir_data.FieldReference( path=[field_reference] + field_references.list, - source_location=parser_types.make_location( - field_reference.source_location.start, end_location + source_location=parser_types.merge_source_locations( + field_reference, field_references ), ) ) @@ -869,11 +865,8 @@ def _type_definition(type_definition): def _structure(struct, name, parameters, colon, comment, newline, struct_body): """Composes the top-level IR for an Emboss structure.""" del colon, comment, newline # Unused. - ir_data_utils.builder(struct_body.structure).source_location.start.CopyFrom( - struct.source_location.start - ) - ir_data_utils.builder(struct_body.structure).source_location.end.CopyFrom( - struct_body.source_location.end + ir_data_utils.builder(struct_body.structure).source_location = ( + parser_types.merge_source_locations(struct, struct_body) ) if struct_body.name: ir_data_utils.update(struct_body.name, name) @@ -969,14 +962,12 @@ def _conditional_block_plus_field_block(conditional_block, block): ) def _unconditional_block_plus_field_block(field, block): """Prepends an unconditional field to block.""" - ir_data_utils.builder(field.field).existence_condition.source_location.CopyFrom( + ir_data_utils.builder(field.field).existence_condition.source_location = ( field.source_location ) ir_data_utils.builder( field.field - ).existence_condition.boolean_constant.source_location.CopyFrom( - field.source_location - ) + ).existence_condition.boolean_constant.source_location = field.source_location ir_data_utils.builder(field.field).existence_condition.boolean_constant.value = True return _List([field] + block.list) @@ -1032,7 +1023,9 @@ def _conditional_field_block( for field in fields.list: condition = ir_data_utils.builder(field.field).existence_condition condition.CopyFrom(expression) - condition.source_location.is_disjoint_from_parent = True + condition.source_location = condition.source_location._replace( + is_disjoint_from_parent=True + ) return fields @@ -1096,11 +1089,9 @@ def _field( field.documentation.extend(field_body.list[0].documentation) if abbreviation.list: field.abbreviation.CopyFrom(abbreviation.list[0]) - field.source_location.start.CopyFrom(location.source_location.start) - if field_body.source_location.HasField("end"): - field.source_location.end.CopyFrom(field_body.source_location.end) - else: - field.source_location.end.CopyFrom(newline.source_location.end) + field.source_location = parser_types.merge_source_locations( + location, newline, field_body + ) return _FieldWithType(field=field_ir) @@ -1121,11 +1112,9 @@ def _virtual_field(let, name, equals, value, comment, newline, field_body): if field_body.list: field.attribute.extend(field_body.list[0].attribute) field.documentation.extend(field_body.list[0].documentation) - field.source_location.start.CopyFrom(let.source_location.start) - if field_body.source_location.HasField("end"): - field.source_location.end.CopyFrom(field_body.source_location.end) - else: - field.source_location.end.CopyFrom(newline.source_location.end) + field.source_location = parser_types.merge_source_locations( + let, newline, field_body + ) return _FieldWithType(field=field_ir) @@ -1192,27 +1181,20 @@ def _inline_type_field(location, name, abbreviation, body): type_name.name.text ) field.type.atomic_type.reference.source_name.extend([type_name.name]) - field.type.atomic_type.reference.source_location.CopyFrom(type_name.source_location) + field.type.atomic_type.reference.source_location = type_name.source_location field.type.atomic_type.reference.is_local_name = True - field.type.atomic_type.source_location.CopyFrom(type_name.source_location) - field.type.source_location.CopyFrom(type_name.source_location) + field.type.atomic_type.source_location = type_name.source_location + field.type.source_location = type_name.source_location if abbreviation.list: field.abbreviation.CopyFrom(abbreviation.list[0]) - field.source_location.start.CopyFrom(location.source_location.start) - ir_data_utils.builder(body.source_location).start.CopyFrom( - location.source_location.start - ) + body.source_location = parser_types.merge_source_locations(location, body) if body.HasField("enumeration"): - ir_data_utils.builder(body.enumeration).source_location.CopyFrom( - body.source_location - ) + ir_data_utils.builder(body.enumeration).source_location = body.source_location else: assert body.HasField("structure") - ir_data_utils.builder(body.structure).source_location.CopyFrom( - body.source_location - ) + ir_data_utils.builder(body.structure).source_location = body.source_location ir_data_utils.builder(body).name.CopyFrom(type_name) - field.source_location.end.CopyFrom(body.source_location.end) + field.source_location = parser_types.merge_source_locations(location, body) subtypes = [body] + list(body.subtype) del body.subtype[:] return _FieldWithType(field=field_ir, subtypes=subtypes) @@ -1254,11 +1236,8 @@ def _abbreviation(open_paren, word, close_paren): @_handles('enum -> "enum" type-name ":" Comment? eol enum-body') def _enum(enum, name, colon, comment, newline, enum_body): del colon, comment, newline # Unused. - ir_data_utils.builder(enum_body.enumeration).source_location.start.CopyFrom( - enum.source_location.start - ) - ir_data_utils.builder(enum_body.enumeration).source_location.end.CopyFrom( - enum_body.source_location.end + ir_data_utils.builder(enum_body.enumeration).source_location = ( + parser_types.merge_source_locations(enum, enum_body) ) ir_data_utils.builder(enum_body).name.CopyFrom(name) return enum_body @@ -1311,8 +1290,8 @@ def _enum_value_body(indent, docs, attributes, dedent): @_handles('external -> "external" type-name ":" Comment? eol external-body') def _external(external, name, colon, comment, newline, external_body): del colon, comment, newline # Unused. - ir_data_utils.builder(external_body.source_location).start.CopyFrom( - external.source_location.start + external_body.source_location = parser_types.merge_source_locations( + external, external_body ) if external_body.name: ir_data_utils.update(external_body.name, name) @@ -1328,7 +1307,7 @@ def _external_body(indent, docs, attributes, dedent): return ir_data.TypeDefinition( external=ir_data.External( # Set source_location here, since it won't be set automatically. - source_location=ir_data.Location( + source_location=parser_types.SourceLocation( start=indent.source_location.start, end=dedent.source_location.end ) ), @@ -1366,10 +1345,10 @@ def _type(reference, parameters, size, array_spec): atomic_type_source_location_end = parameters.source_location.end if size.list: base_type_source_location_end = size.source_location.end - base_type_location = parser_types.make_location( + base_type_location = parser_types.SourceLocation( reference.source_location.start, base_type_source_location_end ) - atomic_type_location = parser_types.make_location( + atomic_type_location = parser_types.SourceLocation( reference.source_location.start, atomic_type_source_location_end ) t = ir_data.Type( @@ -1382,7 +1361,7 @@ def _type(reference, parameters, size, array_spec): source_location=base_type_location, ) for length in array_spec.list: - location = parser_types.make_location( + location = parser_types.SourceLocation( t.source_location.start, length.source_location.end ) if isinstance(length, ir_data.Expression): @@ -1524,7 +1503,7 @@ def _auto_array_length_specifier(open_bracket, close_bracket): # Note that the Void's source_location is the space between the brackets (if # any). return ir_data.Empty( - source_location=ir_data.Location( + source_location=parser_types.SourceLocation( start=open_bracket.source_location.end, end=close_bracket.source_location.start, ) diff --git a/compiler/front_end/module_ir_test.py b/compiler/front_end/module_ir_test.py index ad884a8..e83675a 100644 --- a/compiler/front_end/module_ir_test.py +++ b/compiler/front_end/module_ir_test.py @@ -454,56 +454,35 @@ "function": "SUBTRACTION", "function_name": { "text": "-", - "source_location": { - "start": { "line": 8, "column": 3 }, - "end": { "line": 8, "column": 4 } - } + "source_location": "8:3-8:4" }, "args": [ { "constant": { "value": "0", - "source_location": { - "start": { "line": 8, "column": 3 }, - "end": { "line": 8, "column": 3 } - } + "source_location": "8:3-8:3" }, - "source_location": { - "start": { "line": 8, "column": 3 }, - "end": { "line": 8, "column": 3 } - } + "source_location": "8:3-8:3" }, { "function": { "function": "ADDITION", "function_name": { "text": "+", - "source_location": { - "start": { "line": 8, "column": 5 }, - "end": { "line": 8, "column": 6 } - } + "source_location": "8:5-8:6" }, "args": [ { "constant": { "value": "0" }, - "source_location": { - "start": { "line": 8, "column": 5 }, - "end": { "line": 8, "column": 5 } - } + "source_location": "8:5-8:5" }, { "constant": { "value": "1" }, - "source_location": { - "start": { "line": 8, "column": 6 }, - "end": { "line": 8, "column": 7 } - } + "source_location": "8:6-8:7" } ] }, - "source_location": { - "start": { "line": 8, "column": 4 }, - "end": { "line": 8, "column": 8 } - } + "source_location": "8:4-8:8" } ] } @@ -513,56 +492,35 @@ "function": "SUBTRACTION", "function_name": { "text": "-", - "source_location": { - "start": { "line": 8, "column": 12 }, - "end": { "line": 8, "column": 13 } - } + "source_location": "8:12-8:13" }, "args": [ { "constant": { "value": "0", - "source_location": { - "start": { "line": 8, "column": 11 }, - "end": { "line": 8, "column": 12 } - } + "source_location": "8:11-8:12" }, - "source_location": { - "start": { "line": 8, "column": 11 }, - "end": { "line": 8, "column": 12 } - } + "source_location": "8:11-8:12" }, { "function": { "function": "SUBTRACTION", "function_name": { "text": "-", - "source_location": { - "start": { "line": 8, "column": 14 }, - "end": { "line": 8, "column": 15 } - } + "source_location": "8:14-8:15" }, "args": [ { "constant": { "value": "0" }, - "source_location": { - "start": { "line": 8, "column": 14 }, - "end": { "line": 8, "column": 14 } - } + "source_location": "8:14-8:14" }, { "constant": { "value": "10" }, - "source_location": { - "start": { "line": 8, "column": 15 }, - "end": { "line": 8, "column": 17 } - } + "source_location": "8:15-8:17" } ] }, - "source_location": { - "start": { "line": 8, "column": 13 }, - "end": { "line": 8, "column": 18 } - } + "source_location": "8:13-8:18" } ] } @@ -642,10 +600,7 @@ } }, "automatic": { - "source_location": { - "start": { "line": 3, "column": 16 }, - "end": { "line": 3, "column": 18 } - } + "source_location": "3:16-3:18" } } }, @@ -675,17 +630,11 @@ "location": { "start": { "constant": { "value": "0" }, - "source_location": { - "start": { "line": 3, "column": 3 }, - "end": { "line": 3, "column": 4 } - } + "source_location": "3:3-3:4" }, "size": { "constant": { "value": "1" }, - "source_location": { - "start": { "line": 3, "column": 9 }, - "end": { "line": 3, "column": 10 } - } + "source_location": "3:9-3:10" } } }, @@ -1825,28 +1774,16 @@ { "file_name": { "text": "", - "source_location": { - "start": { "line": 1, "column": 1 }, - "end": { "line": 1, "column": 1 } - } + "source_location": "1:1-1:1" }, "local_name": { "text": "", - "source_location": { - "start": { "line": 1, "column": 1 }, - "end": { "line": 1, "column": 1 } - } + "source_location": "1:1-1:1" }, - "source_location": { - "start": { "line": 1, "column": 1 }, - "end": { "line": 1, "column": 1 } - } + "source_location": "1:1-1:1" } ], - "source_location": { - "start": { "line": 1, "column": 1 }, - "end": { "line": 1, "column": 1 } - } + "source_location": "1:1-1:1" } === @@ -3547,15 +3484,9 @@ "reference": { "source_name": [ { "text": "UInt" } ] } }, "size_in_bits": { - "source_location": { - "start": { "line": 3, "column": 15 }, - "end": { "line": 3, "column": 18 } - } + "source_location": "3:15-3:18" }, - "source_location": { - "start": { "line": 3, "column": 11 }, - "end": { "line": 3, "column": 18 } - } + "source_location": "3:11-3:18" }, "name": { "name": { "text": "field" } } } @@ -4025,17 +3956,17 @@ def _check_source_location(source_location, path, min_start, max_end): result = [] start = None end = None - if not source_location.HasField("start"): + if not source_location.start: result.append("{}.start missing".format(path)) else: start = source_location.start - if not source_location.HasField("end"): + if not source_location.end: result.append("{}.end missing".format(path)) else: end = source_location.end if start and end: - if start.HasField("line") and end.HasField("line"): + if start.line and end.line: if start.line > end.line: result.append( "{}.start.line > {}.end.line ({} vs {})".format( @@ -4043,11 +3974,7 @@ def _check_source_location(source_location, path, min_start, max_end): ) ) elif start.line == end.line: - if ( - start.HasField("column") - and end.HasField("column") - and start.column > end.column - ): + if start.column and end.column and start.column > end.column: result.append( "{}.start.column > {}.end.column ({} vs {})".format( path, path, start.column, end.column @@ -4057,12 +3984,12 @@ def _check_source_location(source_location, path, min_start, max_end): for name, field in (("start", start), ("end", end)): if not field: continue - if field.HasField("line"): + if field.line: if field.line <= 0: result.append("{}.{}.line <= 0 ({})".format(path, name, field.line)) else: result.append("{}.{}.line missing".format(path, name)) - if field.HasField("column"): + if field.column: if field.column <= 0: result.append("{}.{}.column <= 0 ({})".format(path, name, field.column)) else: diff --git a/compiler/front_end/symbol_resolver.py b/compiler/front_end/symbol_resolver.py index 0590b0d..bb3bf5b 100644 --- a/compiler/front_end/symbol_resolver.py +++ b/compiler/front_end/symbol_resolver.py @@ -24,6 +24,7 @@ from compiler.util import ir_data from compiler.util import ir_data_utils from compiler.util import ir_util +from compiler.util import parser_types from compiler.util import traverse_ir # TODO(bolms): Symbol resolution raises an exception at the first error, but @@ -167,7 +168,7 @@ def _add_struct_field_to_scope(field, scope, errors): value_builtin_name = ir_data.Word( text="this", - source_location=ir_data.Location(is_synthetic=True), + source_location=parser_types.SourceLocation(is_synthetic=True), ) # In "inside field" scope, the name `this` maps back to the field itself. # This is important for attributes like `[requires]`. diff --git a/compiler/front_end/synthetics.py b/compiler/front_end/synthetics.py index f55e32f..94688d4 100644 --- a/compiler/front_end/synthetics.py +++ b/compiler/front_end/synthetics.py @@ -28,7 +28,9 @@ def _mark_as_synthetic(proto): if not isinstance(proto, ir_data.Message): return if hasattr(proto, "source_location"): - ir_data_utils.builder(proto).source_location.is_synthetic = True + proto.source_location = ir_data_utils.reader(proto).source_location._replace( + is_synthetic=True + ) for spec, value in ir_data_utils.get_set_fields(proto): if spec.name != "source_location" and spec.is_dataclass: if spec.is_sequence: @@ -263,7 +265,7 @@ def _maybe_replace_next_keyword_in_expression( expression.CopyFrom(_NEXT_KEYWORD_REPLACEMENT_EXPRESSION) expression.function.args[0].CopyFrom(last_location.start) expression.function.args[1].CopyFrom(last_location.size) - expression.source_location.CopyFrom(original_location) + expression.source_location = original_location _mark_as_synthetic(expression.function) diff --git a/compiler/front_end/tokenizer.py b/compiler/front_end/tokenizer.py index defee34..014217f 100644 --- a/compiler/front_end/tokenizer.py +++ b/compiler/front_end/tokenizer.py @@ -71,7 +71,7 @@ def tokenize(text, file_name): parser_types.Token( '"\\n"', "\n", - parser_types.make_location( + parser_types.SourceLocation( (line_number, len(line) + 1), (line_number, len(line) + 1) ), ) @@ -92,7 +92,7 @@ def tokenize(text, file_name): parser_types.Token( "Indent", leading_whitespace[len(indent_stack[-1]) :], - parser_types.make_location( + parser_types.SourceLocation( (line_number, len(indent_stack[-1]) + 1), (line_number, len(leading_whitespace) + 1), ), @@ -110,7 +110,7 @@ def tokenize(text, file_name): parser_types.Token( "Dedent", "", - parser_types.make_location( + parser_types.SourceLocation( (line_number, len(leading_whitespace) + 1), (line_number, len(leading_whitespace) + 1), ), @@ -122,7 +122,7 @@ def tokenize(text, file_name): [ error.error( file_name, - parser_types.make_location( + parser_types.SourceLocation( (line_number, 1), (line_number, len(leading_whitespace) + 1), ), @@ -138,7 +138,7 @@ def tokenize(text, file_name): parser_types.Token( '"\\n"', "\n", - parser_types.make_location( + parser_types.SourceLocation( (line_number, len(line) + 1), (line_number, len(line) + 1) ), ) @@ -148,7 +148,7 @@ def tokenize(text, file_name): parser_types.Token( "Dedent", "", - parser_types.make_location((line_number + 1, 1), (line_number + 1, 1)), + parser_types.SourceLocation((line_number + 1, 1), (line_number + 1, 1)), ) ) return tokens, [] @@ -250,7 +250,7 @@ def _tokenize_line(line, line_number, file_name): [ error.error( file_name, - parser_types.make_location( + parser_types.SourceLocation( (line_number, offset + 1), (line_number, offset + 2) ), "Unrecognized token", @@ -262,7 +262,7 @@ def _tokenize_line(line, line_number, file_name): parser_types.Token( best_candidate_symbol, best_candidate, - parser_types.make_location( + parser_types.SourceLocation( (line_number, offset + 1), (line_number, offset + len(best_candidate) + 1), ), diff --git a/compiler/front_end/tokenizer_test.py b/compiler/front_end/tokenizer_test.py index 6a301e6..f858348 100644 --- a/compiler/front_end/tokenizer_test.py +++ b/compiler/front_end/tokenizer_test.py @@ -37,7 +37,7 @@ def test_bad_indent_tab_versus_space(self): [ error.error( "file", - parser_types.make_location((2, 1), (2, 2)), + parser_types.SourceLocation((2, 1), (2, 2)), "Bad indentation", ) ] @@ -53,7 +53,7 @@ def test_bad_indent_tab_versus_eight_spaces(self): [ error.error( "file", - parser_types.make_location((2, 1), (2, 2)), + parser_types.SourceLocation((2, 1), (2, 2)), "Bad indentation", ) ] @@ -69,7 +69,7 @@ def test_bad_indent_tab_versus_four_spaces(self): [ error.error( "file", - parser_types.make_location((2, 1), (2, 2)), + parser_types.SourceLocation((2, 1), (2, 2)), "Bad indentation", ) ] @@ -85,7 +85,7 @@ def test_bad_indent_two_spaces_versus_one_space(self): [ error.error( "file", - parser_types.make_location((2, 1), (2, 2)), + parser_types.SourceLocation((2, 1), (2, 2)), "Bad indentation", ) ] @@ -101,7 +101,7 @@ def test_bad_indent_matches_closed_indent(self): [ error.error( "file", - parser_types.make_location((4, 1), (4, 2)), + parser_types.SourceLocation((4, 1), (4, 2)), "Bad indentation", ) ] @@ -117,7 +117,7 @@ def test_bad_string_after_string_with_escaped_backslash_at_end(self): [ error.error( "name", - parser_types.make_location((1, 5), (1, 6)), + parser_types.SourceLocation((1, 5), (1, 6)), "Unrecognized token", ) ] @@ -354,7 +354,7 @@ def test_case(self): [ error.error( "name", - parser_types.make_location((1, 1), (1, 2)), + parser_types.SourceLocation((1, 1), (1, 2)), "Unrecognized token", ) ] @@ -382,7 +382,7 @@ def test_case(self): [ error.error( "name", - parser_types.make_location((1, 1), (1, 2)), + parser_types.SourceLocation((1, 1), (1, 2)), "Unrecognized token", ) ] @@ -482,10 +482,7 @@ def make_test_case(case): def test_case(self): self.assertEqual( - [ - parser_types.format_location(l.source_location) - for l in tokenizer.tokenize(case, "file")[0] - ], + [str(l.source_location) for l in tokenizer.tokenize(case, "file")[0]], cases[case], ) diff --git a/compiler/front_end/write_inference.py b/compiler/front_end/write_inference.py index bd4e2ba..4af5ba1 100644 --- a/compiler/front_end/write_inference.py +++ b/compiler/front_end/write_inference.py @@ -19,6 +19,7 @@ from compiler.util import ir_data from compiler.util import ir_data_utils from compiler.util import ir_util +from compiler.util import parser_types from compiler.util import traverse_ir @@ -118,13 +119,13 @@ def _invert_expression(expression, ir): source_name=[ ir_data.Word( text="$logical_value", - source_location=ir_data.Location(is_synthetic=True), + source_location=parser_types.SourceLocation(is_synthetic=True), ) ], - source_location=ir_data.Location(is_synthetic=True), + source_location=parser_types.SourceLocation(is_synthetic=True), ), type=expression.type, - source_location=ir_data.Location(is_synthetic=True), + source_location=parser_types.SourceLocation(is_synthetic=True), ) # This loop essentially starts with: diff --git a/compiler/util/BUILD b/compiler/util/BUILD index ab53344..b0a29a3 100644 --- a/compiler/util/BUILD +++ b/compiler/util/BUILD @@ -28,13 +28,18 @@ py_library( "ir_data_fields.py", "ir_data_utils.py", ], - deps = [], + deps = [ + ":parser_types", + ], ) py_test( name = "ir_data_fields_test", srcs = ["ir_data_fields_test.py"], - deps = [":ir_data"], + deps = [ + ":ir_data", + ":parser_types", + ], ) py_test( @@ -136,9 +141,7 @@ py_test( py_library( name = "parser_types", srcs = ["parser_types.py"], - deps = [ - ":ir_data", - ], + deps = [], ) py_test( diff --git a/compiler/util/error.py b/compiler/util/error.py index c580520..93d7a11 100644 --- a/compiler/util/error.py +++ b/compiler/util/error.py @@ -36,7 +36,6 @@ ] """ -from compiler.util import ir_data_utils from compiler.util import parser_types # Error levels; represented by the strings that will be included in messages. @@ -67,26 +66,25 @@ RESET = "\033[0m" -def _copy(location): - location = ir_data_utils.copy(location) +def location_or_default(location): if not location: - location = parser_types.make_location((0, 0), (0, 0)) + return parser_types.SourceLocation((0, 0), (0, 0)) return location def error(source_file, location, message): """Returns an object representing an error message.""" - return _Message(source_file, _copy(location), ERROR, message) + return _Message(source_file, location_or_default(location), ERROR, message) def warn(source_file, location, message): """Returns an object representing a warning.""" - return _Message(source_file, _copy(location), WARNING, message) + return _Message(source_file, location_or_default(location), WARNING, message) def note(source_file, location, message): """Returns and object representing an informational note.""" - return _Message(source_file, _copy(location), NOTE, message) + return _Message(source_file, location_or_default(location), NOTE, message) class _Message(object): @@ -139,7 +137,7 @@ def format(self, source_code): if self.location.is_synthetic: pos = "[compiler bug]" else: - pos = parser_types.format_position(self.location.start) + pos = str(self.location.start) source_name = self.source_file or "[prelude]" if not self.location.is_synthetic and self.source_file in source_code: source_lines = source_code[self.source_file].splitlines() @@ -174,20 +172,7 @@ def format(self, source_code): return result def __repr__(self): - return ( - "Message({source_file!r}, make_location(({start_line!r}, " - "{start_column!r}), ({end_line!r}, {end_column!r}), " - "{is_synthetic!r}), {severity!r}, {message!r})" - ).format( - source_file=self.source_file, - start_line=self.location.start.line, - start_column=self.location.start.column, - end_line=self.location.end.line, - end_column=self.location.end.column, - is_synthetic=self.location.is_synthetic, - severity=self.severity, - message=self.message, - ) + return f"Message({repr(self.source_file)}, {repr(self.location)}, {repr(self.severity)}, {repr(self.message)})" def __eq__(self, other): return ( diff --git a/compiler/util/error_test.py b/compiler/util/error_test.py index 45f9920..169163d 100644 --- a/compiler/util/error_test.py +++ b/compiler/util/error_test.py @@ -25,12 +25,12 @@ class MessageTest(unittest.TestCase): def test_error(self): error_message = error.error( - "foo.emb", parser_types.make_location((3, 4), (3, 6)), "Bad thing" + "foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "Bad thing" ) self.assertEqual("foo.emb", error_message.source_file) self.assertEqual(error.ERROR, error_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (3, 6)), error_message.location + parser_types.SourceLocation((3, 4), (3, 6)), error_message.location ) self.assertEqual("Bad thing", error_message.message) sourceless_format = error_message.format({}) @@ -40,7 +40,7 @@ def test_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing"), # Message ], @@ -52,7 +52,7 @@ def test_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing\n"), # Message (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -63,7 +63,9 @@ def test_error(self): def test_synthetic_error(self): error_message = error.error( - "foo.emb", parser_types.make_location((3, 4), (3, 6), True), "Bad thing" + "foo.emb", + parser_types.SourceLocation((3, 4), (3, 6), is_synthetic=True), + "Bad thing", ) sourceless_format = error_message.format({}) sourced_format = error_message.format({"foo.emb": "\n\nabcdefghijklm"}) @@ -73,7 +75,7 @@ def test_synthetic_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:[compiler bug]: "), # Location + (error.BOLD, "foo.emb:[compiler bug]: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing"), # Message ], @@ -85,7 +87,7 @@ def test_synthetic_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:[compiler bug]: "), # Location + (error.BOLD, "foo.emb:[compiler bug]: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing"), # Message ], @@ -94,12 +96,12 @@ def test_synthetic_error(self): def test_prelude_as_file_name(self): error_message = error.error( - "", parser_types.make_location((3, 4), (3, 6)), "Bad thing" + "", parser_types.SourceLocation((3, 4), (3, 6)), "Bad thing" ) self.assertEqual("", error_message.source_file) self.assertEqual(error.ERROR, error_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (3, 6)), error_message.location + parser_types.SourceLocation((3, 4), (3, 6)), error_message.location ) self.assertEqual("Bad thing", error_message.message) sourceless_format = error_message.format({}) @@ -110,7 +112,7 @@ def test_prelude_as_file_name(self): ) self.assertEqual( [ - (error.BOLD, "[prelude]:3:4: "), # Location + (error.BOLD, "[prelude]:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing"), # Message ], @@ -122,7 +124,7 @@ def test_prelude_as_file_name(self): ) self.assertEqual( [ - (error.BOLD, "[prelude]:3:4: "), # Location + (error.BOLD, "[prelude]:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing\n"), # Message (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -133,12 +135,12 @@ def test_prelude_as_file_name(self): def test_multiline_error_source(self): error_message = error.error( - "foo.emb", parser_types.make_location((3, 4), (4, 6)), "Bad thing" + "foo.emb", parser_types.SourceLocation((3, 4), (4, 6)), "Bad thing" ) self.assertEqual("foo.emb", error_message.source_file) self.assertEqual(error.ERROR, error_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (4, 6)), error_message.location + parser_types.SourceLocation((3, 4), (4, 6)), error_message.location ) self.assertEqual("Bad thing", error_message.message) sourceless_format = error_message.format({}) @@ -150,7 +152,7 @@ def test_multiline_error_source(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing"), # Message ], @@ -162,7 +164,7 @@ def test_multiline_error_source(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing\n"), # Message (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -174,13 +176,13 @@ def test_multiline_error_source(self): def test_multiline_error(self): error_message = error.error( "foo.emb", - parser_types.make_location((3, 4), (3, 6)), + parser_types.SourceLocation((3, 4), (3, 6)), "Bad thing\nSome explanation\nMore explanation", ) self.assertEqual("foo.emb", error_message.source_file) self.assertEqual(error.ERROR, error_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (3, 6)), error_message.location + parser_types.SourceLocation((3, 4), (3, 6)), error_message.location ) self.assertEqual( "Bad thing\nSome explanation\nMore explanation", error_message.message @@ -197,13 +199,13 @@ def test_multiline_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing\n"), # Message - (error.BOLD, "foo.emb:3:4: "), # Location, line 2 + (error.BOLD, "foo.emb:3:4: "), # SourceLocation, line 2 (error.WHITE, "note: "), # "Note" severity, line 2 (error.WHITE, "Some explanation\n"), # Message, line 2 - (error.BOLD, "foo.emb:3:4: "), # Location, line 3 + (error.BOLD, "foo.emb:3:4: "), # SourceLocation, line 3 (error.WHITE, "note: "), # "Note" severity, line 3 (error.WHITE, "More explanation"), # Message, line 3 ], @@ -219,13 +221,13 @@ def test_multiline_error(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_RED, "error: "), # Severity (error.BOLD, "Bad thing\n"), # Message - (error.BOLD, "foo.emb:3:4: "), # Location, line 2 + (error.BOLD, "foo.emb:3:4: "), # SourceLocation, line 2 (error.WHITE, "note: "), # "Note" severity, line 2 (error.WHITE, "Some explanation\n"), # Message, line 2 - (error.BOLD, "foo.emb:3:4: "), # Location, line 3 + (error.BOLD, "foo.emb:3:4: "), # SourceLocation, line 3 (error.WHITE, "note: "), # "Note" severity, line 3 (error.WHITE, "More explanation\n"), # Message, line 3 (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -236,12 +238,12 @@ def test_multiline_error(self): def test_warn(self): warning_message = error.warn( - "foo.emb", parser_types.make_location((3, 4), (3, 6)), "Not good thing" + "foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "Not good thing" ) self.assertEqual("foo.emb", warning_message.source_file) self.assertEqual(error.WARNING, warning_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (3, 6)), warning_message.location + parser_types.SourceLocation((3, 4), (3, 6)), warning_message.location ) self.assertEqual("Not good thing", warning_message.message) sourced_format = warning_message.format({"foo.emb": "\n\nabcdefghijklm"}) @@ -251,7 +253,7 @@ def test_warn(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.BRIGHT_YELLOW, "warning: "), # Severity (error.BOLD, "Not good thing\n"), # Message (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -262,12 +264,12 @@ def test_warn(self): def test_note(self): note_message = error.note( - "foo.emb", parser_types.make_location((3, 4), (3, 6)), "OK thing" + "foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "OK thing" ) self.assertEqual("foo.emb", note_message.source_file) self.assertEqual(error.NOTE, note_message.severity) self.assertEqual( - parser_types.make_location((3, 4), (3, 6)), note_message.location + parser_types.SourceLocation((3, 4), (3, 6)), note_message.location ) self.assertEqual("OK thing", note_message.message) sourced_format = note_message.format({"foo.emb": "\n\nabcdefghijklm"}) @@ -277,7 +279,7 @@ def test_note(self): ) self.assertEqual( [ - (error.BOLD, "foo.emb:3:4: "), # Location + (error.BOLD, "foo.emb:3:4: "), # SourceLocation (error.WHITE, "note: "), # Severity (error.WHITE, "OK thing\n"), # Message (error.WHITE, "abcdefghijklm\n"), # Source snippet @@ -288,27 +290,31 @@ def test_note(self): def test_equality(self): note_message = error.note( - "foo.emb", parser_types.make_location((3, 4), (3, 6)), "thing" + "foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "thing" ) self.assertEqual( note_message, - error.note("foo.emb", parser_types.make_location((3, 4), (3, 6)), "thing"), + error.note("foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "thing"), ) self.assertNotEqual( note_message, - error.warn("foo.emb", parser_types.make_location((3, 4), (3, 6)), "thing"), + error.warn("foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "thing"), ) self.assertNotEqual( note_message, - error.note("foo2.emb", parser_types.make_location((3, 4), (3, 6)), "thing"), + error.note( + "foo2.emb", parser_types.SourceLocation((3, 4), (3, 6)), "thing" + ), ) self.assertNotEqual( note_message, - error.note("foo.emb", parser_types.make_location((2, 4), (3, 6)), "thing"), + error.note("foo.emb", parser_types.SourceLocation((2, 4), (3, 6)), "thing"), ) self.assertNotEqual( note_message, - error.note("foo.emb", parser_types.make_location((3, 4), (3, 6)), "thing2"), + error.note( + "foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "thing2" + ), ) @@ -348,43 +354,43 @@ class SplitErrorsTest(unittest.TestCase): def test_split_errors(self): user_error = [ error.error( - "foo.emb", parser_types.make_location((1, 2), (3, 4)), "Bad thing" + "foo.emb", parser_types.SourceLocation((1, 2), (3, 4)), "Bad thing" ), error.note( "foo.emb", - parser_types.make_location((3, 4), (5, 6)), + parser_types.SourceLocation((3, 4), (5, 6)), "Note: bad thing referrent", ), ] user_error_2 = [ error.error( - "foo.emb", parser_types.make_location((8, 9), (10, 11)), "Bad thing" + "foo.emb", parser_types.SourceLocation((8, 9), (10, 11)), "Bad thing" ), error.note( "foo.emb", - parser_types.make_location((10, 11), (12, 13)), + parser_types.SourceLocation((10, 11), (12, 13)), "Note: bad thing referrent", ), ] synthetic_error = [ error.error( - "foo.emb", parser_types.make_location((1, 2), (3, 4)), "Bad thing" + "foo.emb", parser_types.SourceLocation((1, 2), (3, 4)), "Bad thing" ), error.note( "foo.emb", - parser_types.make_location((3, 4), (5, 6), True), + parser_types.SourceLocation((3, 4), (5, 6), is_synthetic=True), "Note: bad thing referrent", ), ] synthetic_error_2 = [ error.error( "foo.emb", - parser_types.make_location((8, 9), (10, 11), True), + parser_types.SourceLocation((8, 9), (10, 11), is_synthetic=True), "Bad thing", ), error.note( "foo.emb", - parser_types.make_location((10, 11), (12, 13)), + parser_types.SourceLocation((10, 11), (12, 13)), "Note: bad thing referrent", ), ] @@ -407,33 +413,33 @@ def test_split_errors(self): def test_filter_errors(self): user_error = [ error.error( - "foo.emb", parser_types.make_location((1, 2), (3, 4)), "Bad thing" + "foo.emb", parser_types.SourceLocation((1, 2), (3, 4)), "Bad thing" ), error.note( "foo.emb", - parser_types.make_location((3, 4), (5, 6)), + parser_types.SourceLocation((3, 4), (5, 6)), "Note: bad thing referrent", ), ] synthetic_error = [ error.error( - "foo.emb", parser_types.make_location((1, 2), (3, 4)), "Bad thing" + "foo.emb", parser_types.SourceLocation((1, 2), (3, 4)), "Bad thing" ), error.note( "foo.emb", - parser_types.make_location((3, 4), (5, 6), True), + parser_types.SourceLocation((3, 4), (5, 6), is_synthetic=True), "Note: bad thing referrent", ), ] synthetic_error_2 = [ error.error( "foo.emb", - parser_types.make_location((8, 9), (10, 11), True), + parser_types.SourceLocation((8, 9), (10, 11), is_synthetic=True), "Bad thing", ), error.note( "foo.emb", - parser_types.make_location((10, 11), (12, 13)), + parser_types.SourceLocation((10, 11), (12, 13)), "Note: bad thing referrent", ), ] @@ -447,7 +453,7 @@ class FormatErrorsTest(unittest.TestCase): def test_format_errors(self): errors = [ - [error.note("foo.emb", parser_types.make_location((3, 4), (3, 6)), "note")] + [error.note("foo.emb", parser_types.SourceLocation((3, 4), (3, 6)), "note")] ] sources = {"foo.emb": "x\ny\nz bcd\nq\n"} self.assertEqual( diff --git a/compiler/util/ir_data.py b/compiler/util/ir_data.py index cd12b96..d37c48a 100644 --- a/compiler/util/ir_data.py +++ b/compiler/util/ir_data.py @@ -17,12 +17,14 @@ This is limited to purely data and type annotations. """ +import collections import dataclasses import enum import sys from typing import ClassVar, Optional from compiler.util import ir_data_fields +from compiler.util import parser_types @dataclasses.dataclass @@ -111,37 +113,6 @@ def HasField(self, name): # pylint:disable=invalid-name # From here to the end of the file are actual structure definitions. -@dataclasses.dataclass -class Position(Message): - """A zero-width position within a source file.""" - - line: int = 0 - """Line (starts from 1).""" - column: int = 0 - """Column (starts from 1).""" - - -@dataclasses.dataclass -class Location(Message): - """A half-open start:end range within a source file.""" - - start: Optional[Position] = None - """Beginning of the range""" - end: Optional[Position] = None - """One column past the end of the range.""" - - is_disjoint_from_parent: Optional[bool] = None - """True if this Location is outside of the parent object's Location.""" - - is_synthetic: Optional[bool] = None - """True if this Location's parent was synthesized, and does not directly - appear in the source file. - - The Emboss front end uses this field to cull - irrelevant error messages. - """ - - @dataclasses.dataclass class Word(Message): """IR for a bare word in the source file. @@ -150,7 +121,7 @@ class Word(Message): """ text: Optional[str] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -158,13 +129,13 @@ class String(Message): """IR for a string in the source file.""" text: Optional[str] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass class Documentation(Message): text: Optional[str] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -172,14 +143,14 @@ class BooleanConstant(Message): """IR for a boolean constant.""" value: Optional[bool] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass class Empty(Message): """Placeholder message for automatic element counts for arrays.""" - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -192,7 +163,7 @@ class NumericConstant(Message): # TODO(bolms): switch back to int, and just use strings during # serialization, now that we're free of proto. value: Optional[str] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None class FunctionMapping(int, enum.Enum): @@ -240,7 +211,7 @@ class Function(Message): function: Optional[FunctionMapping] = None args: list["Expression"] = ir_data_fields.list_field(lambda: Expression) function_name: Optional[Word] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -310,7 +281,7 @@ class NameDefinition(Message): should not be visible outside of its immediate namespace. """ - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None """The location of this NameDefinition in source code.""" @@ -356,7 +327,7 @@ class Reference(Message): # TODO(bolms): Allow absolute paths starting with ".". - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None """Note that this is the source_location of the *Reference*, not of the object to which it refers. """ @@ -406,7 +377,7 @@ class FieldReference(Message): # TODO(bolms): Make the above change before declaring the IR to be "stable". path: list[Reference] = ir_data_fields.list_field(Reference) - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -509,7 +480,7 @@ class Expression(Message): builtin_reference: Optional[Reference] = ir_data_fields.oneof_field("expression") type: Optional[ExpressionType] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -521,7 +492,7 @@ class ArrayType(Message): element_count: Optional[Expression] = ir_data_fields.oneof_field("size") automatic: Optional[Empty] = ir_data_fields.oneof_field("size") - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -530,7 +501,7 @@ class AtomicType(Message): reference: Optional[Reference] = None runtime_parameter: list[Expression] = ir_data_fields.list_field(Expression) - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -541,7 +512,7 @@ class Type(Message): array_type: Optional[ArrayType] = ir_data_fields.oneof_field("type") size_in_bits: Optional[Expression] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -553,7 +524,7 @@ class AttributeValue(Message): expression: Optional[Expression] = ir_data_fields.oneof_field("value") string_constant: Optional[String] = ir_data_fields.oneof_field("value") - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -564,7 +535,7 @@ class Attribute(Message): value: Optional[AttributeValue] = None back_end: Optional[Word] = None is_default: Optional[bool] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -618,7 +589,7 @@ class FieldLocation(Message): start: Optional[Expression] = None size: Optional[Expression] = None - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -689,7 +660,7 @@ class Field(Message): # pylint:disable=too-many-instance-attributes `bar`: those fields only conditionally exist in the structure. """ - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -733,7 +704,7 @@ class Structure(Message): be `{ 0, 1, 2, 3, ... }`. """ - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -743,7 +714,7 @@ class External(Message): # Externals have no values other than name and attribute list, which are # common to all type definitions. - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -759,7 +730,7 @@ class EnumValue(Message): attribute: list[Attribute] = ir_data_fields.list_field(Attribute) """Value-specific attributes.""" - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -767,7 +738,7 @@ class Enum(Message): """IR for an enumerated type definition.""" value: list[EnumValue] = ir_data_fields.list_field(EnumValue) - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -778,7 +749,7 @@ class Import(Message): """The file to import.""" local_name: Optional[Word] = None """The name to use within this module.""" - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -808,7 +779,7 @@ class RuntimeParameter(Message): is filled in after initial parsing is finished. """ - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None class AddressableUnit(int, enum.Enum): @@ -852,7 +823,7 @@ class TypeDefinition(Message): These are currently only allowed on structures, but in the future they should be allowed on externals. """ - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None @dataclasses.dataclass @@ -869,7 +840,7 @@ class Module(Message): """Other modules imported.""" source_text: Optional[str] = None """The original source code.""" - source_location: Optional[Location] = None + source_location: Optional[parser_types.SourceLocation] = None """Source code covered by this IR.""" source_file_name: Optional[str] = None """Name of the source file.""" diff --git a/compiler/util/ir_data_fields_test.py b/compiler/util/ir_data_fields_test.py index 0376b69..36264b8 100644 --- a/compiler/util/ir_data_fields_test.py +++ b/compiler/util/ir_data_fields_test.py @@ -22,6 +22,7 @@ from compiler.util import ir_data from compiler.util import ir_data_fields +from compiler.util import parser_types class TestEnum(enum.Enum): diff --git a/compiler/util/ir_data_utils.py b/compiler/util/ir_data_utils.py index e7679a6..f4c9575 100644 --- a/compiler/util/ir_data_utils.py +++ b/compiler/util/ir_data_utils.py @@ -29,14 +29,14 @@ def set_function_name_end(function: Function): if not function.function_name: function.function_name = Word() if not function.function_name.source_location: - function.function_name.source_location = Location() - word.source_location.end = Position(line=1,column=2) + function.function_name.source_location = SourceLocation() + word.source_location.end = SourcePosition(line=1,column=2) ``` We can do: ``` def set_function_name_end(function: Function): - builder(function).function_name.source_location.end = Position(line=1, + builder(function).function_name.source_location.end = SourcePosition(line=1, column=2) ``` @@ -79,6 +79,7 @@ def is_leaf_synthetic(data): from compiler.util import ir_data from compiler.util import ir_data_fields +from compiler.util import parser_types MessageT = TypeVar("MessageT", bound=ir_data.Message) @@ -106,11 +107,14 @@ def _to_dict( assert ir is not None values: MutableMapping[str, Any] = {} for spec, value in field_func(ir): - if value is not None and spec.is_dataclass: - if spec.is_sequence: - value = [self._to_dict(v, field_func) for v in value] - else: - value = self._to_dict(value, field_func) + if value is not None: + if spec.is_dataclass: + if spec.is_sequence: + value = [self._to_dict(v, field_func) for v in value] + else: + value = self._to_dict(value, field_func) + elif spec.data_type == parser_types.SourceLocation: + value = str(value) values[spec.name] = value return values @@ -182,6 +186,8 @@ def _from_dict(data_cls: type[MessageT], data): class_fields[name] = IrDataSerializer._enum_type_converter( spec.data_type, value ) + elif spec.data_type == parser_types.SourceLocation: + class_fields[name] = parser_types.SourceLocation.from_str(value) else: if spec.is_sequence: class_fields[name] = value diff --git a/compiler/util/ir_data_utils_test.py b/compiler/util/ir_data_utils_test.py index 19ae579..2cadf06 100644 --- a/compiler/util/ir_data_utils_test.py +++ b/compiler/util/ir_data_utils_test.py @@ -23,6 +23,7 @@ from compiler.util import ir_data from compiler.util import ir_data_fields from compiler.util import ir_data_utils +from compiler.util import parser_types class TestEnum(enum.Enum): @@ -103,7 +104,7 @@ def test_field_specs(self): # Try a IR data class expected_field = ir_data_fields.make_field_spec( "source_location", - ir_data.Location, + parser_types.SourceLocation, ir_data_fields.FieldContainer.OPTIONAL, None, ) @@ -119,11 +120,11 @@ def test_field_specs(self): self.assertEqual(fields["external"], expected_field) # Try non-optional scalar - fields = ir_data_utils.field_specs(ir_data.Position) + fields = ir_data_utils.field_specs(ir_data.CanonicalName) expected_field = ir_data_fields.make_field_spec( - "line", int, ir_data_fields.FieldContainer.NONE, None + "module_file", str, ir_data_fields.FieldContainer.NONE, None ) - self.assertEqual(fields["line"], expected_field) + self.assertEqual(fields["module_file"], expected_field) fields = ir_data_utils.field_specs(ir_data.ArrayType) expected_field = ir_data_fields.make_field_spec( @@ -330,9 +331,9 @@ def test_ir_data_builder_sequence(self): def test_copy_from(self) -> None: """Tests that `CopyFrom` works.""" - location = ir_data.Location( - start=ir_data.Position(line=1, column=1), - end=ir_data.Position(line=1, column=2), + location = parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=1), + end=parser_types.SourcePosition(line=1, column=2), ) expression_ir = ir_data.Expression(source_location=location) template: ir_data.Expression = expression_parser.parse("x + y") @@ -355,9 +356,9 @@ def test_copy_from_list(self): self.assertIsInstance(template.function, ir_data.Function) self.assertIsInstance(template.function.args, ir_data_fields.CopyValuesList) - location = ir_data.Location( - start=ir_data.Position(line=1, column=1), - end=ir_data.Position(line=1, column=2), + location = parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=1), + end=parser_types.SourcePosition(line=1, column=2), ) expression_ir = ir_data.Expression(source_location=location) self.assertIsInstance(expression_ir, ir_data.Expression) @@ -515,11 +516,7 @@ def test_from_dict_list(self): { "constant": { "value": "0", - "source_location": { - "start": {"line": 421, "column": 3}, - "end": {"line": 421, "column": 4}, - "is_synthetic": False, - }, + "source_location": "421:3-421:4", }, "type": { "integer": { @@ -529,20 +526,12 @@ def test_from_dict_list(self): "maximum_value": "0", } }, - "source_location": { - "start": {"line": 421, "column": 3}, - "end": {"line": 421, "column": 4}, - "is_synthetic": False, - }, + "source_location": "421:3-421:4", }, { "constant": { "value": "1", - "source_location": { - "start": {"line": 421, "column": 11}, - "end": {"line": 421, "column": 12}, - "is_synthetic": False, - }, + "source_location": "421:11-421:12", }, "type": { "integer": { @@ -552,11 +541,7 @@ def test_from_dict_list(self): "maximum_value": "1", } }, - "source_location": { - "start": {"line": 421, "column": 11}, - "end": {"line": 421, "column": 12}, - "is_synthetic": False, - }, + "source_location": "421:11-421:12", }, ] function_data = {"args": function_args} diff --git a/compiler/util/ir_util_test.py b/compiler/util/ir_util_test.py index 07cf4dc..b3deeb2 100644 --- a/compiler/util/ir_util_test.py +++ b/compiler/util/ir_util_test.py @@ -782,13 +782,13 @@ def test_get_base_type(self): "base_type": { "atomic_type": { "reference": { }, - "source_location": { "start": { "line": 5 } } + "source_location": "5:1-6:1" } }, - "source_location": { "start": { "line": 4 } } + "source_location": "4:1-6:1" } }, - "source_location": { "start": { "line": 3 } } + "source_location": "3:1-6:1" } }""", ) diff --git a/compiler/util/parser_types.py b/compiler/util/parser_types.py index fffe086..bdff0d6 100644 --- a/compiler/util/parser_types.py +++ b/compiler/util/parser_types.py @@ -12,72 +12,139 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Various types shared through multiple passes of parsing. +"""Types related to the LR(1) parser. -This module contains types used as interfaces between parts of the Emboss front -end. These types do not really "belong" to either the producers or consumers, -and in a few cases placing them in one or the other creates unnecessary -dependencies, so they are defined here. +This module contains types used by the LR(1) parser, which are also used in +other parts of the compiler: + + SourcePosition: a position (zero-width) within a source file. + SourceLocation: a span within a source file. + Production: a grammar production. + Token: a token; lr1.Parser operates on lists of tokens. """ import collections -from compiler.util import ir_data - - -def _make_position(line, column): - """Makes an ir_data.Position from line, column ints.""" - if not isinstance(line, int): - raise ValueError("Bad line {!r}".format(line)) - elif not isinstance(column, int): - raise ValueError("Bad column {!r}".format(column)) - return ir_data.Position(line=line, column=column) +PositionTuple = collections.namedtuple( + "PositionTuple", ["line", "column"], defaults=[0, 0] +) -def _parse_position(text): - """Parses an ir_data.Position from "line:column" (e.g., "1:2").""" - line, column = text.split(":") - return _make_position(int(line), int(column)) +class SourcePosition(PositionTuple): + """A zero-width position within a source file.""" -def format_position(position): - """formats an ir_data.Position to "line:column" form.""" - return "{}:{}".format(position.line, position.column) + def __new__(cls, /, line=0, column=0): + assert isinstance(line, int), f"line {line} is not int" + assert isinstance(column, int), f"column {column} is not int" + assert line >= 0, f"line {line} is negative" + assert column >= 0, f"column {column} is negative" + return PositionTuple.__new__(cls, line, column) + def __str__(self): + return f"{self.line}:{self.column}" -def make_location(start, end, is_synthetic=False): - """Makes an ir_data.Location from (line, column) tuples or ir_data.Positions.""" - if isinstance(start, tuple): - start = _make_position(*start) - if isinstance(end, tuple): - end = _make_position(*end) - if not isinstance(start, ir_data.Position): - raise ValueError("Bad start {!r}".format(start)) - elif not isinstance(end, ir_data.Position): - raise ValueError("Bad end {!r}".format(end)) - elif start.line > end.line or ( - start.line == end.line and start.column > end.column + @staticmethod + def from_str(value): + try: + l, c = value.split(":") + return SourcePosition(line=int(l.strip()), column=int(c.strip())) + except Exception as e: + raise ValueError(f"{repr(value)} is not a valid SourcePosition.") + + def __bool__(self): + return bool(self.line or self.column) + + +LocationTuple = collections.namedtuple( + "LocationTuple", + ["start", "end", "is_disjoint_from_parent", "is_synthetic"], + defaults=[SourcePosition(), SourcePosition(), False, False], +) + + +class SourceLocation(LocationTuple): + """A half-open start:end range within a source file.""" + + def __new__( + cls, + /, + start=SourcePosition(), + end=SourcePosition(), + *, + is_disjoint_from_parent=False, + is_synthetic=False, ): - raise ValueError( - "Start {} is after end {}".format( - format_position(start), format_position(end) - ) + if not isinstance(start, SourcePosition): + start = SourcePosition(*start) + if not isinstance(end, SourcePosition): + end = SourcePosition(*end) + assert start <= end, f"start {start} is after end {end}" + assert isinstance( + is_disjoint_from_parent, bool + ), f"is_disjoint_from_parent {is_disjoint_from_parent} is not bool" + assert isinstance( + is_synthetic, bool + ), f"is_synthetic {is_synthetic} is not bool" + return LocationTuple.__new__( + cls, start, end, is_disjoint_from_parent, is_synthetic ) - return ir_data.Location(start=start, end=end, is_synthetic=is_synthetic) + def __str__(self): + suffix = "" + if self.is_disjoint_from_parent: + suffix += "^" + if self.is_synthetic: + suffix += "*" + return ( + f"{self.start or SourcePosition()}-{self.end or SourcePosition()}{suffix}" + ) -def format_location(location): - """Formats an ir_data.Location in format "1:2-3:4" ("start-end").""" - return "{}-{}".format( - format_position(location.start), format_position(location.end) + @staticmethod + def from_str(value): + original_value = value + try: + is_synthetic = False + if value[-1] == "*": + is_synthetic = True + value = value[:-1] + is_disjoint_from_parent = False + if value[-1] == "^": + is_disjoint_from_parent = True + value = value[:-1] + start, end = value.split("-") + return SourceLocation( + start=SourcePosition.from_str(start), + end=SourcePosition.from_str(end), + is_synthetic=is_synthetic, + is_disjoint_from_parent=is_disjoint_from_parent, + ) + except Exception as e: + raise ValueError(f"{repr(original_value)} is not a valid SourceLocation.") + + def __bool__(self): + return bool(self.start or self.end) + + +def merge_source_locations(*nodes): + locations = [ + node.source_location + for node in nodes + if hasattr(node, "source_location") and node.source_location + ] + if not locations: + return None + start = locations[0].start + end = locations[-1].end + is_synthetic = any(l.is_synthetic for l in locations) + is_disjoint_from_parent = any(l.is_disjoint_from_parent for l in locations) + return SourceLocation( + start=start, + end=end, + is_synthetic=is_synthetic, + is_disjoint_from_parent=is_disjoint_from_parent, ) -def parse_location(text): - """Parses an ir_data.Location from format "1:2-3:4" ("start-end").""" - start, end = text.split("-") - return make_location(_parse_position(start), _parse_position(end)) - - class Token(collections.namedtuple("Token", ["symbol", "text", "source_location"])): """A Token is a chunk of text from a source file, and a classification. @@ -89,7 +156,7 @@ class Token(collections.namedtuple("Token", ["symbol", "text", "source_location" def __str__(self): return "{} {} {}".format( - self.symbol, repr(self.text), format_location(self.source_location) + self.symbol, repr(self.text), str(self.source_location) ) diff --git a/compiler/util/parser_types_test.py b/compiler/util/parser_types_test.py index 097ece4..325cdef 100644 --- a/compiler/util/parser_types_test.py +++ b/compiler/util/parser_types_test.py @@ -20,82 +20,69 @@ class PositionTest(unittest.TestCase): - """Tests for Position-related functions in parser_types.""" - - def test_format_position(self): - self.assertEqual( - "1:2", parser_types.format_position(ir_data.Position(line=1, column=2)) - ) + """Tests for SourcePosition-related functions in parser_types.""" class LocationTest(unittest.TestCase): - """Tests for Location-related functions in parser_types.""" + """Tests for SourceLocation-related functions in parser_types.""" - def test_make_location(self): + def test_location_new(self): self.assertEqual( - ir_data.Location( - start=ir_data.Position(line=1, column=2), - end=ir_data.Position(line=3, column=4), + parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=2), + end=parser_types.SourcePosition(line=3, column=4), is_synthetic=False, ), - parser_types.make_location((1, 2), (3, 4)), + parser_types.SourceLocation((1, 2), (3, 4)), ) self.assertEqual( - ir_data.Location( - start=ir_data.Position(line=1, column=2), - end=ir_data.Position(line=3, column=4), + parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=2), + end=parser_types.SourcePosition(line=3, column=4), is_synthetic=False, ), - parser_types.make_location( - ir_data.Position(line=1, column=2), ir_data.Position(line=3, column=4) + parser_types.SourceLocation( + parser_types.SourcePosition(line=1, column=2), + parser_types.SourcePosition(line=3, column=4), ), ) def test_make_synthetic_location(self): self.assertEqual( - ir_data.Location( - start=ir_data.Position(line=1, column=2), - end=ir_data.Position(line=3, column=4), + parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=2), + end=parser_types.SourcePosition(line=3, column=4), is_synthetic=True, ), - parser_types.make_location((1, 2), (3, 4), True), + parser_types.SourceLocation((1, 2), (3, 4), is_synthetic=True), ) self.assertEqual( - ir_data.Location( - start=ir_data.Position(line=1, column=2), - end=ir_data.Position(line=3, column=4), + parser_types.SourceLocation( + start=parser_types.SourcePosition(line=1, column=2), + end=parser_types.SourcePosition(line=3, column=4), is_synthetic=True, ), - parser_types.make_location( - ir_data.Position(line=1, column=2), - ir_data.Position(line=3, column=4), - True, + parser_types.SourceLocation( + parser_types.SourcePosition(line=1, column=2), + parser_types.SourcePosition(line=3, column=4), + is_synthetic=True, ), ) - def test_make_location_type_checks(self): - self.assertRaises(ValueError, parser_types.make_location, [1, 2], (1, 2)) - self.assertRaises(ValueError, parser_types.make_location, (1, 2), [1, 2]) - - def test_make_location_logic_checks(self): - self.assertRaises(ValueError, parser_types.make_location, (3, 4), (1, 2)) - self.assertRaises(ValueError, parser_types.make_location, (1, 3), (1, 2)) - self.assertTrue(parser_types.make_location((1, 2), (1, 2))) - - def test_format_location(self): + def test_location_str(self): self.assertEqual( "1:2-3:4", - parser_types.format_location(parser_types.make_location((1, 2), (3, 4))), + str(parser_types.SourceLocation((1, 2), (3, 4))), ) - def test_parse_location(self): + def test_location_from_str(self): self.assertEqual( - parser_types.make_location((1, 2), (3, 4)), - parser_types.parse_location("1:2-3:4"), + parser_types.SourceLocation((1, 2), (3, 4)), + parser_types.SourceLocation.from_str("1:2-3:4"), ) self.assertEqual( - parser_types.make_location((1, 2), (3, 4)), - parser_types.parse_location(" 1 : 2 - 3 : 4 "), + parser_types.SourceLocation((1, 2), (3, 4)), + parser_types.SourceLocation.from_str(" 1 : 2 - 3 : 4 "), ) @@ -107,7 +94,7 @@ def test_str(self): "FOO 'bar' 1:2-3:4", str( parser_types.Token( - "FOO", "bar", parser_types.make_location((1, 2), (3, 4)) + "FOO", "bar", parser_types.SourceLocation((1, 2), (3, 4)) ) ), ) diff --git a/compiler/util/test_util.py b/compiler/util/test_util.py index d54af37..f0a7a9c 100644 --- a/compiler/util/test_util.py +++ b/compiler/util/test_util.py @@ -51,6 +51,8 @@ def proto_is_superset(proto, expected_values, path=""): name = spec.name field_path = "{}{}".format(path, name) value = getattr(proto, name) + if expected_values.HasField(name) and not proto.HasField(name): + return False, "{} missing".format(field_path) if spec.is_dataclass: if spec.is_sequence: if len(expected_value) > len(value): @@ -64,8 +66,6 @@ def proto_is_superset(proto, expected_values, path=""): if not result[0]: return result else: - if expected_values.HasField(name) and not proto.HasField(name): - return False, "{} missing".format(field_path) result = proto_is_superset(value, expected_value, field_path) if not result[0]: return result diff --git a/compiler/util/test_util_test.py b/compiler/util/test_util_test.py index 58e1ad6..11f4ef9 100644 --- a/compiler/util/test_util_test.py +++ b/compiler/util/test_util_test.py @@ -30,7 +30,7 @@ def test_superset_extra_optional_field(self): test_util.proto_is_superset( ir_data.Structure( field=[ir_data.Field()], - source_location=parser_types.parse_location("1:2-3:4"), + source_location=parser_types.SourceLocation.from_str("1:2-3:4"), ), ir_data.Structure(field=[ir_data.Field()]), ), @@ -42,7 +42,7 @@ def test_superset_extra_repeated_field(self): test_util.proto_is_superset( ir_data.Structure( field=[ir_data.Field(), ir_data.Field()], - source_location=parser_types.parse_location("1:2-3:4"), + source_location=parser_types.SourceLocation.from_str("1:2-3:4"), ), ir_data.Structure(field=[ir_data.Field()]), ), @@ -53,7 +53,8 @@ def test_superset_missing_empty_repeated_field(self): (False, "field[0] missing"), test_util.proto_is_superset( ir_data.Structure( - field=[], source_location=parser_types.parse_location("1:2-3:4") + field=[], + source_location=parser_types.SourceLocation.from_str("1:2-3:4"), ), ir_data.Structure(field=[ir_data.Field(), ir_data.Field()]), ), @@ -64,25 +65,32 @@ def test_superset_missing_empty_optional_field(self): (False, "source_location missing"), test_util.proto_is_superset( ir_data.Structure(field=[]), - ir_data.Structure(source_location=ir_data.Location()), + ir_data.Structure(source_location=parser_types.SourceLocation()), ), ) def test_array_element_differs(self): self.assertEqual( - (False, "field[0].source_location.start.line differs: found 1, expected 2"), + ( + False, + "field[0].source_location differs: found 1:2-3:4, expected 2:2-3:4", + ), test_util.proto_is_superset( ir_data.Structure( field=[ ir_data.Field( - source_location=parser_types.parse_location("1:2-3:4") + source_location=parser_types.SourceLocation.from_str( + "1:2-3:4" + ) ) ] ), ir_data.Structure( field=[ ir_data.Field( - source_location=parser_types.parse_location("2:2-3:4") + source_location=parser_types.SourceLocation.from_str( + "2:2-3:4" + ) ) ] ), @@ -93,8 +101,12 @@ def test_equal(self): self.assertEqual( (True, ""), test_util.proto_is_superset( - parser_types.parse_location("1:2-3:4"), - parser_types.parse_location("1:2-3:4"), + ir_data.Field( + source_location=parser_types.SourceLocation.from_str("1:2-3:4") + ), + ir_data.Field( + source_location=parser_types.SourceLocation.from_str("1:2-3:4") + ), ), ) @@ -105,17 +117,21 @@ def test_superset_missing_optional_field(self): ir_data.Structure(field=[ir_data.Field()]), ir_data.Structure( field=[ir_data.Field()], - source_location=parser_types.parse_location("1:2-3:4"), + source_location=parser_types.SourceLocation.from_str("1:2-3:4"), ), ), ) def test_optional_field_differs(self): self.assertEqual( - (False, "end.line differs: found 4, expected 3"), + (False, "source_location differs: found 1:2-4:4, expected 1:2-3:4"), test_util.proto_is_superset( - parser_types.parse_location("1:2-4:4"), - parser_types.parse_location("1:2-3:4"), + ir_data.Field( + source_location=parser_types.SourceLocation.from_str("1:2-4:4") + ), + ir_data.Field( + source_location=parser_types.SourceLocation.from_str("1:2-3:4") + ), ), ) diff --git a/compiler/util/traverse_ir.py b/compiler/util/traverse_ir.py index 5a5ac78..bf83392 100644 --- a/compiler/util/traverse_ir.py +++ b/compiler/util/traverse_ir.py @@ -19,6 +19,7 @@ from compiler.util import ir_data from compiler.util import ir_data_fields from compiler.util import ir_data_utils +from compiler.util import parser_types from compiler.util import simple_memoizer @@ -219,7 +220,7 @@ def _fields_to_scan_by_current_and_target(): # type_to_descendant_types is a map of all types that can be reached from a # particular type. After the setup, type_to_descendant_types[ir_data.EmbossIr] # == set() and type_to_descendant_types[ir_data.Reference] == - # {ir_data.CanonicalName, ir_data.Word, ir_data.Location} and + # {ir_data.CanonicalName, ir_data.Word} and # type_to_descendant_types[ir_data.Word] == set(). # # The while loop basically ors in the known descendants of each known diff --git a/testdata/golden/span_se_log_file_status.ir.txt b/testdata/golden/span_se_log_file_status.ir.txt index b1bfef8..b880c1c 100644 --- a/testdata/golden/span_se_log_file_status.ir.txt +++ b/testdata/golden/span_se_log_file_status.ir.txt @@ -3,126 +3,36 @@ { "name": { "text": "byte_order", - "source_location": { - "start": { - "line": 17, - "column": 11 - }, - "end": { - "line": 17, - "column": 21 - }, - "is_synthetic": false - } + "source_location": "17:11-17:21" }, "value": { "string_constant": { "text": "LittleEndian", - "source_location": { - "start": { - "line": 17, - "column": 23 - }, - "end": { - "line": 17, - "column": 37 - }, - "is_synthetic": false - } + "source_location": "17:23-17:37" }, - "source_location": { - "start": { - "line": 17, - "column": 23 - }, - "end": { - "line": 17, - "column": 37 - }, - "is_synthetic": false - } + "source_location": "17:23-17:37" }, "is_default": true, - "source_location": { - "start": { - "line": 17, - "column": 1 - }, - "end": { - "line": 17, - "column": 38 - }, - "is_synthetic": false - } + "source_location": "17:1-17:38" }, { "name": { "text": "namespace", - "source_location": { - "start": { - "line": 18, - "column": 8 - }, - "end": { - "line": 18, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "18:8-18:17" }, "value": { "string_constant": { "text": "emboss::test", - "source_location": { - "start": { - "line": 18, - "column": 19 - }, - "end": { - "line": 18, - "column": 33 - }, - "is_synthetic": false - } + "source_location": "18:19-18:33" }, - "source_location": { - "start": { - "line": 18, - "column": 19 - }, - "end": { - "line": 18, - "column": 33 - }, - "is_synthetic": false - } + "source_location": "18:19-18:33" }, - "is_default": false, "back_end": { "text": "cpp", - "source_location": { - "start": { - "line": 18, - "column": 2 - }, - "end": { - "line": 18, - "column": 7 - }, - "is_synthetic": false - } + "source_location": "18:2-18:7" }, - "source_location": { - "start": { - "line": 18, - "column": 1 - }, - "end": { - "line": 20, - "column": 1 - }, - "is_synthetic": false - } + "is_default": false, + "source_location": "18:1-20:1" } ], "type": [ @@ -134,68 +44,18 @@ "start": { "constant": { "value": "0", - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 4 - }, - "is_synthetic": false - } + "source_location": "22:3-22:4" }, - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 4 - }, - "is_synthetic": false - } + "source_location": "22:3-22:4" }, "size": { "constant": { "value": "4", - "source_location": { - "start": { - "line": 22, - "column": 8 - }, - "end": { - "line": 22, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "22:8-22:9" }, - "source_location": { - "start": { - "line": 22, - "column": 8 - }, - "end": { - "line": 22, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "22:8-22:9" }, - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 10 - }, - "is_synthetic": false - } + "source_location": "22:3-22:10" }, "type": { "atomic_type": { @@ -203,187 +63,48 @@ "source_name": [ { "text": "UInt", - "source_location": { - "start": { - "line": 22, - "column": 13 - }, - "end": { - "line": 22, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "22:13-22:17" } ], - "source_location": { - "start": { - "line": 22, - "column": 13 - }, - "end": { - "line": 22, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "22:13-22:17" }, - "source_location": { - "start": { - "line": 22, - "column": 13 - }, - "end": { - "line": 22, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "22:13-22:17" }, - "source_location": { - "start": { - "line": 22, - "column": 13 - }, - "end": { - "line": 22, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "22:13-22:17" }, "name": { "name": { "text": "file_state", - "source_location": { - "start": { - "line": 22, - "column": 25 - }, - "end": { - "line": 22, - "column": 35 - }, - "is_synthetic": false - } + "source_location": "22:25-22:35" }, - "source_location": { - "start": { - "line": 22, - "column": 25 - }, - "end": { - "line": 22, - "column": 35 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 35 - } + "source_location": "22:25-22:35" }, "existence_condition": { - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 35 - }, - "is_synthetic": false - }, "boolean_constant": { - "source_location": { - "start": { - "line": 22, - "column": 3 - }, - "end": { - "line": 22, - "column": 35 - }, - "is_synthetic": false - }, - "value": true - } - } + "value": true, + "source_location": "22:3-22:35" + }, + "source_location": "22:3-22:35" + }, + "source_location": "22:3-22:35" }, { "location": { "start": { "constant": { "value": "4", - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 4 - }, - "is_synthetic": false - } + "source_location": "23:3-23:4" }, - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 4 - }, - "is_synthetic": false - } + "source_location": "23:3-23:4" }, "size": { "constant": { "value": "12", - "source_location": { - "start": { - "line": 23, - "column": 8 - }, - "end": { - "line": 23, - "column": 10 - }, - "is_synthetic": false - } + "source_location": "23:8-23:10" }, - "source_location": { - "start": { - "line": 23, - "column": 8 - }, - "end": { - "line": 23, - "column": 10 - }, - "is_synthetic": false - } + "source_location": "23:8-23:10" }, - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 11 - }, - "is_synthetic": false - } + "source_location": "23:3-23:11" }, "type": { "array_type": { @@ -393,265 +114,66 @@ "source_name": [ { "text": "UInt", - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "23:13-23:17" } ], - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "23:13-23:17" }, - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "23:13-23:17" }, "size_in_bits": { "constant": { "value": "8", - "source_location": { - "start": { - "line": 23, - "column": 18 - }, - "end": { - "line": 23, - "column": 19 - }, - "is_synthetic": false - } + "source_location": "23:18-23:19" }, - "source_location": { - "start": { - "line": 23, - "column": 17 - }, - "end": { - "line": 23, - "column": 19 - }, - "is_synthetic": false - } + "source_location": "23:17-23:19" }, - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 19 - }, - "is_synthetic": false - } + "source_location": "23:13-23:19" }, "element_count": { "constant": { "value": "12", - "source_location": { - "start": { - "line": 23, - "column": 20 - }, - "end": { - "line": 23, - "column": 22 - }, - "is_synthetic": false - } + "source_location": "23:20-23:22" }, - "source_location": { - "start": { - "line": 23, - "column": 19 - }, - "end": { - "line": 23, - "column": 23 - }, - "is_synthetic": false - } + "source_location": "23:19-23:23" }, - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 23 - }, - "is_synthetic": false - } + "source_location": "23:13-23:23" }, - "source_location": { - "start": { - "line": 23, - "column": 13 - }, - "end": { - "line": 23, - "column": 23 - }, - "is_synthetic": false - } + "source_location": "23:13-23:23" }, "name": { "name": { "text": "file_name", - "source_location": { - "start": { - "line": 23, - "column": 25 - }, - "end": { - "line": 23, - "column": 34 - }, - "is_synthetic": false - } + "source_location": "23:25-23:34" }, - "source_location": { - "start": { - "line": 23, - "column": 25 - }, - "end": { - "line": 23, - "column": 34 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 34 - } + "source_location": "23:25-23:34" }, "existence_condition": { - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 34 - }, - "is_synthetic": false - }, "boolean_constant": { - "source_location": { - "start": { - "line": 23, - "column": 3 - }, - "end": { - "line": 23, - "column": 34 - }, - "is_synthetic": false - }, - "value": true - } - } + "value": true, + "source_location": "23:3-23:34" + }, + "source_location": "23:3-23:34" + }, + "source_location": "23:3-23:34" }, { "location": { "start": { "constant": { "value": "16", - "source_location": { - "start": { - "line": 24, - "column": 3 - }, - "end": { - "line": 24, - "column": 5 - }, - "is_synthetic": false - } + "source_location": "24:3-24:5" }, - "source_location": { - "start": { - "line": 24, - "column": 3 - }, - "end": { - "line": 24, - "column": 5 - }, - "is_synthetic": false - } + "source_location": "24:3-24:5" }, "size": { "constant": { "value": "4", - "source_location": { - "start": { - "line": 24, - "column": 8 - }, - "end": { - "line": 24, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "24:8-24:9" }, - "source_location": { - "start": { - "line": 24, - "column": 8 - }, - "end": { - "line": 24, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "24:8-24:9" }, - "source_location": { - "start": { - "line": 24, - "column": 3 - }, - "end": { - "line": 24, - "column": 10 - }, - "is_synthetic": false - } + "source_location": "24:3-24:10" }, "type": { "atomic_type": { @@ -659,187 +181,48 @@ "source_name": [ { "text": "UInt", - "source_location": { - "start": { - "line": 24, - "column": 13 - }, - "end": { - "line": 24, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "24:13-24:17" } ], - "source_location": { - "start": { - "line": 24, - "column": 13 - }, - "end": { - "line": 24, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "24:13-24:17" }, - "source_location": { - "start": { - "line": 24, - "column": 13 - }, - "end": { - "line": 24, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "24:13-24:17" }, - "source_location": { - "start": { - "line": 24, - "column": 13 - }, - "end": { - "line": 24, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "24:13-24:17" }, "name": { "name": { "text": "file_size_kb", - "source_location": { - "start": { - "line": 24, - "column": 25 - }, - "end": { - "line": 24, - "column": 37 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 24, - "column": 25 - }, - "end": { - "line": 24, - "column": 37 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 24, - "column": 3 + "source_location": "24:25-24:37" }, - "end": { - "line": 24, - "column": 37 - } + "source_location": "24:25-24:37" }, "existence_condition": { - "source_location": { - "start": { - "line": 24, - "column": 3 - }, - "end": { - "line": 24, - "column": 37 - }, - "is_synthetic": false - }, "boolean_constant": { - "source_location": { - "start": { - "line": 24, - "column": 3 - }, - "end": { - "line": 24, - "column": 37 - }, - "is_synthetic": false - }, - "value": true - } - } + "value": true, + "source_location": "24:3-24:37" + }, + "source_location": "24:3-24:37" + }, + "source_location": "24:3-24:37" }, { "location": { "start": { "constant": { "value": "20", - "source_location": { - "start": { - "line": 25, - "column": 3 - }, - "end": { - "line": 25, - "column": 5 - }, - "is_synthetic": false - } + "source_location": "25:3-25:5" }, - "source_location": { - "start": { - "line": 25, - "column": 3 - }, - "end": { - "line": 25, - "column": 5 - }, - "is_synthetic": false - } + "source_location": "25:3-25:5" }, "size": { "constant": { "value": "4", - "source_location": { - "start": { - "line": 25, - "column": 8 - }, - "end": { - "line": 25, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "25:8-25:9" }, - "source_location": { - "start": { - "line": 25, - "column": 8 - }, - "end": { - "line": 25, - "column": 9 - }, - "is_synthetic": false - } + "source_location": "25:8-25:9" }, - "source_location": { - "start": { - "line": 25, - "column": 3 - }, - "end": { - "line": 25, - "column": 10 - }, - "is_synthetic": false - } + "source_location": "25:3-25:10" }, "type": { "atomic_type": { @@ -847,242 +230,64 @@ "source_name": [ { "text": "UInt", - "source_location": { - "start": { - "line": 25, - "column": 13 - }, - "end": { - "line": 25, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "25:13-25:17" } ], - "source_location": { - "start": { - "line": 25, - "column": 13 - }, - "end": { - "line": 25, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "25:13-25:17" }, - "source_location": { - "start": { - "line": 25, - "column": 13 - }, - "end": { - "line": 25, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "25:13-25:17" }, - "source_location": { - "start": { - "line": 25, - "column": 13 - }, - "end": { - "line": 25, - "column": 17 - }, - "is_synthetic": false - } + "source_location": "25:13-25:17" }, "name": { "name": { "text": "media", - "source_location": { - "start": { - "line": 25, - "column": 25 - }, - "end": { - "line": 25, - "column": 30 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 25, - "column": 25 - }, - "end": { - "line": 25, - "column": 30 - }, - "is_synthetic": false - } - }, - "source_location": { - "start": { - "line": 25, - "column": 3 + "source_location": "25:25-25:30" }, - "end": { - "line": 25, - "column": 30 - } + "source_location": "25:25-25:30" }, "existence_condition": { - "source_location": { - "start": { - "line": 25, - "column": 3 - }, - "end": { - "line": 25, - "column": 30 - }, - "is_synthetic": false - }, "boolean_constant": { - "source_location": { - "start": { - "line": 25, - "column": 3 - }, - "end": { - "line": 25, - "column": 30 - }, - "is_synthetic": false - }, - "value": true - } - } + "value": true, + "source_location": "25:3-25:30" + }, + "source_location": "25:3-25:30" + }, + "source_location": "25:3-25:30" } ], - "source_location": { - "start": { - "line": 21, - "column": 1 - }, - "end": { - "line": 26, - "column": 1 - } - } - }, - "addressable_unit": 8, - "source_location": { - "start": { - "line": 21, - "column": 1 - }, - "end": { - "line": 26, - "column": 1 - }, - "is_synthetic": false + "source_location": "21:1-26:1" }, "name": { "name": { "text": "LogFileStatus", - "source_location": { - "start": { - "line": 21, - "column": 8 - }, - "end": { - "line": 21, - "column": 21 - }, - "is_synthetic": false - } + "source_location": "21:8-21:21" }, - "source_location": { - "start": { - "line": 21, - "column": 8 - }, - "end": { - "line": 21, - "column": 21 - }, - "is_synthetic": false - } - } + "source_location": "21:8-21:21" + }, + "addressable_unit": 8, + "source_location": "21:1-26:1" } ], "documentation": [ { "text": "This is a simple, real-world example structure.", - "source_location": { - "start": { - "line": 15, - "column": 1 - }, - "end": { - "line": 16, - "column": 1 - }, - "is_synthetic": false - } + "source_location": "15:1-16:1" } ], "foreign_import": [ { "file_name": { "text": "", - "source_location": { - "start": { - "line": 16, - "column": 1 - }, - "end": { - "line": 16, - "column": 1 - }, - "is_synthetic": false - } + "source_location": "16:1-16:1" }, "local_name": { "text": "", - "source_location": { - "start": { - "line": 16, - "column": 1 - }, - "end": { - "line": 16, - "column": 1 - }, - "is_synthetic": false - } + "source_location": "16:1-16:1" }, - "source_location": { - "start": { - "line": 16, - "column": 1 - }, - "end": { - "line": 16, - "column": 1 - }, - "is_synthetic": false - } + "source_location": "16:1-16:1" } ], - "source_location": { - "start": { - "line": 1, - "column": 1 - }, - "end": { - "line": 26, - "column": 1 - }, - "is_synthetic": false - }, - "source_text": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n-- This is a simple, real-world example structure.\n\n[$default byte_order: \"LittleEndian\"]\n[(cpp) namespace: \"emboss::test\"]\n\n\nstruct LogFileStatus:\n 0 [+4] UInt file_state\n 4 [+12] UInt:8[12] file_name\n 16 [+4] UInt file_size_kb\n 20 [+4] UInt media\n" + "source_text": "# Copyright 2019 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n\n-- This is a simple, real-world example structure.\n\n[$default byte_order: \"LittleEndian\"]\n[(cpp) namespace: \"emboss::test\"]\n\n\nstruct LogFileStatus:\n 0 [+4] UInt file_state\n 4 [+12] UInt:8[12] file_name\n 16 [+4] UInt file_size_kb\n 20 [+4] UInt media\n", + "source_location": "1:1-26:1" } diff --git a/testdata/golden/span_se_log_file_status.parse_tree.txt b/testdata/golden/span_se_log_file_status.parse_tree.txt index bd15bdd..e415367 100644 --- a/testdata/golden/span_se_log_file_status.parse_tree.txt +++ b/testdata/golden/span_se_log_file_status.parse_tree.txt @@ -3,67 +3,67 @@ module: comment-line: Comment?: Comment '# Copyright 2019 Google LLC' 1:1-1:28 - "\n" '\n' 1:1-1:28 + "\n" '\n' 1:28-1:28 comment-line*: comment-line: Comment?: Comment '#' 2:1-2:2 - "\n" '\n' 2:1-2:2 + "\n" '\n' 2:2-2:2 comment-line*: comment-line: Comment?: Comment '# Licensed under the Apache License, Version 2.0 (the "License");' 3:1-3:66 - "\n" '\n' 3:1-3:66 + "\n" '\n' 3:66-3:66 comment-line*: comment-line: Comment?: Comment '# you may not use this file except in compliance with the License.' 4:1-4:67 - "\n" '\n' 4:1-4:67 + "\n" '\n' 4:67-4:67 comment-line*: comment-line: Comment?: Comment '# You may obtain a copy of the License at' 5:1-5:42 - "\n" '\n' 5:1-5:42 + "\n" '\n' 5:42-5:42 comment-line*: comment-line: Comment?: Comment '#' 6:1-6:2 - "\n" '\n' 6:1-6:2 + "\n" '\n' 6:2-6:2 comment-line*: comment-line: Comment?: Comment '# https://www.apache.org/licenses/LICENSE-2.0' 7:1-7:50 - "\n" '\n' 7:1-7:50 + "\n" '\n' 7:50-7:50 comment-line*: comment-line: Comment?: Comment '#' 8:1-8:2 - "\n" '\n' 8:1-8:2 + "\n" '\n' 8:2-8:2 comment-line*: comment-line: Comment?: Comment '# Unless required by applicable law or agreed to in writing, software' 9:1-9:70 - "\n" '\n' 9:1-9:70 + "\n" '\n' 9:70-9:70 comment-line*: comment-line: Comment?: Comment '# distributed under the License is distributed on an "AS IS" BASIS,' 10:1-10:68 - "\n" '\n' 10:1-10:68 + "\n" '\n' 10:68-10:68 comment-line*: comment-line: Comment?: Comment '# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.' 11:1-11:75 - "\n" '\n' 11:1-11:75 + "\n" '\n' 11:75-11:75 comment-line*: comment-line: Comment?: Comment '# See the License for the specific language governing permissions and' 12:1-12:70 - "\n" '\n' 12:1-12:70 + "\n" '\n' 12:70-12:70 comment-line*: comment-line: Comment?: Comment '# limitations under the License.' 13:1-13:33 - "\n" '\n' 13:1-13:33 + "\n" '\n' 13:33-13:33 comment-line*: comment-line: Comment? @@ -75,7 +75,7 @@ module: Documentation '-- This is a simple, real-world example structure.' 15:1-15:51 Comment? eol: - "\n" '\n' 15:51-16:1 + "\n" '\n' 15:51-15:51 comment-line*: comment-line: Comment? @@ -121,7 +121,7 @@ module: "]" ']' 18:33-18:34 Comment? eol: - "\n" '\n' 18:34-20:1 + "\n" '\n' 18:34-18:34 comment-line*: comment-line: Comment? diff --git a/testdata/golden/span_se_log_file_status.tokens.txt b/testdata/golden/span_se_log_file_status.tokens.txt index 5474c99..91937f0 100644 --- a/testdata/golden/span_se_log_file_status.tokens.txt +++ b/testdata/golden/span_se_log_file_status.tokens.txt @@ -1,32 +1,32 @@ Comment '# Copyright 2019 Google LLC' 1:1-1:28 -"\n" '\n' 1:1-1:28 +"\n" '\n' 1:28-1:28 Comment '#' 2:1-2:2 -"\n" '\n' 2:1-2:2 +"\n" '\n' 2:2-2:2 Comment '# Licensed under the Apache License, Version 2.0 (the "License");' 3:1-3:66 -"\n" '\n' 3:1-3:66 +"\n" '\n' 3:66-3:66 Comment '# you may not use this file except in compliance with the License.' 4:1-4:67 -"\n" '\n' 4:1-4:67 +"\n" '\n' 4:67-4:67 Comment '# You may obtain a copy of the License at' 5:1-5:42 -"\n" '\n' 5:1-5:42 +"\n" '\n' 5:42-5:42 Comment '#' 6:1-6:2 -"\n" '\n' 6:1-6:2 +"\n" '\n' 6:2-6:2 Comment '# https://www.apache.org/licenses/LICENSE-2.0' 7:1-7:50 -"\n" '\n' 7:1-7:50 +"\n" '\n' 7:50-7:50 Comment '#' 8:1-8:2 -"\n" '\n' 8:1-8:2 +"\n" '\n' 8:2-8:2 Comment '# Unless required by applicable law or agreed to in writing, software' 9:1-9:70 -"\n" '\n' 9:1-9:70 +"\n" '\n' 9:70-9:70 Comment '# distributed under the License is distributed on an "AS IS" BASIS,' 10:1-10:68 -"\n" '\n' 10:1-10:68 +"\n" '\n' 10:68-10:68 Comment '# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.' 11:1-11:75 -"\n" '\n' 11:1-11:75 +"\n" '\n' 11:75-11:75 Comment '# See the License for the specific language governing permissions and' 12:1-12:70 -"\n" '\n' 12:1-12:70 +"\n" '\n' 12:70-12:70 Comment '# limitations under the License.' 13:1-13:33 -"\n" '\n' 13:1-13:33 +"\n" '\n' 13:33-13:33 "\n" '\n' 14:1-14:1 Documentation '-- This is a simple, real-world example structure.' 15:1-15:51 -"\n" '\n' 15:51-16:1 +"\n" '\n' 15:51-15:51 "\n" '\n' 16:1-16:1 "[" '[' 17:1-17:2 "$default" '$default' 17:2-17:10 @@ -43,7 +43,7 @@ SnakeWord 'namespace' 18:8-18:17 ":" ':' 18:17-18:18 String '"emboss::test"' 18:19-18:33 "]" ']' 18:33-18:34 -"\n" '\n' 18:34-20:1 +"\n" '\n' 18:34-18:34 "\n" '\n' 19:1-19:1 "\n" '\n' 20:1-20:1 "struct" 'struct' 21:1-21:7 From 2b55d690314c7177df5dd380b11b67267acb465d Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Sat, 12 Oct 2024 20:56:03 +0000 Subject: [PATCH 2/6] Restore incorrectly-removed test. --- compiler/util/parser_types_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler/util/parser_types_test.py b/compiler/util/parser_types_test.py index 325cdef..50c3af8 100644 --- a/compiler/util/parser_types_test.py +++ b/compiler/util/parser_types_test.py @@ -22,6 +22,9 @@ class PositionTest(unittest.TestCase): """Tests for SourcePosition-related functions in parser_types.""" + def test_format_position(self): + self.assertEqual("1:2", str(parser_types.SourcePosition(line=1, column=2))) + class LocationTest(unittest.TestCase): """Tests for SourceLocation-related functions in parser_types.""" From f65388035c92239acb80a9d2ad199d7b6a584ad8 Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Mon, 14 Oct 2024 21:43:59 +0000 Subject: [PATCH 3/6] Revamp `_check_source_location` and add more `parser_types` tests. --- compiler/front_end/module_ir_test.py | 50 ++------ compiler/util/parser_types.py | 4 +- compiler/util/parser_types_test.py | 163 ++++++++++++++++++++++----- 3 files changed, 145 insertions(+), 72 deletions(-) diff --git a/compiler/front_end/module_ir_test.py b/compiler/front_end/module_ir_test.py index e83675a..e5559ce 100644 --- a/compiler/front_end/module_ir_test.py +++ b/compiler/front_end/module_ir_test.py @@ -3948,11 +3948,6 @@ def _check_source_location(source_location, path, min_start, max_end): Returns: A list of error messages, or an empty list if no errors. """ - if source_location.is_disjoint_from_parent: - # If source_location.is_disjoint_from_parent, then this source_location is - # allowed to be outside of the parent's source_location. - return [] - result = [] start = None end = None @@ -3965,47 +3960,24 @@ def _check_source_location(source_location, path, min_start, max_end): else: end = source_location.end - if start and end: - if start.line and end.line: - if start.line > end.line: - result.append( - "{}.start.line > {}.end.line ({} vs {})".format( - path, path, start.line, end.line - ) - ) - elif start.line == end.line: - if start.column and end.column and start.column > end.column: - result.append( - "{}.start.column > {}.end.column ({} vs {})".format( - path, path, start.column, end.column - ) - ) - for name, field in (("start", start), ("end", end)): if not field: continue - if field.line: - if field.line <= 0: - result.append("{}.{}.line <= 0 ({})".format(path, name, field.line)) - else: + if not field.line: result.append("{}.{}.line missing".format(path, name)) - if field.column: - if field.column <= 0: - result.append("{}.{}.column <= 0 ({})".format(path, name, field.column)) - else: + if not field.column: result.append("{}.{}.column missing".format(path, name)) - if min_start and start: - if min_start.line > start.line or ( - min_start.line == start.line and min_start.column > start.column - ): - result.append("{}.start before parent start".format(path)) + if not source_location.is_disjoint_from_parent: + # If source_location.is_disjoint_from_parent, then this source_location + # is allowed to be outside of the parent's source_location. + if min_start and start: + if min_start > start: + result.append("{}.start before parent start".format(path)) - if max_end and end: - if max_end.line < end.line or ( - max_end.line == end.line and max_end.column < end.column - ): - result.append("{}.end after parent end".format(path)) + if max_end and end: + if max_end < end: + result.append("{}.end after parent end".format(path)) return result diff --git a/compiler/util/parser_types.py b/compiler/util/parser_types.py index bdff0d6..146693c 100644 --- a/compiler/util/parser_types.py +++ b/compiler/util/parser_types.py @@ -95,9 +95,7 @@ def __str__(self): suffix += "^" if self.is_synthetic: suffix += "*" - return ( - f"{self.start or SourcePosition()}-{self.end or SourcePosition()}{suffix}" - ) + return f"{self.start}-{self.end}{suffix}" @staticmethod def from_str(value): diff --git a/compiler/util/parser_types_test.py b/compiler/util/parser_types_test.py index 50c3af8..6c8ae99 100644 --- a/compiler/util/parser_types_test.py +++ b/compiler/util/parser_types_test.py @@ -22,9 +22,61 @@ class PositionTest(unittest.TestCase): """Tests for SourcePosition-related functions in parser_types.""" - def test_format_position(self): + def test_position_str(self): self.assertEqual("1:2", str(parser_types.SourcePosition(line=1, column=2))) + def test_position_bool(self): + self.assertFalse(parser_types.SourcePosition()) + self.assertFalse(parser_types.SourcePosition(0, 0)) + self.assertTrue(parser_types.SourcePosition(1, 0)) + self.assertTrue(parser_types.SourcePosition(0, 1)) + + def test_position_from_str(self): + self.assertEqual( + parser_types.SourcePosition(1, 2), + parser_types.SourcePosition.from_str("1:2"), + ) + self.assertEqual( + parser_types.SourcePosition(0, 0), + parser_types.SourcePosition.from_str("0:0"), + ) + self.assertRaises(ValueError, parser_types.SourcePosition.from_str, "0xa:9") + self.assertRaises(ValueError, parser_types.SourcePosition.from_str, "9") + if __debug__: + self.assertRaises(ValueError, parser_types.SourcePosition.from_str, "0:-1") + self.assertRaises(ValueError, parser_types.SourcePosition.from_str, "-1:0") + + def test_position_new(self): + self.assertEqual( + parser_types.SourcePosition(1, 2), + parser_types.SourcePosition(line=1, column=2), + ) + if __debug__: + self.assertRaises(AssertionError, parser_types.SourcePosition, -1, 1) + self.assertRaises(AssertionError, parser_types.SourcePosition, 1, -1) + self.assertRaises(AssertionError, parser_types.SourcePosition, None, 1) + self.assertRaises(AssertionError, parser_types.SourcePosition, 1, None) + self.assertRaises(AssertionError, parser_types.SourcePosition, 1.1, 1) + self.assertRaises(AssertionError, parser_types.SourcePosition, 1, 1.1) + + def test_position_attributes(self): + self.assertEqual(1, parser_types.SourcePosition(1, 2).line) + self.assertEqual(2, parser_types.SourcePosition(1, 2).column) + + def test_position_order(self): + self.assertTrue( + parser_types.SourcePosition(1, 2) < parser_types.SourcePosition(2, 2) + ) + self.assertTrue( + parser_types.SourcePosition(2, 1) < parser_types.SourcePosition(2, 2) + ) + self.assertFalse( + parser_types.SourcePosition(2, 1) < parser_types.SourcePosition(2, 1) + ) + self.assertFalse( + parser_types.SourcePosition(2, 2) < parser_types.SourcePosition(2, 1) + ) + class LocationTest(unittest.TestCase): """Tests for SourceLocation-related functions in parser_types.""" @@ -35,47 +87,56 @@ def test_location_new(self): start=parser_types.SourcePosition(line=1, column=2), end=parser_types.SourcePosition(line=3, column=4), is_synthetic=False, + is_disjoint_from_parent=False, ), parser_types.SourceLocation((1, 2), (3, 4)), ) - self.assertEqual( + self.assertFalse(parser_types.SourceLocation(is_synthetic=False).is_synthetic) + self.assertTrue(parser_types.SourceLocation(is_synthetic=True).is_synthetic) + self.assertFalse( parser_types.SourceLocation( - start=parser_types.SourcePosition(line=1, column=2), - end=parser_types.SourcePosition(line=3, column=4), - is_synthetic=False, - ), - parser_types.SourceLocation( - parser_types.SourcePosition(line=1, column=2), - parser_types.SourcePosition(line=3, column=4), - ), + is_disjoint_from_parent=False + ).is_disjoint_from_parent ) + self.assertTrue( + parser_types.SourceLocation( + is_disjoint_from_parent=True + ).is_disjoint_from_parent + ) + self.assertRaises(TypeError, parser_types.SourceLocation, None, (3, 4)) + self.assertRaises(TypeError, parser_types.SourceLocation, (1, 2), None) + if __debug__: + self.assertRaises( + AssertionError, parser_types.SourceLocation, (3, 4), (1, 2) + ) + self.assertRaises( + AssertionError, parser_types.SourceLocation, (3, 4), (3, 2) + ) - def test_make_synthetic_location(self): + def test_location_str(self): self.assertEqual( - parser_types.SourceLocation( - start=parser_types.SourcePosition(line=1, column=2), - end=parser_types.SourcePosition(line=3, column=4), - is_synthetic=True, - ), - parser_types.SourceLocation((1, 2), (3, 4), is_synthetic=True), + "1:2-3:4", + str(parser_types.SourceLocation((1, 2), (3, 4))), ) self.assertEqual( - parser_types.SourceLocation( - start=parser_types.SourcePosition(line=1, column=2), - end=parser_types.SourcePosition(line=3, column=4), - is_synthetic=True, - ), - parser_types.SourceLocation( - parser_types.SourcePosition(line=1, column=2), - parser_types.SourcePosition(line=3, column=4), - is_synthetic=True, + "1:2-3:4^", + str( + parser_types.SourceLocation( + (1, 2), (3, 4), is_disjoint_from_parent=True + ) ), ) - - def test_location_str(self): self.assertEqual( - "1:2-3:4", - str(parser_types.SourceLocation((1, 2), (3, 4))), + "1:2-3:4*", + str(parser_types.SourceLocation((1, 2), (3, 4), is_synthetic=True)), + ) + self.assertEqual( + "1:2-3:4^*", + str( + parser_types.SourceLocation( + (1, 2), (3, 4), is_synthetic=True, is_disjoint_from_parent=True + ) + ), ) def test_location_from_str(self): @@ -87,6 +148,48 @@ def test_location_from_str(self): parser_types.SourceLocation((1, 2), (3, 4)), parser_types.SourceLocation.from_str(" 1 : 2 - 3 : 4 "), ) + self.assertEqual( + parser_types.SourceLocation((1, 2), (3, 4), is_disjoint_from_parent=True), + parser_types.SourceLocation.from_str("1:2-3:4^"), + ) + self.assertEqual( + parser_types.SourceLocation((1, 2), (3, 4), is_synthetic=True), + parser_types.SourceLocation.from_str("1:2-3:4*"), + ) + self.assertEqual( + parser_types.SourceLocation( + (1, 2), (3, 4), is_disjoint_from_parent=True, is_synthetic=True + ), + parser_types.SourceLocation.from_str("1:2-3:4^*"), + ) + self.assertRaises(ValueError, parser_types.SourceLocation.from_str, "1:2-3:") + if __debug__: + self.assertRaises( + ValueError, parser_types.SourceLocation.from_str, "1:2-3:-1" + ) + self.assertRaises(ValueError, parser_types.SourceLocation.from_str, "1:2-3:1%") + + def test_location_attributes(self): + self.assertEqual( + parser_types.SourceLocation((1, 2), (3, 4)).start, + parser_types.SourcePosition(1, 2), + ) + self.assertEqual( + parser_types.SourceLocation((1, 2), (3, 4)).end, + parser_types.SourcePosition(3, 4), + ) + self.assertFalse(parser_types.SourceLocation((1, 2), (3, 4)).is_synthetic) + self.assertFalse( + parser_types.SourceLocation((1, 2), (3, 4)).is_disjoint_from_parent + ) + self.assertTrue( + parser_types.SourceLocation((1, 2), (3, 4), is_synthetic=True).is_synthetic + ) + self.assertTrue( + parser_types.SourceLocation( + (1, 2), (3, 4), is_disjoint_from_parent=True + ).is_disjoint_from_parent + ) class TokenTest(unittest.TestCase): From e0967393a8428bb22c35917860ad6123be9ba349 Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Wed, 16 Oct 2024 19:44:24 +0000 Subject: [PATCH 4/6] Move more assertions into `SourceLocation` and `SourcePosition`. --- compiler/front_end/module_ir_test.py | 8 ------ compiler/util/parser_types.py | 39 +++++++++++++++++++++++++--- compiler/util/parser_types_test.py | 17 ++++++++++-- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/compiler/front_end/module_ir_test.py b/compiler/front_end/module_ir_test.py index e5559ce..1931bab 100644 --- a/compiler/front_end/module_ir_test.py +++ b/compiler/front_end/module_ir_test.py @@ -3960,14 +3960,6 @@ def _check_source_location(source_location, path, min_start, max_end): else: end = source_location.end - for name, field in (("start", start), ("end", end)): - if not field: - continue - if not field.line: - result.append("{}.{}.line missing".format(path, name)) - if not field.column: - result.append("{}.{}.column missing".format(path, name)) - if not source_location.is_disjoint_from_parent: # If source_location.is_disjoint_from_parent, then this source_location # is allowed to be outside of the parent's source_location. diff --git a/compiler/util/parser_types.py b/compiler/util/parser_types.py index 146693c..3074869 100644 --- a/compiler/util/parser_types.py +++ b/compiler/util/parser_types.py @@ -31,13 +31,26 @@ class SourcePosition(PositionTuple): - """A zero-width position within a source file.""" + """A zero-width position within a source file. + + Positions are 1-based (the first character of a source file is (1, 1)). + + The special value (0, 0) indicates a missing or unknown position, and is + considered falsy. All other values of SourcePosition are truthy. + + Attributes: + line: the line within the source file; the first line is 1 + column: the column within the source line; the first character is 1 + """ def __new__(cls, /, line=0, column=0): assert isinstance(line, int), f"line {line} is not int" assert isinstance(column, int), f"column {column} is not int" assert line >= 0, f"line {line} is negative" assert column >= 0, f"column {column} is negative" + assert (line == 0 and column == 0) or ( + line != 0 and column != 0 + ), f"Cannot have line {line} with column {column}" return PositionTuple.__new__(cls, line, column) def __str__(self): @@ -52,7 +65,7 @@ def from_str(value): raise ValueError(f"{repr(value)} is not a valid SourcePosition.") def __bool__(self): - return bool(self.line or self.column) + return bool(self.line) LocationTuple = collections.namedtuple( @@ -63,7 +76,21 @@ def __bool__(self): class SourceLocation(LocationTuple): - """A half-open start:end range within a source file.""" + """The source location of a node in the IR, as a half-open start:end range + + Attributes: + start: the start of the range + end: one character past the end of the range + is_disjoint_from_parent: True if this SourceLocation may fall outside + of the SourceLocation of the parent node + is_synthetic: True if the associated node was generated from + user-supplied source code, but is part of a construct that does not + directly correspond to something that the user wrote (e.g., a + generated virtual field that is assembled from fragments from + elsewhere in the IR). + + SourceLocation is falsy if the start and end are falsy. + """ def __new__( cls, @@ -79,6 +106,9 @@ def __new__( if not isinstance(end, SourcePosition): end = SourcePosition(*end) assert start <= end, f"start {start} is after end {end}" + assert (not start and not end) or ( + start and end + ), "Cannot have have start {start} with end {end}" assert isinstance( is_disjoint_from_parent, bool ), f"is_disjoint_from_parent {is_disjoint_from_parent} is not bool" @@ -120,7 +150,8 @@ def from_str(value): raise ValueError(f"{repr(original_value)} is not a valid SourceLocation.") def __bool__(self): - return bool(self.start or self.end) + # Should this check is_synthetic and is_disjoint_from_parent as well? + return bool(self.start) def merge_source_locations(*nodes): diff --git a/compiler/util/parser_types_test.py b/compiler/util/parser_types_test.py index 6c8ae99..c66bf2f 100644 --- a/compiler/util/parser_types_test.py +++ b/compiler/util/parser_types_test.py @@ -28,8 +28,7 @@ def test_position_str(self): def test_position_bool(self): self.assertFalse(parser_types.SourcePosition()) self.assertFalse(parser_types.SourcePosition(0, 0)) - self.assertTrue(parser_types.SourcePosition(1, 0)) - self.assertTrue(parser_types.SourcePosition(0, 1)) + self.assertTrue(parser_types.SourcePosition(1, 1)) def test_position_from_str(self): self.assertEqual( @@ -58,6 +57,8 @@ def test_position_new(self): self.assertRaises(AssertionError, parser_types.SourcePosition, 1, None) self.assertRaises(AssertionError, parser_types.SourcePosition, 1.1, 1) self.assertRaises(AssertionError, parser_types.SourcePosition, 1, 1.1) + self.assertRaises(AssertionError, parser_types.SourcePosition, 0, 1) + self.assertRaises(AssertionError, parser_types.SourcePosition, 1, 0) def test_position_attributes(self): self.assertEqual(1, parser_types.SourcePosition(1, 2).line) @@ -112,6 +113,18 @@ def test_location_new(self): self.assertRaises( AssertionError, parser_types.SourceLocation, (3, 4), (3, 2) ) + self.assertRaises( + AssertionError, + parser_types.SourceLocation, + parser_types.SourcePosition(), + (1, 2), + ) + self.assertRaises( + AssertionError, + parser_types.SourceLocation, + (1, 2), + parser_types.SourcePosition(), + ) def test_location_str(self): self.assertEqual( From dcceb48a9e1a58432b3546d236e391e20511449f Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Wed, 16 Oct 2024 20:19:07 +0000 Subject: [PATCH 5/6] Add explanatory comment. --- compiler/util/parser_types.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/compiler/util/parser_types.py b/compiler/util/parser_types.py index 3074869..62478be 100644 --- a/compiler/util/parser_types.py +++ b/compiler/util/parser_types.py @@ -43,15 +43,18 @@ class SourcePosition(PositionTuple): column: the column within the source line; the first character is 1 """ - def __new__(cls, /, line=0, column=0): - assert isinstance(line, int), f"line {line} is not int" - assert isinstance(column, int), f"column {column} is not int" - assert line >= 0, f"line {line} is negative" - assert column >= 0, f"column {column} is negative" - assert (line == 0 and column == 0) or ( - line != 0 and column != 0 - ), f"Cannot have line {line} with column {column}" - return PositionTuple.__new__(cls, line, column) + # This __new__ just adds asserts around PositionTuple.__new__, so it is + # unnecessary when running under -O. + if __debug__: + def __new__(cls, /, line=0, column=0): + assert isinstance(line, int), f"line {line} is not int" + assert isinstance(column, int), f"column {column} is not int" + assert line >= 0, f"line {line} is negative" + assert column >= 0, f"column {column} is negative" + assert (line == 0 and column == 0) or ( + line != 0 and column != 0 + ), f"Cannot have line {line} with column {column}" + return PositionTuple.__new__(cls, line, column) def __str__(self): return f"{self.line}:{self.column}" From d16c2a92cd2cedf84272d3e8207324e040748a90 Mon Sep 17 00:00:00 2001 From: Ben Olmstead Date: Wed, 16 Oct 2024 20:21:19 +0000 Subject: [PATCH 6/6] Reformat with Black. --- compiler/util/parser_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler/util/parser_types.py b/compiler/util/parser_types.py index 62478be..a3ef34d 100644 --- a/compiler/util/parser_types.py +++ b/compiler/util/parser_types.py @@ -46,6 +46,7 @@ class SourcePosition(PositionTuple): # This __new__ just adds asserts around PositionTuple.__new__, so it is # unnecessary when running under -O. if __debug__: + def __new__(cls, /, line=0, column=0): assert isinstance(line, int), f"line {line} is not int" assert isinstance(column, int), f"column {column} is not int"