Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions Doc/reference/grammar.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,17 @@ The notation used here is the same as in the preceding docs,
and is described in the :ref:`notation <notation>` section,
except for an extra complication:

* ``~`` ("cut"): commit to the current alternative and fail the rule
even if this fails to parse
* ``~`` ("cut"): commit to the current alternative; fail the rule
if the alternative fails to parse

Python mainly uses cuts for optimizations or improved error
messages. They often appear to be useless in the listing below.

.. see gh-143054, and CutValidator in the source, if you want to change this:
Cuts currently don't appear inside parentheses, brackets, lookaheads
and similar.
Their behavior in these contexts is deliberately left unspecified.

.. literalinclude:: ../../Grammar/python.gram
:language: peg
18 changes: 17 additions & 1 deletion Lib/test/test_peg_generator/test_grammar_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError, RaiseRuleValidator
from pegen.validator import SubRuleValidator, ValidationError
from pegen.validator import RaiseRuleValidator, CutValidator
from pegen.testutil import parse_string
from pegen.grammar import Grammar

Expand Down Expand Up @@ -59,3 +60,18 @@ def test_raising_valid_rule(self) -> None:
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)

def test_cut_validator(self) -> None:
grammar_source = """
star: (OP ~ OP)*
plus: (OP ~ OP)+
bracket: [OP ~ OP]
gather: OP.(OP ~ OP)+
nested: [OP | NAME ~ OP]
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
validator = CutValidator(grammar)
for rule_name, rule in grammar.rules.items():
with self.subTest(rule_name):
with self.assertRaises(ValidationError):
validator.validate_rule(rule_name, rule)
24 changes: 24 additions & 0 deletions Lib/test/test_peg_generator/test_pegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,30 @@ def test_cut(self) -> None:
],
)

def test_cut_is_local_in_rule(self) -> None:
grammar = """
start:
| inner
| 'x' { "ok" }
inner:
| 'x' ~ 'y'
| 'x'
"""
parser_class = make_parser(grammar)
node = parse_string("x", parser_class)
self.assertEqual(node, 'ok')

def test_cut_is_local_in_parens(self) -> None:
# we currently don't guarantee this behavior, see gh-143054
grammar = """
start:
| ('x' ~ 'y' | 'x')
| 'x' { "ok" }
"""
parser_class = make_parser(grammar)
node = parse_string("x", parser_class)
self.assertEqual(node, 'ok')

def test_dangling_reference(self) -> None:
grammar = """
start: foo ENDMARKER
Expand Down
33 changes: 33 additions & 0 deletions Tools/peg_generator/pegen/validator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule

Expand Down Expand Up @@ -44,6 +46,37 @@ def visit_Alt(self, node: Alt) -> None:
)


class CutValidator(GrammarValidator):
"""Fail if Cut is not directly in a rule.

For simplicity, we currently document that a Cut affects alternatives
of the *rule* it is in.
However, the implementation makes cuts local to enclosing Rhs
(e.g. parenthesized list of choices).
Additionally, in academic papers about PEG, repeats and optional items
are "desugared" to choices with an empty alternative, and thus contain
a Cut's effect.

Please update documentation and tests when adding this cut,
then get rid of this validator.

See gh-143054.
"""

def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None:
super().visit(node, parents=(*parents, node))

def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None:
parent_types = [type(p).__name__ for p in parents]
if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']:
raise ValidationError(
f"Rule {self.rulename!r} contains cut that's not on the "
"top level. "
"The intended semantics of such cases need "
"to be clarified; see the CutValidator docstring."
f"\nThe cut is inside: {parent_types}"
)

def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)
Expand Down
Loading