Skip to content

Commit

Permalink
Merge pull request #3 from sourcery-ai/fix/unicode-escapes
Browse files Browse the repository at this point in the history
fix: support correct parsing of f-strings
  • Loading branch information
bm424 authored May 27, 2022
2 parents b85f9da + 3373f60 commit f743293
Show file tree
Hide file tree
Showing 8 changed files with 43,026 additions and 32,702 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ build
*.log
/examples/*/
/target/
log.html
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,30 @@ Python grammar for [tree-sitter][].

[tree-sitter]: https://github.com/tree-sitter/tree-sitter

### Quickstart

**Install**
```sh
npm install
```

**Build**
```sh
npm run build
```

**Tests**
```sh
npm run test
```

**Explain**
```sh
npm run env -- tree-sitter parse path/to/file.py -D
```

This will produce a file `log.html` which you can open in your browser.

#### References

* [Python 2 Grammar](https://docs.python.org/2/reference/grammar.html)
Expand Down
117 changes: 88 additions & 29 deletions grammar.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const PREC = {
named_expression: -3,
// this resolves a conflict between the usage of ':' in a lambda vs in a
// typed parameter. In the case of a lambda, we don't allow typed parameters.
lambda: -2,
Expand All @@ -7,19 +8,19 @@ const PREC = {

parenthesized_expression: 1,
parenthesized_list_splat: 1,
or: 10,
and: 11,
not: 12,
compare: 13,
bitwise_or: 14,
or: 10,
and: 11,
not: 12,
compare: 13,
bitwise_or: 14,
bitwise_and: 15,
xor: 16,
shift: 17,
plus: 18,
times: 19,
unary: 20,
power: 21,
call: 22,
xor: 16,
shift: 17,
plus: 18,
times: 19,
unary: 20,
power: 21,
call: 22,
}

const SEMICOLON = ';'
Expand All @@ -38,6 +39,7 @@ module.exports = grammar({
[$.tuple, $.tuple_pattern],
[$.list, $.list_pattern],
[$.with_item, $._collection_elements],
[$._named_expresssion_lhs, $.pattern],
[$.named_expression, $.as_pattern],
[$.match_statement, $.primary_expression],
],
Expand Down Expand Up @@ -189,11 +191,11 @@ module.exports = grammar({
$.yield
),

named_expression: $ => seq(
named_expression: $ => prec(PREC.named_expression, seq(
field('name', $._named_expresssion_lhs),
':=',
field('value', $.expression)
),
)),

_named_expresssion_lhs: $ => choice(
$.identifier,
Expand Down Expand Up @@ -900,21 +902,90 @@ module.exports = grammar({
repeat1($.string)
),

string: $ => seq(
string: $ => prec(40, seq(
alias($._string_start, '"'),
repeat(choice($.interpolation, $._escape_interpolation, $.escape_sequence, $._not_escape_sequence, $._string_content)),
repeat(
choice(
$.interpolation,
$._escape_interpolation,
$.escape_sequence,
$._not_escape_sequence,
$._string_content
)
),
alias($._string_end, '"')
)),

_interpolation_not_operator: $ => prec(PREC.not, seq(
'not',
field('argument', $._interpolation_expression)
)),

_interpolation_await: $ => prec(PREC.unary, seq(
'await',
$._interpolation_expression
)),

_interpolation_boolean_operator: $ => choice(
prec.left(PREC.and, seq(
field('left', $._interpolation_expression),
field('operator', 'and'),
field('right', $._interpolation_expression)
)),
prec.left(PREC.or, seq(
field('left', $._interpolation_expression),
field('operator', 'or'),
field('right', $._interpolation_expression)
))
),

_interpolation_conditional_expression: $ => prec.right(PREC.conditional, seq(
$._interpolation_expression,
'if',
$._interpolation_expression,
'else',
$._interpolation_expression
)),

_interpolation_expression: $ => choice(
$.comparison_operator,
alias($._interpolation_not_operator, $.not_operator),
alias($._interpolation_boolean_operator, $.boolean_operator),
alias($._interpolation_await, $.await),
$.primary_expression,
alias($._interpolation_conditional_expression, $.conditional_expression)
),

interpolation: $ => seq(
'{',
$.expression,
$._interpolation_expression,
optional('='),
optional($.type_conversion),
optional($.format_specifier),
'}'
),

format_specifier: $ => prec(5, seq(
':',
repeat(choice(
token(prec(1, /[^{}\n]+/)),
$.format_expression,
))
)),

format_expression: $ => seq(
'{',
$._interpolation_expression,
optional('='),
optional($.type_conversion),
optional(alias($._literal_format_specifier, $.format_specifier)),
'}',
),

_literal_format_specifier: $ => /:(?:[^{}]?[<>=])?[\+\- ]?\d*[_,]?(?:.\d+)?[bcdeEfFgGnosxX%]?/,

type_conversion: $ => /![a-z]/,

_escape_interpolation: $ => choice('{{', '}}'),

escape_sequence: $ => token(prec(1, seq(
Expand All @@ -932,18 +1003,6 @@ module.exports = grammar({

_not_escape_sequence: $ => '\\',

format_specifier: $ => seq(
':',
repeat(choice(
token(prec(1, /[^{}\n]+/)),
$.format_expression
))
),

format_expression: $ => seq('{', $.expression, '}'),

type_conversion: $ => /![a-z]/,

integer: $ => token(choice(
seq(
choice('0x', '0X'),
Expand Down
Loading

0 comments on commit f743293

Please sign in to comment.