529 Commits

Author SHA1 Message Date
Dave Halter
3b263f0a0d Fix a failing test 2020-07-24 01:01:23 +02:00
Dave Halter
f52103f236 Prepare 0.7.1 release 2020-07-24 00:54:07 +02:00
Dave Halter
c53321a440 Comprehensions are not valid as class params, fixes #122 2020-07-24 00:32:24 +02:00
Dave Halter
d8a70abf19 Merge pull request #145 from PeterJCLaw/expose-type-stubs
Let consumers know that we have type annotations
2020-07-21 23:42:01 +02:00
Peter Law
c19d7c4e6d Let consumers know that we have type annotations
As well as the type stubs, this includes both the py.typed flag
file (for tools) and the classifier (for people).
2020-07-21 22:33:39 +01:00
Batuhan Taskaya
d42c0f1b3b Merge pull request #143 from Carreau/parse-alpha
Parse alpha, beta and rc versions strings.
2020-07-01 11:14:40 +03:00
Matthias Bussonnier
40e78ff7e0 Parse alpha, beta and rc versions strings.
fixes #142
2020-06-30 13:28:09 -07:00
Batuhan Taskaya
c88a2675b0 Merge pull request #140 from Kazy/fix-139-async-for-newline
Fix #139: newlines in async for comprehension
2020-06-29 20:01:53 +03:00
Jocelyn Boullier
88874a5a9f Fix #139: newlines in async for comprehension 2020-06-29 18:40:55 +02:00
Dave Halter
1e4076f9d9 Merge pull request #141 from isidentical/f-string-errors
Handle 3.9>= f-string errors
2020-06-29 00:03:57 +02:00
Batuhan Taskaya
73796f309d Just raise the f-string error, pass the other 2020-06-28 19:53:57 +03:00
Batuhan Taskaya
1cacdf366e Raise custom errors after break tokens 2020-06-28 19:48:11 +03:00
Batuhan Taskaya
d352bede13 Cover errors that raised by ErrorFinder 2020-06-28 19:37:22 +03:00
Batuhan Taskaya
572be783f3 Cover invalid syntaxes 2020-06-28 18:41:18 +03:00
Batuhan Taskaya
31171d7ae6 Handle 3.9>= f-string errors 2020-06-28 18:04:42 +03:00
Dave Halter
7e0586b0b9 Add a PyPI downloads badge 2020-06-27 15:18:27 +02:00
Dave Halter
cc347b1d3b Merge pull request #137 from isidentical/cannot-delete-starred
Update starred deletion messages for 3.9+
2020-06-22 00:15:01 +02:00
Batuhan Taskaya
841a5d96b3 Update starred deletion messages for 3.9+ 2020-06-21 19:47:18 +03:00
Dave Halter
d68b4e0cab Use Python 3 in deployment script 2020-06-20 01:21:35 +02:00
Dave Halter
d55b4f08dc Merge pull request #136 from davidhalter/permission_errors
Ignore permission errors when saving to cache
2020-06-19 20:27:59 +02:00
Dave Halter
58790c119e Fix issues of #136 2020-06-19 20:20:00 +02:00
Dave Halter
3923ecf12f Ignore permission errors when saving to cache
This might happen when a user doesn't have full access to his home directory.
Fixes davidhalter/jedi#1615
2020-06-19 12:06:46 +02:00
Dave Halter
bd33e4ef7e Merge pull request #135 from isidentical/starred-expr
Improve handling of starred expression on different contexts
2020-06-05 12:58:14 +02:00
Batuhan Taskaya
891bfdaa04 Test only python3+ 2020-06-04 22:09:04 +03:00
Batuhan Taskaya
5e1828b3f0 Check full error message 2020-06-04 22:02:12 +03:00
Batuhan Taskaya
6daf91880b Add a special case against augassign 2020-06-04 21:47:28 +03:00
Batuhan Taskaya
44cf64a5f7 Improve handling of starred expression on different contexts (load/store) 2020-06-04 21:35:48 +03:00
Batuhan Taskaya
fe24f0dc1b Implement garbage collections for inactive cache files (#121)
Cache files that weren't accessed in the last 30 days will be automatically
garbage collected. This collection happens when the `save_module` is called
via a lock system that would make it happen only one time per day.
2020-06-02 12:36:05 +03:00
Dave Halter
450e9d0a19 Merge pull request #130 from yuan-xy/patch-1
fix dump_nfa
2020-05-30 12:11:08 +02:00
yuan
93b5e6dffc Fix a one-word typo 2020-05-29 10:30:08 +03:00
yuan
4403b5cac5 Update generator.py 2020-05-29 08:56:38 +08:00
Batuhan Taskaya
6f29c551fd Adjust invalid aug assign target for 3.9+ 2020-05-27 00:55:31 +02:00
Dave Halter
d6b1d19d87 Merge pull request #129 from isidentical/extended-rhs-for-annassign
Extend annotated assignment rule's RHS
2020-05-26 00:13:46 +02:00
Batuhan Taskaya
e0dc415bbc Extend annotated assignment rule's RHS 2020-05-26 01:10:04 +03:00
Batuhan Taskaya
4c2c0ad077 Add python3.10 grammar (#125) 2020-05-26 00:58:09 +03:00
Batuhan Taskaya
5daa8b1db6 Merge pull request #124 from isidentical/nightly-builds 2020-05-25 00:18:29 +03:00
Batuhan Taskaya
c05e14c24e Test parso on nightly builds 2020-05-25 00:11:46 +03:00
Dave Halter
846513584e Merge pull request #119 from isidentical/check-all-args
Check all arguments for unparenthesized generator expressions
2020-05-23 23:18:00 +02:00
Batuhan Taskaya
6b0e01c220 Revert trailing comma for 3.6< 2020-05-23 21:17:08 +03:00
Batuhan Taskaya
92396a9a16 allow trailing comma <3.6, test both postive/negative cases 2020-05-23 17:45:20 +03:00
Batuhan Taskaya
fe54800cdd Check all arguments for unparenthesized generator expressions
Previously only the first argument on the argument list checked
against the generator expressions, now all argumnets are controlled.
2020-05-23 16:57:34 +03:00
Dave Halter
6ecd975516 Merge pull request #117 from isidentical/repeated-kwarg-39
Show which keyword argument is repeated on 3.9+
2020-05-23 15:15:14 +02:00
Batuhan Taskaya
27a7c16803 assert full message 2020-05-23 15:51:00 +03:00
Batuhan Taskaya
a06521d912 Don't give syntax errors for parenthesised kwargs <3.8 2020-05-23 14:43:43 +02:00
Batuhan Taskaya
216a77dce5 Show which keyword argument is repeated on 3.9+ 2020-05-23 14:06:24 +03:00
Dave Halter
8bb211fafb Merge pull request #116 from isidentical/forbidden-name
Raise violation on starred expressions where the child is a boolean/none
2020-05-23 11:51:08 +02:00
Batuhan Taskaya
342e308f57 Move checking to the _CheckAssignmentRule 2020-05-23 01:18:23 +03:00
Batuhan Taskaya
8f46481aaf Raise violation on starred expressions where the child is a boolean/none 2020-05-23 01:09:38 +03:00
Dave Halter
00621977b7 Merge pull request #115 from isidentical/finally-in-continue
Support finally in continue on 3.8+
2020-05-22 23:44:26 +02:00
Batuhan Taskaya
077e34be84 Support finally in continue on 3.8+
Thanks to [bpo-32489](https://bugs.python.org/issue32489) and sadly
for rejection of my [PEP 601](https://www.python.org/dev/peps/pep-0601/)
finally in continue is supported in 3.8+. I checked the blame and looks
like there was already a commit for the same subject, but that only
changes the test and not actually changes the checker (dfe7fba08e)
2020-05-22 18:47:46 +03:00
Dave Halter
a3f851d8f6 Merge pull request #114 from isidentical/future-annotations
Add support for 'from __future__ import annotations'
2020-05-22 16:18:53 +02:00
Batuhan Taskaya
261132e74c Add support for 'from __future__ import annotations'
PEP 563 brought a new `__future__` import for post-poning evaluation
of annotations that introduced in 3.7. This patch adds support for
that future feature, and removes 'all_feature_names' from that list
since it is not valid a syntax
(`from __future__ import all_feature_names`). Also it fixes a bug
related usage of `ALLOWED_FUTURES` (global and version independant
flags) instead of `allowed_futures` (extended version of the previ
ous flag that has some version specific flags, probably unnoticed)
2020-05-22 17:14:33 +03:00
Batuhan Taskaya
345374d040 Allow 'any' expression on decorators, PEP 614 2020-05-22 10:17:17 +02:00
Batuhan Taskaya
f8709852e3 Adapt Python3.9 errors on multiple star target
In Python3.9, the message "two starred expression in ..." changed
to "multiple starred expression in ...", with python/cpython#19168
2020-05-21 20:46:41 +02:00
Batuhan Taskaya
2dcc0d3770 Quick fix about invalid version test 2020-05-21 20:45:10 +02:00
Batuhan Taskaya
34b8b7dd79 Correctly parse 2-digit minor versions (py3.10) 2020-05-21 16:21:22 +02:00
WinChua
caadf3bf4c approve hit msg when python version is unsupported
currently, when the python version used is not supported, it will raise "Python version None is currently not supported."
2020-05-17 16:52:40 +02:00
Dave Halter
1b4c75608a Fix a python_bytes_to_unicode issue, fixes #107 2020-05-14 23:34:14 +02:00
Dave Halter
15403fd998 Use a Windows cache folder change from Jedi
See also 1115cbd94dcae6fb7b215c51f0407333c92c956e in Jedi and the PR in davidhalter/jedi#1575
2020-05-10 11:50:00 +02:00
Dave Halter
b9725364ab Add a lot of comment to the diff parser 2020-04-13 11:46:36 +02:00
Dave Halter
66ecc264f9 Write 0.7.0 release notes 2020-04-13 11:15:05 +02:00
Dave Halter
63b73a05e6 Diff parser: Take care of one line function error recovery with decorator 2020-04-13 11:07:37 +02:00
Dave Halter
baec4ac58f Diff parser: Take care of one line function error recovery 2020-04-12 02:47:46 +02:00
Dave Halter
b5f58ac33c Ignore some slow files for the fuzzer 2020-04-12 01:14:24 +02:00
Dave Halter
83cb71f7a1 The fuzzer now tries to reuse previous modfiications as well sometimes 2020-04-11 23:29:00 +02:00
Dave Halter
30a2b2f40d Fix an error case with prefixes 2020-04-11 22:51:17 +02:00
Dave Halter
d81e393c0c Fix indentation issues with backslashes and def error recovery 2020-04-10 21:48:28 +02:00
Dave Halter
7822f8be84 Python 2 compatibility 2020-04-09 22:47:50 +02:00
Dave Halter
93788a3e09 Add a test for the diff parser that xfails 2020-04-09 00:03:39 +02:00
Dave Halter
085f666ca1 Add more tokens that can break parens to tokenizer 2020-04-08 23:24:30 +02:00
Dave Halter
9e546e42de Diff parser: Fix another byte order mark issue 2020-04-07 22:58:47 +02:00
Dave Halter
7b14a86e0a Fix tokenizer error tokens 2020-04-07 09:55:28 +02:00
Dave Halter
f45941226f Diff parser: Fix other BOM issues 2020-04-07 01:06:03 +02:00
Dave Halter
e04552b14a Fix tests for Python 2 2020-04-06 23:52:29 +02:00
Dave Halter
cd9c213a62 Fix fstring issues when error leaves are involved 2020-04-06 23:34:27 +02:00
Dave Halter
561e81df00 Replace non utf8 errors properly in diff fuzzer 2020-04-06 02:04:48 +02:00
Dave Halter
556ce86cde Tokenizer: It should not be possible to break out of backslashes on the next line, even if it was an error 2020-04-06 01:25:06 +02:00
Dave Halter
b12dd498bb Diff parser: Fix BOM with indentation issues 2020-04-05 20:47:49 +02:00
Dave Halter
db10b4fa72 Diff parser: Need to care for eror dedents in some open parentheses/always break contexts 2020-04-05 14:39:56 +02:00
Dave Halter
ed38518052 Diff parser: Make sure that nested suites get properly copied 2020-04-05 02:48:41 +02:00
Dave Halter
ebc69545c7 Fix error recovery for multi line strings at the end of the file 2020-04-05 00:13:55 +02:00
Dave Halter
67ebb6acac async is actually a token that cannot appear in brackets 2020-04-04 23:14:10 +02:00
Dave Halter
bcf76949b6 Diff parser: Remove error statements before caring about nested functions 2020-04-04 22:43:33 +02:00
Dave Halter
6c7b397cc7 Diff parser: Check indentation for copies correctly 2020-04-04 20:36:19 +02:00
Dave Halter
1927ba7254 Start using the parser count/copy count again 2020-04-04 17:49:35 +02:00
Dave Halter
a6c33411d4 Remove all the error dedent/indent additions in the diff parser
The parser should just reparse stuff that is strangely indented
2020-04-04 16:15:17 +02:00
Dave Halter
f8dce76ef7 Make sure to only copy nodes that have the same indentation in diff parser 2020-04-04 16:07:54 +02:00
Dave Halter
3242e36859 Python 2 compatibility 2020-04-04 15:45:03 +02:00
Dave Halter
734a4b0e67 Remove support for specialized treatment of form feeds
This is a very intentional change. Previously form feeds were handled very
poorly and sometimes where not counted as indentation. This obviously makes
sense. But at the same time indentation is very tricky to deal with (both for
editors and parso).

Especially in the diff parser this led to a lot of very weird issues. The
decision probably makes sense since:

1. Almost nobody uses form feeds in the first place.
2. People that use form feeds like Barry Warsaw often put a newline ater them.
   (e.g Python's email.__init__)
3. If you write an editor you want to be able to identify a unicode character
   with a clear line/column. This would not be the case if form feeds were just
   ignored when counting.

Form feeds will still work in Jedi, will not cause parse errors and in general
you should be fine using them. It might just cause Jedi to count them as
indentation **if** you use it like '\f  foo()'. This is however confusing for
most editors anyway. It leads to a weird display e.g. in VIM, even if it's
perfectly valid code in Python.

Since parso is a code analysis parser and not the languages parser I think it's
fine to ignore this edge case.
2020-04-04 15:38:10 +02:00
Dave Halter
1047204654 Small tokenizer refactoring 2020-04-04 13:13:00 +02:00
Dave Halter
ae6af7849e Diff parser: All indent checks should use _get_indent 2020-04-04 13:08:47 +02:00
Dave Halter
e1632cdadc Fix some issues with async funcs 2020-04-04 04:01:15 +02:00
Dave Halter
7f0dd35c37 Remove the piece of shit _get_insertion_node function 2020-04-04 03:51:28 +02:00
Dave Halter
ad88783ac9 Remove get_first_indentation 2020-04-03 16:47:00 +02:00
Dave Halter
8550a52e48 Remove indents from _NodesTreeNode 2020-04-03 16:26:01 +02:00
Dave Halter
c88a736e35 Fix indent issues 2020-04-03 16:24:26 +02:00
Dave Halter
a07146f8a5 Deal with indents in diff parser more explicitly 2020-04-03 12:41:28 +02:00
Dave Halter
0c0aa31a91 Don't use max as a variable 2020-04-03 03:35:21 +02:00
Dave Halter
77327a4cea Make node insertion a bit easier 2020-04-03 03:28:14 +02:00
Dave Halter
8bbd304eb9 Define token types a bit different in diff parser 2020-04-03 01:05:11 +02:00
Dave Halter
62fd03edda Pass tokens in diff tokenizer 2020-04-03 01:01:37 +02:00
Dave Halter
12063d42fc When debugging print 2020-04-03 00:56:59 +02:00
Dave Halter
c86af743df Initialize start pos properly in diff parser 2020-04-03 00:54:13 +02:00
Dave Halter
fb2ea551d5 Move the tokenizer/diff parser closer together 2020-04-03 00:18:35 +02:00
Dave Halter
ce170e8aae WIP: Try to use the tokenizer in a more native way 2020-04-02 02:00:35 +02:00
Dave Halter
d674bc9895 Fix a backslash issue 2020-03-29 23:59:53 +02:00
Dave Halter
0d9886c22a Diff parser: Rewrite tokenizer modifications a bit 2020-03-29 22:41:59 +02:00
Dave Halter
9f8a68677d Tokenizer: It's now clearer when an error dedent appears 2020-03-29 13:50:36 +02:00
Dave Halter
a950b82066 Fix tokenizer for random invalid unicode points 2020-03-28 21:02:04 +01:00
Dave Halter
38b7763e9a Use _assert_nodes_are_equal in the fuzzer 2020-03-28 14:51:27 +01:00
Dave Halter
cf880f43d4 Tokenizer: Add error dedents only if parens are not open 2020-03-28 14:41:10 +01:00
Dave Halter
8e49d8ab5f Fix tokenizer fstring end positions 2020-03-28 11:22:32 +01:00
Dave Halter
77b3ad5843 Small flake8 refactoring 2020-03-28 10:41:00 +01:00
Dave Halter
29e3545241 Fix adding error indents/dedents only at the right places 2020-03-27 17:05:05 +01:00
Dave Halter
3d95b65b21 Fix an issue with unfinished f string literals 2020-03-27 11:17:31 +01:00
Dave Halter
b86ea25435 Add a bit to the CHANGELOG 2020-03-24 22:38:18 +01:00
Dave Halter
4c42a82ebc Allow multiple newlines in a suite, this makes the diff parser easier 2020-03-24 22:35:21 +01:00
Dave Halter
43651ef219 Diff parser: Make sure dedent start pos are matching 2020-03-24 22:27:04 +01:00
Dave Halter
419d9e3174 Diff parser: Fix a few more indentation issues 2020-03-24 22:03:29 +01:00
Dave Halter
2bef3cf6ff Fix an issue where indents where repeated unnessecarily 2020-03-24 00:24:53 +01:00
Dave Halter
8e95820d78 Don't show logs in pytest, because they already appear by default 2020-03-23 23:53:23 +01:00
Dave Halter
c18c89eb6b Diff parser: Correctly add indent issues 2020-03-23 00:16:47 +01:00
Dave Halter
afc556d809 Diff parser: Prepare for indent error leaf insertion 2020-03-22 22:57:58 +01:00
Dave Halter
cdb791fbdb Diff parser: Add error dedents if necessary, see also davidhalter/jedi#1499 2020-03-22 21:37:25 +01:00
Dave Halter
93f1cdebbc Try to make parsed trees more similar for incomplete dedents, see also davidhalter/jedi#1499 2020-03-22 21:15:22 +01:00
Dave Halter
d3ceafee01 Specify in tests how another dedent issue is recovered from 2020-03-22 19:34:12 +01:00
Dave Halter
237dc9e135 Diff parser: Make sure to pop nodes directly after error nodes, see also davidhalter/jedi#1499 2020-03-22 14:49:22 +01:00
Dave Halter
bd37353042 Move a bit of code 2020-03-22 13:46:13 +01:00
Dave Halter
51a044cc70 Fix diff parser: Invalid dedents meant that sometimes the wrong parents were chosen, fixes davidhalter/jedi#1499 2020-03-22 12:41:19 +01:00
Dave Halter
2cd0d6c9fc Fix: Dedent omission was wrong, see davidhalter/jedi#1499 2020-03-22 12:41:19 +01:00
Daniel Hahler
287a86c242 ci: Travis: use Python 3.8.2
Ref: https://github.com/davidhalter/parso/issues/103
2020-02-28 00:51:06 +01:00
Dave Halter
0234a70e95 Python 3.8.2 was released and an error message changed, fixes #103 2020-02-28 00:31:58 +01:00
Dave Halter
7ba49a9695 Prepare the 0.6.2 release 2020-02-27 02:10:06 +01:00
Dave Halter
53da7e8e6b Fix get_next_sibling on module, fixes #102 2020-02-21 18:31:13 +01:00
Dave Halter
6dd29c8efb Fix ExprStmt.get_rhs for annotations 2020-02-21 18:31:13 +01:00
Dave Halter
e4a9cfed86 Give parso refactoring tools 2020-02-21 18:31:13 +01:00
Joe Antonakakis
a7f4499644 Add venv to .gitignore (#101) 2020-02-14 14:28:07 +01:00
Dave Halter
4306e8b34b Change the release date for 0.6.1 2020-02-03 21:46:25 +01:00
Dave Halter
2ce3898690 Prepare the next release 0.6.1 2020-02-03 18:40:05 +01:00
Dave Halter
16f257356e Make end_pos public for syntax issues 2020-02-03 18:36:47 +01:00
Dave Halter
c864ca60d1 Bump version to 0.6.0 2020-01-26 20:01:38 +01:00
Dave Halter
a47b5433d4 Make sure iter_funcdefs includes async functions with decorators, fixes #98 2020-01-26 20:00:56 +01:00
Dave Halter
6982cf8321 Add a bit to the changelog 2020-01-26 19:47:46 +01:00
Dave Halter
844ca3d35a del_stmt is now considered a name definition 2020-01-26 19:42:12 +01:00
Dave Halter
9abe5d1e55 Forgot to increase the pickle version 2020-01-20 01:28:06 +01:00
Jarry Shaw
84874aace3 Revision on fstring issues (#100)
* f-string expression part cannot include a backslash
 * failing example `f"{'\n'}"` for tests
2020-01-09 21:49:34 +01:00
Jarry Shaw
55531ab65b Revision on assignment errors (#97)
* Revision on assignment expression errors

 * added rule for __debug__ (should be a keyword)
 * reviewed error messages
 * added new failing samples

* Adjustment upon Dave's review

 * rewind several changes in assignment errors
 * patched is_definition: command not found for assignment expressions
 * patched Python 2 inconsistent error messages in test_python_errors.py: command not found
2020-01-08 23:07:37 +01:00
Dave Halter
31c059fc30 Add a Changelog note about dropping 2.6/3.3 2020-01-06 00:05:11 +01:00
Dave Halter
cfef1d74e7 Fix a Python 2.7 issue 2020-01-06 00:02:26 +01:00
Dave Halter
9ee7409d8a Get rid of Python 3.3 artifacts 2020-01-05 23:59:38 +01:00
Dave Halter
4090c80401 Remove Python 2.6 grammar 2020-01-05 23:55:03 +01:00
Dave Halter
95f353a15f Merge branch 'rm-2.6' of https://github.com/hugovk/parso 2020-01-05 23:50:20 +01:00
Dave Halter
2b0b093276 Make sure to limit the amount of cached files parso stores, fixes davidhalter/jedi#1340 2020-01-05 23:44:51 +01:00
Tim Gates
29b57d93bd Fix simple typo: utitilies -> utilities
Closes #94
2019-12-17 10:00:28 +01:00
Dave Halter
fb010f2b5d Add a release date to the Changelog 2019-12-15 01:00:38 +01:00
Dave Halter
5e12ea5e04 Prepare the next release v0.5.2 2019-12-15 00:55:19 +01:00
Dave Halter
ceb1ee81fa Merge pull request #93 from yangyangxcf/fstring_tokenize
fixed #86 and #87
2019-12-15 00:47:32 +01:00
Dave Halter
bc94293794 Add information about named expressions (#90) to the Changelog 2019-12-15 00:29:41 +01:00
Dave Halter
1122822b7d Use a lower pytest version so python3.4 is able to pass 2019-12-15 00:13:48 +01:00
Dave Halter
09abe42cce Use Python 3.8 on travis for testing 2019-12-15 00:12:36 +01:00
Dave Halter
38cdcceba5 Whitespace changes 2019-12-15 00:06:37 +01:00
Dave Halter
753e1999fe Fix: Add more cases for named expression errors, see #89, #90 2019-12-15 00:04:38 +01:00
Dave Halter
3c475b1e63 Add Python 3.8 to tested environments for tox 2019-12-14 23:59:16 +01:00
Dave Halter
5f04dad9ab Fix: Catch some additional cases named expr errors, see #89, #90 2019-12-14 23:31:43 +01:00
Dave Halter
dbba1959f7 Make sure that function executions are errors as well, see #90 2019-12-14 23:23:00 +01:00
Dave Halter
5fda85275b Some minor refactorings for #90
- search_ancestor is now used instead of using node = node.parent
- Some lines were too long
2019-12-14 23:12:16 +01:00
Dave Halter
32584ac731 Merge https://github.com/JarryShaw/parso into master 2019-12-14 22:21:22 +01:00
Jarry Shaw
89c4d959e9 * moved all namedexpr_test related rules to _NamedExprRule
* added valid examples
2019-12-14 09:37:16 +01:00
Jarry Shaw
776e151370 Revised implementation
* search ancestors of namedexpr_test directly for comprehensions
 * added test samples for invalid namedexpr_test syntax
2019-12-13 11:55:53 +08:00
yangyang
53a6d0c17a spelling 2019-12-06 15:24:33 +08:00
yangyang
b90e5cd758 fixed #86 and #87 2019-12-05 19:22:58 +08:00
Robin Fourcade
e496b07b63 Fix trailing comma error 2019-12-04 22:59:24 +01:00
Jarry Shaw
76fe4792e7 Deal with nested comprehension
e.g. `[i for i, j in range(5) for k in range (10) if True or (i := 1)]`
2019-12-01 16:23:18 +08:00
Jarry Shaw
8cae7ed526 Fixing davidhalter/parso#89
[all changes are in parso/python/errors.py]

* utility function (`_get_namedexpr`) extracting all assignment expression (`namedexpr_test`) nodes
* add `is_namedexpr` parameter to `_CheckAssignmentRule._check_assignment` and special error message for assignment expression related assignment issues (*cannot use named assignment with xxx*)
* add assignment expression check to `_CompForRule` (*assignment expression cannot be used in a comprehension iterable expression*)
* add `_NamedExprRule` for special assignment expression checks
  - *cannot use named assignment with lambda*
  - *cannot use named assignment with subscript*
  - *cannot use named assignment with attribute*
  - and fallback general checks in `_CheckAssignmentRule._check_assignment`
* add `_ComprehensionRule` for special checks on assignment expression in a comprehension
  - *assignment expression within a comprehension cannot be used in a class body*
  - *assignment expression cannot rebind comprehension iteration variable 'xxx'*
2019-12-01 15:43:17 +08:00
Ian Tabolt
ee2995c110 Remove debug print statement 2019-09-28 11:01:52 +02:00
Naglis
76aaa2ddba Fix typo (#84) 2019-09-15 19:53:30 +02:00
Dave Halter
3ecd4dddb4 Fix is_definition test 2019-09-05 23:28:46 +02:00
Dave Halter
8f83e9b3c5 Add include_setitem for get_defined_names, is_definition and get_definition 2019-09-04 09:52:55 +02:00
Dave Halter
e8653a49ff Make is_definition work on setitem modifications, see #66 2019-09-04 09:52:55 +02:00
Hugo
d3383b6c41 Fix string/tuple concatenation 2019-08-08 16:49:42 +03:00
Hugo
9da4df20d1 Add python_requires to help pip 2019-08-08 14:57:13 +03:00
Hugo
0341f69691 Drop support for EOL Python 3.3 2019-08-08 14:57:13 +03:00
Hugo
f6bdba65c0 Drop support for EOL Python 2.6 2019-08-08 14:56:27 +03:00
Thomas A Caswell
3bb46563d4 ENH: update grammar for py39 (#78)
* ENH: update grammar for py39

Grammar is copied from cpython commit
b4e68960b90627422325fdb75f463df1e4153c6e

There appears to be 3 new tokens in the grammar (ASYNC, AWAIT, and
TYPE_COMMENT)

* MNT: revert back to py38 grammar as py39 grammar pt1: comments

Looks like upstream has added some comments, remove them

* MNT: remove TYPE_COMMENT added upstream

* MNT: add string / fstring related changes from parso 38 grammer

* MNT: remove changes to support upstream grammar file
2019-07-21 23:45:51 +02:00
Dave Halter
e723b3e74b Refactor the ambiguity tests a bit, see #70 2019-07-13 20:15:56 +02:00
Benjamin Woodruff
0032bae041 Make pgen2's grammar ambiguity detection handle more cases
Under the old implementation,

```
outer: A [inner] B C
inner: B C [inner]
```

wouldn't get detected as the ambiguous grammar that it is, whereas

```
outer: A rest
rest: [inner] B C
inner: B C [inner]
```

would.

This would manifest itself as non-determinism in the DFA state
generation. See the discussion #62 on for a full explanation.

This modifies the ambiguity detection to work on a broader class of
issues, so it should now hopefully detect all cases where the given
grammar is ambiguous.

At some point, we could extend this logic to allow developers to
optionally set precedence of grammar productions, which could resolve
ambiguities, but that's not a strict requirement for parsing python.
2019-07-13 20:04:32 +02:00
Dave Halter
c0ace63a69 For Python 2.7 and 3.4 pytest 5 doesn't work anymore 2019-07-13 15:46:58 +02:00
Dave Halter
399e8e5043 Prepare the 0.5.1 release 2019-07-13 15:39:44 +02:00
Dave Halter
0a5b5f3346 Fix name tokenizing for Python 2 2019-07-13 15:34:23 +02:00
Dave Halter
2b8544021f Fix positioning for names that are interleaved with error tokens 2019-07-13 12:34:49 +02:00
Dave Halter
99dd4a84d4 Merge branch 'master' of github.com:davidhalter/parso 2019-07-12 21:35:06 +02:00
Dave Halter
9501b0bde0 Fixed name tokenizing issues for tamil characters, fixes davidhalter/jedi#1368 2019-07-12 21:31:49 +02:00
Benjamin Woodruff
ad57a51800 Fix line continuation characters inside f-strings
Line continuation characters are valid inside of strings, but weren't
handled correctly in certain cases with f-strings, due to some small
tokenizer bugs.

This pull request to address those issues, and adds tests to validate
the new logic.
2019-07-12 21:20:00 +02:00
Dave Halter
19de3eb5ca Document that the cache uses pickle files 2019-07-10 00:17:28 -07:00
Dave Halter
7441e6b1d2 Fix changelog dates, fixes #77 2019-06-28 02:00:35 -07:00
Dave Halter
df3c494e02 Try to use collections.abc.Mapping instead of collections.Mapping
The latter is deprecated and will be removed in Python 3.9, fixes #76
2019-06-21 10:17:18 +02:00
Dave Halter
59df3fab43 Some small changes to the changelog 2019-06-20 21:15:53 +02:00
Dave Halter
803cb5f25f Make parso work at least somewhat with an older Jedi version 2019-06-20 20:33:14 +02:00
Dave Halter
3fa8630ba9 Use an immutable map for used names, so that it can be use for hashing 2019-06-18 09:12:33 +02:00
Dave Halter
1ca5ae4008 Bump the version number to the next release: 0.5.0 2019-06-13 17:26:08 +02:00
Dave Halter
c3c16169b5 Ignore positional only arguments slash when listing params 2019-06-09 22:55:37 +02:00
Dave Halter
ecbe2b9926 Add positional only arguments to grammar 2019-06-09 21:15:03 +02:00
Dave Halter
1929c144dc Increate the _PICKLE_VERSION to avoid issues with the latest breaking change 2019-06-09 18:11:21 +02:00
Dave Halter
b5d50392a4 comp_for is now called sync_comp_for for all Python versions to be compatible with the Python 3.8 Grammar 2019-06-09 18:00:32 +02:00
Dave Halter
a7aa23a7f0 Parse named expressions 2019-06-02 23:34:37 +02:00
Dave Halter
5430415d44 Change a test, because it doesn't really matter
The test had changed behavior for Python 3.8, a syntax error of:

SyntaxError: unexpected EOF while parsing

instead of

SyntaxError: invalid syntax
2019-06-02 22:54:45 +02:00
Dave Halter
6cdd47fe2b f-string syntax in Python 3.8 was enhanced
See e.g. https://twitter.com/raymondh/status/1135253771846471680
2019-06-02 22:48:47 +02:00
Dave Halter
917b4421f3 Fix fstring format spec parsing, fixes #74 2019-06-02 15:18:42 +02:00
Dave Halter
4f5fdd5a70 Add release notes for the next release 0.4.1 2019-06-02 11:28:00 +02:00
prim
93ddf5322a parse long number notation (#72)
* parse long number notation

* parse long number notation
2019-06-02 11:14:15 +02:00
Dave Halter
a9b61149eb Fix get_decorators for async functions 2019-05-27 01:08:42 +02:00
Dave Halter
de416b082e Make it clear that get_last_modified should not raise an exception, but return None, if it cannot look up a file 2019-05-22 00:16:26 +02:00
Carl Meyer
4b440159b1 Fix __init__.pyi re-exports. 2019-05-10 09:12:32 +02:00
Carl Meyer
6f2d2362c9 Add type stubs. 2019-05-10 09:12:32 +02:00
Dave Halter
8a06f0da05 0.4.0 release notes 2019-04-05 18:57:21 +02:00
Dave Halter
bd95989c2e Change the default tox environments to test
These version will be tested before deploying
2019-04-05 18:55:23 +02:00
Miro Hrončok
57e91262cd Add Python 3.8 to tox.ini
Otherwise we get:

    Matching undeclared envs is deprecated.
    Be sure all the envs that Tox should run are declared in the tox config.
2019-04-05 18:43:43 +02:00
Miro Hrončok
476383cca9 Test on Python 3.8 2019-04-05 18:43:43 +02:00
Dave Halter
b2ab64d8f9 Fix Python 3.8 error issues 2019-04-05 18:30:48 +02:00
Dave Halter
18cbeb1a3d Fix an issue, because sync_comp_for exists now 2019-04-05 16:27:17 +02:00
Dave Halter
a5686d6cda PEP 8 2019-04-05 16:25:45 +02:00
Dave Halter
dfe7fba08e continue in finally is no longer an error 2019-04-05 16:17:30 +02:00
Dave Halter
6db7f40942 Python 2 compatibility 2019-04-03 01:24:06 +02:00
Dave Halter
d5eb96309c Increase the pickle version. With all the changes lately, it's better this way 2019-04-03 01:07:25 +02:00
Dave Halter
4c65368056 Some minor changes to file_io 2019-03-27 01:02:27 +01:00
Dave Halter
3e2956264c Add FileIO to make it possible to cache e.g. files from zip files 2019-03-25 00:48:59 +01:00
Dave Halter
e77a67cd36 PEP 8 2019-03-22 20:17:59 +01:00
Daniel Hahler
c4d6de2aab tests: add coverage tox factor, use it on Travis 2019-03-22 11:01:22 +01:00
Daniel Hahler
7770e73609 ci: Travis: use dist=xenial 2019-03-22 11:01:22 +01:00
Dave Halter
acccb4f28d 0.3.4 release 2019-02-13 00:19:07 +01:00
Dave Halter
3f6fc8a5ad Fix an f-string tokenizer issue 2019-02-13 00:17:37 +01:00
Dave Halter
f1ee7614c9 Release of 0.3.3 2019-02-06 09:55:18 +01:00
Dave Halter
58850f8bfa Rename a test 2019-02-06 09:51:46 +01:00
Dave Halter
d38a60278e Remove some unused code 2019-02-06 09:50:27 +01:00
Dave Halter
6c65aea47d Fix working with async functions in the diff parser, fixes #56 2019-02-06 09:31:46 +01:00
Dave Halter
0d37ff865c Fix bytes/fstring mixing when using iter_errors, fixes #57. 2019-02-06 01:28:47 +01:00
Dave Halter
076e296497 Improve a docstring, fixes #55. 2019-01-26 21:34:56 +01:00
Dave Halter
a2b153e3c1 Upgrade the Changelog 2019-01-24 00:42:53 +01:00
Dave Halter
bb2855897b Escape a backslash properly 2019-01-24 00:31:26 +01:00
Dave Halter
9c9e6ffede Bump the parso version to 0.3.2 2019-01-24 00:22:30 +01:00
Dave Halter
b5d8175eaa f-string parts are also PythonLeaf instances 2019-01-24 00:15:53 +01:00
Dave Halter
32a83b932a Fix get_start_pos_of_prefix 2019-01-24 00:00:06 +01:00
Dave Halter
01ae01a382 Remove dead code 2019-01-23 23:28:18 +01:00
Dave Halter
5fbc207892 Refactor f-string support 2019-01-23 10:58:36 +01:00
Dave Halter
60e4591837 Fix: End detection for strings was mostly wrong, fixes #51 2019-01-23 10:13:25 +01:00
Dave Halter
ef56debb78 Fix f-string escapes, fixes #48
The tokenizer was not detecting backslash escapes for f-string endings properly
2019-01-22 22:20:32 +01:00
Dave Halter
dc2582f488 Tokenizer: Simplify end of string regexes 2019-01-22 09:33:57 +01:00
Dave Halter
fe69989fbc Add a comment from the Python3.7 code base 2019-01-21 00:33:02 +01:00
Dave Halter
ce8b531175 Fix diff parser: The previous fix was a bit off 2019-01-20 19:03:45 +01:00
Dave Halter
069c08883a Change fuzzer: Add ways to not always use correct parse input 2019-01-20 18:18:13 +01:00
Dave Halter
0da0a8655a Fix diff parser: issue with opening brackets 2019-01-20 00:41:11 +01:00
Dave Halter
3d890c3a00 Async doesn't work in 3.4 2019-01-19 12:59:35 +01:00
Dave Halter
956ea55048 Skip some tests for Python2.6 and Python3.3 2019-01-19 12:08:39 +01:00
Dave Halter
0bd17bee2c Fix diff parser: DEDENT as error leaves should also be ignored and reparsed 2019-01-18 18:41:08 +01:00
Dave Halter
f3015efb2d Fix diff parser: error dedents in between nodes should be ignored for now when copying 2019-01-18 02:43:12 +01:00
Dave Halter
197391dc53 Fix diff parser: Don't copy error nodes/leaves in the beginning, leads to strange issues in some cases 2019-01-17 23:48:00 +01:00
Dave Halter
32321a74b1 Diff fuzzer: Create a check to see if the errors make sense. 2019-01-17 22:07:59 +01:00
Dave Halter
52d01685ba Fix diff parser: Don't copy DEDENT tokens at the beginning 2019-01-17 21:31:13 +01:00
Dave Halter
e591b929eb Fix diff parser: Skip last leaves for last line offset leaves 2019-01-17 00:15:38 +01:00
Dave Halter
dac4c445a7 Fix indentation error tokens 2019-01-16 23:21:31 +01:00
Dave Halter
20fd32b45d Fix diff parser: Avoid side effects for prefix 2019-01-14 21:37:19 +01:00
Dave Halter
9cc8178998 Fix tokenizer: backslashes sometimes led to newline token generation 2019-01-14 09:59:16 +01:00
Dave Halter
1e25445176 Make lines easier copyable in the fuzzer 2019-01-14 01:50:39 +01:00
Dave Halter
d7171ae927 Fix tokenizer: Carriage returns after backslashes were not properly handled 2019-01-14 01:49:09 +01:00
Dave Halter
d3d28480ed Fix in diff parser: prefix calculation was wrong when copying nodes 2019-01-14 01:00:17 +01:00
Dave Halter
564be7882e Replace --print-diff with --print-code 2019-01-14 00:20:49 +01:00
Dave Halter
76c5754b76 Fix diff parser generation for empty files 2019-01-13 23:38:35 +01:00
Dave Halter
55247a5a2c Docopt should not be needed for tests 2019-01-13 23:24:17 +01:00
Dave Halter
7ae1efe5c7 Fix tokenizer: Form feeds and multiline docstrings didn't work together 2019-01-13 23:16:09 +01:00
Dave Halter
01dba7f8ce Fix diff parser: Need to calculate the prefix for the diff tokenizer better 2019-01-13 22:38:53 +01:00
Dave Halter
ea8a758051 Remove copied nodes stuff, to simplify some things 2019-01-13 19:57:23 +01:00
Dave Halter
a7e24a37e7 Fix newline endings and a few parser/copy counts 2019-01-13 19:55:18 +01:00
Dave Halter
f80d9de7a0 Feature: The diff parser fuzzer is now able to use random Python fragments
This hopefully leads to the fuzzer finding more and faster issues in the diff
parser.
2019-01-13 16:00:36 +01:00
Dave Halter
eaee2b9ca0 Fix: The Python 3.8 grammar did not include f-string support 2019-01-13 15:51:46 +01:00
Dave Halter
dd1761da96 Fix tokenizer: Closing parentheses in the wrong place should not lead to strange behavior 2019-01-13 14:52:33 +01:00
Dave Halter
e10802ab09 Fix end positions with error dedents 2019-01-13 14:14:16 +01:00
Dave Halter
3d402d0a77 Fix diff parser tests for Python 2 2019-01-10 09:26:42 +01:00
Dave Halter
f6a8b997f2 Randomize the fuzzer a bit more with inserting characters 2019-01-10 01:22:24 +01:00
Dave Halter
94c2681c8e Simplify the regexes 2019-01-10 01:21:56 +01:00
Dave Halter
610a820799 Fix a regex clause that was totally wrong 2019-01-10 01:00:08 +01:00
Dave Halter
57320af6eb Fix another tokenizer issue 2019-01-09 00:55:54 +01:00
Dave Halter
574e1c63e8 Apply \r changes in syntax trees 2019-01-09 00:34:19 +01:00
Dave Halter
fbaad7883f Actually make \r usable 2019-01-08 20:03:08 +01:00
Dave Halter
b1f613fe16 Fix split lines for Python code
Some characters like Vertical Tab or File Separator were used as line separators.
This is not legal. Line Separators in Python are only Carriage Return \r and Line Feed \n.
2019-01-08 08:42:30 +01:00
Dave Halter
f4696a6245 Add \r as a valid linebreak for splitlines 2019-01-07 18:46:16 +01:00
Dave Halter
48c1a0e590 Move split_lines tests around 2019-01-07 18:40:34 +01:00
Dave Halter
6f63147f69 Start generating really random strings with the fuzzer 2019-01-06 20:51:49 +01:00
Dave Halter
94bd48bae1 Fix tokenizer: Dedents before error tokens are properly done, now. 2019-01-06 19:26:49 +01:00
Dave Halter
edbceba4f8 Fix diff parser: Also check async with 2019-01-06 16:25:28 +01:00
Dave Halter
b33c2b3ae1 Make the diff parser use a lot of different files by default 2019-01-06 15:43:37 +01:00
Dave Halter
65a0748f4f Fix diff parser: Forgot that with statments are also flows 2019-01-06 15:41:16 +01:00
Dave Halter
c442cf98be Fix valid graph asserting for some dedents that are errors 2019-01-06 12:39:04 +01:00
Dave Halter
65b15b05e3 Fix diff parser: If funcs are not copied, errors shouldn't either 2019-01-06 11:39:51 +01:00
Dave Halter
26aee1c6a9 Better documentation for the fuzz diff parser script 2019-01-06 01:10:15 +01:00
Dave Halter
c88a862bae Rename a test 2019-01-06 01:08:15 +01:00
Dave Halter
d6b0585933 More verbose output for the diff fuzzer 2019-01-06 01:05:07 +01:00
Dave Halter
6eba40b4c5 Fix diff parser: error dedent issues 2019-01-06 01:00:34 +01:00
Dave Halter
428bde0573 Fix diff parser: Avoid indentation issues 2019-01-05 22:40:31 +01:00
Dave Halter
d1d866f6c6 Use the right diff order in debug output 2019-01-05 18:36:48 +01:00
Dave Halter
a8ec75fedd Fix diff parser: The prefix was wrong in some copy cases 2019-01-05 18:33:38 +01:00
Dave Halter
deaf1f310b Make fuzz parser compatible with Python 2 2019-01-05 14:57:58 +01:00
Dave Halter
2a881bf875 Make it possible to print all diffs in fuzzer 2019-01-05 14:50:59 +01:00
Dave Halter
4d713f56e9 Introduce a redo flag 'only_last' to narrow down issues 2019-01-05 14:20:30 +01:00
Dave Halter
d202fdea49 Add docopt to testing dependencies 2019-01-05 14:09:14 +01:00
Dave Halter
5e6d5dec59 Rewrite the fuzz diff parser to cache errors (so we can re-run those) 2019-01-05 14:05:19 +01:00
Dave Halter
c1846dd082 Fix diff parser: Decorators were sometimes parsed without their functions 2019-01-05 09:29:00 +01:00
Dave Halter
5da51720cd Fix tokenizer: Dedents should only happen after newlines 2019-01-03 11:44:17 +01:00
Dave Halter
fde64d0eae Usability for diff parser fuzzing 2019-01-02 17:31:07 +01:00
Dave Halter
430f13af5e Fix for diff parser: Rewrite prefix logic and don't mutate prematurely 2019-01-02 17:28:01 +01:00
Dave Halter
96ae6a078b Fix diff parser: positioning of functions if decorators were removed 2019-01-02 13:16:22 +01:00
Dave Halter
a9f58b7c45 Ignore ERROR_DEDENT in graph validation 2019-01-02 12:15:05 +01:00
Dave Halter
e0d0e57bd0 Add a small diff parser fuzzer
It should help us find the rest of the issues that the diff parser has
2019-01-02 11:26:31 +01:00
Dave Halter
d2542983e9 Fix diff parser: get_last_line was sometimes wrong
Now the calculation is way simpler. Still annoying that it even happened.
2019-01-02 01:39:53 +01:00
Dave Halter
64cf24d9da Fix error reporting order for diff issues 2019-01-02 00:33:43 +01:00
Dave Halter
02f48a68f2 Clean up the test diff parser file 2019-01-01 23:37:31 +01:00
Dave Halter
c7c464e5e9 Avoid nasty side effects in creation of Node
This issue led to bugs in Jedi, because Jedi used the nodes in a wrong way.
2019-01-01 23:35:20 +01:00
Dave Halter
29325d3052 Make parso errors even more informative 2018-12-31 11:47:02 +01:00
Dave Halter
750b8af37b Fix diff parser get_last_line calculation 2018-12-31 01:25:11 +01:00
Dave Halter
0126a38bd1 Fix graph asserting for error indents 2018-12-30 18:20:55 +01:00
Dave Halter
c2985c111e Better checks for checking valid graphs 2018-12-30 16:34:11 +01:00
Dave Halter
45f9d4b204 Create better ways for debugging the diff parser 2018-12-30 16:03:54 +01:00
Dave Halter
f99fe6ad21 Fix diff-parser: Copying parts of if else should not lead to the whole thing being copied 2018-12-30 15:25:17 +01:00
Dave Halter
a64c32bb2a Reenable diff parser parser counting in all tests 2018-12-30 02:46:44 +01:00
Dave Halter
e5fb1927bb Fix: Make the NodesStack to a NodesTree
This fixes an issue with positions that were doubled if the stack was closed too early.
2018-12-30 01:27:37 +01:00
Dave Halter
0ef4809377 Fix for diff parser : Make sure that start_pos are growing always
The problem was that functions/classes were sometimes not well positioned. Now
all diff tests are ensuring that leaves always grow.
2018-12-28 21:49:49 +01:00
Dave Halter
29456a6c0a Add a check to see if leaves have the right start positions 2018-12-28 02:24:22 +01:00
Dave Halter
ada84ed063 Add parso version to an exception 2018-12-27 13:33:10 +01:00
Thomas A Caswell
1c7b078db0 MNT: add grammar for python 3.8
Coppied from cpython commit 1dd035954bb03c41b954ebbd63969b4bcb0e106e
2018-12-22 12:39:08 +01:00
Hugo
930ec08ab0 Use SVG badges
And update some links to HTTPS.
2018-09-28 18:51:36 +02:00
Daniel Hahler
a90622040d tox.ini: simplify deps 2018-09-22 10:02:38 +02:00
Daniel Hahler
98c02f7d79 tox: add pypy to envlist for tox-travis
Fixes deprecation warning
(https://travis-ci.org/davidhalter/parso/jobs/431468986).
2018-09-22 10:02:38 +02:00
Daniel Hahler
d6d6c5038f setup.py: add "testing" extras_require
Ref: https://github.com/davidhalter/parso/issues/15#issuecomment-339964845
2018-09-22 10:02:38 +02:00
Michael Käufl
3be8ac7786 Add Python 3.7 stable to test matrix and update classifiers 2018-09-13 00:28:27 +02:00
Anders Hovmöller
96f1582b6e Update usage.rst 2018-08-02 22:17:14 +02:00
Dave Halter
7064ecf3fb Don't use invalid escape sequences in regex, see https://github.com/davidhalter/jedi-vim/issues/843 2018-07-12 08:53:48 +02:00
Dave Halter
e6bc924fba Use a setuptools that still supports py33 2018-07-09 20:53:06 +02:00
Dave Halter
59605438e9 3.1 release notes 2018-07-09 20:27:10 +02:00
Dave Halter
e7f71a3eba Use one simple functions to check for funcdefs in diff parser 2018-07-08 20:30:31 +02:00
Dave Halter
3f7aad84f9 Make sure to treat async funcdefs the same way as normal funcdefs 2018-07-08 20:18:15 +02:00
Dave Halter
52e3db4834 Fix an issue in the diff parser
Forgot to check for functions/classes that were part of a decorator/async func.

Fixes https://github.com/davidhalter/jedi/issues/1132
2018-07-06 01:25:06 +02:00
Dave Halter
0daf4d9068 Asterisks in function definitions may be at the end of a func without a comma, fixes #44 2018-07-04 09:51:34 +02:00
Dave Halter
29b6232541 Remove some TODOs that were fixed 2018-07-03 19:28:05 +02:00
Dave Halter
e05d7fd59f Error recovery should not match the whole line in case of an invalid token, fixes #40 2018-07-03 01:31:32 +02:00
Daniel Hahler
7f964c26f2 docs: enable searchbox 2018-07-02 00:51:36 +02:00
Dave Halter
ff67de248f Merge branch 'pgen' 2018-06-29 18:14:03 +02:00
Dave Halter
1af5d9d46b Add a changelog for 0.3.0 2018-06-29 18:13:53 +02:00
Dave Halter
fce3ead829 Bump version to 0.3.0 2018-06-29 18:04:55 +02:00
Dave Halter
55d5d39c53 Add a private API for jedi to work with the parser stack 2018-06-29 10:04:54 +02:00
Dave Halter
c8bf23b787 Remove get_tos_nodes and get_tos_first_tokens, because they are not used (not even in Jedi) 2018-06-29 00:00:09 +02:00
Dave Halter
98c9a1ec7f Better documentation for _add_token 2018-06-28 10:11:44 +02:00
Dave Halter
ecdb90d9bc Way better documentation for the DFA generator 2018-06-28 10:08:09 +02:00
Dave Halter
375ebf2181 Better documentation of the parser generator 2018-06-28 09:49:35 +02:00
Dave Halter
badb2fe010 Move transition_to_generator to transitions 2018-06-28 09:42:37 +02:00
Dave Halter
8e118c913c Remove note about print as absolute import. This is probably not going to happen anymore, Python 2 is pretty much end-of-life 2018-06-28 01:01:46 +02:00
Dave Halter
52fc8fc569 Finish the stack in a way we want to. 2018-06-28 00:59:55 +02:00
Dave Halter
97cdb448d4 Pass tokens around and not all the different token values 2018-06-28 00:33:22 +02:00
Dave Halter
603b67ee6d Just always pass token objects to the tokenizer 2018-06-28 00:18:44 +02:00
Dave Halter
7686273287 Use the stack from the parser itself 2018-06-28 00:12:18 +02:00
Dave Halter
692436ba12 Don't use grammar as an argument anymore, because it's already there 2018-06-28 00:01:47 +02:00
Dave Halter
f7d3d4e82f Merge the PgenParser and our own parser 2018-06-27 23:45:04 +02:00
Dave Halter
edce279dee Remove a function that was no longer used 2018-06-27 23:19:57 +02:00
Dave Halter
a9e40eb578 Simplify error recovery for suites 2018-06-27 22:21:17 +02:00
Dave Halter
b14f518306 Rename the last usage of ilabel to transition 2018-06-27 00:18:27 +02:00
Dave Halter
8407894b25 Fix python 2 tests 2018-06-27 00:15:00 +02:00
Dave Halter
e4efebc9f3 s/ilabel/transition/g 2018-06-26 23:05:04 +02:00
Dave Halter
f66e47c540 Check better for more transitions 2018-06-26 22:53:02 +02:00
Dave Halter
706a92ee0d Merge branch 'master' of github.com:davidhalter/parso 2018-06-26 10:23:17 +02:00
Dave Halter
91d864b23d Make it clearer which things are public in pgen 2018-06-26 10:22:38 +02:00
Dave Halter
e20f2069ba Move the grammar to a fitting file. 2018-06-26 10:20:05 +02:00
Dave Halter
4cf198285a Move things out of the grammar class 2018-06-26 10:15:31 +02:00
Dave Halter
30cf491b4f Move the Grammar to the pgen module 2018-06-26 10:08:44 +02:00
Dave Halter
c1675da0cb Make nonterminal_to_dfas public 2018-06-26 09:56:49 +02:00
Dave Halter
7b7b66eb3c Get rid of the first_terminal variable in the grammar generator 2018-06-26 09:48:13 +02:00
Dave Halter
5d46c3e18b Trying to reduce the amount of variables used in first sets 2018-06-26 01:04:22 +02:00
Dave Halter
e9fde82512 Remove the overlapcheck, it's probably not needed anymore 2018-06-26 01:00:06 +02:00
Dave Halter
a46ecbb499 Fix an ambiguity issue
Unfortunately had to refactor most of the transition generation
2018-06-26 00:58:19 +02:00
Dave Halter
da5aa8a2ab Better detection of ambiguities 2018-06-25 01:56:02 +02:00
Dave Halter
43d4a8a834 Don't use a function that doesn't work 2018-06-24 23:45:18 +02:00
Dave Halter
309033ae2d Work with token types whenever possible 2018-06-24 17:59:32 +02:00
Dave Halter
2a9d8632fe Remove label caching 2018-06-24 17:56:13 +02:00
Dave Halter
530a324643 Remove labels 2018-06-24 17:55:22 +02:00
Dave Halter
71003bc20e Just use caching instead of strange transitions 2018-06-24 17:29:03 +02:00
Dave Halter
c5d141bf60 Make some more things faster 2018-06-24 16:43:28 +02:00
Dave Halter
e958b241c7 Use some tokenize names directly 2018-06-24 16:39:48 +02:00
Dave Halter
34ab35558f Remove a lot of the old token code 2018-06-24 16:31:58 +02:00
Dave Halter
03de9cebb8 Introduce TokenTypes 2018-06-24 16:24:09 +02:00
Dave Halter
6098d89150 Add PythonTokens to get rid of a lot of the token module eventually 2018-06-24 13:03:07 +02:00
Dave Halter
ff4358cd97 Merge remote-tracking branch 'origin/master' into pgen 2018-06-24 11:49:34 +02:00
Dave Halter
b5378e4602 Use token.OP and use reserved words
This change breaks the tokenizer backwards compatibility a bit. Details of
operators is now part of the parser and not the tokenizer anymore. The parser
does this anyway, so we don't need the complexity in the tokenizer.
2018-06-24 11:28:23 +02:00
Dave Halter
33e321a539 Don't set the root node before it's not actually defined 2018-06-22 13:04:00 +02:00
Dave Halter
a890ddd6cc Remove make_first 2018-06-22 12:54:27 +02:00
Dave Halter
1362d4f05d Remove more unused grammar stuff 2018-06-22 12:53:38 +02:00
Dave Halter
532aef2342 Remove nonterminal2number and number2nonterminal, they are no longer used 2018-06-22 12:52:44 +02:00
Dave Halter
878b4b2d3b Use nonterminals instead of numbers if possible 2018-06-22 12:47:02 +02:00
Dave Halter
87299335c4 Remove more unused code 2018-06-22 12:36:48 +02:00
Dave Halter
4f0e9c0fd7 Remove old dfas and states from the parser generator 2018-06-22 11:47:18 +02:00
Dave Halter
67ca091631 delete a lot of the old parser code 2018-06-22 11:44:42 +02:00
Dave Halter
4e5ba02dbb Fix the final issues of the new parser 2018-06-22 11:38:34 +02:00
Dave Halter
a85f544901 Fix all tests except diff tests. Mostly error recovery fixes 2018-06-22 11:12:10 +02:00
Dave Halter
9e8066c6fd Fix a lot of the old error recovery 2018-06-22 09:56:58 +02:00
Dave Halter
68eab72229 Some slight changes to error recovery 2018-06-22 01:59:39 +02:00
Dave Halter
d9264609f2 Get quite a bit of the error recovery working 2018-06-22 01:56:29 +02:00
Dave Halter
79c7e0b59d Use the new parser. Error recovery is not yet working 2018-06-22 00:38:18 +02:00
Dave Halter
f03a87b876 Actually parse some first things with the new approach 2018-06-21 23:56:34 +02:00
Dave Halter
2a082d69df Add better reprs 2018-06-21 22:13:27 +02:00
Dave Halter
e6fc739670 Get most things ready for plans 2018-06-21 21:32:05 +02:00
Dave Halter
12e11b3d16 Remove a while loop that is not necessary 2018-06-21 21:14:14 +02:00
Dave Halter
cc8038966b isfinal -> is_final 2018-06-21 21:11:46 +02:00
Dave Halter
31aecf2d35 Calculate the first plans in a very messy way 2018-06-21 21:10:28 +02:00
Dave Halter
d8554d86d1 A lot of new code to hopefully transition to a better parsing mechanism in the future 2018-06-21 18:17:32 +02:00
Dave Halter
d691bf0fd1 Some minor changes 2018-06-20 09:42:21 +02:00
Dave Halter
5712ffb5ca Introduce a label cache that is currently not used 2018-06-18 20:14:29 +02:00
Dave Halter
55d6a69aad Some more renames 2018-06-18 01:14:09 +02:00
Dave Halter
453471eeb6 Move some ParserGenerator stuff into the Grammar class 2018-06-18 00:15:31 +02:00
Dave Halter
a06c3a3129 name -> nonterminal 2018-06-17 23:10:27 +02:00
Dave Halter
73ce57428b Try to completely remove the word symbol and use nonterminal
The ones that we could not remove are in grammar.py, because that's the public documented API.
2018-06-17 18:30:20 +02:00
Dave Halter
640f544af9 One instance of symbol -> terminal 2018-06-17 18:12:13 +02:00
Dave Halter
b6cbf306d7 Use the name first_terminals instead of first 2018-06-17 18:09:12 +02:00
Dave Halter
95e4ecf592 next -> next_ 2018-06-17 17:49:43 +02:00
Dave Halter
fbed1ecfe0 More dict to set 2018-06-17 17:45:40 +02:00
Dave Halter
1f27fa9320 Use more nonterminal/terminal terminology 2018-06-17 17:37:15 +02:00
Dave Halter
23362ec2d3 Start using the term nonterminal 2018-06-17 16:40:11 +02:00
Dave Halter
6b391af071 Use sets instead of dicts if possible 2018-06-17 16:36:27 +02:00
Dave Halter
c43cb21a0e Add docstring 2018-06-15 17:57:59 +02:00
Dave Halter
24346a0d32 Add some comments 2018-06-15 10:21:00 +02:00
Dave Halter
9d452ec66a Use a set instead of dict if it's not necessary 2018-06-15 00:35:39 +02:00
Dave Halter
567e0d7aed Simplify some code 2018-06-15 00:06:56 +02:00
Dave Halter
1f02327cff Some refactorings for simplicity 2018-06-14 18:18:57 +02:00
Dave Halter
8c348aee6f Use NFAArc as a class 2018-06-14 10:29:19 +02:00
Dave Halter
a277ccf288 Some more renames 2018-06-14 01:10:23 +02:00
Dave Halter
a5ce2caab6 Another rename 2018-06-13 23:19:52 +02:00
Dave Halter
da4df9c0f1 Rename 2018-06-13 21:12:48 +02:00
Dave Halter
bd444df417 Move the grammar parsing to a separate module 2018-06-13 21:01:06 +02:00
Dave Halter
275dbca1b9 More renames 2018-06-13 20:54:18 +02:00
Dave Halter
9a0b6f4928 Rename 2018-06-13 20:47:16 +02:00
Dave Halter
fc5560874b Separate generating dfas from parsing 2018-06-13 20:46:36 +02:00
Dave Halter
6e5a520e7b Cleanup some names 2018-06-13 10:18:05 +02:00
Dave Halter
dcabf3d415 Remove some code that is not used anymore 2018-06-13 10:15:21 +02:00
Dave Halter
3bc82d112d Some cleanups and documentation 2018-06-13 10:10:17 +02:00
Dave Halter
ec186a78f8 Move out some more functions out of classes 2018-06-13 01:50:25 +02:00
Dave Halter
3818fb2b22 Some more refactorings for clarification 2018-06-13 01:44:44 +02:00
Dave Halter
95ddeb4012 Factor out start_symbol into a better position 2018-06-13 00:54:18 +02:00
Dave Halter
f638abb08e Refactor out dfas 2018-06-13 00:31:45 +02:00
Dave Halter
f8558df27a Document pgen grammars a bit better 2018-06-13 00:27:51 +02:00
Dave Halter
bae56e72e1 addarc -> add_arc 2018-06-12 22:13:08 +02:00
Dave Halter
41c38311f7 Refactor some things in pgen 2018-06-12 22:08:19 +02:00
Dave Halter
eeb456a6d4 Move some initializations 2018-06-12 22:00:49 +02:00
Dave Halter
1c0956d9e0 Change license again. The year shouldn't matter 2018-06-12 21:58:27 +02:00
Dave Halter
c17156bd36 Make first private 2018-06-12 21:56:26 +02:00
Dave Halter
8865aa452c Change copyright years 2018-06-12 21:55:37 +02:00
Dave Halter
e0c79a9fcc Remove some code from the grammar 2018-06-12 21:49:18 +02:00
Dave Halter
3c08b1b058 Separate the grammar generation from the grammar parsing 2018-06-12 21:30:09 +02:00
Dave Halter
0f32673092 In pgen now everything is named grammar and not c 2018-06-12 18:17:02 +02:00
Dave Halter
1e18163402 Better recovery for online classes and functions 2018-06-12 13:23:49 +02:00
Dave Halter
cef9f1bdbd Fix one-line error recovery for all things that are using a suite
Fixes https://github.com/davidhalter/jedi/issues/1138.
2018-06-12 12:56:27 +02:00
Dave Halter
23db71a5f7 Add a debug function for first tokens 2018-06-12 01:50:37 +02:00
Aaron Meurer
34154d05a0 Don't mutate the standard library token.tok_name dictionary (#42)
* Don't mutate the standard library token.tok_name dictionary

Fixes #41.

* More robust test that tok_name isn't mutated

This test now works in Python 2.7, and actually tests something in Python 3.7,
and it's better anyway because it tests the whole dictionary instead of just
one token.

* Fix test_tok_name_copied in Python 3.7 and PyPy

Apparently Python 3.7 adds N_TOKENS to the tok_name dictionary, and PyPy
doesn't have NT_OFFSET in it.
2018-06-08 18:46:16 +02:00
Dave Halter
6f385bdba1 Not testing Python 3.3 anymore on travis. It seems to be broken 2018-05-21 12:55:49 +02:00
Dave Halter
4fc31c58b3 Add a changelog for 0.2.1 2018-05-21 12:48:31 +02:00
Dave Halter
689decc66c Push the version 2018-05-21 12:44:10 +02:00
Dave Halter
c2eacdb81c The diff parser was slighly off with prefixes, fixes #1121 2018-05-20 19:13:50 +02:00
Dave Halter
ac0bf4fcdd A better repr for the endmarker 2018-05-17 09:56:16 +02:00
Dave Halter
948f9ccecc Merge branch 'master' of github.com:davidhalter/parso 2018-04-23 23:42:11 +02:00
Dave Halter
f20106d88e Fix a prefix issue with error leafs. 2018-04-22 19:28:30 +02:00
Aaron Meurer
f4912f6c17 Use the correct field name in the PythonToken repr 2018-04-19 22:20:23 +02:00
Jonas Tranberg
bf5a4b7c2c Added path param to load_grammar for loading custom grammar files 2018-04-19 10:16:33 +02:00
Dave Halter
579146b501 Don't test python 2.6 in tox by default, because the newer pip versions don't support it anymore 2018-04-15 14:46:35 +02:00
Dave Halter
deb4dbce1c Set a release date 2018-04-15 13:54:10 +02:00
Dave Halter
8eda8decea Fix whitespace issues with prefixes 2018-04-07 15:34:58 +02:00
Dave Halter
f6935935c0 Use proper leafs for fstring start/end 2018-04-07 12:32:33 +02:00
Dave Halter
d3fa7e1cad Fix a Python 2 related issue. 2018-04-07 11:14:41 +02:00
Dave Halter
83d9abd036 Forgot to delete another print. WTF I'm tired 2018-04-07 02:22:45 +02:00
Dave Halter
222e9117b4 Unfortunately forgot to delete a print 2018-04-07 02:21:59 +02:00
Dave Halter
eda2207e6c Start to write a changelog for 0.2.0 2018-04-07 02:20:00 +02:00
Dave Halter
a91e5f2775 Merge branch 'fstrings'
f-strings are now parsed as part of the Python grammar and not in separate
steps.

Note that this is not the way that CPython does it. CPython still uses multiple
parse steps in ast.c.
2018-04-07 02:17:02 +02:00
Dave Halter
cba4f2ccc1 Fix the syntax errors from f-strings 2018-04-07 02:14:35 +02:00
Dave Halter
8f1a436ba1 Remove the old f-string grammar and fix the tests with the new syntax 2018-04-07 02:11:26 +02:00
Dave Halter
9941348ec6 Add python 3.7 to tox 2018-04-06 20:30:07 +02:00
Dave Halter
afb71dc762 Remove the f-string rules and replace them with new ones 2018-04-06 20:29:22 +02:00
Dave Halter
0d96b12566 Fix the fstring syntax if there's a conversion with exclamation mark 2018-04-06 09:59:15 +02:00
Dave Halter
9d2ce4bcd4 Fix a few fstring error gatherings 2018-04-06 09:50:07 +02:00
Dave Halter
a3e280c2b9 Use strings as a non-terminal symbol in all grammars
This makes it easier to write the same logic for all Python versions
2018-04-05 09:55:19 +02:00
Dave Halter
7c7f4f4e54 Fix a test 2018-04-05 00:45:23 +02:00
Dave Halter
56b3e2cdc8 Also use the fstring modfications for the 3.7 grammar 2018-04-05 00:45:03 +02:00
Dave Halter
97f042c6ba Remove clutter from the grammar 2018-03-31 14:26:12 +02:00
Dave Halter
b1aa7c6a79 Cleanup a lot of details in the tokenizer for fstrings 2018-03-31 14:25:29 +02:00
Dave Halter
235fda3fbb Fix a few things so that the tokenizer can at least parse the grammar. 2018-03-30 22:13:18 +02:00
Dave Halter
d8d2e596a5 A first implementation of the fstring tokenizer 2018-03-30 20:50:49 +02:00
Dave Halter
e05ce5ae31 Revert "A small improvement in checks"
The problem with this commit is that it probably makes some checks slower. It's
still slightly more beautiful, but we leave it for now.

This reverts commit 25e4ea9c24.
2018-03-28 09:51:37 +02:00
Dave Halter
25e4ea9c24 A small improvement in checks 2018-03-28 02:16:37 +02:00
Dave Halter
9f88fe16a3 Added the fstring grammar without the tokenization part
This means that fstrings are not yet parsed, because there are no f-string tokens.
2018-03-28 02:03:18 +02:00
Dave Halter
ba0e7a2e9d A comparison was slightly off 2018-03-24 22:40:12 +01:00
Dave Halter
dc80152ff8 Ignore the pytest cache 2018-03-23 20:24:08 +01:00
Dave Halter
9e3154d167 Fix an error message change in Python 3.7 2018-03-23 20:23:15 +01:00
Dave Halter
065da34272 Fix an issue in the diff parser about endmarker newlines
This was discovered in https://github.com/davidhalter/jedi/issues/1000.
2018-03-11 23:41:18 +01:00
Dave Halter
f89809de9a Remove the copyright for good 2018-01-09 23:27:46 +01:00
Chris Lamb
332c57ebcb Remove copyright years from documentation. (Closes: #25) 2018-01-09 23:24:39 +01:00
lcolaholicl
acb173b703 Fix typo: containes→contains 2017-12-31 13:59:41 +01:00
Daniel Hahler
47e78b37fe tox: use older pytest only for py26/py33
Follow-up to https://github.com/davidhalter/parso/commit/73439d5863daea.
2017-12-30 22:11:07 +01:00
Dave Halter
fc44af6165 Merge branch 'master' of github.com:davidhalter/parso 2017-12-30 18:08:17 +01:00
Dave Halter
73439d5863 Don't use a newer pytest version
Otherwise python 3.3 and 2.6 are not working anymore
2017-12-30 18:07:58 +01:00
Dave Halter
085aad3038 The tags should be annotated if possible 2017-12-30 14:05:50 +01:00
lcolaholicl
7db500bfbc Fix another typo.
Delete accidentally repeated `None.`
2017-12-30 13:52:33 +01:00
lcolaholicl
e689f3dce6 Fix typo
keyworda → keyword
2017-12-30 13:52:33 +01:00
Max Nordlund
b076cdc12a Fix typo 2017-12-26 03:09:40 +01:00
Dave Halter
0dea94c801 Bump version for the next release 2017-11-05 15:11:51 +01:00
Dave Halter
6cf487aee2 Use 3.7-dev not 3.7 for travis 2017-11-05 14:39:13 +01:00
Dave Halter
2ca629a2f6 Don't use py37 in tox but on travis 2017-11-05 14:34:09 +01:00
Dave Halter
5c1e953c17 Add the v0.1.1 changes to the changelog 2017-11-05 12:43:55 +01:00
Dave Halter
a5f565ae10 Python 3.7 should be tested as well 2017-11-05 12:18:28 +01:00
Dave Halter
a29ec25598 Use os.path.join instead of a slash
Fixes #20
2017-11-04 14:58:57 +01:00
Mike Morearty
04360cdfe7 Load from cache even when code was given
Change Grammar._parse() to try load_module() even if code was passed
to it.

In many cases, _parse() is passed both the code and the path to the .py
file. E.g. in jedi-vim if you type "import foo.", then every .py file in
directory "foo" will reach Grammar._parse() with both the `code` and the
`path` variables filled in. This change allows the cache to be used in
those cases.
2017-11-04 11:16:28 +01:00
Robin Roth
4824534f8a Support async for statement 2017-11-01 19:48:24 +01:00
Alisdair Robertson
647073b1b9 Iter raise statements in a Function (#16)
* Add Function.iter_raise_stmts method and tests

* Add Alisdair Robertson to AUTHORS.txt

* Cleanup Function.iter_raise_stmts and test

Decided not to try and exclude exceptions that would be caught by a try-catch
2017-10-28 13:35:49 +02:00
Daniel Hahler
50445f424e tox.ini: prefer pytest without a dot 2017-10-28 13:28:19 +02:00
Alisdair Robertson
5b5d855fab Clarify what a prefix is 2017-10-26 15:03:05 +02:00
Alisdair Robertson
04e18ebb01 Docstring for BaseNode.get_leaf_for_position 2017-10-26 15:03:05 +02:00
Alisdair Robertson
d3cfcc24b8 Correct seperator for param anotation docstring 2017-10-26 15:03:05 +02:00
Alisdair Robertson
89646e0970 Fix Name role in PythonMixin.get_name_of_position docstring 2017-10-26 15:03:05 +02:00
Mike Morearty
bc8566e964 Use cPickle instead of pickle
Change Parso to use cPickle instead of pickle when reading/writing the
cache, which speeds up the cache significantly.

In Python 2, cPickle is up to 1000 times faster than pickle. (In Python
3, if you "import pickle", you are actually getting cPickle.)

As is the convention, the code tries to import cPickle, and if that
fails, it falls back to pickle.

This has a big impact for users of jedi-vim, since in many cases Vim
uses Python 2.
2017-10-24 01:10:32 +02:00
Mike Morearty
89932c368d Fix the cache path
Any caller of Parso can specify a cache path, and if none is specified,
parso will fall back to a default.

Parso's code to read a file from the cache handled this correctly; but
its code to write a file to the cache had a bug -- any override of the
default was ignored.

In the case of Jedi, this is a significant problem, since Jedi overrides
the default. The result is that files will be written to the cache, but
will then never be found when reading from the cache.
2017-10-24 01:08:57 +02:00
Dave Halter
dcae8cda92 Merge branch 'master' of github.com:davidhalter/parso 2017-10-12 22:59:24 +02:00
Dave Halter
1f6683b8ac Don't use the root logger
The root logger should not be used in parso. Use custom loggers for each module. Fixes #9.
2017-10-12 22:59:17 +02:00
Håken Lid
0ec02e1d7f Add support for python 3.7 (#8)
grammar37.txt is simply a copy of grammar36.txt, since there's no syntax
changes in 3.7

https://docs.python.org/3.7/whatsnew/3.7.html
2017-10-02 00:48:09 +02:00
Dave Halter
8db1498185 Deployment script forgot to push the tags to github. 2017-09-21 00:06:48 +02:00
Dave Halter
26e882d19c Magic names were not validated correctly
Fixes #6.
2017-09-14 09:04:08 +02:00
Dave Halter
3a506b44ac Remove dead code in get_definition. Fixes #4. 2017-09-05 08:59:16 +02:00
Aaron Meurer
bae36f8ab0 Fix the link url for the "fork me on GitHub" banner in the docs (#3) 2017-09-05 08:56:23 +02:00
Jakub Wilk
94268815e8 Fix typo. (#2) 2017-09-05 00:35:00 +02:00
75 changed files with 6732 additions and 2498 deletions

View File

@@ -1,4 +1,5 @@
[run]
source = parso
[report]
# Regexes for lines to exclude from consideration

3
.gitignore vendored
View File

@@ -9,3 +9,6 @@
/dist/
parso.egg-info/
/.cache/
/.pytest_cache
test/fuzz-redo.pickle
/venv/

View File

@@ -1,25 +1,28 @@
dist: xenial
language: python
sudo: false
python:
- 2.6
- 2.7
- 3.3
- 3.4
- 3.5
- 3.6
- pypy
- 3.7
- 3.8.2
- nightly
- pypy2.7-6.0
- pypy3.5-6.0
matrix:
allow_failures:
- env: TOXENV=cov
- python: nightly
include:
- python: 3.5
env: TOXENV=cov
env: TOXENV=py35-coverage
install:
- pip install --quiet tox-travis
script:
- tox
after_script:
- if [ $TOXENV == "cov" ]; then
pip install --quiet coveralls;
coveralls;
- |
if [ "${TOXENV%-coverage}" == "$TOXENV" ]; then
pip install --quiet coveralls;
coveralls;
fi

View File

@@ -5,6 +5,7 @@ David Halter (@davidhalter) <davidhalter88@gmail.com>
Code Contributors
=================
Alisdair Robertson (@robodair)
Code Contributors (to Jedi and therefore possibly to this library)
@@ -48,6 +49,9 @@ Mathias Rav (@Mortal) <rav@cs.au.dk>
Daniel Fiterman (@dfit99) <fitermandaniel2@gmail.com>
Simon Ruggier (@sruggier)
Élie Gouzien (@ElieGouzien)
Tim Gates (@timgates42) <tim.gates@iress.com>
Batuhan Taskaya (@isidentical) <isidentical@gmail.com>
Jocelyn Boullier (@Kazy) <jocelyn@boullier.bzh>
Note: (@user) means a github user name.

View File

@@ -3,8 +3,119 @@
Changelog
---------
0.7.1 (2020-07-24)
++++++++++++++++++
0.1.0 (2017-05-30)
- Fixed a couple of smaller bugs (mostly syntax error detection in
``Grammar.iter_errors``)
This is going to be the last release that supports Python 2.7, 3.4 and 3.5.
0.7.0 (2020-04-13)
++++++++++++++++++
- Fix a lot of annoying bugs in the diff parser. The fuzzer did not find
issues anymore even after running it for more than 24 hours (500k tests).
- Small grammar change: suites can now contain newlines even after a newline.
This should really not matter if you don't use error recovery. It allows for
nicer error recovery.
0.6.2 (2020-02-27)
++++++++++++++++++
- Bugfixes
- Add Grammar.refactor (might still be subject to change until 0.7.0)
0.6.1 (2020-02-03)
++++++++++++++++++
- Add ``parso.normalizer.Issue.end_pos`` to make it possible to know where an
issue ends
0.6.0 (2020-01-26)
++++++++++++++++++
- Dropped Python 2.6/Python 3.3 support
- del_stmt names are now considered as a definition
(for ``name.is_definition()``)
- Bugfixes
0.5.2 (2019-12-15)
++++++++++++++++++
- Add include_setitem to get_definition/is_definition and get_defined_names (#66)
- Fix named expression error listing (#89, #90)
- Fix some f-string tokenizer issues (#93)
0.5.1 (2019-07-13)
++++++++++++++++++
- Fix: Some unicode identifiers were not correctly tokenized
- Fix: Line continuations in f-strings are now working
0.5.0 (2019-06-20)
++++++++++++++++++
- **Breaking Change** comp_for is now called sync_comp_for for all Python
versions to be compatible with the Python 3.8 Grammar
- Added .pyi stubs for a lot of the parso API
- Small FileIO changes
0.4.0 (2019-04-05)
++++++++++++++++++
- Python 3.8 support
- FileIO support, it's now possible to use abstract file IO, support is alpha
0.3.4 (2019-02-13)
+++++++++++++++++++
- Fix an f-string tokenizer error
0.3.3 (2019-02-06)
+++++++++++++++++++
- Fix async errors in the diff parser
- A fix in iter_errors
- This is a very small bugfix release
0.3.2 (2019-01-24)
+++++++++++++++++++
- 20+ bugfixes in the diff parser and 3 in the tokenizer
- A fuzzer for the diff parser, to give confidence that the diff parser is in a
good shape.
- Some bugfixes for f-string
0.3.1 (2018-07-09)
+++++++++++++++++++
- Bugfixes in the diff parser and keyword-only arguments
0.3.0 (2018-06-30)
+++++++++++++++++++
- Rewrote the pgen2 parser generator.
0.2.1 (2018-05-21)
+++++++++++++++++++
- A bugfix for the diff parser.
- Grammar files can now be loaded from a specific path.
0.2.0 (2018-04-15)
+++++++++++++++++++
- f-strings are now parsed as a part of the normal Python grammar. This makes
it way easier to deal with them.
0.1.1 (2017-11-05)
+++++++++++++++++++
- Fixed a few bugs in the caching layer
- Added support for Python 3.7
0.1.0 (2017-09-04)
+++++++++++++++++++
- Pulling the library out of Jedi. Some APIs will definitely change.

View File

@@ -2,14 +2,19 @@
parso - A Python Parser
###################################################################
.. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
:target: http://travis-ci.org/davidhalter/parso
:alt: Travis-CI build status
.. image:: https://coveralls.io/repos/davidhalter/parso/badge.png?branch=master
:target: https://coveralls.io/r/davidhalter/parso
.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master
:target: https://travis-ci.org/davidhalter/parso
:alt: Travis CI build status
.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master
:target: https://coveralls.io/github/davidhalter/parso?branch=master
:alt: Coverage Status
.. image:: https://pepy.tech/badge/parso
:target: https://pepy.tech/project/parso
:alt: PyPI Downloads
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
Parso is a Python parser that supports error recovery and round-trip parsing
@@ -52,13 +57,13 @@ To list multiple issues:
>>> error2.message
"SyntaxError: 'continue' not properly in loop"
Ressources
==========
Resources
=========
- `Testing <http://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
- `PyPI <https://pypi.python.org/pypi/parso>`_
- `Docs <https://parso.readthedocs.org/en/latest/>`_
- Uses `semantic versioning <http://semver.org/>`_
- Uses `semantic versioning <https://semver.org/>`_
Installation
============

View File

@@ -11,9 +11,12 @@ import parso
from parso import cache
from parso.utils import parse_version_string
collect_ignore = ["setup.py"]
VERSIONS_2 = '2.7',
VERSIONS_3 = '3.4', '3.5', '3.6', '3.7', '3.8'
@pytest.fixture(scope='session')
def clean_parso_cache():
"""
@@ -49,20 +52,15 @@ def pytest_generate_tests(metafunc):
ids=[c.name for c in cases]
)
elif 'each_version' in metafunc.fixturenames:
metafunc.parametrize(
'each_version',
['2.6', '2.7', '3.3', '3.4', '3.5', '3.6'],
)
metafunc.parametrize('each_version', VERSIONS_2 + VERSIONS_3)
elif 'each_py2_version' in metafunc.fixturenames:
metafunc.parametrize(
'each_py2_version',
['2.6', '2.7'],
)
metafunc.parametrize('each_py2_version', VERSIONS_2)
elif 'each_py3_version' in metafunc.fixturenames:
metafunc.parametrize(
'each_py3_version',
['3.3', '3.4', '3.5', '3.6'],
)
metafunc.parametrize('each_py3_version', VERSIONS_3)
elif 'version_ge_py36' in metafunc.fixturenames:
metafunc.parametrize('version_ge_py36', ['3.6', '3.7', '3.8'])
elif 'version_ge_py38' in metafunc.fixturenames:
metafunc.parametrize('version_ge_py38', ['3.8'])
class NormalizerIssueCase(object):
@@ -89,22 +87,12 @@ def pytest_configure(config):
root = logging.getLogger()
root.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
#ch = logging.StreamHandler(sys.stdout)
#ch.setLevel(logging.DEBUG)
#formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
#ch.setFormatter(formatter)
root.addHandler(ch)
@pytest.fixture
def each_py3_version():
return '3.3', '3.4', '3.5', '3.6'
@pytest.fixture
def each_py2_version():
return '2.6', '2.7'
#root.addHandler(ch)
class Checker():
@@ -167,8 +155,20 @@ def works_ge_py3(each_version):
@pytest.fixture
def works_ge_py35(each_version):
"""
Works only greater equal Python 3.3.
"""
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 5))
@pytest.fixture
def works_ge_py36(each_version):
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 6))
@pytest.fixture
def works_ge_py38(each_version):
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 8))
@pytest.fixture
def works_ge_py39(each_version):
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 9))

View File

@@ -26,7 +26,7 @@ git checkout $BRANCH
tox
# Create tag
tag=v$(python -c "import $PROJECT_NAME; print($PROJECT_NAME.__version__)")
tag=v$(python3 -c "import $PROJECT_NAME; print($PROJECT_NAME.__version__)")
master_ref=$(git show-ref -s heads/$BRANCH)
tag_ref=$(git show-ref -s $tag || true)
@@ -36,17 +36,17 @@ if [[ $tag_ref ]]; then
exit 1
fi
else
git tag $tag
git tag -a $tag
git push --tags
fi
# Package and upload to PyPI
#rm -rf dist/ - Not needed anymore, because the folder is never reused.
echo `pwd`
python setup.py sdist bdist_wheel
python3 setup.py sdist bdist_wheel
# Maybe do a pip install twine before.
twine upload dist/*
cd $BASE_DIR
# Back in the development directory fetch tags.
git fetch --tags
# The tags have been pushed to this repo. Push the tags to github, now.
git push --tags

View File

@@ -6,7 +6,7 @@
{% endif %}
<link media="only screen and (max-device-width: 480px)" href="{{
pathto('_static/small_flask.css', 1) }}" type= "text/css" rel="stylesheet" />
<a href="https://github.com/davidhalter/jedi">
<a href="https://github.com/davidhalter/parso">
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me">
</a>
{% endblock %}
@@ -19,7 +19,6 @@
{% endblock %}
{%- block footer %}
<div class="footer">
&copy; Copyright {{ copyright }}.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
</div>
{% if pagename == 'index' %}

View File

@@ -13,7 +13,6 @@
import sys
import os
import datetime
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -45,7 +44,7 @@ master_doc = 'index'
# General information about the project.
project = u'parso'
copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
copyright = u'parso contributors'
import parso
from parso.utils import version_info
@@ -145,7 +144,7 @@ html_sidebars = {
#'relations.html',
'ghbuttons.html',
#'sourcelink.html',
#'searchbox.html'
'searchbox.html'
]
}

View File

@@ -61,6 +61,8 @@ Used By
-------
- jedi_ (which is used by IPython and a lot of editor plugins).
- mutmut_ (mutation tester)
.. _jedi: https://github.com/davidhalter/jedi
.. _mutmut: https://github.com/boxed/mutmut

View File

@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
from parso.utils import split_lines, python_bytes_to_unicode
__version__ = '0.1.0'
__version__ = '0.7.1'
def parse(code=None, **kwargs):

19
parso/__init__.pyi Normal file
View File

@@ -0,0 +1,19 @@
from typing import Any, Optional, Union
from parso.grammar import Grammar as Grammar, load_grammar as load_grammar
from parso.parser import ParserSyntaxError as ParserSyntaxError
from parso.utils import python_bytes_to_unicode as python_bytes_to_unicode, split_lines as split_lines
__version__: str = ...
def parse(
code: Optional[Union[str, bytes]],
*,
version: Optional[str] = None,
error_recovery: bool = True,
path: Optional[str] = None,
start_symbol: Optional[str] = None,
cache: bool = False,
diff_cache: bool = False,
cache_path: Optional[str] = None,
) -> Any: ...

View File

@@ -1,14 +1,11 @@
"""
To ensure compatibility from Python ``2.6`` - ``3.3``, a module has been
To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been
created. Clearly there is huge need to use conforming syntax.
"""
import os
import sys
import platform
# Cannot use sys.version.major and minor names, because in Python 2.6 it's not
# a namedtuple.
py_version = int(str(sys.version_info[0]) + str(sys.version_info[1]))
# unicode function
try:
unicode = unicode
@@ -36,10 +33,10 @@ except AttributeError:
def u(string):
"""Cast to unicode DAMMIT!
Written because Python2 repr always implicitly casts to a string, so we
have to cast back to a unicode (and we now that we always deal with valid
have to cast back to a unicode (and we know that we always deal with valid
unicode, because we check that in the beginning).
"""
if py_version >= 30:
if sys.version_info.major >= 3:
return str(string)
if not isinstance(string, unicode):
@@ -48,9 +45,17 @@ def u(string):
try:
# Python 3.3+
FileNotFoundError = FileNotFoundError
except NameError:
FileNotFoundError = IOError
# Python 2.7 (both IOError + OSError)
FileNotFoundError = EnvironmentError
try:
# Python 3.3+
PermissionError = PermissionError
except NameError:
# Python 2.7 (both IOError + OSError)
PermissionError = EnvironmentError
def utf8_repr(func):
@@ -65,39 +70,32 @@ def utf8_repr(func):
else:
return result
if py_version >= 30:
if sys.version_info.major >= 3:
return func
else:
return wrapper
try:
from functools import total_ordering
except ImportError:
# Python 2.6
def total_ordering(cls):
"""Class decorator that fills in missing ordering methods"""
convert = {
'__lt__': [('__gt__', lambda self, other: not (self < other or self == other)),
('__le__', lambda self, other: self < other or self == other),
('__ge__', lambda self, other: not self < other)],
'__le__': [('__ge__', lambda self, other: not self <= other or self == other),
('__lt__', lambda self, other: self <= other and not self == other),
('__gt__', lambda self, other: not self <= other)],
'__gt__': [('__lt__', lambda self, other: not (self > other or self == other)),
('__ge__', lambda self, other: self > other or self == other),
('__le__', lambda self, other: not self > other)],
'__ge__': [('__le__', lambda self, other: (not self >= other) or self == other),
('__gt__', lambda self, other: self >= other and not self == other),
('__lt__', lambda self, other: not self >= other)]
}
roots = set(dir(cls)) & set(convert)
if not roots:
raise ValueError('must define at least one ordering operation: < > <= >=')
root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
for opname, opfunc in convert[root]:
if opname not in roots:
opfunc.__name__ = opname
opfunc.__doc__ = getattr(int, opname).__doc__
setattr(cls, opname, opfunc)
return cls
if sys.version_info < (3, 5):
"""
A super-minimal shim around listdir that behave like
scandir for the information we need.
"""
class _DirEntry:
def __init__(self, name, basepath):
self.name = name
self.basepath = basepath
@property
def path(self):
return os.path.join(self.basepath, self.name)
def stat(self):
# won't follow symlinks
return os.lstat(os.path.join(self.basepath, self.name))
def scandir(dir):
return [_DirEntry(name, dir) for name in os.listdir(dir)]
else:
from os import scandir

View File

@@ -4,15 +4,45 @@ import sys
import hashlib
import gc
import shutil
import pickle
import platform
import errno
import logging
import warnings
from parso._compatibility import FileNotFoundError
try:
import cPickle as pickle
except:
import pickle
from parso._compatibility import FileNotFoundError, PermissionError, scandir
from parso.file_io import FileIO
_PICKLE_VERSION = 30
LOG = logging.getLogger(__name__)
_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
"""
Cached files should survive at least a few minutes.
"""
_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
"""
Maximum time for a cached file to survive if it is not
accessed within.
"""
_CACHED_SIZE_TRIGGER = 600
"""
This setting limits the amount of cached files. It's basically a way to start
garbage collection.
The reasoning for this limit being as big as it is, is the following:
Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This
makes Jedi use ~500mb of memory. Since we might want a bit more than those few
libraries, we just increase it a bit.
"""
_PICKLE_VERSION = 33
"""
Version number (integer) for file system cache.
@@ -34,20 +64,23 @@ _VERSION_TAG = '%s-%s%s-%s' % (
"""
Short name for distinguish Python implementations and versions.
It's like `sys.implementation.cache_tag` but for Python < 3.3
It's like `sys.implementation.cache_tag` but for Python2
we generate something similar. See:
http://docs.python.org/3/library/sys.html#sys.implementation
"""
def _get_default_cache_path():
if platform.system().lower() == 'windows':
dir_ = os.path.join(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
dir_ = os.path.join(os.getenv('LOCALAPPDATA')
or os.path.expanduser('~'), 'Parso', 'Parso')
elif platform.system().lower() == 'darwin':
dir_ = os.path.join('~', 'Library', 'Caches', 'Parso')
else:
dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
return os.path.expanduser(dir_)
_default_cache_path = _get_default_cache_path()
"""
The path where the cache is stored.
@@ -58,6 +91,19 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
"""
_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24
def _get_cache_clear_lock(cache_path = None):
"""
The path where the cache lock is stored.
Cache lock will prevent continous cache clearing and only allow garbage
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
"""
cache_path = cache_path or _get_default_cache_path()
return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK"))
parser_cache = {}
@@ -68,23 +114,29 @@ class _NodeCacheItem(object):
if change_time is None:
change_time = time.time()
self.change_time = change_time
self.last_used = change_time
def load_module(hashed_grammar, path, cache_path=None):
def load_module(hashed_grammar, file_io, cache_path=None):
"""
Returns a module or None, if it fails.
"""
try:
p_time = os.path.getmtime(path)
except FileNotFoundError:
p_time = file_io.get_last_modified()
if p_time is None:
return None
try:
module_cache_item = parser_cache[hashed_grammar][path]
module_cache_item = parser_cache[hashed_grammar][file_io.path]
if p_time <= module_cache_item.change_time:
module_cache_item.last_used = time.time()
return module_cache_item.node
except KeyError:
return _load_from_file_system(hashed_grammar, path, p_time, cache_path=cache_path)
return _load_from_file_system(
hashed_grammar,
file_io.path,
p_time,
cache_path=cache_path
)
def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
@@ -110,22 +162,50 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
except FileNotFoundError:
return None
else:
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
logging.debug('pickle loaded: %s', path)
_set_cache_item(hashed_grammar, path, module_cache_item)
LOG.debug('pickle loaded: %s', path)
return module_cache_item.node
def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=None):
def _set_cache_item(hashed_grammar, path, module_cache_item):
if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER:
# Garbage collection of old cache files.
# We are basically throwing everything away that hasn't been accessed
# in 10 minutes.
cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL
for key, path_to_item_map in parser_cache.items():
parser_cache[key] = {
path: node_item
for path, node_item in path_to_item_map.items()
if node_item.last_used > cutoff_time
}
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
path = file_io.path
try:
p_time = None if path is None else os.path.getmtime(path)
p_time = None if path is None else file_io.get_last_modified()
except OSError:
p_time = None
pickling = False
item = _NodeCacheItem(module, lines, p_time)
parser_cache.setdefault(hashed_grammar, {})[path] = item
_set_cache_item(hashed_grammar, path, item)
if pickling and path is not None:
_save_to_file_system(hashed_grammar, path, item)
try:
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
except PermissionError:
# It's not really a big issue if the cache cannot be saved to the
# file system. It's still in RAM in that case. However we should
# still warn the user that this is happening.
warnings.warn(
'Tried to save a file to %s, but got permission denied.',
Warning
)
else:
_remove_cache_and_update_lock(cache_path=cache_path)
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
@@ -140,6 +220,46 @@ def clear_cache(cache_path=None):
parser_cache.clear()
def clear_inactive_cache(
cache_path=None,
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
):
if cache_path is None:
cache_path = _get_default_cache_path()
if not os.path.exists(cache_path):
return False
for version_path in os.listdir(cache_path):
version_path = os.path.join(cache_path, version_path)
if not os.path.isdir(version_path):
continue
for file in scandir(version_path):
if (
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
<= time.time()
):
try:
os.remove(file.path)
except OSError: # silently ignore all failures
continue
else:
return True
def _remove_cache_and_update_lock(cache_path = None):
lock = _get_cache_clear_lock(cache_path=cache_path)
clear_lock_time = lock.get_last_modified()
if (
clear_lock_time is None # first time
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
):
if not lock._touch():
# First make sure that as few as possible other cleanup jobs also
# get started. There is still a race condition but it's probably
# not a big problem.
return False
clear_inactive_cache(cache_path = cache_path)
def _get_hashed_path(hashed_grammar, path, cache_path=None):
directory = _get_cache_directory_path(cache_path=cache_path)

47
parso/file_io.py Normal file
View File

@@ -0,0 +1,47 @@
import os
from parso._compatibility import FileNotFoundError
class FileIO(object):
def __init__(self, path):
self.path = path
def read(self): # Returns bytes/str
# We would like to read unicode here, but we cannot, because we are not
# sure if it is a valid unicode file. Therefore just read whatever is
# here.
with open(self.path, 'rb') as f:
return f.read()
def get_last_modified(self):
"""
Returns float - timestamp or None, if path doesn't exist.
"""
try:
return os.path.getmtime(self.path)
except OSError:
# Might raise FileNotFoundError, OSError for Python 2
return None
def _touch(self):
try:
os.utime(self.path, None)
except FileNotFoundError:
try:
file = open(self.path, 'a')
file.close()
except (OSError, IOError): # TODO Maybe log this?
return False
return True
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.path)
class KnownContentFileIO(FileIO):
def __init__(self, path, content):
super(KnownContentFileIO, self).__init__(path)
self._content = content
def read(self):
return self._content

View File

@@ -2,17 +2,18 @@ import hashlib
import os
from parso._compatibility import FileNotFoundError, is_pypy
from parso.pgen2.pgen import generate_grammar
from parso.pgen2 import generate_grammar
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
from parso.python.diff import DiffParser
from parso.python.tokenize import tokenize_lines, tokenize
from parso.python import token
from parso.cache import parser_cache, load_module, save_module
from parso.python.token import PythonTokenTypes
from parso.cache import parser_cache, load_module, try_to_save_module
from parso.parser import BaseParser
from parso.python.parser import Parser as PythonParser
from parso.python.errors import ErrorFinderConfig
from parso.python import pep8
from parso.python import fstring
from parso.file_io import FileIO, KnownContentFileIO
from parso.normalizer import RefactoringNormalizer
_loaded_grammars = {}
@@ -21,7 +22,7 @@ class Grammar(object):
"""
:py:func:`parso.load_grammar` returns instances of this class.
Creating custom grammars by calling this is not supported, yet.
Creating custom none-python grammars by calling this is not supported, yet.
"""
#:param text: A BNF representation of your grammar.
_error_normalizer_config = None
@@ -52,12 +53,13 @@ class Grammar(object):
it is invalid, it will be returned as an error node. If disabled,
you will get a ParseError when encountering syntax errors in your
code.
:param str start_symbol: The grammar symbol that you want to parse. Only
allowed to be used when error_recovery is False.
:param str start_symbol: The grammar rule (nonterminal) that you want
to parse. Only allowed to be used when error_recovery is False.
:param str path: The path to the file you want to open. Only needed for caching.
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
if a path is given. Returns the cached trees if the corresponding
files on disk have not changed.
files on disk have not changed. Note that this stores pickle files
on your file system (e.g. for Linux in ``~/.cache/parso/``).
:param bool diff_cache: Diffs the cached python module against the new
code and tries to parse only the parts that have changed. Returns
the same (changed) module that is found in cache. Using this option
@@ -73,39 +75,40 @@ class Grammar(object):
:py:class:`parso.python.tree.Module`.
"""
if 'start_pos' in kwargs:
raise TypeError("parse() got an unexpected keyworda argument.")
raise TypeError("parse() got an unexpected keyword argument.")
return self._parse(code=code, **kwargs)
def _parse(self, code=None, error_recovery=True, path=None,
start_symbol=None, cache=False, diff_cache=False,
cache_path=None, start_pos=(1, 0)):
cache_path=None, file_io=None, start_pos=(1, 0)):
"""
Wanted python3.5 * operator and keyword only arguments. Therefore just
wrap it all.
start_pos here is just a parameter internally used. Might be public
sometime in the future.
"""
if code is None and path is None:
if code is None and path is None and file_io is None:
raise TypeError("Please provide either code or a path.")
if start_symbol is None:
start_symbol = self._start_symbol
start_symbol = self._start_nonterminal
if error_recovery and start_symbol != 'file_input':
raise NotImplementedError("This is currently not implemented.")
if cache and code is None and path is not None:
# With the current architecture we cannot load from cache if the
# code is given, because we just load from cache if it's not older than
# the latest change (file last modified).
module_node = load_module(self._hashed, path, cache_path=cache_path)
if file_io is None:
if code is None:
file_io = FileIO(path)
else:
file_io = KnownContentFileIO(path, code)
if cache and file_io.path is not None:
module_node = load_module(self._hashed, file_io, cache_path=cache_path)
if module_node is not None:
return module_node
if code is None:
with open(path, 'rb') as f:
code = f.read()
code = file_io.read()
code = python_bytes_to_unicode(code)
lines = split_lines(code, keepends=True)
@@ -114,7 +117,7 @@ class Grammar(object):
raise TypeError("You have to define a diff parser to be able "
"to use this option.")
try:
module_cache_item = parser_cache[self._hashed][path]
module_cache_item = parser_cache[self._hashed][file_io.path]
except KeyError:
pass
else:
@@ -129,23 +132,23 @@ class Grammar(object):
old_lines=old_lines,
new_lines=lines
)
save_module(self._hashed, path, new_node, lines,
try_to_save_module(self._hashed, file_io, new_node, lines,
# Never pickle in pypy, it's slow as hell.
pickling=cache and not is_pypy,
cache_path=cache_path)
return new_node
tokens = self._tokenizer(lines, start_pos)
tokens = self._tokenizer(lines, start_pos=start_pos)
p = self._parser(
self._pgen_grammar,
error_recovery=error_recovery,
start_symbol=start_symbol
start_nonterminal=start_symbol
)
root_node = p.parse(tokens=tokens)
if cache or diff_cache:
save_module(self._hashed, path, root_node, lines,
try_to_save_module(self._hashed, file_io, root_node, lines,
# Never pickle in pypy, it's slow as hell.
pickling=cache and not is_pypy,
cache_path=cache_path)
@@ -168,6 +171,9 @@ class Grammar(object):
return self._get_normalizer_issues(node, self._error_normalizer_config)
def refactor(self, base_node, node_to_str_map):
return RefactoringNormalizer(node_to_str_map).walk(base_node)
def _get_normalizer(self, normalizer_config):
if normalizer_config is None:
normalizer_config = self._default_normalizer_config
@@ -189,17 +195,16 @@ class Grammar(object):
normalizer.walk(node)
return normalizer.issues
def __repr__(self):
labels = self._pgen_grammar.number2symbol.values()
txt = ' '.join(list(labels)[:3]) + ' ...'
nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
txt = ' '.join(list(nonterminals)[:3]) + ' ...'
return '<%s:%s>' % (self.__class__.__name__, txt)
class PythonGrammar(Grammar):
_error_normalizer_config = ErrorFinderConfig()
_token_namespace = token
_start_symbol = 'file_input'
_token_namespace = PythonTokenTypes
_start_nonterminal = 'file_input'
def __init__(self, version_info, bnf_text):
super(PythonGrammar, self).__init__(
@@ -210,54 +215,30 @@ class PythonGrammar(Grammar):
)
self.version_info = version_info
def _tokenize_lines(self, lines, start_pos):
return tokenize_lines(lines, self.version_info, start_pos=start_pos)
def _tokenize_lines(self, lines, **kwargs):
return tokenize_lines(lines, self.version_info, **kwargs)
def _tokenize(self, code):
# Used by Jedi.
return tokenize(code, self.version_info)
class PythonFStringGrammar(Grammar):
_token_namespace = fstring.TokenNamespace
_start_symbol = 'fstring'
def __init__(self):
super(PythonFStringGrammar, self).__init__(
text=fstring.GRAMMAR,
tokenizer=fstring.tokenize,
parser=fstring.Parser
)
def parse(self, code, **kwargs):
return self._parse(code, **kwargs)
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
tokens = self._tokenizer(code, start_pos=start_pos)
p = self._parser(
self._pgen_grammar,
error_recovery=error_recovery,
start_symbol=self._start_symbol,
)
return p.parse(tokens=tokens)
def parse_leaf(self, leaf, error_recovery=True):
code = leaf._get_payload()
return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
def load_grammar(**kwargs):
"""
Loads a :py:class:`parso.Grammar`. The default version is the current Python
version.
:param str version: A python version string, e.g. ``version='3.3'``.
:param str version: A python version string, e.g. ``version='3.8'``.
:param str path: A path to a grammar file
"""
def load_grammar(language='python', version=None):
def load_grammar(language='python', version=None, path=None):
if language == 'python':
version_info = parse_version_string(version)
file = 'python/grammar%s%s.txt' % (version_info.major, version_info.minor)
file = path or os.path.join(
'python',
'grammar%s%s.txt' % (version_info.major, version_info.minor)
)
global _loaded_grammars
path = os.path.join(os.path.dirname(__file__), file)
@@ -271,12 +252,8 @@ def load_grammar(**kwargs):
grammar = PythonGrammar(version_info, bnf_text)
return _loaded_grammars.setdefault(path, grammar)
except FileNotFoundError:
message = "Python version %s is currently not supported." % version
message = "Python version %s.%s is currently not supported." % (version_info.major, version_info.minor)
raise NotImplementedError(message)
elif language == 'python-f-string':
if version is not None:
raise NotImplementedError("Currently different versions are not supported.")
return PythonFStringGrammar()
else:
raise NotImplementedError("No support for language %s." % language)

38
parso/grammar.pyi Normal file
View File

@@ -0,0 +1,38 @@
from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union
from typing_extensions import Literal
from parso.utils import PythonVersionInfo
_Token = Any
_NodeT = TypeVar("_NodeT")
class Grammar(Generic[_NodeT]):
_default_normalizer_config: Optional[Any] = ...
_error_normalizer_config: Optional[Any] = None
_start_nonterminal: str = ...
_token_namespace: Optional[str] = None
def __init__(
self,
text: str,
tokenizer: Callable[[Sequence[str], int], Sequence[_Token]],
parser: Any = ...,
diff_parser: Any = ...,
) -> None: ...
def parse(
self,
code: Union[str, bytes] = ...,
error_recovery: bool = ...,
path: Optional[str] = ...,
start_symbol: Optional[str] = ...,
cache: bool = ...,
diff_cache: bool = ...,
cache_path: Optional[str] = ...,
) -> _NodeT: ...
class PythonGrammar(Grammar):
version_info: PythonVersionInfo
def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ...
def load_grammar(
language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ...
) -> Grammar: ...

View File

@@ -12,6 +12,9 @@ class _NormalizerMeta(type):
class Normalizer(use_metaclass(_NormalizerMeta)):
_rule_type_instances = {}
_rule_value_instances = {}
def __init__(self, grammar, config):
self.grammar = grammar
self._config = config
@@ -41,8 +44,8 @@ class Normalizer(use_metaclass(_NormalizerMeta)):
except AttributeError:
return self.visit_leaf(node)
else:
with self.visit_node(node):
return ''.join(self.visit(child) for child in children)
with self.visit_node(node):
return ''.join(self.visit(child) for child in children)
@contextmanager
def visit_node(self, node):
@@ -119,7 +122,6 @@ class NormalizerConfig(object):
class Issue(object):
def __init__(self, node, code, message):
self._node = node
self.code = code
"""
An integer code that stands for the type of error.
@@ -133,6 +135,7 @@ class Issue(object):
The start position position of the error as a tuple (line, column). As
always in |parso| the first line is 1 and the first column 0.
"""
self.end_pos = node.end_pos
def __eq__(self, other):
return self.start_pos == other.start_pos and self.code == other.code
@@ -147,7 +150,6 @@ class Issue(object):
return '<%s: %s>' % (self.__class__.__name__, self.code)
class Rule(object):
code = None
message = None
@@ -161,7 +163,7 @@ class Rule(object):
def get_node(self, node):
return node
def _get_message(self, message):
def _get_message(self, message, node):
if message is None:
message = self.message
if message is None:
@@ -174,7 +176,7 @@ class Rule(object):
if code is None:
raise ValueError("The error code on the class is not set.")
message = self._get_message(message)
message = self._get_message(message, node)
self._normalizer.add_issue(node, code, message)
@@ -182,3 +184,20 @@ class Rule(object):
if self.is_issue(node):
issue_node = self.get_node(node)
self.add_issue(issue_node)
class RefactoringNormalizer(Normalizer):
def __init__(self, node_to_str_map):
self._node_to_str_map = node_to_str_map
def visit(self, node):
try:
return self._node_to_str_map[node]
except KeyError:
return super(RefactoringNormalizer, self).visit(node)
def visit_leaf(self, leaf):
try:
return self._node_to_str_map[leaf]
except KeyError:
return super(RefactoringNormalizer, self).visit_leaf(leaf)

View File

@@ -1,3 +1,11 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
# 99% of the code is different from pgen2, now.
"""
The ``Parser`` tries to convert the available Python code in an easy to read
format, something like an abstract syntax tree. The classes who represent this
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
``Statement``, which produces ``Array`` and ``Call``).
"""
from parso import tree
from parso.pgen2.parse import PgenParser
from parso.pgen2.generator import ReservedString
class ParserSyntaxError(Exception):
@@ -30,7 +38,76 @@ class ParserSyntaxError(Exception):
self.error_leaf = error_leaf
class InternalParseError(Exception):
"""
Exception to signal the parser is stuck and error recovery didn't help.
Basically this shouldn't happen. It's a sign that something is really
wrong.
"""
def __init__(self, msg, type_, value, start_pos):
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
(msg, type_.name, value, start_pos))
self.msg = msg
self.type = type
self.value = value
self.start_pos = start_pos
class Stack(list):
def _allowed_transition_names_and_token_types(self):
def iterate():
# An API just for Jedi.
for stack_node in reversed(self):
for transition in stack_node.dfa.transitions:
if isinstance(transition, ReservedString):
yield transition.value
else:
yield transition # A token type
if not stack_node.dfa.is_final:
break
return list(iterate())
class StackNode(object):
def __init__(self, dfa):
self.dfa = dfa
self.nodes = []
@property
def nonterminal(self):
return self.dfa.from_rule
def __repr__(self):
return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
def _token_to_transition(grammar, type_, value):
# Map from token to label
if type_.contains_syntax:
# Check for reserved words (keywords)
try:
return grammar.reserved_syntax_strings[value]
except KeyError:
pass
return type_
class BaseParser(object):
"""Parser engine.
A Parser instance contains state pertaining to the current token
sequence, and should not be used concurrently by different threads
to parse separate token sequences.
See python/tokenize.py for how to get input tokens by a string.
When a syntax error occurs, error_recovery() is called.
"""
node_map = {}
default_node = tree.Node
@@ -38,41 +115,97 @@ class BaseParser(object):
}
default_leaf = tree.Leaf
def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
self._pgen_grammar = pgen_grammar
self._start_symbol = start_symbol
self._start_nonterminal = start_nonterminal
self._error_recovery = error_recovery
def parse(self, tokens):
start_number = self._pgen_grammar.symbol2number[self._start_symbol]
self.pgen_parser = PgenParser(
self._pgen_grammar, self.convert_node, self.convert_leaf,
self.error_recovery, start_number
)
first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
self.stack = Stack([StackNode(first_dfa)])
node = self.pgen_parser.parse(tokens)
# The stack is empty now, we don't need it anymore.
del self.pgen_parser
return node
for token in tokens:
self._add_token(token)
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
while True:
tos = self.stack[-1]
if not tos.dfa.is_final:
# We never broke out -- EOF is too soon -- Unfinished statement.
# However, the error recovery might have added the token again, if
# the stack is empty, we're fine.
raise InternalParseError(
"incomplete input", token.type, token.string, token.start_pos
)
if len(self.stack) > 1:
self._pop()
else:
return self.convert_node(tos.nonterminal, tos.nodes)
def error_recovery(self, token):
if self._error_recovery:
raise NotImplementedError("Error Recovery is not implemented")
else:
error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
type_, value, start_pos, prefix = token
error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
def convert_node(self, pgen_grammar, type_, children):
# TODO REMOVE symbol, we don't want type here.
symbol = pgen_grammar.number2symbol[type_]
def convert_node(self, nonterminal, children):
try:
return self.node_map[symbol](children)
node = self.node_map[nonterminal](children)
except KeyError:
return self.default_node(symbol, children)
node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node
def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
def convert_leaf(self, type_, value, prefix, start_pos):
try:
return self.leaf_map[type_](value, start_pos, prefix)
except KeyError:
return self.default_leaf(value, start_pos, prefix)
def _add_token(self, token):
"""
This is the only core function for parsing. Here happens basically
everything. Everything is well prepared by the parser generator and we
only apply the necessary steps here.
"""
grammar = self._pgen_grammar
stack = self.stack
type_, value, start_pos, prefix = token
transition = _token_to_transition(grammar, type_, value)
while True:
try:
plan = stack[-1].dfa.transitions[transition]
break
except KeyError:
if stack[-1].dfa.is_final:
self._pop()
else:
self.error_recovery(token)
return
except IndexError:
raise InternalParseError("too much input", type_, value, start_pos)
stack[-1].dfa = plan.next_dfa
for push in plan.dfa_pushes:
stack.append(StackNode(push))
leaf = self.convert_leaf(type_, value, prefix, start_pos)
stack[-1].nodes.append(leaf)
def _pop(self):
tos = self.stack.pop()
# If there's exactly one child, return that child instead of
# creating a new node. We still create expr_stmt and
# file_input though, because a lot of Jedi depends on its
# logic.
if len(tos.nodes) == 1:
new_node = tos.nodes[0]
else:
new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
self.stack[-1].nodes.append(new_node)

View File

@@ -4,5 +4,7 @@
# Modifications:
# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Copyright 2014 David Halter. Integration into Jedi.
# Copyright 2014 David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
from parso.pgen2.generator import generate_grammar

1
parso/pgen2/__init__.pyi Normal file
View File

@@ -0,0 +1 @@
from parso.pgen2.generator import generate_grammar as generate_grammar

378
parso/pgen2/generator.py Normal file
View File

@@ -0,0 +1,378 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
"""
This module defines the data structures used to represent a grammar.
Specifying grammars in pgen is possible with this grammar::
grammar: (NEWLINE | rule)* ENDMARKER
rule: NAME ':' rhs NEWLINE
rhs: items ('|' items)*
items: item+
item: '[' rhs ']' | atom ['+' | '*']
atom: '(' rhs ')' | NAME | STRING
This grammar is self-referencing.
This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
Most of the code has been refactored to make it more Pythonic. Since this was a
"copy" of the CPython Parser parser "pgen", there was some work needed to make
it more readable. It should also be slightly faster than the original pgen2,
because we made some optimizations.
"""
from ast import literal_eval
from parso.pgen2.grammar_parser import GrammarParser, NFAState
class Grammar(object):
"""
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly.
The only important part in this parsers are dfas and transitions between
dfas.
"""
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
self.reserved_syntax_strings = reserved_syntax_strings
self.start_nonterminal = start_nonterminal
class DFAPlan(object):
"""
Plans are used for the parser to create stack nodes and do the proper
DFA state transitions.
"""
def __init__(self, next_dfa, dfa_pushes=[]):
self.next_dfa = next_dfa
self.dfa_pushes = dfa_pushes
def __repr__(self):
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
class DFAState(object):
"""
The DFAState object is the core class for pretty much anything. DFAState
are the vertices of an ordered graph while arcs and transitions are the
edges.
Arcs are the initial edges, where most DFAStates are not connected and
transitions are then calculated to connect the DFA state machines that have
different nonterminals.
"""
def __init__(self, from_rule, nfa_set, final):
assert isinstance(nfa_set, set)
assert isinstance(next(iter(nfa_set)), NFAState)
assert isinstance(final, NFAState)
self.from_rule = from_rule
self.nfa_set = nfa_set
self.arcs = {} # map from terminals/nonterminals to DFAState
# In an intermediary step we set these nonterminal arcs (which has the
# same structure as arcs). These don't contain terminals anymore.
self.nonterminal_arcs = {}
# Transitions are basically the only thing that the parser is using
# with is_final. Everyting else is purely here to create a parser.
self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan]
self.is_final = final in nfa_set
def add_arc(self, next_, label):
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(next_, DFAState)
self.arcs[label] = next_
def unifystate(self, old, new):
for label, next_ in self.arcs.items():
if next_ is old:
self.arcs[label] = new
def __eq__(self, other):
# Equality test -- ignore the nfa_set instance variable
assert isinstance(other, DFAState)
if self.is_final != other.is_final:
return False
# Can't just return self.arcs == other.arcs, because that
# would invoke this method recursively, with cycles...
if len(self.arcs) != len(other.arcs):
return False
for label, next_ in self.arcs.items():
if next_ is not other.arcs.get(label):
return False
return True
__hash__ = None # For Py3 compatibility.
def __repr__(self):
return '<%s: %s is_final=%s>' % (
self.__class__.__name__, self.from_rule, self.is_final
)
class ReservedString(object):
"""
Most grammars will have certain keywords and operators that are mentioned
in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
This class basically is the former.
"""
def __init__(self, value):
self.value = value
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.value)
def _simplify_dfas(dfas):
"""
This is not theoretically optimal, but works well enough.
Algorithm: repeatedly look for two states that have the same
set of arcs (same labels pointing to the same nodes) and
unify them, until things stop changing.
dfas is a list of DFAState instances
"""
changes = True
while changes:
changes = False
for i, state_i in enumerate(dfas):
for j in range(i + 1, len(dfas)):
state_j = dfas[j]
if state_i == state_j:
#print " unify", i, j
del dfas[j]
for state in dfas:
state.unifystate(state_j, state_i)
changes = True
break
def _make_dfas(start, finish):
"""
Uses the powerset construction algorithm to create DFA states from sets of
NFA states.
Also does state reduction if some states are not needed.
"""
# To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some
# state reduction.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def addclosure(nfa_state, base_nfa_set):
assert isinstance(nfa_state, NFAState)
if nfa_state in base_nfa_set:
return
base_nfa_set.add(nfa_state)
for nfa_arc in nfa_state.arcs:
if nfa_arc.nonterminal_or_string is None:
addclosure(nfa_arc.next, base_nfa_set)
base_nfa_set = set()
addclosure(start, base_nfa_set)
states = [DFAState(start.from_rule, base_nfa_set, finish)]
for state in states: # NB states grows while we're iterating
arcs = {}
# Find state transitions and store them in arcs.
for nfa_state in state.nfa_set:
for nfa_arc in nfa_state.arcs:
if nfa_arc.nonterminal_or_string is not None:
nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
addclosure(nfa_arc.next, nfa_set)
# Now create the dfa's with no None's in arcs anymore. All Nones have
# been eliminated and state transitions (arcs) are properly defined, we
# just need to create the dfa's.
for nonterminal_or_string, nfa_set in arcs.items():
for nested_state in states:
if nested_state.nfa_set == nfa_set:
# The DFA state already exists for this rule.
break
else:
nested_state = DFAState(start.from_rule, nfa_set, finish)
states.append(nested_state)
state.add_arc(nested_state, nonterminal_or_string)
return states # List of DFAState instances; first one is start
def _dump_nfa(start, finish):
print("Dump of NFA for", start.from_rule)
todo = [start]
for i, state in enumerate(todo):
print(" State", i, state is finish and "(final)" or "")
for arc in state.arcs:
label, next_ = arc.nonterminal_or_string, arc.next
if next_ in todo:
j = todo.index(next_)
else:
j = len(todo)
todo.append(next_)
if label is None:
print(" -> %d" % j)
else:
print(" %s -> %d" % (label, j))
def _dump_dfas(dfas):
print("Dump of DFA for", dfas[0].from_rule)
for i, state in enumerate(dfas):
print(" State", i, state.is_final and "(final)" or "")
for nonterminal, next_ in state.arcs.items():
print(" %s -> %d" % (nonterminal, dfas.index(next_)))
def generate_grammar(bnf_grammar, token_namespace):
"""
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and ()
for grouping).
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
own parser.
"""
rule_to_dfas = {}
start_nonterminal = None
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
#_dump_nfa(nfa_a, nfa_z)
dfas = _make_dfas(nfa_a, nfa_z)
#_dump_dfas(dfas)
# oldlen = len(dfas)
_simplify_dfas(dfas)
# newlen = len(dfas)
rule_to_dfas[nfa_a.from_rule] = dfas
#print(nfa_a.from_rule, oldlen, newlen)
if start_nonterminal is None:
start_nonterminal = nfa_a.from_rule
reserved_strings = {}
for nonterminal, dfas in rule_to_dfas.items():
for dfa_state in dfas:
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
if terminal_or_nonterminal in rule_to_dfas:
dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
else:
transition = _make_transition(
token_namespace,
reserved_strings,
terminal_or_nonterminal
)
dfa_state.transitions[transition] = DFAPlan(next_dfa)
_calculate_tree_traversal(rule_to_dfas)
return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
def _make_transition(token_namespace, reserved_syntax_strings, label):
"""
Creates a reserved string ("if", "for", "*", ...) or returns the token type
(NUMBER, STRING, ...) for a given grammar terminal.
"""
if label[0].isalpha():
# A named token (e.g. NAME, NUMBER, STRING)
return getattr(token_namespace, label)
else:
# Either a keyword or an operator
assert label[0] in ('"', "'"), label
assert not label.startswith('"""') and not label.startswith("'''")
value = literal_eval(label)
try:
return reserved_syntax_strings[value]
except KeyError:
r = reserved_syntax_strings[value] = ReservedString(value)
return r
def _calculate_tree_traversal(nonterminal_to_dfas):
"""
By this point we know how dfas can move around within a stack node, but we
don't know how we can add a new stack node (nonterminal transitions).
"""
# Map from grammar rule (nonterminal) name to a set of tokens.
first_plans = {}
nonterminals = list(nonterminal_to_dfas.keys())
nonterminals.sort()
for nonterminal in nonterminals:
if nonterminal not in first_plans:
_calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
# Now that we have calculated the first terminals, we are sure that
# there is no left recursion.
for dfas in nonterminal_to_dfas.values():
for dfa_state in dfas:
transitions = dfa_state.transitions
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
for transition, pushes in first_plans[nonterminal].items():
if transition in transitions:
prev_plan = transitions[transition]
# Make sure these are sorted so that error messages are
# at least deterministic
choices = sorted([
(
prev_plan.dfa_pushes[0].from_rule
if prev_plan.dfa_pushes
else prev_plan.next_dfa.from_rule
),
(
pushes[0].from_rule
if pushes else next_dfa.from_rule
),
])
raise ValueError(
"Rule %s is ambiguous; given a %s token, we "
"can't determine if we should evaluate %s or %s."
% (
(
dfa_state.from_rule,
transition,
) + tuple(choices)
)
)
transitions[transition] = DFAPlan(next_dfa, pushes)
def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
"""
Calculates the first plan in the first_plans dictionary for every given
nonterminal. This is going to be used to know when to create stack nodes.
"""
dfas = nonterminal_to_dfas[nonterminal]
new_first_plans = {}
first_plans[nonterminal] = None # dummy to detect left recursion
# We only need to check the first dfa. All the following ones are not
# interesting to find first terminals.
state = dfas[0]
for transition, next_ in state.transitions.items():
# It's a string. We have finally found a possible first token.
new_first_plans[transition] = [next_.next_dfa]
for nonterminal2, next_ in state.nonterminal_arcs.items():
# It's a nonterminal and we have either a left recursion issue
# in the grammar or we have to recurse.
try:
first_plans2 = first_plans[nonterminal2]
except KeyError:
first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
else:
if first_plans2 is None:
raise ValueError("left recursion for rule %r" % nonterminal)
for t, pushes in first_plans2.items():
new_first_plans[t] = [next_] + pushes
first_plans[nonterminal] = new_first_plans
return new_first_plans

38
parso/pgen2/generator.pyi Normal file
View File

@@ -0,0 +1,38 @@
from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union
from parso.pgen2.grammar_parser import NFAState
_TokenTypeT = TypeVar("_TokenTypeT")
class Grammar(Generic[_TokenTypeT]):
nonterminal_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]]
reserved_syntax_strings: Mapping[str, ReservedString]
start_nonterminal: str
def __init__(
self,
start_nonterminal: str,
rule_to_dfas: Mapping[str, Sequence[DFAState]],
reserved_syntax_strings: Mapping[str, ReservedString],
) -> None: ...
class DFAPlan:
next_dfa: DFAState
dfa_pushes: Sequence[DFAState]
class DFAState(Generic[_TokenTypeT]):
from_rule: str
nfa_set: Set[NFAState]
is_final: bool
arcs: Mapping[str, DFAState] # map from all terminals/nonterminals to DFAState
nonterminal_arcs: Mapping[str, DFAState]
transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan]
def __init__(
self, from_rule: str, nfa_set: Set[NFAState], final: NFAState
) -> None: ...
class ReservedString:
value: str
def __init__(self, value: str) -> None: ...
def __repr__(self) -> str: ...
def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]: ...

View File

@@ -1,125 +0,0 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2014 David Halter. Integration into Jedi.
# Modifications are dual-licensed: MIT and PSF.
"""This module defines the data structures used to represent a grammar.
These are a bit arcane because they are derived from the data
structures used by Python's 'pgen' parser generator.
There's also a table here mapping operators to their names in the
token module; the Python tokenize module reports all operators as the
fallback token code OP, but the parser needs the actual token code.
"""
import pickle
class Grammar(object):
"""Pgen parsing tables conversion class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule (represented by a dict
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self, bnf_text):
self.symbol2number = {}
self.number2symbol = {}
self.states = []
self.dfas = {}
self.labels = [(0, "EMPTY")]
self.keywords = {}
self.tokens = {}
self.symbol2label = {}
self.label2symbol = {}
self.start = 256
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""
with open(filename, "wb") as f:
pickle.dump(self.__dict__, f, 2)
def load(self, filename):
"""Load the grammar tables from a pickle file."""
with open(filename, "rb") as f:
d = pickle.load(f)
self.__dict__.update(d)
def copy(self):
"""
Copy the grammar.
"""
new = self.__class__()
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
"tokens", "symbol2label"):
setattr(new, dict_attr, getattr(self, dict_attr).copy())
new.labels = self.labels[:]
new.states = self.states[:]
new.start = self.start
return new
def report(self):
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint
print("s2n")
pprint(self.symbol2number)
print("n2s")
pprint(self.number2symbol)
print("states")
pprint(self.states)
print("dfas")
pprint(self.dfas)
print("labels")
pprint(self.labels)
print("start", self.start)

View File

@@ -0,0 +1,159 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright David Halter and Contributors
# Modifications are dual-licensed: MIT and PSF.
from parso.python.tokenize import tokenize
from parso.utils import parse_version_string
from parso.python.token import PythonTokenTypes
class GrammarParser():
"""
The parser for Python grammar files.
"""
def __init__(self, bnf_grammar):
self._bnf_grammar = bnf_grammar
self.generator = tokenize(
bnf_grammar,
version_info=parse_version_string('3.6')
)
self._gettoken() # Initialize lookahead
def parse(self):
# grammar: (NEWLINE | rule)* ENDMARKER
while self.type != PythonTokenTypes.ENDMARKER:
while self.type == PythonTokenTypes.NEWLINE:
self._gettoken()
# rule: NAME ':' rhs NEWLINE
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
self._expect(PythonTokenTypes.OP, ':')
a, z = self._parse_rhs()
self._expect(PythonTokenTypes.NEWLINE)
yield a, z
def _parse_rhs(self):
# rhs: items ('|' items)*
a, z = self._parse_items()
if self.value != "|":
return a, z
else:
aa = NFAState(self._current_rule_name)
zz = NFAState(self._current_rule_name)
while True:
# Add the possibility to go into the state of a and come back
# to finish.
aa.add_arc(a)
z.add_arc(zz)
if self.value != "|":
break
self._gettoken()
a, z = self._parse_items()
return aa, zz
def _parse_items(self):
# items: item+
a, b = self._parse_item()
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
or self.value in ('(', '['):
c, d = self._parse_item()
# Need to end on the next item.
b.add_arc(c)
b = d
return a, b
def _parse_item(self):
# item: '[' rhs ']' | atom ['+' | '*']
if self.value == "[":
self._gettoken()
a, z = self._parse_rhs()
self._expect(PythonTokenTypes.OP, ']')
# Make it also possible that there is no token and change the
# state.
a.add_arc(z)
return a, z
else:
a, z = self._parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self._gettoken()
# Make it clear that we can go back to the old state and repeat.
z.add_arc(a)
if value == "+":
return a, z
else:
# The end state is the same as the beginning, nothing must
# change.
return a, a
def _parse_atom(self):
# atom: '(' rhs ')' | NAME | STRING
if self.value == "(":
self._gettoken()
a, z = self._parse_rhs()
self._expect(PythonTokenTypes.OP, ')')
return a, z
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
a = NFAState(self._current_rule_name)
z = NFAState(self._current_rule_name)
# Make it clear that the state transition requires that value.
a.add_arc(z, self.value)
self._gettoken()
return a, z
else:
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
def _expect(self, type_, value=None):
if self.type != type_:
self._raise_error("expected %s, got %s [%s]",
type_, self.type, self.value)
if value is not None and self.value != value:
self._raise_error("expected %s, got %s", value, self.value)
value = self.value
self._gettoken()
return value
def _gettoken(self):
tup = next(self.generator)
self.type, self.value, self.begin, prefix = tup
def _raise_error(self, msg, *args):
if args:
try:
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
raise SyntaxError(msg, ('<grammar>', self.begin[0],
self.begin[1], line))
class NFAArc(object):
def __init__(self, next_, nonterminal_or_string):
self.next = next_
self.nonterminal_or_string = nonterminal_or_string
def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string)
class NFAState(object):
def __init__(self, from_rule):
self.from_rule = from_rule
self.arcs = [] # List[nonterminal (str), NFAState]
def add_arc(self, next_, nonterminal_or_string=None):
assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
assert isinstance(next_, NFAState)
self.arcs.append(NFAArc(next_, nonterminal_or_string))
def __repr__(self):
return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)

View File

@@ -0,0 +1,20 @@
from typing import Generator, List, Optional, Tuple
from parso.python.token import TokenType
class GrammarParser:
generator: Generator[TokenType, None, None]
def __init__(self, bnf_grammar: str) -> None: ...
def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: ...
class NFAArc:
next: NFAState
nonterminal_or_string: Optional[str]
def __init__(
self, next_: NFAState, nonterminal_or_string: Optional[str]
) -> None: ...
class NFAState:
from_rule: str
arcs: List[NFAArc]
def __init__(self, from_rule: str) -> None: ...

View File

@@ -1,223 +0,0 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2014 David Halter. Integration into Jedi.
# Modifications are dual-licensed: MIT and PSF.
"""
Parser engine for the grammar tables generated by pgen.
The grammar table must be loaded first.
See Parser/parser.c in the Python distribution for additional info on
how this parsing engine works.
"""
from parso.python import tokenize
class InternalParseError(Exception):
"""
Exception to signal the parser is stuck and error recovery didn't help.
Basically this shouldn't happen. It's a sign that something is really
wrong.
"""
def __init__(self, msg, type, value, start_pos):
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
(msg, tokenize.tok_name[type], value, start_pos))
self.msg = msg
self.type = type
self.value = value
self.start_pos = start_pos
class Stack(list):
def get_tos_nodes(self):
tos = self[-1]
return tos[2][1]
def token_to_ilabel(grammar, type_, value):
# Map from token to label
if type_ == tokenize.NAME:
# Check for reserved words (keywords)
try:
return grammar.keywords[value]
except KeyError:
pass
try:
return grammar.tokens[type_]
except KeyError:
return None
class PgenParser(object):
"""Parser engine.
The proper usage sequence is:
p = Parser(grammar, [converter]) # create instance
p.setup([start]) # prepare for parsing
<for each input token>:
if p.add_token(...): # parse a token
break
root = p.rootnode # root of abstract syntax tree
A Parser instance may be reused by calling setup() repeatedly.
A Parser instance contains state pertaining to the current token
sequence, and should not be used concurrently by different threads
to parse separate token sequences.
See driver.py for how to get input tokens by tokenizing a file or
string.
Parsing is complete when add_token() returns True; the root of the
abstract syntax tree can then be retrieved from the rootnode
instance variable. When a syntax error occurs, error_recovery()
is called. There is no error recovery; the parser cannot be used
after a syntax error was reported (but it can be reinitialized by
calling setup()).
"""
def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
"""Constructor.
The grammar argument is a grammar.Grammar instance; see the
grammar module for more information.
The parser is not ready yet for parsing; you must call the
setup() method to get it started.
The optional convert argument is a function mapping concrete
syntax tree nodes to abstract syntax tree nodes. If not
given, no conversion is done and the syntax tree produced is
the concrete syntax tree. If given, it must be a function of
two arguments, the first being the grammar (a grammar.Grammar
instance), and the second being the concrete syntax tree node
to be converted. The syntax tree is converted from the bottom
up.
A concrete syntax tree node is a (type, nodes) tuple, where
type is the node type (a token or symbol number) and nodes
is a list of children for symbols, and None for tokens.
An abstract syntax tree node may be anything; this is entirely
up to the converter function.
"""
self.grammar = grammar
self.convert_node = convert_node
self.convert_leaf = convert_leaf
# Each stack entry is a tuple: (dfa, state, node).
# A node is a tuple: (type, children),
# where children is a list of nodes or None
newnode = (start, [])
stackentry = (self.grammar.dfas[start], 0, newnode)
self.stack = Stack([stackentry])
self.rootnode = None
self.error_recovery = error_recovery
def parse(self, tokens):
for type_, value, start_pos, prefix in tokens:
if self.add_token(type_, value, start_pos, prefix):
break
else:
# We never broke out -- EOF is too soon -- Unfinished statement.
# However, the error recovery might have added the token again, if
# the stack is empty, we're fine.
if self.stack:
raise InternalParseError("incomplete input", type_, value, start_pos)
return self.rootnode
def add_token(self, type_, value, start_pos, prefix):
"""Add a token; return True if this is the end of the program."""
ilabel = token_to_ilabel(self.grammar, type_, value)
# Loop until the token is shifted; may raise exceptions
_gram = self.grammar
_labels = _gram.labels
_push = self._push
_pop = self._pop
_shift = self._shift
while True:
dfa, state, node = self.stack[-1]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
t, v = _labels[i]
if ilabel == i:
# Look it up in the list of labels
assert t < 256
# Shift a token; we're done with it
_shift(type_, value, newstate, prefix, start_pos)
# Pop while we are in an accept-only state
state = newstate
while states[state] == [(0, state)]:
_pop()
if not self.stack:
# Done parsing!
return True
dfa, state, node = self.stack[-1]
states, first = dfa
# Done with this token
return False
elif t >= 256:
# See if it's a symbol and if we're in its first set
itsdfa = _gram.dfas[t]
itsstates, itsfirst = itsdfa
if ilabel in itsfirst:
# Push a symbol
_push(t, itsdfa, newstate)
break # To continue the outer while loop
else:
if (0, state) in arcs:
# An accepting state, pop it and try something else
_pop()
if not self.stack:
# Done parsing, but another token is input
raise InternalParseError("too much input", type_, value, start_pos)
else:
self.error_recovery(self.grammar, self.stack, arcs, type_,
value, start_pos, prefix, self.add_token)
break
def _shift(self, type_, value, newstate, prefix, start_pos):
"""Shift a token. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
node[-1].append(newnode)
self.stack[-1] = (dfa, newstate, node)
def _push(self, type_, newdfa, newstate):
"""Push a nonterminal. (Internal)"""
dfa, state, node = self.stack[-1]
newnode = (type_, [])
self.stack[-1] = (dfa, newstate, node)
self.stack.append((newdfa, 0, newnode))
def _pop(self):
"""Pop a nonterminal. (Internal)"""
popdfa, popstate, (type_, children) = self.stack.pop()
# If there's exactly one child, return that child instead of creating a
# new node. We still create expr_stmt and file_input though, because a
# lot of Jedi depends on its logic.
if len(children) == 1:
newnode = children[0]
else:
newnode = self.convert_node(self.grammar, type_, children)
try:
# Equal to:
# dfa, state, node = self.stack[-1]
# symbol, children = node
self.stack[-1][2][1].append(newnode)
except IndexError:
# Stack is empty, set the rootnode.
self.rootnode = newnode

View File

@@ -1,399 +0,0 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
# Modifications:
# Copyright 2014 David Halter. Integration into Jedi.
# Modifications are dual-licensed: MIT and PSF.
from parso.pgen2 import grammar
from parso.python import token
from parso.python import tokenize
from parso.utils import parse_version_string
class ParserGenerator(object):
def __init__(self, bnf_text, token_namespace):
self._bnf_text = bnf_text
self.generator = tokenize.tokenize(
bnf_text,
version_info=parse_version_string('3.6')
)
self._gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self._parse()
self.first = {} # map from symbol name to set of tokens
self._addfirstsets()
self._token_namespace = token_namespace
def make_grammar(self):
c = grammar.Grammar(self._bnf_text)
names = list(self.dfas.keys())
names.sort()
names.remove(self.startsymbol)
names.insert(0, self.startsymbol)
for name in names:
i = 256 + len(c.symbol2number)
c.symbol2number[name] = i
c.number2symbol[i] = name
for name in names:
dfa = self.dfas[name]
states = []
for state in dfa:
arcs = []
for label, next in state.arcs.items():
arcs.append((self._make_label(c, label), dfa.index(next)))
if state.isfinal:
arcs.append((0, dfa.index(state)))
states.append(arcs)
c.states.append(states)
c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name))
c.start = c.symbol2number[self.startsymbol]
return c
def _make_first(self, c, name):
rawfirst = self.first[name]
first = {}
for label in rawfirst:
ilabel = self._make_label(c, label)
##assert ilabel not in first # XXX failed on <> ... !=
first[ilabel] = 1
return first
def _make_label(self, c, label):
# XXX Maybe this should be a method on a subclass of converter?
ilabel = len(c.labels)
if label[0].isalpha():
# Either a symbol name or a named token
if label in c.symbol2number:
# A symbol name (a non-terminal)
if label in c.symbol2label:
return c.symbol2label[label]
else:
c.labels.append((c.symbol2number[label], None))
c.symbol2label[label] = ilabel
c.label2symbol[ilabel] = label
return ilabel
else:
# A named token (NAME, NUMBER, STRING)
itoken = getattr(self._token_namespace, label, None)
assert isinstance(itoken, int), label
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
else:
# Either a keyword or an operator
assert label[0] in ('"', "'"), label
value = eval(label)
if value[0].isalpha():
# A keyword
if value in c.keywords:
return c.keywords[value]
else:
# TODO this might be an issue?! Using token.NAME here?
c.labels.append((token.NAME, value))
c.keywords[value] = ilabel
return ilabel
else:
# An operator (any non-numeric token)
itoken = self._token_namespace.generate_token_id(value)
if itoken in c.tokens:
return c.tokens[itoken]
else:
c.labels.append((itoken, None))
c.tokens[itoken] = ilabel
return ilabel
def _addfirstsets(self):
names = list(self.dfas.keys())
names.sort()
for name in names:
if name not in self.first:
self._calcfirst(name)
#print name, self.first[name].keys()
def _calcfirst(self, name):
dfa = self.dfas[name]
self.first[name] = None # dummy to detect left recursion
state = dfa[0]
totalset = {}
overlapcheck = {}
for label, next in state.arcs.items():
if label in self.dfas:
if label in self.first:
fset = self.first[label]
if fset is None:
raise ValueError("recursion for rule %r" % name)
else:
self._calcfirst(label)
fset = self.first[label]
totalset.update(fset)
overlapcheck[label] = fset
else:
totalset[label] = 1
overlapcheck[label] = {label: 1}
inverse = {}
for label, itsfirst in overlapcheck.items():
for symbol in itsfirst:
if symbol in inverse:
raise ValueError("rule %s is ambiguous; %s is in the"
" first sets of %s as well as %s" %
(name, symbol, label, inverse[symbol]))
inverse[symbol] = label
self.first[name] = totalset
def _parse(self):
dfas = {}
startsymbol = None
# MSTART: (NEWLINE | RULE)* ENDMARKER
while self.type != token.ENDMARKER:
while self.type == token.NEWLINE:
self._gettoken()
# RULE: NAME ':' RHS NEWLINE
name = self._expect(token.NAME)
self._expect(token.COLON)
a, z = self._parse_rhs()
self._expect(token.NEWLINE)
#self._dump_nfa(name, a, z)
dfa = self._make_dfa(a, z)
#self._dump_dfa(name, dfa)
# oldlen = len(dfa)
self._simplify_dfa(dfa)
# newlen = len(dfa)
dfas[name] = dfa
#print name, oldlen, newlen
if startsymbol is None:
startsymbol = name
return dfas, startsymbol
def _make_dfa(self, start, finish):
# To turn an NFA into a DFA, we define the states of the DFA
# to correspond to *sets* of states of the NFA. Then do some
# state reduction. Let's represent sets as dicts with 1 for
# values.
assert isinstance(start, NFAState)
assert isinstance(finish, NFAState)
def closure(state):
base = {}
addclosure(state, base)
return base
def addclosure(state, base):
assert isinstance(state, NFAState)
if state in base:
return
base[state] = 1
for label, next in state.arcs:
if label is None:
addclosure(next, base)
states = [DFAState(closure(start), finish)]
for state in states: # NB states grows while we're iterating
arcs = {}
for nfastate in state.nfaset:
for label, next in nfastate.arcs:
if label is not None:
addclosure(next, arcs.setdefault(label, {}))
for label, nfaset in arcs.items():
for st in states:
if st.nfaset == nfaset:
break
else:
st = DFAState(nfaset, finish)
states.append(st)
state.addarc(st, label)
return states # List of DFAState instances; first one is start
def _dump_nfa(self, name, start, finish):
print("Dump of NFA for", name)
todo = [start]
for i, state in enumerate(todo):
print(" State", i, state is finish and "(final)" or "")
for label, next in state.arcs:
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
print(" -> %d" % j)
else:
print(" %s -> %d" % (label, j))
def _dump_dfa(self, name, dfa):
print("Dump of DFA for", name)
for i, state in enumerate(dfa):
print(" State", i, state.isfinal and "(final)" or "")
for label, next in state.arcs.items():
print(" %s -> %d" % (label, dfa.index(next)))
def _simplify_dfa(self, dfa):
# This is not theoretically optimal, but works well enough.
# Algorithm: repeatedly look for two states that have the same
# set of arcs (same labels pointing to the same nodes) and
# unify them, until things stop changing.
# dfa is a list of DFAState instances
changes = True
while changes:
changes = False
for i, state_i in enumerate(dfa):
for j in range(i + 1, len(dfa)):
state_j = dfa[j]
if state_i == state_j:
#print " unify", i, j
del dfa[j]
for state in dfa:
state.unifystate(state_j, state_i)
changes = True
break
def _parse_rhs(self):
# RHS: ALT ('|' ALT)*
a, z = self._parse_alt()
if self.value != "|":
return a, z
else:
aa = NFAState()
zz = NFAState()
aa.addarc(a)
z.addarc(zz)
while self.value == "|":
self._gettoken()
a, z = self._parse_alt()
aa.addarc(a)
z.addarc(zz)
return aa, zz
def _parse_alt(self):
# ALT: ITEM+
a, b = self._parse_item()
while (self.value in ("(", "[") or
self.type in (token.NAME, token.STRING)):
c, d = self._parse_item()
b.addarc(c)
b = d
return a, b
def _parse_item(self):
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
if self.value == "[":
self._gettoken()
a, z = self._parse_rhs()
self._expect(token.RSQB)
a.addarc(z)
return a, z
else:
a, z = self._parse_atom()
value = self.value
if value not in ("+", "*"):
return a, z
self._gettoken()
z.addarc(a)
if value == "+":
return a, z
else:
return a, a
def _parse_atom(self):
# ATOM: '(' RHS ')' | NAME | STRING
if self.value == "(":
self._gettoken()
a, z = self._parse_rhs()
self._expect(token.RPAR)
return a, z
elif self.type in (token.NAME, token.STRING):
a = NFAState()
z = NFAState()
a.addarc(z, self.value)
self._gettoken()
return a, z
else:
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
self.type, self.value)
def _expect(self, type):
if self.type != type:
self._raise_error("expected %s, got %s(%s)",
type, self.type, self.value)
value = self.value
self._gettoken()
return value
def _gettoken(self):
tup = next(self.generator)
while tup[0] in (token.COMMENT, token.NL):
tup = next(self.generator)
self.type, self.value, self.begin, prefix = tup
def _raise_error(self, msg, *args):
if args:
try:
msg = msg % args
except:
msg = " ".join([msg] + list(map(str, args)))
line = self._bnf_text.splitlines()[self.begin[0] - 1]
raise SyntaxError(msg, ('<grammar>', self.begin[0],
self.begin[1], line))
class NFAState(object):
def __init__(self):
self.arcs = [] # list of (label, NFAState) pairs
def addarc(self, next, label=None):
assert label is None or isinstance(label, str)
assert isinstance(next, NFAState)
self.arcs.append((label, next))
class DFAState(object):
def __init__(self, nfaset, final):
assert isinstance(nfaset, dict)
assert isinstance(next(iter(nfaset)), NFAState)
assert isinstance(final, NFAState)
self.nfaset = nfaset
self.isfinal = final in nfaset
self.arcs = {} # map from label to DFAState
def addarc(self, next, label):
assert isinstance(label, str)
assert label not in self.arcs
assert isinstance(next, DFAState)
self.arcs[label] = next
def unifystate(self, old, new):
for label, next in self.arcs.items():
if next is old:
self.arcs[label] = new
def __eq__(self, other):
# Equality test -- ignore the nfaset instance variable
assert isinstance(other, DFAState)
if self.isfinal != other.isfinal:
return False
# Can't just return self.arcs == other.arcs, because that
# would invoke this method recursively, with cycles...
if len(self.arcs) != len(other.arcs):
return False
for label, next in self.arcs.items():
if next is not other.arcs.get(label):
return False
return True
__hash__ = None # For Py3 compatibility.
def generate_grammar(bnf_text, token_namespace):
"""
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and ()
for grouping).
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
own parser.
"""
p = ParserGenerator(bnf_text, token_namespace)
return p.make_grammar()

0
parso/py.typed Normal file
View File

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,211 +0,0 @@
import re
from itertools import count
from parso.utils import PythonVersionInfo
from parso.utils import split_lines
from parso.python.tokenize import Token
from parso import parser
from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
version36 = PythonVersionInfo(3, 6)
class TokenNamespace:
_c = count()
LBRACE = next(_c)
RBRACE = next(_c)
ENDMARKER = next(_c)
COLON = next(_c)
CONVERSION = next(_c)
PYTHON_EXPR = next(_c)
EXCLAMATION_MARK = next(_c)
UNTERMINATED_STRING = next(_c)
token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
@classmethod
def generate_token_id(cls, string):
if string == '{':
return cls.LBRACE
elif string == '}':
return cls.RBRACE
elif string == '!':
return cls.EXCLAMATION_MARK
elif string == ':':
return cls.COLON
return getattr(cls, string)
GRAMMAR = """
fstring: expression* ENDMARKER
format_spec: ':' expression*
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
"""
_prefix = r'((?:[^{}]+)*)'
_expr = _prefix + r'(\{|\}|$)'
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
# There's only one conversion character allowed. But the rules have to be
# checked later anyway, so allow more here. This makes error recovery nicer.
_conversion = r'([^={}:]*)(.?)'
_compiled_expr = re.compile(_expr)
_compiled_in_expr = re.compile(_in_expr)
_compiled_conversion = re.compile(_conversion)
def tokenize(code, start_pos=(1, 0)):
def add_to_pos(string):
lines = split_lines(string)
l = len(lines[-1])
if len(lines) > 1:
start_pos[0] += len(lines) - 1
start_pos[1] = l
else:
start_pos[1] += l
def tok(value, type=None, prefix=''):
if type is None:
type = TokenNamespace.generate_token_id(value)
add_to_pos(prefix)
token = Token(type, value, tuple(start_pos), prefix)
add_to_pos(value)
return token
start = 0
recursion_level = 0
added_prefix = ''
start_pos = list(start_pos)
while True:
match = _compiled_expr.match(code, start)
prefix = added_prefix + match.group(1)
found = match.group(2)
start = match.end()
if not found:
# We're at the end.
break
if found == '}':
if recursion_level == 0 and len(code) > start and code[start] == '}':
# This is a }} escape.
added_prefix = prefix + '}}'
start += 1
continue
recursion_level = max(0, recursion_level - 1)
yield tok(found, prefix=prefix)
added_prefix = ''
else:
assert found == '{'
if recursion_level == 0 and len(code) > start and code[start] == '{':
# This is a {{ escape.
added_prefix = prefix + '{{'
start += 1
continue
recursion_level += 1
yield tok(found, prefix=prefix)
added_prefix = ''
expression = ''
squared_count = 0
curly_count = 0
while True:
expr_match = _compiled_in_expr.match(code, start)
expression += expr_match.group(1)
found = expr_match.group(2)
start = expr_match.end()
if found == '{':
curly_count += 1
expression += found
elif found == '}' and curly_count > 0:
curly_count -= 1
expression += found
elif found == '[':
squared_count += 1
expression += found
elif found == ']':
# Use a max function here, because the Python code might
# just have syntax errors.
squared_count = max(0, squared_count - 1)
expression += found
elif found == ':' and (squared_count or curly_count):
expression += found
elif found in ('"', "'"):
search = found
if len(code) > start + 1 and \
code[start] == found == code[start+1]:
search *= 3
start += 2
index = code.find(search, start)
if index == -1:
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
yield tok(
found + code[start:],
type=TokenNamespace.UNTERMINATED_STRING,
)
start = len(code)
break
expression += found + code[start:index+1]
start = index + 1
elif found == '!' and len(code) > start and code[start] == '=':
# This is a python `!=` and not a conversion.
expression += found
else:
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
if found:
yield tok(found)
break
if found == '!':
conversion_match = _compiled_conversion.match(code, start)
found = conversion_match.group(2)
start = conversion_match.end()
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
if found:
yield tok(found)
if found == '}':
recursion_level -= 1
# We don't need to handle everything after ':', because that is
# basically new tokens.
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
class Parser(parser.BaseParser):
def parse(self, tokens):
node = super(Parser, self).parse(tokens)
if isinstance(node, self.default_leaf): # Is an endmarker.
# If there's no curly braces we get back a non-module. We always
# want an fstring.
node = self.default_node('fstring', [node])
return node
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# TODO this is so ugly.
leaf_type = TokenNamespace.token_map[type].lower()
return TypedLeaf(leaf_type, value, start_pos, prefix)
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
if not self._error_recovery:
return super(Parser, self).error_recovery(
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback
)
token_type = TokenNamespace.token_map[typ].lower()
if len(stack) == 1:
error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
stack[0][2][1].append(error_leaf)
else:
dfa, state, (type_, nodes) = stack[1]
stack[0][2][1].append(ErrorNode(nodes))
stack[1:] = []
add_token_callback(typ, value, start_pos, prefix)

View File

@@ -1,158 +0,0 @@
# Grammar for Python
# Note: Changing the grammar specified in this file will most likely
# require corresponding changes in the parser module
# (../Modules/parsermodule.c). If you can't make the changes to
# that module yourself, please co-ordinate the required changes
# with someone who can; ask around on python-dev for help. Fred
# Drake <fdrake@acm.org> will probably be listening there.
# NOTE WELL: You should also follow all the steps listed in PEP 306,
# "How to Change Python's Grammar"
# Commands for Kees Blom's railroad program
#diagram:token NAME
#diagram:token NUMBER
#diagram:token STRING
#diagram:token NEWLINE
#diagram:token ENDMARKER
#diagram:token INDENT
#diagram:output\input python.bla
#diagram:token DEDENT
#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm
#diagram:rules
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() and input() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef)
funcdef: 'def' NAME parameters ':' suite
parameters: '(' [varargslist] ')'
varargslist: ((fpdef ['=' test] ',')*
('*' NAME [',' '**' NAME] | '**' NAME) |
fpdef ['=' test] (',' fpdef ['=' test])* [','])
fpdef: NAME | '(' fplist ')'
fplist: fpdef (',' fpdef)* [',']
stmt: simple_stmt | compound_stmt
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | exec_stmt | assert_stmt)
expr_stmt: testlist (augassign (yield_expr|testlist) |
('=' (yield_expr|testlist))*)
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal assignments, additional restrictions enforced by the interpreter
print_stmt: 'print' ( [ test (',' test)* [','] ] |
'>>' test [ (',' test)+ [','] ] )
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist]
yield_stmt: yield_expr
raise_stmt: 'raise' [test [',' test [',' test]]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
import_from: ('from' ('.'* dotted_name | '.'+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
exec_stmt: 'exec' expr ['in' test [',' test]]
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item ':' suite
# Dave: Python2.6 actually defines a little bit of a different label called
# 'with_var'. However in 2.7+ this is the default. Apply it for
# consistency reasons.
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test [('as' | ',') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
# Backward compatibility cruft to support:
# [ x for x in lambda: True, lambda: False if x() ]
# even while also allowing:
# lambda x: 5 if x else 2
# (But not a mix of the two)
testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test
test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [listmaker] ']' |
'{' [dictorsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+)
listmaker: test ( list_for | (',' test)* [','] )
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
# default. It's more consistent like this.
testlist_comp: test ( gen_for | (',' test)* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: expr (',' expr)* [',']
testlist: test (',' test)* [',']
# Dave: Rename from dictmaker to dictorsetmaker, because this is more
# consistent with the following grammars.
dictorsetmaker: test ':' test (',' test ':' test)* [',']
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
arglist: (argument ',')* (argument [',']
|'*' test (',' argument)* [',' '**' test]
|'**' test)
argument: test [gen_for] | test '=' test # Really [keyword '='] test
list_iter: list_for | list_if
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
list_if: 'if' old_test [list_iter]
gen_iter: gen_for | gen_if
gen_for: 'for' exprlist 'in' or_test [gen_iter]
gen_if: 'if' old_test [gen_iter]
testlist1: test (',' test)*
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [testlist]

View File

@@ -16,7 +16,7 @@
# eval_input is the input for the eval() and input() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -30,7 +30,7 @@ varargslist: ((fpdef ['=' test] ',')*
fpdef: NAME | '(' fplist ')'
fplist: fpdef (',' fpdef)* [',']
stmt: simple_stmt | compound_stmt
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | exec_stmt | assert_stmt)
@@ -104,9 +104,10 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [listmaker] ']' |
'{' [dictorsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+)
NAME | NUMBER | strings)
strings: STRING+
listmaker: test ( list_for | (',' test)* [','] )
testlist_comp: test ( comp_for | (',' test)* [','] )
testlist_comp: test ( sync_comp_for | (',' test)* [','] )
lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
@@ -114,8 +115,8 @@ subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: expr (',' expr)* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) )
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
@@ -124,14 +125,14 @@ arglist: (argument ',')* (argument [',']
|'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test
argument: test [sync_comp_for] | test '=' test
list_iter: list_for | list_if
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
list_if: 'if' old_test [list_iter]
comp_iter: comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_iter: sync_comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' old_test [comp_iter]
testlist1: test (',' test)*

171
parso/python/grammar310.txt Normal file
View File

@@ -0,0 +1,171 @@
# Grammar for Python
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' namedexpr_test NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: (
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
',' tfpdef ['=' test])* ([',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]])
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
| '**' tfpdef [',']]] )
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
)
tfpdef: NAME [':' test]
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist_star_expr]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
namedexpr_test: test [':=' test]
test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
# <> isn't actually a valid comparison operator in Python. It's here for the
# sake of a __future__ import described in PEP 401 (which really works :-)
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom_expr ['**' factor]
atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test ':=' test |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

View File

@@ -16,7 +16,7 @@
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -33,7 +33,7 @@ varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
vfpdef: NAME
stmt: simple_stmt | compound_stmt
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
@@ -103,16 +103,17 @@ power: atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) )
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -121,9 +122,9 @@ arglist: (argument ',')* (argument [',']
|'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test # Really [keyword '='] test
comp_iter: comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter]
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
comp_iter: sync_comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -16,7 +16,7 @@
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -33,7 +33,7 @@ varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
vfpdef: NAME
stmt: simple_stmt | compound_stmt
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
@@ -103,16 +103,17 @@ power: atom trailer* ['**' factor]
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) )
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -121,9 +122,9 @@ arglist: (argument ',')* (argument [',']
|'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test # Really [keyword '='] test
comp_iter: comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter]
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
comp_iter: sync_comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -16,7 +16,7 @@
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
@@ -38,7 +38,7 @@ varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
vfpdef: NAME
stmt: simple_stmt | compound_stmt
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
@@ -110,8 +110,9 @@ atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
@@ -119,9 +120,9 @@ sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
(sync_comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
(sync_comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -136,13 +137,13 @@ arglist: argument (',' argument)* [',']
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
argument: ( test [sync_comp_for] |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_iter: sync_comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -9,7 +9,7 @@
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
@@ -35,7 +35,7 @@ varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
@@ -140,7 +140,8 @@ argument: ( test [comp_for] |
'*' test )
comp_iter: comp_for | comp_if
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
@@ -148,3 +149,10 @@ encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

156
parso/python/grammar37.txt Normal file
View File

@@ -0,0 +1,156 @@
# Grammar for Python
# NOTE WELL: You should also follow all the steps listed at
# https://docs.python.org/devguide/grammar.html
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
tfpdef: NAME [':' test]
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' test]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
# <> isn't actually a valid comparison operator in Python. It's here for the
# sake of a __future__ import described in PEP 401 (which really works :-)
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom_expr ['**' factor]
atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

171
parso/python/grammar38.txt Normal file
View File

@@ -0,0 +1,171 @@
# Grammar for Python
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: (
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
',' tfpdef ['=' test])* ([',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]])
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
| '**' tfpdef [',']]] )
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
)
tfpdef: NAME [':' test]
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist_star_expr]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
namedexpr_test: test [':=' test]
test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
# <> isn't actually a valid comparison operator in Python. It's here for the
# sake of a __future__ import described in PEP 401 (which really works :-)
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom_expr ['**' factor]
atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test ':=' test |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

171
parso/python/grammar39.txt Normal file
View File

@@ -0,0 +1,171 @@
# Grammar for Python
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: stmt* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' namedexpr_test NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: (
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
',' tfpdef ['=' test])* ([',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]])
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
| '**' tfpdef [',']]] )
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
)
tfpdef: NAME [':' test]
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt | NEWLINE
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist_star_expr]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
namedexpr_test: test [':=' test]
test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
# <> isn't actually a valid comparison operator in Python. It's here for the
# sake of a __future__ import described in PEP 401 (which really works :-)
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom_expr ['**' factor]
atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test ':=' test |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

View File

@@ -172,5 +172,5 @@ A list of syntax/indentation errors I've encountered in CPython.
Version specific:
Python 3.5:
'yield' inside async function
Python 3.3/3.4:
Python 3.4:
can use starred expression only as assignment target

View File

@@ -1,8 +1,11 @@
from parso.python import tree
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
STRING, tok_name, NAME)
from parso.python.token import PythonTokenTypes
from parso.parser import BaseParser
from parso.pgen2.parse import token_to_ilabel
NAME = PythonTokenTypes.NAME
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
class Parser(BaseParser):
@@ -36,13 +39,11 @@ class Parser(BaseParser):
'for_stmt': tree.ForStmt,
'while_stmt': tree.WhileStmt,
'try_stmt': tree.TryStmt,
'comp_for': tree.CompFor,
'sync_comp_for': tree.SyncCompFor,
# Not sure if this is the best idea, but IMO it's the easiest way to
# avoid extreme amounts of work around the subtle difference of 2/3
# grammar in list comoprehensions.
'list_for': tree.CompFor,
# Same here. This just exists in Python 2.6.
'gen_for': tree.CompFor,
'list_for': tree.SyncCompFor,
'decorator': tree.Decorator,
'lambdef': tree.Lambda,
'old_lambdef': tree.Lambda,
@@ -50,44 +51,35 @@ class Parser(BaseParser):
}
default_node = tree.PythonNode
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
# Names/Keywords are handled separately
_leaf_map = {
PythonTokenTypes.STRING: tree.String,
PythonTokenTypes.NUMBER: tree.Number,
PythonTokenTypes.NEWLINE: tree.Newline,
PythonTokenTypes.ENDMARKER: tree.EndMarker,
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
PythonTokenTypes.FSTRING_START: tree.FStringStart,
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
}
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
error_recovery=error_recovery)
self.syntax_errors = []
self._omit_dedent_list = []
self._indent_counter = 0
# TODO do print absolute import detection here.
# try:
# del python_grammar_no_print_statement.keywords["print"]
# except KeyError:
# pass # Doesn't exist in the Python 3 grammar.
# if self.options["print_function"]:
# python_grammar = pygram.python_grammar_no_print_statement
# else:
def parse(self, tokens):
if self._error_recovery:
if self._start_symbol != 'file_input':
if self._start_nonterminal != 'file_input':
raise NotImplementedError
tokens = self._recovery_tokenize(tokens)
node = super(Parser, self).parse(tokens)
return super(Parser, self).parse(tokens)
if self._start_symbol == 'file_input' != node.type:
# If there's only one statement, we get back a non-module. That's
# not what we want, we want a module, so we add it here:
node = self.convert_node(
self._pgen_grammar,
self._pgen_grammar.symbol2number['file_input'],
[node]
)
return node
def convert_node(self, pgen_grammar, type, children):
def convert_node(self, nonterminal, children):
"""
Convert raw node information to a PythonBaseNode instance.
@@ -95,167 +87,131 @@ class Parser(BaseParser):
grammar rule produces a new complete node, so that the tree is build
strictly bottom-up.
"""
# TODO REMOVE symbol, we don't want type here.
symbol = pgen_grammar.number2symbol[type]
try:
return self.node_map[symbol](children)
node = self.node_map[nonterminal](children)
except KeyError:
if symbol == 'suite':
if nonterminal == 'suite':
# We don't want the INDENT/DEDENT in our parser tree. Those
# leaves are just cancer. They are virtual leaves and not real
# ones and therefore have pseudo start/end positions and no
# prefixes. Just ignore them.
children = [children[0]] + children[2:-1]
elif symbol == 'list_if':
elif nonterminal == 'list_if':
# Make transitioning from 2 to 3 easier.
symbol = 'comp_if'
elif symbol == 'listmaker':
nonterminal = 'comp_if'
elif nonterminal == 'listmaker':
# Same as list_if above.
symbol = 'testlist_comp'
return self.default_node(symbol, children)
nonterminal = 'testlist_comp'
node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
def convert_leaf(self, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type])
if type == NAME:
if value in pgen_grammar.keywords:
if value in self._pgen_grammar.reserved_syntax_strings:
return tree.Keyword(value, start_pos, prefix)
else:
return tree.Name(value, start_pos, prefix)
elif type == STRING:
return tree.String(value, start_pos, prefix)
elif type == NUMBER:
return tree.Number(value, start_pos, prefix)
elif type == NEWLINE:
return tree.Newline(value, start_pos, prefix)
elif type == ENDMARKER:
return tree.EndMarker(value, start_pos, prefix)
else:
return tree.Operator(value, start_pos, prefix)
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
def get_symbol_and_nodes(stack):
for dfa, state, (type_, nodes) in stack:
symbol = pgen_grammar.number2symbol[type_]
yield symbol, nodes
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
tos_nodes = stack.get_tos_nodes()
def error_recovery(self, token):
tos_nodes = self.stack[-1].nodes
if tos_nodes:
last_leaf = tos_nodes[-1].get_last_leaf()
else:
last_leaf = None
if self._start_symbol == 'file_input' and \
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
def reduce_stack(states, newstate):
# reduce
state = newstate
while states[state] == [(0, state)]:
self.pgen_parser._pop()
dfa, state, (type_, nodes) = stack[-1]
states, first = dfa
if self._start_nonterminal == 'file_input' and \
(token.type == PythonTokenTypes.ENDMARKER
or token.type == DEDENT and not last_leaf.value.endswith('\n')
and not last_leaf.value.endswith('\r')):
# In Python statements need to end with a newline. But since it's
# possible (and valid in Python ) that there's no newline at the
# possible (and valid in Python) that there's no newline at the
# end of a file, we have to recover even if the user doesn't want
# error recovery.
#print('x', pprint.pprint(stack))
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
dfa, state, (type_, nodes) = stack[-1]
symbol = pgen_grammar.number2symbol[type_]
states, first = dfa
arcs = states[state]
# Look for a state with this label
for i, newstate in arcs:
if ilabel == i:
if symbol == 'simple_stmt':
# This is basically shifting
stack[-1] = (dfa, newstate, (type_, nodes))
reduce_stack(states, newstate)
add_token_callback(typ, value, start_pos, prefix)
if self.stack[-1].dfa.from_rule == 'simple_stmt':
try:
plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
except KeyError:
pass
else:
if plan.next_dfa.is_final and not plan.dfa_pushes:
# We are ignoring here that the newline would be
# required for a simple_stmt.
self.stack[-1].dfa = plan.next_dfa
self._add_token(token)
return
# Check if we're at the right point
#for symbol, nodes in get_symbol_and_nodes(stack):
# self.pgen_parser._pop()
#break
break
#symbol = pgen_grammar.number2symbol[type_]
if not self._error_recovery:
return super(Parser, self).error_recovery(
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback)
return super(Parser, self).error_recovery(token)
def current_suite(stack):
# For now just discard everything that is not a suite or
# file_input, if we detect an error.
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
for until_index, stack_node in reversed(list(enumerate(stack))):
# `suite` can sometimes be only simple_stmt, not stmt.
if symbol == 'file_input':
if stack_node.nonterminal == 'file_input':
break
elif symbol == 'suite' and len(nodes) > 1:
# suites without an indent in them get discarded.
break
return index, symbol, nodes
elif stack_node.nonterminal == 'suite':
# In the case where we just have a newline we don't want to
# do error recovery here. In all other cases, we want to do
# error recovery.
if len(stack_node.nodes) != 1:
break
return until_index
index, symbol, nodes = current_suite(stack)
until_index = current_suite(self.stack)
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
add_token_callback(typ, value, start_pos, prefix)
if self._stack_removal(until_index + 1):
self._add_token(token)
else:
typ, value, start_pos, prefix = token
if typ == INDENT:
# For every deleted INDENT we have to delete a DEDENT as well.
# Otherwise the parser will get into trouble and DEDENT too early.
self._omit_dedent_list.append(self._indent_counter)
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
stack[-1][2][1].append(error_leaf)
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
self.stack[-1].nodes.append(error_leaf)
if symbol == 'suite':
dfa, state, node = stack[-1]
states, first = dfa
arcs = states[state]
intended_label = pgen_grammar.symbol2label['stmt']
# Introduce a proper state transition. We're basically allowing
# there to be no valid statements inside a suite.
if [x[0] for x in arcs] == [intended_label]:
new_state = arcs[0][1]
stack[-1] = dfa, new_state, node
tos = self.stack[-1]
if tos.nonterminal == 'suite':
# Need at least one statement in the suite. This happend with the
# error recovery above.
try:
tos.dfa = tos.dfa.arcs['stmt']
except KeyError:
# We're already in a final state.
pass
def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
failed_stack = False
found = False
all_nodes = []
for dfa, state, (type_, nodes) in stack[start_index:]:
if nodes:
found = True
if found:
failed_stack = True
all_nodes += nodes
if failed_stack:
stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
def _stack_removal(self, start_index):
all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
stack[start_index:] = []
return failed_stack
if all_nodes:
node = tree.PythonErrorNode(all_nodes)
for n in all_nodes:
n.parent = node
self.stack[start_index - 1].nodes.append(node)
self.stack[start_index:] = []
return bool(all_nodes)
def _recovery_tokenize(self, tokens):
for typ, value, start_pos, prefix in tokens:
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
for token in tokens:
typ = token[0]
if typ == DEDENT:
# We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place.
o = self._omit_dedent_list
if o and o[-1] == self._indent_counter:
o.pop()
self._indent_counter -= 1
continue
self._indent_counter -= 1
elif typ == INDENT:
self._indent_counter += 1
yield typ, value, start_pos, prefix
yield token

View File

@@ -145,7 +145,7 @@ class BackslashNode(IndentationNode):
def _is_magic_name(name):
return name.value.startswith('__') and name.value.startswith('__')
return name.value.startswith('__') and name.value.endswith('__')
class PEP8Normalizer(ErrorFinder):
@@ -391,11 +391,11 @@ class PEP8Normalizer(ErrorFinder):
if value.lstrip('#'):
self.add_issue(part, 266, "Too many leading '#' for block comment.")
elif self._on_newline:
if not re.match('#:? ', value) and not value == '#' \
if not re.match(r'#:? ', value) and not value == '#' \
and not (value.startswith('#!') and part.start_pos == (1, 0)):
self.add_issue(part, 265, "Block comment should start with '# '")
else:
if not re.match('#:? [^ ]', value):
if not re.match(r'#:? [^ ]', value):
self.add_issue(part, 262, "Inline comment should start with '# '")
self._reset_newlines(spacing, leaf, is_comment=True)
@@ -677,7 +677,7 @@ class PEP8Normalizer(ErrorFinder):
elif typ == 'string':
# Checking multiline strings
for i, line in enumerate(leaf.value.splitlines()[1:]):
indentation = re.match('[ \t]*', line).group(0)
indentation = re.match(r'[ \t]*', line).group(0)
start_pos = leaf.line + i, len(indentation)
# TODO check multiline indentation.
elif typ == 'endmarker':

View File

@@ -1,104 +1,27 @@
from __future__ import absolute_import
from itertools import count
from token import *
from parso._compatibility import py_version
_counter = count(N_TOKENS)
# Never want to see this thing again.
del N_TOKENS
class TokenType(object):
def __init__(self, name, contains_syntax=False):
self.name = name
self.contains_syntax = contains_syntax
COMMENT = next(_counter)
tok_name[COMMENT] = 'COMMENT'
NL = next(_counter)
tok_name[NL] = 'NL'
# Sets the attributes that don't exist in these tok_name versions.
if py_version >= 30:
BACKQUOTE = next(_counter)
tok_name[BACKQUOTE] = 'BACKQUOTE'
else:
RARROW = next(_counter)
tok_name[RARROW] = 'RARROW'
ELLIPSIS = next(_counter)
tok_name[ELLIPSIS] = 'ELLIPSIS'
if py_version < 35:
ATEQUAL = next(_counter)
tok_name[ATEQUAL] = 'ATEQUAL'
ERROR_DEDENT = next(_counter)
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.name)
# Map from operator to number (since tokenize doesn't do this)
opmap_raw = """\
( LPAR
) RPAR
[ LSQB
] RSQB
: COLON
, COMMA
; SEMI
+ PLUS
- MINUS
* STAR
/ SLASH
| VBAR
& AMPER
< LESS
> GREATER
= EQUAL
. DOT
% PERCENT
` BACKQUOTE
{ LBRACE
} RBRACE
@ AT
== EQEQUAL
!= NOTEQUAL
<> NOTEQUAL
<= LESSEQUAL
>= GREATEREQUAL
~ TILDE
^ CIRCUMFLEX
<< LEFTSHIFT
>> RIGHTSHIFT
** DOUBLESTAR
+= PLUSEQUAL
-= MINEQUAL
*= STAREQUAL
/= SLASHEQUAL
%= PERCENTEQUAL
&= AMPEREQUAL
|= VBAREQUAL
@= ATEQUAL
^= CIRCUMFLEXEQUAL
<<= LEFTSHIFTEQUAL
>>= RIGHTSHIFTEQUAL
**= DOUBLESTAREQUAL
// DOUBLESLASH
//= DOUBLESLASHEQUAL
-> RARROW
... ELLIPSIS
"""
opmap = {}
for line in opmap_raw.splitlines():
op, name = line.split()
opmap[op] = globals()[name]
def generate_token_id(string):
class TokenTypes(object):
"""
Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
ID for it. The strings are part of the grammar file.
Basically an enum, but Python 2 doesn't have enums in the standard library.
"""
try:
return opmap[string]
except KeyError:
pass
return globals()[string]
def __init__(self, names, contains_syntax):
for name in names:
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
PythonTokenTypes = TokenTypes((
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
'ENDMARKER'),
contains_syntax=('NAME', 'OP'),
)

30
parso/python/token.pyi Normal file
View File

@@ -0,0 +1,30 @@
from typing import Container, Iterable
class TokenType:
name: str
contains_syntax: bool
def __init__(self, name: str, contains_syntax: bool) -> None: ...
class TokenTypes:
def __init__(
self, names: Iterable[str], contains_syntax: Container[str]
) -> None: ...
# not an actual class in the source code, but we need this class to type the fields of
# PythonTokenTypes
class _FakePythonTokenTypesClass(TokenTypes):
STRING: TokenType
NUMBER: TokenType
NAME: TokenType
ERRORTOKEN: TokenType
NEWLINE: TokenType
INDENT: TokenType
DEDENT: TokenType
ERROR_DEDENT: TokenType
FSTRING_STRING: TokenType
FSTRING_START: TokenType
FSTRING_END: TokenType
OP: TokenType
ENDMARKER: TokenType
PythonTokenTypes: _FakePythonTokenTypesClass = ...

View File

@@ -12,34 +12,53 @@ memory optimizations here.
from __future__ import absolute_import
import sys
import string
import re
from collections import namedtuple
import itertools as _itertools
from codecs import BOM_UTF8
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
ERROR_DEDENT)
from parso._compatibility import py_version
from parso.python.token import PythonTokenTypes
from parso.utils import split_lines
# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
MAX_UNICODE = '\U0010ffff'
STRING = PythonTokenTypes.STRING
NAME = PythonTokenTypes.NAME
NUMBER = PythonTokenTypes.NUMBER
OP = PythonTokenTypes.OP
NEWLINE = PythonTokenTypes.NEWLINE
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ENDMARKER = PythonTokenTypes.ENDMARKER
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END
TokenCollection = namedtuple(
'TokenCollection',
'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
'pseudo_token single_quoted triple_quoted endpats whitespace '
'fstring_pattern_map always_break_tokens',
)
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
_token_collection_cache = {}
if py_version >= 30:
if sys.version_info.major >= 3:
# Python 3 has str.isidentifier() to check if a char is a valid identifier
is_identifier = str.isidentifier
else:
namechars = string.ascii_letters + '_'
is_identifier = lambda s: s in namechars
# Python 2 doesn't, but it's not that important anymore and if you tokenize
# Python 2 code with this, it's still ok. It's just that parsing Python 3
# code with this function is not 100% correct.
# This just means that Python 2 code matches a few identifiers too much,
# but that doesn't really matter.
def is_identifier(s):
return True
def group(*choices, **kwargs):
@@ -52,37 +71,40 @@ def group(*choices, **kwargs):
return start + '|'.join(choices) + ')'
def any(*choices):
return group(*choices) + '*'
def maybe(*choices):
return group(*choices) + '?'
# Return the empty string, plus all of the valid string prefixes.
def _all_string_prefixes(version_info):
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
def different_case_versions(prefix):
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
yield ''.join(s)
# The valid string prefixes. Only contain the lower case versions,
# and don't contain any permuations (include 'fr', but not
# 'rf'). The various permutations will be generated.
_valid_string_prefixes = ['b', 'r', 'u']
if version_info >= (3, 0):
_valid_string_prefixes.append('br')
valid_string_prefixes = ['b', 'r', 'u']
if version_info.major >= 3:
valid_string_prefixes.append('br')
if version_info >= (3, 6):
_valid_string_prefixes += ['f', 'fr']
result = set([''])
if version_info >= (3, 6) and include_fstring:
f = ['f', 'fr']
if only_fstring:
valid_string_prefixes = f
result = set()
else:
valid_string_prefixes += f
elif only_fstring:
return set()
# if we add binary f-strings, add: ['fb', 'fbr']
result = set([''])
for prefix in _valid_string_prefixes:
for prefix in valid_string_prefixes:
for t in _itertools.permutations(prefix):
# create a list with upper and lower versions of each
# character
result.update(different_case_versions(t))
if version_info <= (2, 7):
if version_info.major == 2:
# In Python 2 the order cannot just be random.
result.update(different_case_versions('ur'))
result.update(different_case_versions('br'))
@@ -102,12 +124,28 @@ def _get_token_collection(version_info):
return result
fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_format_spec_multi_line = _compile(r'[^{}]+')
def _create_token_collection(version_info):
# Note: we use unicode matching for names ("\w") but ascii matching for
# number literals.
Whitespace = r'[ \f\t]*'
whitespace = _compile(Whitespace)
Comment = r'#[^\r\n]*'
Name = r'\w+'
# Python 2 is pretty much not working properly anymore, we just ignore
# parsing unicode properly, which is fine, I guess.
if version_info[0] == 2:
Name = r'([A-Za-z_0-9]+)'
elif sys.version_info[0] == 2:
# Unfortunately the regex engine cannot deal with the regex below, so
# just use this one.
Name = r'(\w+)'
else:
Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
if version_info >= (3, 6):
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
@@ -124,12 +162,14 @@ def _create_token_collection(version_info):
else:
Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+'
if version_info >= (3, 0):
if version_info.major >= 3:
Octnumber = r'0[oO][0-7]+'
else:
Octnumber = '0[oO]?[0-7]+'
Decnumber = r'(?:0+|[1-9][0-9]*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
if version_info[0] < 3:
Intnumber += '[lL]?'
Exponent = r'[eE][-+]?[0-9]+'
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
Expfloat = r'[0-9]+' + Exponent
@@ -141,40 +181,52 @@ def _create_token_collection(version_info):
# StringPrefix can be the empty string (making it optional).
possible_prefixes = _all_string_prefixes(version_info)
StringPrefix = group(*possible_prefixes)
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
FStringStart = group(*fstring_prefixes)
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
Single = r"(?:\\.|[^'\\])*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Double = r'(?:\\.|[^"\\])*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
r"//=?", r"->",
r"[+\-*/%&@`|^=<>]=?",
r"[+\-*/%&@`|^!=<>]=?",
r"~")
Bracket = '[][(){}]'
special_args = [r'\r?\n', r'[:;.,@]']
special_args = [r'\r\n?', r'\n', r'[;.,@]']
if version_info >= (3, 0):
special_args.insert(0, r'\.\.\.')
if version_info >= (3, 8):
special_args.insert(0, ":=?")
else:
special_args.insert(0, ":")
Special = group(*special_args)
Funny = group(Operator, Bracket, Special)
# First (or only) line of ' or " string.
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*"
+ group("'", r'\\(?:\r\n?|\n)'),
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*'
+ group('"', r'\\(?:\r\n?|\n)'))
pseudo_extra_pool = [Comment, Triple]
all_quotes = '"', "'", '"""', "'''"
if fstring_prefixes:
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
PseudoToken = group(Whitespace, capture=True) + \
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
@@ -192,18 +244,27 @@ def _create_token_collection(version_info):
# including the opening quotes.
single_quoted = set()
triple_quoted = set()
fstring_pattern_map = {}
for t in possible_prefixes:
for p in (t + '"', t + "'"):
single_quoted.add(p)
for p in (t + '"""', t + "'''"):
triple_quoted.add(p)
for quote in '"', "'":
single_quoted.add(t + quote)
for quote in '"""', "'''":
triple_quoted.add(t + quote)
for t in fstring_prefixes:
for quote in all_quotes:
fstring_pattern_map[t + quote] = quote
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
'finally', 'while', 'with', 'return')
'finally', 'while', 'with', 'return', 'continue',
'break', 'del', 'pass', 'global', 'assert')
if version_info >= (3, 5):
ALWAYS_BREAK_TOKENS += ('nonlocal', )
pseudo_token_compiled = _compile(PseudoToken)
return TokenCollection(
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
ALWAYS_BREAK_TOKENS
whitespace, fstring_pattern_map, set(ALWAYS_BREAK_TOKENS)
)
@@ -218,12 +279,96 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
class PythonToken(Token):
def _get_type_name(self, exact=True):
return tok_name[self.type]
def __repr__(self):
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
self._replace(type=self._get_type_name()))
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
self._replace(type=self.type.name))
class FStringNode(object):
def __init__(self, quote):
self.quote = quote
self.parentheses_count = 0
self.previous_lines = ''
self.last_string_start_pos = None
# In the syntax there can be multiple format_spec's nested:
# {x:{y:3}}
self.format_spec_count = 0
def open_parentheses(self, character):
self.parentheses_count += 1
def close_parentheses(self, character):
self.parentheses_count -= 1
if self.parentheses_count == 0:
# No parentheses means that the format spec is also finished.
self.format_spec_count = 0
def allow_multiline(self):
return len(self.quote) == 3
def is_in_expr(self):
return self.parentheses_count > self.format_spec_count
def is_in_format_spec(self):
return not self.is_in_expr() and self.format_spec_count
def _close_fstring_if_necessary(fstring_stack, string, line_nr, column, additional_prefix):
for fstring_stack_index, node in enumerate(fstring_stack):
lstripped_string = string.lstrip()
len_lstrip = len(string) - len(lstripped_string)
if lstripped_string.startswith(node.quote):
token = PythonToken(
FSTRING_END,
node.quote,
(line_nr, column + len_lstrip),
prefix=additional_prefix+string[:len_lstrip],
)
additional_prefix = ''
assert not node.previous_lines
del fstring_stack[fstring_stack_index:]
return token, '', len(node.quote) + len_lstrip
return None, additional_prefix, 0
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
tos = fstring_stack[-1]
allow_multiline = tos.allow_multiline()
if tos.is_in_format_spec():
if allow_multiline:
regex = fstring_format_spec_multi_line
else:
regex = fstring_format_spec_single_line
else:
if allow_multiline:
regex = fstring_string_multi_line
else:
regex = fstring_string_single_line
match = regex.match(line, pos)
if match is None:
return tos.previous_lines, pos
if not tos.previous_lines:
tos.last_string_start_pos = (lnum, pos)
string = match.group(0)
for fstring_stack_node in fstring_stack:
end_match = endpats[fstring_stack_node.quote].match(string)
if end_match is not None:
string = end_match.group(0)[:-len(fstring_stack_node.quote)]
new_pos = pos
new_pos += len(string)
# even if allow_multiline is False, we still need to check for trailing
# newlines, because a single-line f-string can contain line continuations
if string.endswith('\n') or string.endswith('\r'):
tos.previous_lines += string
string = ''
else:
string = tos.previous_lines + string
return string, new_pos
def tokenize(code, version_info, start_pos=(1, 0)):
@@ -232,7 +377,20 @@ def tokenize(code, version_info, start_pos=(1, 0)):
return tokenize_lines(lines, version_info, start_pos=start_pos)
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
def _print_tokens(func):
"""
A small helper function to help debug the tokenize_lines function.
"""
def wrapper(*args, **kwargs):
for token in func(*args, **kwargs):
print(token) # This print is intentional for debugging!
yield token
return wrapper
# @_print_tokens
def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True):
"""
A heavily modified Python standard library tokenizer.
@@ -240,11 +398,22 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments.
"""
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
def dedent_if_necessary(start):
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, start), '')
indents[-1] = start
break
indents.pop()
yield PythonToken(DEDENT, '', spos, '')
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info)
paren_level = 0 # count parentheses
indents = [0]
max = 0
if indents is None:
indents = [0]
max_ = 0
numchars = '0123456789'
contstr = ''
contline = None
@@ -255,30 +424,32 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
new_line = True
prefix = '' # Should never be required, but here for safety
additional_prefix = ''
first = True
lnum = start_pos[0] - 1
fstring_stack = []
for line in lines: # loop over lines in stream
lnum += 1
pos = 0
max = len(line)
if first:
max_ = len(line)
if is_first_token:
if line.startswith(BOM_UTF8_STRING):
additional_prefix = BOM_UTF8_STRING
line = line[1:]
max = len(line)
max_ = len(line)
# Fake that the part before was already parsed.
line = '^' * start_pos[1] + line
pos = start_pos[1]
max += start_pos[1]
max_ += start_pos[1]
first = False
is_first_token = False
if contstr: # continued string
endmatch = endprog.match(line)
if endmatch:
pos = endmatch.end(0)
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
yield PythonToken(
STRING, contstr + line[:pos],
contstr_start, prefix)
contstr = ''
contline = None
else:
@@ -286,58 +457,131 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
contline = contline + line
continue
while pos < max:
pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens
txt = line[pos:]
if txt.endswith('\n'):
new_line = True
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
while pos < max_:
if fstring_stack:
tos = fstring_stack[-1]
if not tos.is_in_expr():
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
if string:
yield PythonToken(
FSTRING_STRING, string,
tos.last_string_start_pos,
# Never has a prefix because it can start anywhere and
# include whitespace.
prefix=''
)
tos.previous_lines = ''
continue
if pos == max_:
break
rest = line[pos:]
fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
fstring_stack,
rest,
lnum,
pos,
additional_prefix,
)
pos += quote_length
if fstring_end_token is not None:
yield fstring_end_token
continue
# in an f-string, match until the end of the string
if fstring_stack:
string_line = line
for fstring_stack_node in fstring_stack:
quote = fstring_stack_node.quote
end_match = endpats[quote].match(line, pos)
if end_match is not None:
end_match_string = end_match.group(0)
if len(end_match_string) - len(quote) + pos < len(string_line):
string_line = line[:pos] + end_match_string[:-len(quote)]
pseudomatch = pseudo_token.match(string_line, pos)
else:
pseudomatch = pseudo_token.match(line, pos)
if pseudomatch:
prefix = additional_prefix + pseudomatch.group(1)
additional_prefix = ''
break
start, pos = pseudomatch.span(2)
spos = (lnum, start)
token = pseudomatch.group(2)
if token == '':
assert prefix
additional_prefix = prefix
# This means that we have a line with whitespace/comments at
# the end, which just results in an endmarker.
break
initial = token[0]
else:
match = whitespace.match(line, pos)
initial = line[match.end()]
start = match.end()
spos = (lnum, start)
prefix = additional_prefix + pseudomatch.group(1)
additional_prefix = ''
start, pos = pseudomatch.span(2)
spos = (lnum, start)
token = pseudomatch.group(2)
if token == '':
assert prefix
additional_prefix = prefix
# This means that we have a line with whitespace/comments at
# the end, which just results in an endmarker.
break
initial = token[0]
if new_line and initial not in '\r\n#':
if new_line and initial not in '\r\n#' and (initial != '\\' or pseudomatch is None):
new_line = False
if paren_level == 0:
i = 0
while line[i] == '\f':
i += 1
start -= 1
if start > indents[-1]:
if paren_level == 0 and not fstring_stack:
indent_start = start
if indent_start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
indents.append(indent_start)
for t in dedent_if_necessary(indent_start):
yield t
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
if not pseudomatch: # scan for tokens
match = whitespace.match(line, pos)
if new_line and paren_level == 0 and not fstring_stack:
for t in dedent_if_necessary(match.end()):
yield t
pos = match.end()
new_line = False
yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0)
)
additional_prefix = ''
pos += 1
continue
if (initial in numchars # ordinary number
or (initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix)
elif pseudomatch.group(3) is not None: # ordinary name
if token in always_break_tokens and (fstring_stack or paren_level):
fstring_stack[:] = []
paren_level = 0
# We only want to dedent if the token is on a new line.
m = re.match(r'[ \f\t]*$', line[:start])
if m is not None:
for t in dedent_if_necessary(m.end()):
yield t
if is_identifier(token):
yield PythonToken(NAME, token, spos, prefix)
else:
for t in _split_illegal_unicode_name(token, spos, prefix):
yield t # yield from Python 2
elif initial in '\r\n':
if not new_line and paren_level == 0:
if any(not f.allow_multiline() for f in fstring_stack):
# Would use fstring_stack.clear, but that's not available
# in Python 2.
fstring_stack[:] = []
if not new_line and paren_level == 0 and not fstring_stack:
yield PythonToken(NEWLINE, token, spos, prefix)
else:
additional_prefix = prefix + token
new_line = True
elif initial == '#': # Comments
assert not token.endswith("\n")
additional_prefix = prefix + token
if fstring_stack and fstring_stack[-1].is_in_expr():
# `#` is not allowed in f-string expressions
yield PythonToken(ERRORTOKEN, initial, spos, prefix)
pos = start + 1
else:
additional_prefix = prefix + token
elif token in triple_quoted:
endprog = endpats[token]
endmatch = endprog.match(line, pos)
@@ -346,14 +590,27 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token = line[start:pos]
yield PythonToken(STRING, token, spos, prefix)
else:
contstr_start = (lnum, start) # multiple lines
contstr_start = spos # multiple lines
contstr = line[start:]
contline = line
break
# Check up to the first 3 chars of the token to see if
# they're in the single_quoted set. If so, they start
# a string.
# We're using the first 3, because we're looking for
# "rb'" (for example) at the start of the token. If
# we switch to longer prefixes, this needs to be
# adjusted.
# Note that initial == token[:1].
# Also note that single quote checking must come after
# triple quote checking (above).
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
if token[-1] in '\r\n': # continued string
# This means that a single quoted string ends with a
# backslash and is continued.
contstr_start = lnum, start
endprog = (endpats.get(initial) or endpats.get(token[1])
or endpats.get(token[2]))
@@ -362,47 +619,92 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
break
else: # ordinary string
yield PythonToken(STRING, token, spos, prefix)
elif is_identifier(initial): # ordinary name
if token in always_break_tokens:
paren_level = 0
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
additional_prefix += prefix + line[start:]
break
else:
if token in '([{':
paren_level += 1
if fstring_stack:
fstring_stack[-1].open_parentheses(token)
else:
paren_level += 1
elif token in ')]}':
paren_level -= 1
if fstring_stack:
fstring_stack[-1].close_parentheses(token)
else:
if paren_level:
paren_level -= 1
elif token.startswith(':') and fstring_stack \
and fstring_stack[-1].parentheses_count \
- fstring_stack[-1].format_spec_count == 1:
# `:` and `:=` both count
fstring_stack[-1].format_spec_count += 1
token = ':'
pos = start + 1
try:
# This check is needed in any case to check if it's a valid
# operator or just some random unicode character.
typ = opmap[token]
except KeyError:
typ = ERRORTOKEN
yield PythonToken(typ, token, spos, prefix)
yield PythonToken(OP, token, spos, prefix)
if contstr:
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'):
if contstr.endswith('\n') or contstr.endswith('\r'):
new_line = True
end_pos = lnum, max
if fstring_stack:
tos = fstring_stack[-1]
if tos.previous_lines:
yield PythonToken(
FSTRING_STRING, tos.previous_lines,
tos.last_string_start_pos,
# Never has a prefix because it can start anywhere and
# include whitespace.
prefix=''
)
end_pos = lnum, max_
# As the last position we just take the maximally possible position. We
# remove -1 for the last new line.
for indent in indents[1:]:
indents.pop()
yield PythonToken(DEDENT, '', end_pos, '')
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
def _split_illegal_unicode_name(token, start_pos, prefix):
def create_token():
return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
found = ''
is_illegal = False
pos = start_pos
for i, char in enumerate(token):
if is_illegal:
if is_identifier(char):
yield create_token()
found = char
is_illegal = False
prefix = ''
pos = start_pos[0], start_pos[1] + i
else:
found += char
else:
new_found = found + char
if is_identifier(new_found):
found = new_found
else:
if found:
yield create_token()
prefix = ''
pos = start_pos[0], start_pos[1] + i
found = char
is_illegal = True
if found:
yield create_token()
if __name__ == "__main__":
if len(sys.argv) >= 2:
path = sys.argv[1]

24
parso/python/tokenize.pyi Normal file
View File

@@ -0,0 +1,24 @@
from typing import Generator, Iterable, NamedTuple, Tuple
from parso.python.token import TokenType
from parso.utils import PythonVersionInfo
class Token(NamedTuple):
type: TokenType
string: str
start_pos: Tuple[int, int]
prefix: str
@property
def end_pos(self) -> Tuple[int, int]: ...
class PythonToken(Token):
def __repr__(self) -> str: ...
def tokenize(
code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
) -> Generator[PythonToken, None, None]: ...
def tokenize_lines(
lines: Iterable[str],
version_info: PythonVersionInfo,
start_pos: Tuple[int, int] = (1, 0),
) -> Generator[PythonToken, None, None]: ...

View File

@@ -43,24 +43,32 @@ Parser Tree Classes
"""
import re
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from parso._compatibility import utf8_repr, unicode
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
search_ancestor
from parso.python.prefix import split_prefix
from parso.utils import split_lines
_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
'with_stmt', 'async_stmt', 'suite'])
_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS
_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS
_FUNC_CONTAINERS = set(
['suite', 'simple_stmt', 'decorated', 'async_funcdef']
) | _FLOW_CONTAINERS
_GET_DEFINITION_TYPES = set([
'expr_stmt', 'comp_for', 'with_stmt', 'for_stmt', 'import_name',
'import_from', 'param'
'expr_stmt', 'sync_comp_for', 'with_stmt', 'for_stmt', 'import_name',
'import_from', 'param', 'del_stmt',
])
_IMPORTS = set(['import_name', 'import_from'])
class DocstringMixin(object):
__slots__ = ()
@@ -91,13 +99,13 @@ class DocstringMixin(object):
class PythonMixin(object):
"""
Some Python specific utitilies.
Some Python specific utilities.
"""
__slots__ = ()
def get_name_of_position(self, position):
"""
Given a (line, column) tuple, returns a :class`Name` or ``None`` if
Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if
there is no name at that position.
"""
for c in self.children:
@@ -125,15 +133,16 @@ class PythonLeaf(PythonMixin, Leaf):
# indent error leafs somehow? No idea how, though.
previous_leaf = self.get_previous_leaf()
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
and previous_leaf.original_type in ('indent', 'error_dedent'):
and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
previous_leaf = previous_leaf.get_previous_leaf()
if previous_leaf is None:
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
if previous_leaf is None: # It's the first leaf.
lines = split_lines(self.prefix)
# + 1 is needed because split_lines always returns at least [''].
return self.line - len(lines) + 1, 0 # It's the first leaf.
return previous_leaf.end_pos
class _LeafWithoutNewlines(PythonLeaf):
"""
Simply here to optimize performance.
@@ -166,6 +175,12 @@ class EndMarker(_LeafWithoutNewlines):
__slots__ = ()
type = 'endmarker'
@utf8_repr
def __repr__(self):
return "<%s: prefix=%s end_pos=%s>" % (
type(self).__name__, repr(self.prefix), self.end_pos
)
class Newline(PythonLeaf):
"""Contains NEWLINE and ENDMARKER tokens."""
@@ -189,36 +204,28 @@ class Name(_LeafWithoutNewlines):
return "<%s: %s@%s,%s>" % (type(self).__name__, self.value,
self.line, self.column)
def is_definition(self):
def is_definition(self, include_setitem=False):
"""
Returns True if the name is being defined.
"""
return self.get_definition() is not None
return self.get_definition(include_setitem=include_setitem) is not None
def get_definition(self, import_name_always=False):
def get_definition(self, import_name_always=False, include_setitem=False):
"""
Returns None if there's on definition for a name.
Returns None if there's no definition for a name.
:param import_name_alway: Specifies if an import name is always a
:param import_name_always: Specifies if an import name is always a
definition. Normally foo in `from foo import bar` is not a
definition.
"""
node = self.parent
type_ = node.type
if type_ in ('power', 'atom_expr'):
# In `self.x = 3` self is not a definition, but x is.
return None
if type_ in ('funcdef', 'classdef'):
if self == node.name:
return node
return None
if type_ in ():
if self in node.get_defined_names():
return node
return None
if type_ == 'except_clause':
# TODO in Python 2 this doesn't work correctly. See grammar file.
# I think we'll just let it be. Python 2 will be gone in a few
@@ -230,8 +237,10 @@ class Name(_LeafWithoutNewlines):
while node is not None:
if node.type == 'suite':
return None
if node.type == 'namedexpr_test':
return node.children[0]
if node.type in _GET_DEFINITION_TYPES:
if self in node.get_defined_names():
if self in node.get_defined_names(include_setitem):
return node
if import_name_always and node.type in _IMPORTS:
return node
@@ -240,7 +249,6 @@ class Name(_LeafWithoutNewlines):
return None
class Literal(PythonLeaf):
__slots__ = ()
@@ -256,7 +264,7 @@ class String(Literal):
@property
def string_prefix(self):
return re.match('\w*(?=[\'"])', self.value).group(0)
return re.match(r'\w*(?=[\'"])', self.value).group(0)
def _get_payload(self):
match = re.search(
@@ -267,6 +275,33 @@ class String(Literal):
return match.group(2)[:-len(match.group(1))]
class FStringString(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
"""
type = 'fstring_string'
__slots__ = ()
class FStringStart(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
"""
type = 'fstring_start'
__slots__ = ()
class FStringEnd(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
"""
type = 'fstring_end'
__slots__ = ()
class _StringComparisonMixin(object):
def __eq__(self, other):
"""
@@ -414,7 +449,7 @@ class Module(Scope):
recurse(child)
recurse(self)
self._used_names = dct
self._used_names = UsedNamesMapping(dct)
return self._used_names
@@ -438,6 +473,9 @@ class ClassOrFunc(Scope):
:rtype: list of :class:`Decorator`
"""
decorated = self.parent
if decorated.type == 'async_funcdef':
decorated = decorated.parent
if decorated.type == 'decorated':
if decorated.children[0].type == 'decorators':
return decorated.children[0].children
@@ -514,8 +552,11 @@ def _create_params(parent, argslist_list):
if child is None or child == ',':
param_children = children[start:end]
if param_children: # Could as well be comma and then end.
if param_children[0] == '*' and param_children[1] == ',' \
or check_python2_nested_param(param_children[0]):
if param_children[0] == '*' \
and (len(param_children) == 1
or param_children[1] == ',') \
or check_python2_nested_param(param_children[0]) \
or param_children[0] == '/':
for p in param_children:
p.parent = parent
new_children += param_children
@@ -597,6 +638,21 @@ class Function(ClassOrFunc):
return scan(self.children)
def iter_raise_stmts(self):
"""
Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks
"""
def scan(children):
for element in children:
if element.type == 'raise_stmt' \
or element.type == 'keyword' and element.value == 'raise':
yield element
if element.type in _RETURN_STMT_CONTAINERS:
for e in scan(element.children):
yield e
return scan(self.children)
def is_generator(self):
"""
:return bool: Checks if a function is a generator or not.
@@ -616,6 +672,7 @@ class Function(ClassOrFunc):
except IndexError:
return None
class Lambda(Function):
"""
Lambdas are basically trimmed functions, so give it the same interface.
@@ -721,8 +778,8 @@ class ForStmt(Flow):
"""
return self.children[3]
def get_defined_names(self):
return _defined_names(self.children[1])
def get_defined_names(self, include_setitem=False):
return _defined_names(self.children[1], include_setitem)
class TryStmt(Flow):
@@ -745,7 +802,7 @@ class WithStmt(Flow):
type = 'with_stmt'
__slots__ = ()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
"""
Returns the a list of `Name` that the with statement defines. The
defined names are set after `as`.
@@ -754,7 +811,7 @@ class WithStmt(Flow):
for with_item in self.children[1:-2:2]:
# Check with items for 'as' names.
if with_item.type == 'with_item':
names += _defined_names(with_item.children[2])
names += _defined_names(with_item.children[2], include_setitem)
return names
def get_test_node_from_name(self, name):
@@ -795,7 +852,7 @@ class ImportFrom(Import):
type = 'import_from'
__slots__ = ()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
"""
Returns the a list of `Name` that the import defines. The
defined names are set after `import` or in case an alias - `as` - is
@@ -866,7 +923,7 @@ class ImportName(Import):
type = 'import_name'
__slots__ = ()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
"""
Returns the a list of `Name` that the import defines. The defined names
is always the first name after `import` or in case an alias - `as` - is
@@ -923,7 +980,7 @@ class ImportName(Import):
class KeywordStatement(PythonBaseNode):
"""
For the following statements: `assert`, `del`, `global`, `nonlocal`,
`raise`, `return`, `yield`, `return`, `yield`.
`raise`, `return`, `yield`.
`pass`, `continue` and `break` are not in there, because they are just
simple keywords and the parser reduces it to a keyword.
@@ -942,6 +999,14 @@ class KeywordStatement(PythonBaseNode):
def keyword(self):
return self.children[0].value
def get_defined_names(self, include_setitem=False):
keyword = self.keyword
if keyword == 'del':
return _defined_names(self.children[1], include_setitem)
if keyword in ('global', 'nonlocal'):
return self.children[1::2]
return []
class AssertStmt(KeywordStatement):
__slots__ = ()
@@ -967,7 +1032,7 @@ class YieldExpr(PythonBaseNode):
__slots__ = ()
def _defined_names(current):
def _defined_names(current, include_setitem):
"""
A helper function to find the defined names in statements, for loops and
list comprehensions.
@@ -975,14 +1040,22 @@ def _defined_names(current):
names = []
if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'):
for child in current.children[::2]:
names += _defined_names(child)
names += _defined_names(child, include_setitem)
elif current.type in ('atom', 'star_expr'):
names += _defined_names(current.children[1])
names += _defined_names(current.children[1], include_setitem)
elif current.type in ('power', 'atom_expr'):
if current.children[-2] != '**': # Just if there's no operation
trailer = current.children[-1]
if trailer.children[0] == '.':
names.append(trailer.children[1])
elif trailer.children[0] == '[' and include_setitem:
for node in current.children[-2::-1]:
if node.type == 'trailer':
names.append(node.children[1])
break
if node.type == 'name':
names.append(node)
break
else:
names.append(current)
return names
@@ -992,23 +1065,29 @@ class ExprStmt(PythonBaseNode, DocstringMixin):
type = 'expr_stmt'
__slots__ = ()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
"""
Returns a list of `Name` defined before the `=` sign.
"""
names = []
if self.children[1].type == 'annassign':
names = _defined_names(self.children[0])
names = _defined_names(self.children[0], include_setitem)
return [
name
for i in range(0, len(self.children) - 2, 2)
if '=' in self.children[i + 1].value
for name in _defined_names(self.children[i])
for name in _defined_names(self.children[i], include_setitem)
] + names
def get_rhs(self):
"""Returns the right-hand-side of the equals."""
return self.children[-1]
node = self.children[-1]
if node.type == 'annassign':
if len(node.children) == 4:
node = node.children[3]
else:
node = node.children[1]
return node
def yield_operators(self):
"""
@@ -1067,7 +1146,7 @@ class Param(PythonBaseNode):
@property
def annotation(self):
"""
The default is the test node that appears after `->`. Is `None` in case
The default is the test node that appears after `:`. Is `None` in case
no annotation is present.
"""
tfpdef = self._tfpdef()
@@ -1096,7 +1175,7 @@ class Param(PythonBaseNode):
else:
return self._tfpdef()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
return [self.name]
@property
@@ -1112,6 +1191,13 @@ class Param(PythonBaseNode):
index -= 2
except ValueError:
pass
try:
keyword_only_index = self.parent.children.index('/')
if index > keyword_only_index:
# Skip the ` /, `
index -= 2
except ValueError:
pass
return index - 1
def get_parent_function(self):
@@ -1143,12 +1229,42 @@ class Param(PythonBaseNode):
return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default)
class CompFor(PythonBaseNode):
type = 'comp_for'
class SyncCompFor(PythonBaseNode):
type = 'sync_comp_for'
__slots__ = ()
def get_defined_names(self):
def get_defined_names(self, include_setitem=False):
"""
Returns the a list of `Name` that the comprehension defines.
"""
return _defined_names(self.children[1])
# allow async for
return _defined_names(self.children[1], include_setitem)
# This is simply here so an older Jedi version can work with this new parso
# version. Can be deleted in the next release.
CompFor = SyncCompFor
class UsedNamesMapping(Mapping):
"""
This class exists for the sole purpose of creating an immutable dict.
"""
def __init__(self, dct):
self._dict = dct
def __getitem__(self, key):
return self._dict[key]
def __len__(self):
return len(self._dict)
def __iter__(self):
return iter(self._dict)
def __hash__(self):
return id(self)
def __eq__(self, other):
# Comparing these dicts does not make sense.
return self is other

View File

@@ -1,5 +1,8 @@
import sys
from abc import abstractmethod, abstractproperty
from parso._compatibility import utf8_repr, encoding, py_version
from parso._compatibility import utf8_repr, encoding
from parso.utils import split_lines
def search_ancestor(node, *node_types):
@@ -42,8 +45,12 @@ class NodeOrLeaf(object):
Returns the node immediately following this node in this parent's
children list. If this node does not have a next sibling, it is None
"""
parent = self.parent
if parent is None:
return None
# Can't use index(); we need to test by identity
for i, child in enumerate(self.parent.children):
for i, child in enumerate(parent.children):
if child is self:
try:
return self.parent.children[i + 1]
@@ -55,10 +62,13 @@ class NodeOrLeaf(object):
Returns the node immediately preceding this node in this parent's
children list. If this node does not have a previous sibling, it is
None.
None.
"""
parent = self.parent
if parent is None:
return None
# Can't use index(); we need to test by identity
for i, child in enumerate(self.parent.children):
for i, child in enumerate(parent.children):
if child is self:
if i == 0:
return None
@@ -69,6 +79,9 @@ class NodeOrLeaf(object):
Returns the previous leaf in the parser tree.
Returns `None` if this is the first element in the parser tree.
"""
if self.parent is None:
return None
node = self
while True:
c = node.parent.children
@@ -92,6 +105,9 @@ class NodeOrLeaf(object):
Returns the next leaf in the parser tree.
Returns None if this is the last element in the parser tree.
"""
if self.parent is None:
return None
node = self
while True:
c = node.parent.children
@@ -152,7 +168,7 @@ class NodeOrLeaf(object):
@abstractmethod
def get_code(self, include_prefix=True):
"""
Returns the code that was input the input for the parser for this node.
Returns the code that was the input for the parser for this node.
:param include_prefix: Removes the prefix (whitespace and comments) of
e.g. a statement.
@@ -194,7 +210,9 @@ class Leaf(NodeOrLeaf):
def get_start_pos_of_prefix(self):
previous_leaf = self.get_previous_leaf()
if previous_leaf is None:
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
lines = split_lines(self.prefix)
# + 1 is needed because split_lines always returns at least [''].
return self.line - len(lines) + 1, 0 # It's the first leaf.
return previous_leaf.end_pos
def get_first_leaf(self):
@@ -211,7 +229,7 @@ class Leaf(NodeOrLeaf):
@property
def end_pos(self):
lines = self.value.split('\n')
lines = split_lines(self.value)
end_pos_line = self.line + len(lines) - 1
# Check for multiline token
if self.line == end_pos_line:
@@ -230,6 +248,7 @@ class Leaf(NodeOrLeaf):
class TypedLeaf(Leaf):
__slots__ = ('type',)
def __init__(self, type, value, start_pos, prefix=''):
super(TypedLeaf, self).__init__(value, start_pos, prefix)
self.type = type
@@ -244,8 +263,6 @@ class BaseNode(NodeOrLeaf):
type = None
def __init__(self, children):
for c in children:
c.parent = self
self.children = children
"""
A list of :class:`NodeOrLeaf` child nodes.
@@ -278,6 +295,14 @@ class BaseNode(NodeOrLeaf):
return self._get_code_for_children(self.children, include_prefix)
def get_leaf_for_position(self, position, include_prefixes=False):
"""
Get the :py:class:`parso.tree.Leaf` at ``position``
:param tuple position: A position tuple, row, column. Rows start from 1
:param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls
on whitespace or comments before a leaf
:return: :py:class:`parso.tree.Leaf` at ``position``, or ``None``
"""
def binary_search(lower, upper):
if lower == upper:
element = self.children[lower]
@@ -310,8 +335,8 @@ class BaseNode(NodeOrLeaf):
@utf8_repr
def __repr__(self):
code = self.get_code().replace('\n', ' ').strip()
if not py_version >= 30:
code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
if not sys.version_info.major >= 3:
code = code.encode(encoding, 'replace')
return "<%s: %s@%s,%s>" % \
(type(self).__name__, code, self.start_pos[0], self.start_pos[1])
@@ -331,7 +356,7 @@ class Node(BaseNode):
class ErrorNode(BaseNode):
"""
A node that containes valid nodes/leaves that we're follow by a token that
A node that contains valid nodes/leaves that we're follow by a token that
was invalid. This basically means that the leaf after this node is where
Python would mark a syntax error.
"""
@@ -344,13 +369,13 @@ class ErrorLeaf(Leaf):
A leaf that is either completely invalid in a language (like `$` in Python)
or is invalid at that position. Like the star in `1 +* 1`.
"""
__slots__ = ('original_type',)
__slots__ = ('token_type',)
type = 'error_leaf'
def __init__(self, original_type, value, start_pos, prefix=''):
def __init__(self, token_type, value, start_pos, prefix=''):
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
self.original_type = original_type
self.token_type = token_type
def __repr__(self):
return "<%s: %s:%s, %s>" % \
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)

View File

@@ -2,9 +2,24 @@ from collections import namedtuple
import re
import sys
from ast import literal_eval
from functools import total_ordering
from parso._compatibility import unicode, total_ordering
from parso._compatibility import unicode
# The following is a list in Python that are line breaks in str.splitlines, but
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
# 0xA) are allowed to split lines.
_NON_LINE_BREAKS = (
u'\v', # Vertical Tabulation 0xB
u'\f', # Form Feed 0xC
u'\x1C', # File Separator
u'\x1D', # Group Separator
u'\x1E', # Record Separator
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
# Used to mark end-of-line on some IBM mainframes.)
u'\u2028', # Line Separator
u'\u2029', # Paragraph Separator
)
Version = namedtuple('Version', 'major, minor, micro')
@@ -26,8 +41,13 @@ def split_lines(string, keepends=False):
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
if line.endswith('\f'):
merge.append(i)
try:
last_chr = line[-1]
except IndexError:
pass
else:
if last_chr in _NON_LINE_BREAKS:
merge.append(i)
for index in reversed(merge):
try:
@@ -41,11 +61,11 @@ def split_lines(string, keepends=False):
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
if string.endswith('\n') or string.endswith('\r') or string == '':
lst.append('')
return lst
else:
return re.split('\n|\r\n', string)
return re.split(r'\n|\r\n|\r', string)
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
@@ -85,8 +105,17 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
if not isinstance(encoding, unicode):
encoding = unicode(encoding, 'utf-8', 'replace')
# Cast to unicode
return unicode(source, encoding, errors)
try:
# Cast to unicode
return unicode(source, encoding, errors)
except LookupError:
if errors == 'replace':
# This is a weird case that can happen if the given encoding is not
# a valid encoding. This usually shouldn't happen with provided
# encodings, but can happen if somebody uses encoding declarations
# like `# coding: foo-8`.
return unicode(source, 'utf-8', errors)
raise
def version_info():
@@ -100,10 +129,10 @@ def version_info():
def _parse_version(version):
match = re.match(r'(\d+)(?:\.(\d)(?:\.\d+)?)?$', version)
match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
if match is None:
raise ValueError('The given version is not in the right format. '
'Use something like "3.2" or "3".')
'Use something like "3.8" or "3".')
major = int(match.group(1))
minor = match.group(2)
@@ -144,13 +173,13 @@ class PythonVersionInfo(namedtuple('Version', 'major, minor')):
def parse_version_string(version=None):
"""
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
Checks for a valid version number (e.g. `3.8` or `2.7.1` or `3`) and
returns a corresponding version info that is always two characters long in
decimal.
"""
if version is None:
version = '%s.%s' % sys.version_info[:2]
if not isinstance(version, (unicode, str)):
raise TypeError("version must be a string like 3.2.")
raise TypeError('version must be a string like "3.8"')
return _parse_version(version)

29
parso/utils.pyi Normal file
View File

@@ -0,0 +1,29 @@
from typing import NamedTuple, Optional, Sequence, Union
class Version(NamedTuple):
major: int
minor: int
micro: int
def split_lines(string: str, keepends: bool = ...) -> Sequence[str]: ...
def python_bytes_to_unicode(
source: Union[str, bytes], encoding: str = ..., errors: str = ...
) -> str: ...
def version_info() -> Version:
"""
Returns a namedtuple of parso's version, similar to Python's
``sys.version_info``.
"""
...
class PythonVersionInfo(NamedTuple):
major: int
minor: int
def parse_version_string(version: Optional[str]) -> PythonVersionInfo:
"""
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
returns a corresponding version info that is always two characters long in
decimal.
"""
...

View File

@@ -1,6 +1,8 @@
[pytest]
addopts = --doctest-modules
testpaths = parso test
# Ignore broken files inblackbox test directories
norecursedirs = .* docs scripts normalizer_issue_files build

View File

@@ -1,2 +1,12 @@
[bdist_wheel]
universal=1
[flake8]
max-line-length = 100
ignore =
# do not use bare 'except'
E722,
# don't know why this was ever even an option, 1+1 should be possible.
E226,
# line break before binary operator
W503,

View File

@@ -25,8 +25,9 @@ setup(name='parso',
keywords='python parser parsing',
long_description=readme,
packages=find_packages(exclude=['test']),
package_data={'parso': ['python/grammar*.txt']},
package_data={'parso': ['python/grammar*.txt', 'py.typed', '*.pyi', '**/*.pyi']},
platforms=['any'],
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*',
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Plugins',
@@ -34,14 +35,21 @@ setup(name='parso',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Text Editors :: Integrated Development Environments (IDE)',
'Topic :: Utilities',
'Typing :: Typed',
],
extras_require={
'testing': [
'pytest>=3.0.7',
'docopt',
],
},
)

View File

@@ -19,14 +19,6 @@ def build_nested(code, depth, base='def f():\n'):
FAILING_EXAMPLES = [
'1 +',
'?',
# Python/compile.c
dedent('''\
for a in [1]:
try:
pass
finally:
continue
'''), # 'continue' not supported inside 'finally' clause"
'continue',
'break',
'return',
@@ -60,9 +52,37 @@ FAILING_EXAMPLES = [
'f(x=2, y)',
'f(**x, *y)',
'f(**x, y=3, z)',
# augassign
'a, b += 3',
'(a, b) += 3',
'[a, b] += 3',
'f() += 1',
'lambda x:None+=1',
'{} += 1',
'{a:b} += 1',
'{1} += 1',
'{*x} += 1',
'(x,) += 1',
'(x, y if a else q) += 1',
'[] += 1',
'[1,2] += 1',
'[] += 1',
'None += 1',
'... += 1',
'a > 1 += 1',
'"test" += 1',
'1 += 1',
'1.0 += 1',
'(yield) += 1',
'(yield from x) += 1',
'(x if x else y) += 1',
'a() += 1',
'a + b += 1',
'+a += 1',
'a and b += 1',
'*a += 1',
'a, b += 1',
'f"xxx" += 1',
# All assignment tests
'lambda a: 1 = 1',
'[x for x in y] = 1',
@@ -141,7 +161,7 @@ FAILING_EXAMPLES = [
# f-strings
'f"{}"',
'f"{\\}"',
r'f"{\}"',
'f"{\'\\\'}"',
'f"{#}"',
"f'{1!b}'",
@@ -154,7 +174,7 @@ FAILING_EXAMPLES = [
# Now nested parsing
"f'{continue}'",
"f'{1;1}'",
"f'{a=3}'",
"f'{a;}'",
"f'{b\"\" \"\"}'",
]
@@ -259,10 +279,6 @@ GLOBAL_NONLOCAL_ERROR = [
if sys.version_info >= (3, 6):
FAILING_EXAMPLES += GLOBAL_NONLOCAL_ERROR
FAILING_EXAMPLES += [
# Raises multiple errors in previous versions.
'async def foo():\n def nofoo():[x async for x in []]',
]
if sys.version_info >= (3, 5):
FAILING_EXAMPLES += [
# Raises different errors so just ignore them for now.
@@ -285,11 +301,19 @@ if sys.version_info >= (3,):
'b"ä"',
# combining strings and unicode is allowed in Python 2.
'"s" b""',
'"s" b"" ""',
'b"" "" b"" ""',
]
if sys.version_info >= (2, 7):
# This is something that raises a different error in 2.6 than in the other
# versions. Just skip it for 2.6.
FAILING_EXAMPLES.append('[a, 1] += 3')
if sys.version_info >= (3, 6):
FAILING_EXAMPLES += [
# Same as above, but for f-strings.
'f"s" b""',
'b"s" f""',
# f-string expression part cannot include a backslash
r'''f"{'\n'}"''',
]
FAILING_EXAMPLES.append('[a, 1] += 3')
if sys.version_info[:2] == (3, 5):
# yields are not allowed in 3.5 async functions. Therefore test them
@@ -311,3 +335,63 @@ if sys.version_info[:2] <= (3, 4):
'a = *[1], 2',
'(*[1], 2)',
]
if sys.version_info[:2] >= (3, 7):
# This is somehow ok in previous versions.
FAILING_EXAMPLES += [
'class X(base for base in bases): pass',
]
if sys.version_info[:2] < (3, 8):
FAILING_EXAMPLES += [
# Python/compile.c
dedent('''\
for a in [1]:
try:
pass
finally:
continue
'''), # 'continue' not supported inside 'finally' clause"
]
if sys.version_info[:2] >= (3, 8):
# assignment expressions from issue#89
FAILING_EXAMPLES += [
# Case 2
'(lambda: x := 1)',
'((lambda: x) := 1)',
# Case 3
'(a[i] := x)',
'((a[i]) := x)',
'(a(i) := x)',
# Case 4
'(a.b := c)',
'[(i.i:= 0) for ((i), j) in range(5)]',
# Case 5
'[i:= 0 for i, j in range(5)]',
'[(i:= 0) for ((i), j) in range(5)]',
'[(i:= 0) for ((i), j), in range(5)]',
'[(i:= 0) for ((i), j.i), in range(5)]',
'[[(i:= i) for j in range(5)] for i in range(5)]',
'[i for i, j in range(5) if True or (i:= 1)]',
'[False and (i:= 0) for i, j in range(5)]',
# Case 6
'[i+1 for i in (i:= range(5))]',
'[i+1 for i in (j:= range(5))]',
'[i+1 for i in (lambda: (j:= range(5)))()]',
# Case 7
'class Example:\n [(j := i) for i in range(5)]',
# Not in that issue
'(await a := x)',
'((await a) := x)',
# new discoveries
'((a, b) := (1, 2))',
'([a, b] := [1, 2])',
'({a, b} := {1, 2})',
'({a: b} := {1: 2})',
'(a + b := 1)',
'(True := 1)',
'(False := 1)',
'(None := 1)',
'(__debug__ := 1)',
]

307
test/fuzz_diff_parser.py Normal file
View File

@@ -0,0 +1,307 @@
"""
A script to find bugs in the diff parser.
This script is extremely useful if changes are made to the diff parser. By
running a few thousand iterations, we can assure that the diff parser is in
good shape.
Usage:
fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p]
fuzz_diff_parser.py -h | --help
Options:
-h --help Show this screen
-n, --maxtries=<nr> Maximum of random tries [default: 1000]
-x, --changes=<nr> Amount of changes to be done to a file per try [default: 5]
-l, --logging Prints all the logs
-o, --only-last=<nr> Only runs the last n iterations; Defaults to running all
-p, --print-code Print all test diffs
--pdb Launch pdb when error is raised
--ipdb Launch ipdb when error is raised
"""
from __future__ import print_function
import logging
import sys
import os
import random
import pickle
import parso
from parso.utils import split_lines
from test.test_diff_parser import _check_error_leaves_nodes
_latest_grammar = parso.load_grammar(version='3.8')
_python_reserved_strings = tuple(
# Keywords are ususally only interesting in combination with spaces after
# them. We don't put a space before keywords, to avoid indentation errors.
s + (' ' if s.isalpha() else '')
for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
)
_random_python_fragments = _python_reserved_strings + (
' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
"'''", ';', ' some_random_word ', '\\', '#',
)
def find_python_files_in_tree(file_path):
if not os.path.isdir(file_path):
yield file_path
return
for root, dirnames, filenames in os.walk(file_path):
if 'chardet' in root:
# Stuff like chardet/langcyrillicmodel.py is just very slow to
# parse and machine generated, so ignore those.
continue
for name in filenames:
if name.endswith('.py'):
yield os.path.join(root, name)
def _print_copyable_lines(lines):
for line in lines:
line = repr(line)[1:-1]
if line.endswith(r'\n'):
line = line[:-2] + '\n'
print(line, end='')
def _get_first_error_start_pos_or_none(module):
error_leaf = _check_error_leaves_nodes(module)
return None if error_leaf is None else error_leaf.start_pos
class LineReplacement:
def __init__(self, line_nr, new_line):
self._line_nr = line_nr
self._new_line = new_line
def apply(self, code_lines):
# print(repr(self._new_line))
code_lines[self._line_nr] = self._new_line
class LineDeletion:
def __init__(self, line_nr):
self.line_nr = line_nr
def apply(self, code_lines):
del code_lines[self.line_nr]
class LineCopy:
def __init__(self, copy_line, insertion_line):
self._copy_line = copy_line
self._insertion_line = insertion_line
def apply(self, code_lines):
code_lines.insert(
self._insertion_line,
# Use some line from the file. This doesn't feel totally
# random, but for the diff parser it will feel like it.
code_lines[self._copy_line]
)
class FileModification:
@classmethod
def generate(cls, code_lines, change_count, previous_file_modification=None):
if previous_file_modification is not None and random.random() > 0.5:
# We want to keep the previous modifications in some cases to make
# more complex parser issues visible.
code_lines = previous_file_modification.apply(code_lines)
added_modifications = previous_file_modification.modification_list
else:
added_modifications = []
return cls(
added_modifications
+ list(cls._generate_line_modifications(code_lines, change_count)),
# work with changed trees more than with normal ones.
check_original=random.random() > 0.8,
)
@staticmethod
def _generate_line_modifications(lines, change_count):
def random_line(include_end=False):
return random.randint(0, len(lines) - (not include_end))
lines = list(lines)
for _ in range(change_count):
rand = random.randint(1, 4)
if rand == 1:
if len(lines) == 1:
# We cannot delete every line, that doesn't make sense to
# fuzz and it would be annoying to rewrite everything here.
continue
l = LineDeletion(random_line())
elif rand == 2:
# Copy / Insertion
# Make it possible to insert into the first and the last line
l = LineCopy(random_line(), random_line(include_end=True))
elif rand in (3, 4):
# Modify a line in some weird random ways.
line_nr = random_line()
line = lines[line_nr]
column = random.randint(0, len(line))
random_string = ''
for _ in range(random.randint(1, 3)):
if random.random() > 0.8:
# The lower characters cause way more issues.
unicode_range = 0x1f if random.randint(0, 1) else 0x3000
random_string += chr(random.randint(0, unicode_range))
else:
# These insertions let us understand how random
# keyword/operator insertions work. Theoretically this
# could also be done with unicode insertions, but the
# fuzzer is just way more effective here.
random_string += random.choice(_random_python_fragments)
if random.random() > 0.5:
# In this case we insert at a very random place that
# probably breaks syntax.
line = line[:column] + random_string + line[column:]
else:
# Here we have better chances to not break syntax, because
# we really replace the line with something that has
# indentation.
line = ' ' * random.randint(0, 12) + random_string + '\n'
l = LineReplacement(line_nr, line)
l.apply(lines)
yield l
def __init__(self, modification_list, check_original):
self.modification_list = modification_list
self._check_original = check_original
def apply(self, code_lines):
changed_lines = list(code_lines)
for modification in self.modification_list:
modification.apply(changed_lines)
return changed_lines
def run(self, grammar, code_lines, print_code):
code = ''.join(code_lines)
modified_lines = self.apply(code_lines)
modified_code = ''.join(modified_lines)
if print_code:
if self._check_original:
print('Original:')
_print_copyable_lines(code_lines)
print('\nModified:')
_print_copyable_lines(modified_lines)
print()
if self._check_original:
m = grammar.parse(code, diff_cache=True)
start1 = _get_first_error_start_pos_or_none(m)
grammar.parse(modified_code, diff_cache=True)
if self._check_original:
# Also check if it's possible to "revert" the changes.
m = grammar.parse(code, diff_cache=True)
start2 = _get_first_error_start_pos_or_none(m)
assert start1 == start2, (start1, start2)
class FileTests:
def __init__(self, file_path, test_count, change_count):
self._path = file_path
with open(file_path, errors='replace') as f:
code = f.read()
self._code_lines = split_lines(code, keepends=True)
self._test_count = test_count
self._code_lines = self._code_lines
self._change_count = change_count
self._file_modifications = []
def _run(self, grammar, file_modifications, debugger, print_code=False):
try:
for i, fm in enumerate(file_modifications, 1):
fm.run(grammar, self._code_lines, print_code=print_code)
print('.', end='')
sys.stdout.flush()
print()
except Exception:
print("Issue in file: %s" % self._path)
if debugger:
einfo = sys.exc_info()
pdb = __import__(debugger)
pdb.post_mortem(einfo[2])
raise
def redo(self, grammar, debugger, only_last, print_code):
mods = self._file_modifications
if only_last is not None:
mods = mods[-only_last:]
self._run(grammar, mods, debugger, print_code=print_code)
def run(self, grammar, debugger):
def iterate():
fm = None
for _ in range(self._test_count):
fm = FileModification.generate(
self._code_lines, self._change_count,
previous_file_modification=fm
)
self._file_modifications.append(fm)
yield fm
self._run(grammar, iterate(), debugger)
def main(arguments):
debugger = 'pdb' if arguments['--pdb'] else \
'ipdb' if arguments['--ipdb'] else None
redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')
if arguments['--logging']:
root = logging.getLogger()
root.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
root.addHandler(ch)
grammar = parso.load_grammar()
parso.python.diff.DEBUG_DIFF_PARSER = True
if arguments['redo']:
with open(redo_file, 'rb') as f:
file_tests_obj = pickle.load(f)
only_last = arguments['--only-last'] and int(arguments['--only-last'])
file_tests_obj.redo(
grammar,
debugger,
only_last=only_last,
print_code=arguments['--print-code']
)
elif arguments['random']:
# A random file is used to do diff parser checks if no file is given.
# This helps us to find errors in a lot of different files.
file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.'))
max_tries = int(arguments['--maxtries'])
tries = 0
try:
while tries < max_tries:
path = random.choice(file_paths)
print("Checking %s: %s tries" % (path, tries))
now_tries = min(1000, max_tries - tries)
file_tests_obj = FileTests(path, now_tries, int(arguments['--changes']))
file_tests_obj.run(grammar, debugger)
tries += now_tries
except Exception:
with open(redo_file, 'wb') as f:
pickle.dump(file_tests_obj, f)
raise
else:
raise NotImplementedError('Command is not implemented')
if __name__ == '__main__':
from docopt import docopt
arguments = docopt(__doc__)
main(arguments)

View File

@@ -12,13 +12,6 @@ from .__future__ import absolute_import
''r''u''
b'' BR''
for x in [1]:
try:
continue # Only the other continue and pass is an error.
finally:
#: E901
continue
for x in [1]:
break

View File

@@ -2,25 +2,38 @@
Test all things related to the ``jedi.cache`` module.
"""
from os import unlink
import os
import os.path
import pytest
import time
from parso.cache import _NodeCacheItem, save_module, load_module, \
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG,
_get_cache_clear_lock, _get_hashed_path,
_load_from_file_system, _NodeCacheItem,
_remove_cache_and_update_lock, _save_to_file_system,
load_module, parser_cache, try_to_save_module)
from parso._compatibility import is_pypy, PermissionError
from parso import load_grammar
from parso import cache
from parso import file_io
from parso import parse
skip_pypy = pytest.mark.skipif(
is_pypy,
reason="pickling in pypy is slow, since we don't pickle,"
"we never go into path of auto-collecting garbage"
)
@pytest.fixture()
def isolated_jedi_cache(monkeypatch, tmpdir):
"""
Set `jedi.settings.cache_directory` to a temporary directory during test.
Same as `clean_jedi_cache`, but create the temporary directory for
each test case (scope='function').
"""
monkeypatch.setattr(cache, '_default_cache_path', str(tmpdir))
def isolated_parso_cache(monkeypatch, tmpdir):
"""Set `parso.cache._default_cache_path` to a temporary directory
during the test. """
cache_path = str(os.path.join(str(tmpdir), "__parso_cache"))
monkeypatch.setattr(cache, '_default_cache_path', cache_path)
monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path)
return cache_path
def test_modulepickling_change_cache_dir(tmpdir):
@@ -54,7 +67,7 @@ def load_stored_item(hashed_grammar, path, item, cache_path):
return item
@pytest.mark.usefixtures("isolated_jedi_cache")
@pytest.mark.usefixtures("isolated_parso_cache")
def test_modulepickling_simulate_deleted_cache(tmpdir):
"""
Tests loading from a cache file after it is deleted.
@@ -76,12 +89,103 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
path = tmpdir.dirname + '/some_path'
with open(path, 'w'):
pass
io = file_io.FileIO(path)
save_module(grammar._hashed, path, module, [])
assert load_module(grammar._hashed, path) == module
try_to_save_module(grammar._hashed, io, module, lines=[])
assert load_module(grammar._hashed, io) == module
unlink(_get_hashed_path(grammar._hashed, path))
os.unlink(_get_hashed_path(grammar._hashed, path))
parser_cache.clear()
cached2 = load_module(grammar._hashed, path)
cached2 = load_module(grammar._hashed, io)
assert cached2 is None
def test_cache_limit():
def cache_size():
return sum(len(v) for v in parser_cache.values())
try:
parser_cache.clear()
future_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() + 10e6)
old_node_cache_item = _NodeCacheItem('bla', [], change_time=time.time() - 10e4)
parser_cache['some_hash_old'] = {
'/path/%s' % i: old_node_cache_item for i in range(300)
}
parser_cache['some_hash_new'] = {
'/path/%s' % i: future_node_cache_item for i in range(300)
}
assert cache_size() == 600
parse('somecode', cache=True, path='/path/somepath')
assert cache_size() == 301
finally:
parser_cache.clear()
class _FixedTimeFileIO(file_io.KnownContentFileIO):
def __init__(self, path, content, last_modified):
super(_FixedTimeFileIO, self).__init__(path, content)
self._last_modified = last_modified
def get_last_modified(self):
return self._last_modified
@pytest.mark.parametrize('diff_cache', [False, True])
@pytest.mark.parametrize('use_file_io', [False, True])
def test_cache_last_used_update(diff_cache, use_file_io):
p = '/path/last-used'
parser_cache.clear() # Clear, because then it's easier to find stuff.
parse('somecode', cache=True, path=p)
node_cache_item = next(iter(parser_cache.values()))[p]
now = time.time()
assert node_cache_item.last_used < now
if use_file_io:
f = _FixedTimeFileIO(p, 'code', node_cache_item.last_used - 10)
parse(file_io=f, cache=True, diff_cache=diff_cache)
else:
parse('somecode2', cache=True, path=p, diff_cache=diff_cache)
node_cache_item = next(iter(parser_cache.values()))[p]
assert now < node_cache_item.last_used < time.time()
@skip_pypy
def test_inactive_cache(tmpdir, isolated_parso_cache):
parser_cache.clear()
test_subjects = "abcdef"
for path in test_subjects:
parse('somecode', cache=True, path=os.path.join(str(tmpdir), path))
raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG)
assert os.path.exists(raw_cache_path)
paths = os.listdir(raw_cache_path)
a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL
old_paths = set()
for path in paths[:len(test_subjects) // 2]: # make certain number of paths old
os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago))
old_paths.add(path)
# nothing should be cleared while the lock is on
assert os.path.exists(_get_cache_clear_lock().path)
_remove_cache_and_update_lock() # it shouldn't clear anything
assert len(os.listdir(raw_cache_path)) == len(test_subjects)
assert old_paths.issubset(os.listdir(raw_cache_path))
os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago))
_remove_cache_and_update_lock()
assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2
assert not old_paths.intersection(os.listdir(raw_cache_path))
@skip_pypy
def test_permission_error(monkeypatch):
def save(*args, **kwargs):
was_called[0] = True # Python 2... Use nonlocal instead
raise PermissionError
was_called = [False]
monkeypatch.setattr(cache, '_save_to_file_system', save)
with pytest.warns(Warning):
parse(path=__file__, cache=True, diff_cache=True)
assert was_called[0]

File diff suppressed because it is too large Load Diff

149
test/test_error_recovery.py Normal file
View File

@@ -0,0 +1,149 @@
from textwrap import dedent
from parso import parse, load_grammar
def test_with_stmt():
module = parse('with x: f.\na')
assert module.children[0].type == 'with_stmt'
w, with_item, colon, f = module.children[0].children
assert f.type == 'error_node'
assert f.get_code(include_prefix=False) == 'f.'
assert module.children[2].type == 'name'
def test_one_line_function(each_version):
module = parse('def x(): f.', version=each_version)
assert module.children[0].type == 'funcdef'
def_, name, parameters, colon, f = module.children[0].children
assert f.type == 'error_node'
module = parse('def x(a:', version=each_version)
func = module.children[0]
assert func.type == 'error_node'
if each_version.startswith('2'):
assert func.children[-1].value == 'a'
else:
assert func.children[-1] == ':'
def test_if_else():
module = parse('if x:\n f.\nelse:\n g(')
if_stmt = module.children[0]
if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children
f = suite1.children[1]
assert f.type == 'error_node'
assert f.children[0].value == 'f'
assert f.children[1].value == '.'
g = suite2.children[1]
assert g.children[0].value == 'g'
assert g.children[1].value == '('
def test_if_stmt():
module = parse('if x: f.\nelse: g(')
if_stmt = module.children[0]
assert if_stmt.type == 'if_stmt'
if_, test, colon, f = if_stmt.children
assert f.type == 'error_node'
assert f.children[0].value == 'f'
assert f.children[1].value == '.'
assert module.children[1].type == 'newline'
assert module.children[1].value == '\n'
assert module.children[2].type == 'error_leaf'
assert module.children[2].value == 'else'
assert module.children[3].type == 'error_leaf'
assert module.children[3].value == ':'
in_else_stmt = module.children[4]
assert in_else_stmt.type == 'error_node'
assert in_else_stmt.children[0].value == 'g'
assert in_else_stmt.children[1].value == '('
def test_invalid_token():
module = parse('a + ? + b')
error_node, q, plus_b, endmarker = module.children
assert error_node.get_code() == 'a +'
assert q.value == '?'
assert q.type == 'error_leaf'
assert plus_b.type == 'factor'
assert plus_b.get_code() == ' + b'
def test_invalid_token_in_fstr():
module = load_grammar(version='3.6').parse('f"{a + ? + b}"')
error_node, q, plus_b, error1, error2, endmarker = module.children
assert error_node.get_code() == 'f"{a +'
assert q.value == '?'
assert q.type == 'error_leaf'
assert plus_b.type == 'error_node'
assert plus_b.get_code() == ' + b'
assert error1.value == '}'
assert error1.type == 'error_leaf'
assert error2.value == '"'
assert error2.type == 'error_leaf'
def test_dedent_issues1():
code = dedent('''\
class C:
@property
f
g
end
''')
module = load_grammar(version='3.8').parse(code)
klass, endmarker = module.children
suite = klass.children[-1]
assert suite.children[2].type == 'error_leaf'
assert suite.children[3].get_code(include_prefix=False) == 'f\n'
assert suite.children[5].get_code(include_prefix=False) == 'g\n'
assert suite.type == 'suite'
def test_dedent_issues2():
code = dedent('''\
class C:
@property
if 1:
g
else:
h
end
''')
module = load_grammar(version='3.8').parse(code)
klass, endmarker = module.children
suite = klass.children[-1]
assert suite.children[2].type == 'error_leaf'
if_ = suite.children[3]
assert if_.children[0] == 'if'
assert if_.children[3].type == 'suite'
assert if_.children[3].get_code() == '\n g\n'
assert if_.children[4] == 'else'
assert if_.children[6].type == 'suite'
assert if_.children[6].get_code() == '\n h\n'
assert suite.children[4].get_code(include_prefix=False) == 'end\n'
assert suite.type == 'suite'
def test_dedent_issues3():
code = dedent('''\
class C:
f
g
''')
module = load_grammar(version='3.8').parse(code)
klass, endmarker = module.children
suite = klass.children[-1]
assert len(suite.children) == 4
assert suite.children[1].get_code() == ' f\n'
assert suite.children[1].type == 'simple_stmt'
assert suite.children[2].get_code() == ''
assert suite.children[2].type == 'error_leaf'
assert suite.children[2].token_type == 'ERROR_DEDENT'
assert suite.children[3].get_code() == ' g\n'
assert suite.children[3].type == 'simple_stmt'

View File

@@ -1,57 +1,101 @@
import pytest
from textwrap import dedent
from parso import load_grammar, ParserSyntaxError
from parso.python.fstring import tokenize
from parso.python.tokenize import tokenize
@pytest.fixture
def grammar():
return load_grammar(language="python-f-string")
return load_grammar(version='3.8')
@pytest.mark.parametrize(
'code', [
'{1}',
'',
'{1!a}',
'{1!a:1}',
'{1:1}',
'{1:1.{32}}',
'{1::>4}',
'{foo} {bar}',
# simple cases
'f"{1}"',
'f"""{1}"""',
'f"{foo} {bar}"',
# empty string
'f""',
'f""""""',
# empty format specifier is okay
'f"{1:}"',
# use of conversion options
'f"{1!a}"',
'f"{1!a:1}"',
# format specifiers
'f"{1:1}"',
'f"{1:1.{32}}"',
'f"{1::>4}"',
'f"{x:{y}}"',
'f"{x:{y:}}"',
'f"{x:{y:1}}"',
# Escapes
'{{}}',
'{{{1}}}',
'{{{1}',
'1{{2{{3',
'}}',
'{:}}}',
'f"{{}}"',
'f"{{{1}}}"',
'f"{{{1}"',
'f"1{{2{{3"',
'f"}}"',
# Invalid, but will be checked, later.
'{}',
'{1:}',
'{:}',
'{:1}',
'{!:}',
'{!}',
'{!a}',
'{1:{}}',
'{1:{:}}',
# New Python 3.8 syntax f'{a=}'
'f"{a=}"',
'f"{a()=}"',
# multiline f-string
'f"""abc\ndef"""',
'f"""abc{\n123}def"""',
# a line continuation inside of an fstring_string
'f"abc\\\ndef"',
'f"\\\n{123}\\\n"',
# a line continuation inside of an fstring_expr
'f"{\\\n123}"',
# a line continuation inside of an format spec
'f"{123:.2\\\nf}"',
]
)
def test_valid(code, grammar):
fstring = grammar.parse(code, error_recovery=False)
module = grammar.parse(code, error_recovery=False)
fstring = module.children[0]
assert fstring.type == 'fstring'
assert fstring.get_code() == code
@pytest.mark.parametrize(
'code', [
'}',
'{',
'{1!{a}}',
'{!{a}}',
# an f-string can't contain unmatched curly braces
'f"}"',
'f"{"',
'f"""}"""',
'f"""{"""',
# invalid conversion characters
'f"{1!{a}}"',
'f"{!{a}}"',
# The curly braces must contain an expression
'f"{}"',
'f"{:}"',
'f"{:}}}"',
'f"{:1}"',
'f"{!:}"',
'f"{!}"',
'f"{!a}"',
# invalid (empty) format specifiers
'f"{1:{}}"',
'f"{1:{:}}"',
# a newline without a line continuation inside a single-line string
'f"abc\ndef"',
]
)
def test_invalid(code, grammar):
@@ -59,17 +103,36 @@ def test_invalid(code, grammar):
grammar.parse(code, error_recovery=False)
# It should work with error recovery.
#grammar.parse(code, error_recovery=True)
grammar.parse(code, error_recovery=True)
@pytest.mark.parametrize(
('code', 'start_pos', 'positions'), [
('code', 'positions'), [
# 2 times 2, 5 because python expr and endmarker.
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
(1, 10), (1, 11), (1, 12), (1, 13)]),
('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
(4, 2), (4, 5)]),
]
)
def test_tokenize_start_pos(code, start_pos, positions):
tokens = tokenize(code, start_pos)
def test_tokenize_start_pos(code, positions):
tokens = list(tokenize(code, version_info=(3, 6)))
assert positions == [p.start_pos for p in tokens]
@pytest.mark.parametrize(
'code', [
dedent("""\
f'''s{
str.uppe
'''
"""),
'f"foo',
'f"""foo',
'f"abc\ndef"',
]
)
def test_roundtrip(grammar, code):
tree = grammar.parse(code)
assert tree.get_code() == code

View File

@@ -106,14 +106,28 @@ def test_end_newlines():
@pytest.mark.parametrize(('code', 'types'), [
('\r', ['error_leaf', 'endmarker']),
('\n\r', ['error_leaf', 'endmarker'])
('\r', ['endmarker']),
('\n\r', ['endmarker'])
])
def test_carriage_return_at_end(code, types):
"""
By adding an artificial newline this creates weird side effects for
\r at the end of files that would normally be error leafs.
By adding an artificial newline this created weird side effects for
\r at the end of files.
"""
tree = parse(code)
assert tree.get_code() == code
assert [c.type for c in tree.children] == types
assert tree.end_pos == (len(code) + 1, 0)
@pytest.mark.parametrize('code', [
' ',
' F"""',
' F"""\n',
' F""" \n',
' F""" \n3',
' f"""\n"""',
' f"""\n"""\n',
])
def test_full_code_round_trip(code):
assert parse(code).get_code() == code

View File

@@ -20,7 +20,7 @@ def test_parse_version(string, result):
assert utils._parse_version(string) == result
@pytest.mark.parametrize('string', ['1.', 'a', '#', '1.3.4.5', '1.12'])
@pytest.mark.parametrize('string', ['1.', 'a', '#', '1.3.4.5'])
def test_invalid_grammar_version(string):
with pytest.raises(ValueError):
load_grammar(version=string)
@@ -28,4 +28,4 @@ def test_invalid_grammar_version(string):
def test_grammar_int_version():
with pytest.raises(TypeError):
load_grammar(version=3.2)
load_grammar(version=3.8)

View File

@@ -5,9 +5,9 @@ tests of pydocstyle.
import difflib
import re
from functools import total_ordering
import parso
from parso._compatibility import total_ordering
from parso.utils import python_bytes_to_unicode

View File

@@ -32,3 +32,16 @@ def test_split_params_with_stars():
assert_params(u'x, *args', x=None, args=None)
assert_params(u'**kwargs', kwargs=None)
assert_params(u'*args, **kwargs', args=None, kwargs=None)
def test_kw_only_no_kw(works_ge_py3):
"""
Parsing this should be working. In CPython the parser also parses this and
in a later step the AST complains.
"""
module = works_ge_py3.parse('def test(arg, *):\n pass')
if module is not None:
func = module.children[0]
open_, p1, asterisk, close = func._get_param_nodes()
assert p1.get_code('arg,')
assert asterisk.value == '*'

View File

@@ -189,3 +189,35 @@ def test_no_error_nodes(each_version):
check(child)
check(parse("if foo:\n bar", version=each_version))
def test_named_expression(works_ge_py38):
works_ge_py38.parse("(a := 1, a + 1)")
def test_extended_rhs_annassign(works_ge_py38):
works_ge_py38.parse("x: y = z,")
works_ge_py38.parse("x: Tuple[int, ...] = z, *q, w")
@pytest.mark.parametrize(
'param_code', [
'a=1, /',
'a, /',
'a=1, /, b=3',
'a, /, b',
'a, /, b',
'a, /, *, b',
'a, /, **kwargs',
]
)
def test_positional_only_arguments(works_ge_py38, param_code):
works_ge_py38.parse("def x(%s): pass" % param_code)
@pytest.mark.parametrize(
'expression', [
'a + a',
'lambda x: x',
'a := lambda x: x'
]
)
def test_decorator_expression(works_ge_py39, expression):
works_ge_py39.parse("@%s\ndef x(): pass" % expression)

View File

@@ -125,6 +125,10 @@ def get_return_stmts(code):
return list(parse(code).children[0].iter_return_stmts())
def get_raise_stmts(code, child):
return list(parse(code).children[child].iter_raise_stmts())
def test_yields(each_version):
y, = get_yield_exprs('def x(): yield', each_version)
assert y.value == 'yield'
@@ -138,7 +142,7 @@ def test_yields(each_version):
def test_yield_from():
y, = get_yield_exprs('def x(): (yield from 1)', '3.3')
y, = get_yield_exprs('def x(): (yield from 1)', '3.8')
assert y.type == 'yield_expr'
@@ -149,3 +153,88 @@ def test_returns():
r, = get_return_stmts('def x(): return 1')
assert r.type == 'return_stmt'
def test_raises():
code = """
def single_function():
raise Exception
def top_function():
def inner_function():
raise NotImplementedError()
inner_function()
raise Exception
def top_function_three():
try:
raise NotImplementedError()
except NotImplementedError:
pass
raise Exception
"""
r = get_raise_stmts(code, 0) # Lists in a simple Function
assert len(list(r)) == 1
r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures
assert len(list(r)) == 1
r = get_raise_stmts(code, 2) # Lists inside try-catch
assert len(list(r)) == 2
@pytest.mark.parametrize(
'code, name_index, is_definition, include_setitem', [
('x = 3', 0, True, False),
('x.y = 3', 0, False, False),
('x.y = 3', 1, True, False),
('x.y = u.v = z', 0, False, False),
('x.y = u.v = z', 1, True, False),
('x.y = u.v = z', 2, False, False),
('x.y = u.v, w = z', 3, True, False),
('x.y = u.v, w = z', 4, True, False),
('x.y = u.v, w = z', 5, False, False),
('x, y = z', 0, True, False),
('x, y = z', 1, True, False),
('x, y = z', 2, False, False),
('x, y = z', 2, False, False),
('x[0], y = z', 2, False, False),
('x[0] = z', 0, False, False),
('x[0], y = z', 0, False, False),
('x[0], y = z', 2, False, True),
('x[0] = z', 0, True, True),
('x[0], y = z', 0, True, True),
('x: int = z', 0, True, False),
('x: int = z', 1, False, False),
('x: int = z', 2, False, False),
('x: int', 0, True, False),
('x: int', 1, False, False),
]
)
def test_is_definition(code, name_index, is_definition, include_setitem):
module = parse(code, version='3.8')
name = module.get_first_leaf()
while True:
if name.type == 'name':
if name_index == 0:
break
name_index -= 1
name = name.get_next_leaf()
assert name.is_definition(include_setitem=include_setitem) == is_definition
def test_iter_funcdefs():
code = dedent('''
def normal(): ...
async def asyn(): ...
@dec
def dec_normal(): ...
@dec1
@dec2
async def dec_async(): ...
def broken
''')
module = parse(code, version='3.8')
func_names = [f.name.value for f in module.iter_funcdefs()]
assert func_names == ['normal', 'asyn', 'dec_normal', 'dec_async']

View File

@@ -12,6 +12,8 @@ import pytest
from parso import load_grammar
from parso import ParserSyntaxError
from parso.pgen2 import generate_grammar
from parso.python import tokenize
def _parse(code, version=None):
@@ -27,13 +29,17 @@ def _invalid_syntax(code, version=None, **kwargs):
print(module.children)
def test_formfeed(each_py2_version):
s = u"""print 1\n\x0Cprint 2\n"""
t = _parse(s, each_py2_version)
assert t.children[0].children[0].type == 'print_stmt'
assert t.children[1].children[0].type == 'print_stmt'
s = u"""1\n\x0C\x0C2\n"""
t = _parse(s, each_py2_version)
def test_formfeed(each_version):
s = u"foo\n\x0c\nfoo\n"
t = _parse(s, each_version)
assert t.children[0].children[0].type == 'name'
assert t.children[1].children[0].type == 'name'
s = u"1\n\x0c\x0c\n2\n"
t = _parse(s, each_version)
with pytest.raises(ParserSyntaxError):
s = u"\n\x0c2\n"
_parse(s, each_version)
def test_matrix_multiplication_operator(works_ge_py35):
@@ -81,6 +87,39 @@ def test_async_for(works_ge_py35):
works_ge_py35.parse("async def foo():\n async for a in b: pass")
@pytest.mark.parametrize("body", [
"""[1 async for a in b
]""",
"""[1 async
for a in b
]""",
"""[
1
async for a in b
]""",
"""[
1
async for a
in b
]""",
"""[
1
async
for
a
in
b
]""",
""" [
1 async for a in b
]""",
])
def test_async_for_comprehension_newline(works_ge_py36, body):
# Issue #139
works_ge_py36.parse("""async def foo():
{}""".format(body))
def test_async_with(works_ge_py35):
works_ge_py35.parse("async def foo():\n async with a: pass")
@@ -188,6 +227,19 @@ def test_old_octal_notation(works_in_py2):
works_in_py2.parse("07")
def test_long_notation(works_in_py2):
works_in_py2.parse("0xFl")
works_in_py2.parse("0xFL")
works_in_py2.parse("0b1l")
works_in_py2.parse("0B1L")
works_in_py2.parse("0o7l")
works_in_py2.parse("0O7L")
works_in_py2.parse("0l")
works_in_py2.parse("0L")
works_in_py2.parse("10l")
works_in_py2.parse("10L")
def test_new_binary_notation(each_version):
_parse("""0b101010""", each_version)
_invalid_syntax("""0b0101021""", each_version)
@@ -270,3 +322,29 @@ def py_br(each_version):
def test_py3_rb(works_ge_py3):
works_ge_py3.parse("rb'1'")
works_ge_py3.parse("RB'1'")
def test_left_recursion():
with pytest.raises(ValueError, match='left recursion'):
generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
@pytest.mark.parametrize(
'grammar, error_match', [
['foo: bar | baz\nbar: NAME\nbaz: NAME\n',
r"foo is ambiguous.*given a TokenType\(NAME\).*bar or baz"],
['''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''',
r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"],
['''foo: bar | 'x'\nbar: 'x'\n''',
r"foo is ambiguous.*given a ReservedString\(x\).*bar or foo"],
# An ambiguity with the second (not the first) child of a production
['outer: "a" [inner] "b" "c"\ninner: "b" "c" [inner]\n',
r"outer is ambiguous.*given a ReservedString\(b\).*inner or outer"],
# An ambiguity hidden by a level of indirection (middle)
['outer: "a" [middle] "b" "c"\nmiddle: inner\ninner: "b" "c" [inner]\n',
r"outer is ambiguous.*given a ReservedString\(b\).*middle or outer"],
]
)
def test_ambiguities(grammar, error_match):
with pytest.raises(ValueError, match=error_match):
generate_grammar(grammar, tokenize.PythonTokenTypes)

View File

@@ -7,6 +7,8 @@ import warnings
import pytest
import parso
from textwrap import dedent
from parso._compatibility import is_pypy
from .failing_examples import FAILING_EXAMPLES, indent, build_nested
@@ -37,10 +39,33 @@ def test_python_exception_matches(code):
error, = errors
actual = error.message
assert actual in wanted
# Somehow in Python3.3 the SyntaxError().lineno is sometimes None
# Somehow in Python2.7 the SyntaxError().lineno is sometimes None
assert line_nr is None or line_nr == error.start_pos[0]
def test_non_async_in_async():
"""
This example doesn't work with FAILING_EXAMPLES, because the line numbers
are not always the same / incorrect in Python 3.8.
"""
if sys.version_info[:2] < (3, 5):
pytest.skip()
# Raises multiple errors in previous versions.
code = 'async def foo():\n def nofoo():[x async for x in []]'
wanted, line_nr = _get_actual_exception(code)
errors = _get_error_list(code)
if errors:
error, = errors
actual = error.message
assert actual in wanted
if sys.version_info[:2] < (3, 8):
assert line_nr == error.start_pos[0]
else:
assert line_nr == 0 # For whatever reason this is zero in Python 3.8+
@pytest.mark.parametrize(
('code', 'positions'), [
('1 +', [(1, 3)]),
@@ -95,25 +120,32 @@ def _get_actual_exception(code):
assert False, "The piece of code should raise an exception."
# SyntaxError
# Python 2.6 has a bit different error messages here, so skip it.
if sys.version_info[:2] == (2, 6) and wanted == 'SyntaxError: unexpected EOF while parsing':
wanted = 'SyntaxError: invalid syntax'
if wanted == 'SyntaxError: non-keyword arg after keyword arg':
# The python 3.5+ way, a bit nicer.
wanted = 'SyntaxError: positional argument follows keyword argument'
elif wanted == 'SyntaxError: assignment to keyword':
return [wanted, "SyntaxError: can't assign to keyword"], line_nr
elif wanted == 'SyntaxError: assignment to None':
# Python 2.6 does has a slightly different error.
wanted = 'SyntaxError: cannot assign to None'
elif wanted == 'SyntaxError: can not assign to __debug__':
# Python 2.6 does has a slightly different error.
wanted = 'SyntaxError: cannot assign to __debug__'
return [wanted, "SyntaxError: can't assign to keyword",
'SyntaxError: cannot assign to __debug__'], line_nr
elif wanted == 'SyntaxError: can use starred expression only as assignment target':
# Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
# certain places. But in others this error makes sense.
return [wanted, "SyntaxError: can't use starred expression here"], line_nr
elif wanted == 'SyntaxError: f-string: unterminated string':
wanted = 'SyntaxError: EOL while scanning string literal'
elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
return [
wanted,
"SyntaxError: EOL while scanning string literal",
"SyntaxError: unexpected character after line continuation character",
], line_nr
elif wanted == "SyntaxError: f-string: expecting '}'":
wanted = 'SyntaxError: EOL while scanning string literal'
elif wanted == 'SyntaxError: f-string: empty expression not allowed':
wanted = 'SyntaxError: invalid syntax'
elif wanted == "SyntaxError: f-string expression part cannot include '#'":
wanted = 'SyntaxError: invalid syntax'
elif wanted == "SyntaxError: f-string: single '}' is not allowed":
wanted = 'SyntaxError: invalid syntax'
return [wanted], line_nr
@@ -155,12 +187,13 @@ def test_statically_nested_blocks():
def test_future_import_first():
def is_issue(code, *args):
def is_issue(code, *args, **kwargs):
code = code % args
return bool(_get_error_list(code))
return bool(_get_error_list(code, **kwargs))
i1 = 'from __future__ import division'
i2 = 'from __future__ import absolute_import'
i3 = 'from __future__ import annotations'
assert not is_issue(i1)
assert not is_issue(i1 + ';' + i2)
assert not is_issue(i1 + '\n' + i2)
@@ -171,6 +204,8 @@ def test_future_import_first():
assert not is_issue('""\n%s;%s', i1, i2)
assert not is_issue('"";%s;%s ', i1, i2)
assert not is_issue('"";%s\n%s ', i1, i2)
assert not is_issue(i3, version="3.7")
assert is_issue(i3, version="3.6")
assert is_issue('1;' + i1)
assert is_issue('1\n' + i1)
assert is_issue('"";1\n' + i1)
@@ -238,19 +273,42 @@ def test_too_many_levels_of_indentation():
assert not _get_error_list(build_nested('pass', 49, base=base))
assert _get_error_list(build_nested('pass', 50, base=base))
def test_paren_kwarg():
assert _get_error_list("print((sep)=seperator)", version="3.8")
assert not _get_error_list("print((sep)=seperator)", version="3.7")
@pytest.mark.parametrize(
'code', [
"f'{*args,}'",
r'f"\""',
r'f"\\\""',
r'fr"\""',
r'fr"\\\""',
r"print(f'Some {x:.2f} and some {y}')",
]
)
def test_valid_fstrings(code):
assert not _get_error_list(code, version='3.6')
@pytest.mark.parametrize(
'code', [
'a = (b := 1)',
'[x4 := x ** 5 for x in range(7)]',
'[total := total + v for v in range(10)]',
'while chunk := file.read(2):\n pass',
'numbers = [y := math.factorial(x), y**2, y**3]',
]
)
def test_valid_namedexpr(code):
assert not _get_error_list(code, version='3.8')
@pytest.mark.parametrize(
('code', 'message'), [
("f'{1+}'", ('invalid syntax')),
(r'f"\"', ('invalid syntax')),
(r'fr"\"', ('invalid syntax')),
]
)
def test_invalid_fstrings(code, message):
@@ -260,3 +318,99 @@ def test_invalid_fstrings(code, message):
"""
error, = _get_error_list(code, version='3.6')
assert message in error.message
@pytest.mark.parametrize(
'code', [
"from foo import (\nbar,\n rab,\n)",
"from foo import (bar, rab, )",
]
)
def test_trailing_comma(code):
errors = _get_error_list(code)
assert not errors
def test_continue_in_finally():
code = dedent('''\
for a in [1]:
try:
pass
finally:
continue
''')
assert not _get_error_list(code, version="3.8")
assert _get_error_list(code, version="3.7")
@pytest.mark.parametrize(
'template', [
"a, b, {target}, c = d",
"a, b, *{target}, c = d",
"(a, *{target}), c = d",
"for x, {target} in y: pass",
"for x, q, {target} in y: pass",
"for x, q, *{target} in y: pass",
"for (x, *{target}), q in y: pass",
]
)
@pytest.mark.parametrize(
'target', [
"True",
"False",
"None",
"__debug__"
]
)
def test_forbidden_name(template, target):
assert _get_error_list(template.format(target=target), version="3")
def test_repeated_kwarg():
# python 3.9+ shows which argument is repeated
assert (
_get_error_list("f(q=1, q=2)", version="3.8")[0].message
== "SyntaxError: keyword argument repeated"
)
assert (
_get_error_list("f(q=1, q=2)", version="3.9")[0].message
== "SyntaxError: keyword argument repeated: q"
)
@pytest.mark.parametrize(
('source', 'no_errors'), [
('a(a for a in b,)', False),
('a(a for a in b, a)', False),
('a(a, a for a in b)', False),
('a(a, b, a for a in b, c, d)', False),
('a(a for a in b)', True),
('a((a for a in b), c)', True),
('a(c, (a for a in b))', True),
('a(a, b, (a for a in b), c, d)', True),
]
)
def test_unparenthesized_genexp(source, no_errors):
assert bool(_get_error_list(source)) ^ no_errors
@pytest.mark.parametrize(
('source', 'no_errors'), [
('*x = 2', False),
('(*y) = 1', False),
('((*z)) = 1', False),
('a, *b = 1', True),
('a, *b, c = 1', True),
('a, (*b), c = 1', True),
('a, ((*b)), c = 1', True),
('a, (*b, c), d = 1', True),
('[*(1,2,3)]', True),
('{*(1,2,3)}', True),
('[*(1,2,3),]', True),
('[*(1,2,3), *(4,5,6)]', True),
('[0, *(1,2,3)]', True),
('{*(1,2,3),}', True),
('{*(1,2,3), *(4,5,6)}', True),
('{0, *(4,5,6)}', True)
]
)
def test_starred_expr(source, no_errors):
assert bool(_get_error_list(source, version="3")) ^ no_errors

View File

@@ -1,21 +1,36 @@
# -*- coding: utf-8 # This file contains Unicode characters.
import sys
from textwrap import dedent
import pytest
from parso._compatibility import py_version
from parso.utils import split_lines, parse_version_string
from parso.python.token import (
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
from parso.python.token import PythonTokenTypes
from parso.python import tokenize
from parso import parse
from parso.python.tokenize import PythonToken
def _get_token_list(string):
# To make it easier to access some of the token types, just put them here.
NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING
NUMBER = PythonTokenTypes.NUMBER
INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
OP = PythonTokenTypes.OP
ENDMARKER = PythonTokenTypes.ENDMARKER
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END
def _get_token_list(string, version=None):
# Load the current version.
version_info = parse_version_string()
version_info = parse_version_string(version)
return list(tokenize.tokenize(string, version_info))
@@ -121,12 +136,12 @@ def test_identifier_contains_unicode():
''')
token_list = _get_token_list(fundef)
unicode_token = token_list[1]
if py_version >= 30:
if sys.version_info.major >= 3:
assert unicode_token[0] == NAME
else:
# Unicode tokens in Python 2 seem to be identified as operators.
# They will be ignored in the parser, that's ok.
assert unicode_token[0] == tokenize.ERRORTOKEN
assert unicode_token[0] == ERRORTOKEN
def test_quoted_strings():
@@ -162,39 +177,41 @@ def test_ur_literals():
token_list = _get_token_list(literal)
typ, result_literal, _, _ = token_list[0]
if is_literal:
assert typ == STRING
assert result_literal == literal
if typ != FSTRING_START:
assert typ == STRING
assert result_literal == literal
else:
assert typ == NAME
check('u""')
check('ur""', is_literal=not py_version >= 30)
check('Ur""', is_literal=not py_version >= 30)
check('UR""', is_literal=not py_version >= 30)
check('ur""', is_literal=not sys.version_info.major >= 3)
check('Ur""', is_literal=not sys.version_info.major >= 3)
check('UR""', is_literal=not sys.version_info.major >= 3)
check('bR""')
# Starting with Python 3.3 this ordering is also possible.
if py_version >= 33:
if sys.version_info.major >= 3:
check('Rb""')
# Starting with Python 3.6 format strings where introduced.
check('fr""', is_literal=py_version >= 36)
check('rF""', is_literal=py_version >= 36)
check('f""', is_literal=py_version >= 36)
check('F""', is_literal=py_version >= 36)
check('fr""', is_literal=sys.version_info >= (3, 6))
check('rF""', is_literal=sys.version_info >= (3, 6))
check('f""', is_literal=sys.version_info >= (3, 6))
check('F""', is_literal=sys.version_info >= (3, 6))
def test_error_literal():
error_token, endmarker = _get_token_list('"\n')
assert error_token.type == tokenize.ERRORTOKEN
assert endmarker.prefix == ''
assert error_token.string == '"\n'
assert endmarker.type == tokenize.ENDMARKER
error_token, newline, endmarker = _get_token_list('"\n')
assert error_token.type == ERRORTOKEN
assert error_token.string == '"'
assert newline.type == NEWLINE
assert endmarker.type == ENDMARKER
assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == tokenize.ERRORTOKEN
assert error_token.type == ERRORTOKEN
assert error_token.prefix == ' '
assert error_token.string == '"""'
assert endmarker.type == tokenize.ENDMARKER
assert endmarker.type == ENDMARKER
assert endmarker.prefix == ''
@@ -212,15 +229,215 @@ def test_endmarker_end_pos():
check('a\\')
xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')])
@pytest.mark.parametrize(
('code', 'types'), [
# Indentation
(' foo', [INDENT, NAME, DEDENT]),
(' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
(' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
NEWLINE, NAME, DEDENT]),
(' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
# Name stuff
('1foo1', [NUMBER, NAME]),
pytest.param(
u'மெல்லினம்', [NAME],
**xfail_py2),
pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
(' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]),
(dedent('''\
class BaseCache:
a
def
b
def
c
'''), [NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE,
ERROR_DEDENT, NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
NAME, NEWLINE, INDENT, NAME, NEWLINE, DEDENT, DEDENT]),
(' )\n foo', [INDENT, OP, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
('a\n b\n )\n c', [NAME, NEWLINE, INDENT, NAME, NEWLINE, INDENT, OP,
NEWLINE, DEDENT, NAME, DEDENT]),
(' 1 \\\ndef', [INDENT, NUMBER, NAME, DEDENT]),
]
)
def test_indentation(code, types):
def test_token_types(code, types):
actual_types = [t.type for t in _get_token_list(code)]
assert actual_types == types + [ENDMARKER]
def test_error_string():
indent, t1, newline, token, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN
assert t1.prefix == ' '
assert t1.string == '"'
assert newline.type == NEWLINE
assert endmarker.prefix == ''
assert endmarker.string == ''
def test_indent_error_recovery():
code = dedent("""\
str(
from x import a
def
""")
lst = _get_token_list(code)
expected = [
# `str(`
INDENT, NAME, OP,
# `from parso`
NAME, NAME,
# `import a` on same line as the previous from parso
NAME, NAME, NEWLINE,
# Dedent happens, because there's an import now and the import
# statement "breaks" out of the opening paren on the first line.
DEDENT,
# `b`
NAME, NEWLINE, ENDMARKER]
assert [t.type for t in lst] == expected
def test_error_token_after_dedent():
code = dedent("""\
class C:
pass
$foo
""")
lst = _get_token_list(code)
expected = [
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
# $foo\n
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
]
assert [t.type for t in lst] == expected
def test_brackets_no_indentation():
"""
There used to be an issue that the parentheses counting would go below
zero. This should not happen.
"""
code = dedent("""\
}
{
}
""")
lst = _get_token_list(code)
assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER]
def test_form_feed():
indent, error_token, dedent_, endmarker = _get_token_list(dedent('''\
\f"""'''))
assert error_token.prefix == '\f'
assert error_token.string == '"""'
assert endmarker.prefix == ''
assert indent.type == INDENT
assert dedent_.type == DEDENT
def test_carriage_return():
lst = _get_token_list(' =\\\rclass')
assert [t.type for t in lst] == [INDENT, OP, NAME, DEDENT, ENDMARKER]
def test_backslash():
code = '\\\n# 1 \n'
endmarker, = _get_token_list(code)
assert endmarker.prefix == code
@pytest.mark.parametrize(
('code', 'types'), [
# f-strings
('f"', [FSTRING_START]),
('f""', [FSTRING_START, FSTRING_END]),
('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
# format spec
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
# multiline f-string
('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
('f"""abc{\n123}def"""', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_string
('f"abc\\\ndef"', [
FSTRING_START, FSTRING_STRING, FSTRING_END
]),
('f"\\\n{123}\\\n"', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_expr
('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
# a line continuation inside of an format spec
('f"{123:.2\\\nf}"', [
FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
]),
# a newline without a line continuation inside a single-line string is
# wrong, and will generate an ERRORTOKEN
('f"abc\ndef"', [
FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
]),
# a more complex example
(r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]),
# issue #86, a string-like in an f-string expression
('f"{ ""}"', [
FSTRING_START, OP, FSTRING_END, STRING
]),
('f"{ f""}"', [
FSTRING_START, OP, NAME, FSTRING_END, STRING
]),
]
)
def test_fstring_token_types(code, types, version_ge_py36):
actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
assert types + [ENDMARKER] == actual_types
@pytest.mark.parametrize(
('code', 'types'), [
# issue #87, `:=` in the outest paratheses should be tokenized
# as a format spec marker and part of the format
('f"{x:=10}"', [
FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END
]),
('f"{(x:=10)}"', [
FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END
]),
]
)
def test_fstring_assignment_expression(code, types, version_ge_py38):
actual_types = [t.type for t in _get_token_list(code, version_ge_py38)]
assert types + [ENDMARKER] == actual_types
def test_fstring_end_error_pos(version_ge_py38):
f_start, f_string, bracket, f_end, endmarker = \
_get_token_list('f" { "', version_ge_py38)
assert f_start.start_pos == (1, 0)
assert f_string.start_pos == (1, 2)
assert bracket.start_pos == (1, 3)
assert f_end.start_pos == (1, 5)
assert endmarker.start_pos == (1, 6)

View File

@@ -1,23 +1,49 @@
from codecs import BOM_UTF8
from parso.utils import split_lines, python_bytes_to_unicode
from parso.utils import (
split_lines,
parse_version_string,
python_bytes_to_unicode,
)
import parso
def test_split_lines_no_keepends():
assert split_lines('asd\r\n') == ['asd', '']
assert split_lines('asd\r\n\f') == ['asd', '\f']
assert split_lines('\fasd\r\n') == ['\fasd', '']
assert split_lines('') == ['']
assert split_lines('\n') == ['', '']
import pytest
def test_split_lines_keepends():
assert split_lines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert split_lines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert split_lines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert split_lines('', keepends=True) == ['']
assert split_lines('\n', keepends=True) == ['\n', '']
@pytest.mark.parametrize(
('string', 'expected_result', 'keepends'), [
('asd\r\n', ['asd', ''], False),
('asd\r\n', ['asd\r\n', ''], True),
('asd\r', ['asd', ''], False),
('asd\r', ['asd\r', ''], True),
('asd\n', ['asd', ''], False),
('asd\n', ['asd\n', ''], True),
('asd\r\n\f', ['asd', '\f'], False),
('asd\r\n\f', ['asd\r\n', '\f'], True),
('\fasd\r\n', ['\fasd', ''], False),
('\fasd\r\n', ['\fasd\r\n', ''], True),
('', [''], False),
('', [''], True),
('\n', ['', ''], False),
('\n', ['\n', ''], True),
('\r', ['', ''], False),
('\r', ['\r', ''], True),
# Invalid line breaks
('a\vb', ['a\vb'], False),
('a\vb', ['a\vb'], True),
('\x1C', ['\x1C'], False),
('\x1C', ['\x1C'], True),
]
)
def test_split_lines(string, expected_result, keepends):
assert split_lines(string, keepends=keepends) == expected_result
def test_python_bytes_to_unicode_unicode_text():
@@ -42,3 +68,35 @@ def test_utf8_bom():
expr_stmt = module.children[0]
assert expr_stmt.type == 'expr_stmt'
assert unicode_bom == expr_stmt.get_first_leaf().prefix
@pytest.mark.parametrize(
('code', 'errors'), [
(b'# coding: wtf-12\nfoo', 'strict'),
(b'# coding: wtf-12\nfoo', 'replace'),
]
)
def test_bytes_to_unicode_failing_encoding(code, errors):
if errors == 'strict':
with pytest.raises(LookupError):
python_bytes_to_unicode(code, errors=errors)
else:
python_bytes_to_unicode(code, errors=errors)
@pytest.mark.parametrize(
('version_str', 'version'), [
('3', (3,)),
('3.6', (3, 6)),
('3.6.10', (3, 6)),
('3.10', (3, 10)),
('3.10a9', (3, 10)),
('3.10b9', (3, 10)),
('3.10rc9', (3, 10)),
]
)
def test_parse_version_string(version_str, version):
parsed_version = parse_version_string(version_str)
if len(version) == 1:
assert parsed_version[0] == version[0]
else:
assert parsed_version == version

19
tox.ini
View File

@@ -1,20 +1,15 @@
[tox]
envlist = py26, py27, py33, py34, py35, py36
envlist = {py27,py34,py35,py36,py37,py38}
[testenv]
extras = testing
deps =
pytest>=3.0.7
# For --lf and --ff.
pytest-cache
py27,py34: pytest<3.3
coverage: coverage
setenv =
# https://github.com/tomchristie/django-rest-framework/issues/1957
# tox corrupts __pycache__, solution from here:
PYTHONDONTWRITEBYTECODE=1
coverage: TOX_TESTENV_COMMAND=coverage run -m pytest
commands =
py.test {posargs:parso test}
[testenv:cov]
deps =
coverage
{[testenv]deps}
commands =
coverage run --source parso -m py.test
coverage report
{env:TOX_TESTENV_COMMAND:pytest} {posargs}
coverage: coverage report