mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Compare commits
739 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
59df3fab43 | ||
|
|
803cb5f25f | ||
|
|
3fa8630ba9 | ||
|
|
1ca5ae4008 | ||
|
|
c3c16169b5 | ||
|
|
ecbe2b9926 | ||
|
|
1929c144dc | ||
|
|
b5d50392a4 | ||
|
|
a7aa23a7f0 | ||
|
|
5430415d44 | ||
|
|
6cdd47fe2b | ||
|
|
917b4421f3 | ||
|
|
4f5fdd5a70 | ||
|
|
93ddf5322a | ||
|
|
a9b61149eb | ||
|
|
de416b082e | ||
|
|
4b440159b1 | ||
|
|
6f2d2362c9 | ||
|
|
8a06f0da05 | ||
|
|
bd95989c2e | ||
|
|
57e91262cd | ||
|
|
476383cca9 | ||
|
|
b2ab64d8f9 | ||
|
|
18cbeb1a3d | ||
|
|
a5686d6cda | ||
|
|
dfe7fba08e | ||
|
|
6db7f40942 | ||
|
|
d5eb96309c | ||
|
|
4c65368056 | ||
|
|
3e2956264c | ||
|
|
e77a67cd36 | ||
|
|
c4d6de2aab | ||
|
|
7770e73609 | ||
|
|
acccb4f28d | ||
|
|
3f6fc8a5ad | ||
|
|
f1ee7614c9 | ||
|
|
58850f8bfa | ||
|
|
d38a60278e | ||
|
|
6c65aea47d | ||
|
|
0d37ff865c | ||
|
|
076e296497 | ||
|
|
a2b153e3c1 | ||
|
|
bb2855897b | ||
|
|
9c9e6ffede | ||
|
|
b5d8175eaa | ||
|
|
32a83b932a | ||
|
|
01ae01a382 | ||
|
|
5fbc207892 | ||
|
|
60e4591837 | ||
|
|
ef56debb78 | ||
|
|
dc2582f488 | ||
|
|
fe69989fbc | ||
|
|
ce8b531175 | ||
|
|
069c08883a | ||
|
|
0da0a8655a | ||
|
|
3d890c3a00 | ||
|
|
956ea55048 | ||
|
|
0bd17bee2c | ||
|
|
f3015efb2d | ||
|
|
197391dc53 | ||
|
|
32321a74b1 | ||
|
|
52d01685ba | ||
|
|
e591b929eb | ||
|
|
dac4c445a7 | ||
|
|
20fd32b45d | ||
|
|
9cc8178998 | ||
|
|
1e25445176 | ||
|
|
d7171ae927 | ||
|
|
d3d28480ed | ||
|
|
564be7882e | ||
|
|
76c5754b76 | ||
|
|
55247a5a2c | ||
|
|
7ae1efe5c7 | ||
|
|
01dba7f8ce | ||
|
|
ea8a758051 | ||
|
|
a7e24a37e7 | ||
|
|
f80d9de7a0 | ||
|
|
eaee2b9ca0 | ||
|
|
dd1761da96 | ||
|
|
e10802ab09 | ||
|
|
3d402d0a77 | ||
|
|
f6a8b997f2 | ||
|
|
94c2681c8e | ||
|
|
610a820799 | ||
|
|
57320af6eb | ||
|
|
574e1c63e8 | ||
|
|
fbaad7883f | ||
|
|
b1f613fe16 | ||
|
|
f4696a6245 | ||
|
|
48c1a0e590 | ||
|
|
6f63147f69 | ||
|
|
94bd48bae1 | ||
|
|
edbceba4f8 | ||
|
|
b33c2b3ae1 | ||
|
|
65a0748f4f | ||
|
|
c442cf98be | ||
|
|
65b15b05e3 | ||
|
|
26aee1c6a9 | ||
|
|
c88a862bae | ||
|
|
d6b0585933 | ||
|
|
6eba40b4c5 | ||
|
|
428bde0573 | ||
|
|
d1d866f6c6 | ||
|
|
a8ec75fedd | ||
|
|
deaf1f310b | ||
|
|
2a881bf875 | ||
|
|
4d713f56e9 | ||
|
|
d202fdea49 | ||
|
|
5e6d5dec59 | ||
|
|
c1846dd082 | ||
|
|
5da51720cd | ||
|
|
fde64d0eae | ||
|
|
430f13af5e | ||
|
|
96ae6a078b | ||
|
|
a9f58b7c45 | ||
|
|
e0d0e57bd0 | ||
|
|
d2542983e9 | ||
|
|
64cf24d9da | ||
|
|
02f48a68f2 | ||
|
|
c7c464e5e9 | ||
|
|
29325d3052 | ||
|
|
750b8af37b | ||
|
|
0126a38bd1 | ||
|
|
c2985c111e | ||
|
|
45f9d4b204 | ||
|
|
f99fe6ad21 | ||
|
|
a64c32bb2a | ||
|
|
e5fb1927bb | ||
|
|
0ef4809377 | ||
|
|
29456a6c0a | ||
|
|
ada84ed063 | ||
|
|
1c7b078db0 | ||
|
|
930ec08ab0 | ||
|
|
a90622040d | ||
|
|
98c02f7d79 | ||
|
|
d6d6c5038f | ||
|
|
3be8ac7786 | ||
|
|
96f1582b6e | ||
|
|
7064ecf3fb | ||
|
|
e6bc924fba | ||
|
|
59605438e9 | ||
|
|
e7f71a3eba | ||
|
|
3f7aad84f9 | ||
|
|
52e3db4834 | ||
|
|
0daf4d9068 | ||
|
|
29b6232541 | ||
|
|
e05d7fd59f | ||
|
|
7f964c26f2 | ||
|
|
ff67de248f | ||
|
|
1af5d9d46b | ||
|
|
fce3ead829 | ||
|
|
55d5d39c53 | ||
|
|
c8bf23b787 | ||
|
|
98c9a1ec7f | ||
|
|
ecdb90d9bc | ||
|
|
375ebf2181 | ||
|
|
badb2fe010 | ||
|
|
8e118c913c | ||
|
|
52fc8fc569 | ||
|
|
97cdb448d4 | ||
|
|
603b67ee6d | ||
|
|
7686273287 | ||
|
|
692436ba12 | ||
|
|
f7d3d4e82f | ||
|
|
edce279dee | ||
|
|
a9e40eb578 | ||
|
|
b14f518306 | ||
|
|
8407894b25 | ||
|
|
e4efebc9f3 | ||
|
|
f66e47c540 | ||
|
|
706a92ee0d | ||
|
|
91d864b23d | ||
|
|
e20f2069ba | ||
|
|
4cf198285a | ||
|
|
30cf491b4f | ||
|
|
c1675da0cb | ||
|
|
7b7b66eb3c | ||
|
|
5d46c3e18b | ||
|
|
e9fde82512 | ||
|
|
a46ecbb499 | ||
|
|
da5aa8a2ab | ||
|
|
43d4a8a834 | ||
|
|
309033ae2d | ||
|
|
2a9d8632fe | ||
|
|
530a324643 | ||
|
|
71003bc20e | ||
|
|
c5d141bf60 | ||
|
|
e958b241c7 | ||
|
|
34ab35558f | ||
|
|
03de9cebb8 | ||
|
|
6098d89150 | ||
|
|
ff4358cd97 | ||
|
|
b5378e4602 | ||
|
|
33e321a539 | ||
|
|
a890ddd6cc | ||
|
|
1362d4f05d | ||
|
|
532aef2342 | ||
|
|
878b4b2d3b | ||
|
|
87299335c4 | ||
|
|
4f0e9c0fd7 | ||
|
|
67ca091631 | ||
|
|
4e5ba02dbb | ||
|
|
a85f544901 | ||
|
|
9e8066c6fd | ||
|
|
68eab72229 | ||
|
|
d9264609f2 | ||
|
|
79c7e0b59d | ||
|
|
f03a87b876 | ||
|
|
2a082d69df | ||
|
|
e6fc739670 | ||
|
|
12e11b3d16 | ||
|
|
cc8038966b | ||
|
|
31aecf2d35 | ||
|
|
d8554d86d1 | ||
|
|
d691bf0fd1 | ||
|
|
5712ffb5ca | ||
|
|
55d6a69aad | ||
|
|
453471eeb6 | ||
|
|
a06c3a3129 | ||
|
|
73ce57428b | ||
|
|
640f544af9 | ||
|
|
b6cbf306d7 | ||
|
|
95e4ecf592 | ||
|
|
fbed1ecfe0 | ||
|
|
1f27fa9320 | ||
|
|
23362ec2d3 | ||
|
|
6b391af071 | ||
|
|
c43cb21a0e | ||
|
|
24346a0d32 | ||
|
|
9d452ec66a | ||
|
|
567e0d7aed | ||
|
|
1f02327cff | ||
|
|
8c348aee6f | ||
|
|
a277ccf288 | ||
|
|
a5ce2caab6 | ||
|
|
da4df9c0f1 | ||
|
|
bd444df417 | ||
|
|
275dbca1b9 | ||
|
|
9a0b6f4928 | ||
|
|
fc5560874b | ||
|
|
6e5a520e7b | ||
|
|
dcabf3d415 | ||
|
|
3bc82d112d | ||
|
|
ec186a78f8 | ||
|
|
3818fb2b22 | ||
|
|
95ddeb4012 | ||
|
|
f638abb08e | ||
|
|
f8558df27a | ||
|
|
bae56e72e1 | ||
|
|
41c38311f7 | ||
|
|
eeb456a6d4 | ||
|
|
1c0956d9e0 | ||
|
|
c17156bd36 | ||
|
|
8865aa452c | ||
|
|
e0c79a9fcc | ||
|
|
3c08b1b058 | ||
|
|
0f32673092 | ||
|
|
1e18163402 | ||
|
|
cef9f1bdbd | ||
|
|
23db71a5f7 | ||
|
|
34154d05a0 | ||
|
|
6f385bdba1 | ||
|
|
4fc31c58b3 | ||
|
|
689decc66c | ||
|
|
c2eacdb81c | ||
|
|
ac0bf4fcdd | ||
|
|
948f9ccecc | ||
|
|
f20106d88e | ||
|
|
f4912f6c17 | ||
|
|
bf5a4b7c2c | ||
|
|
579146b501 | ||
|
|
deb4dbce1c | ||
|
|
8eda8decea | ||
|
|
f6935935c0 | ||
|
|
d3fa7e1cad | ||
|
|
83d9abd036 | ||
|
|
222e9117b4 | ||
|
|
eda2207e6c | ||
|
|
a91e5f2775 | ||
|
|
cba4f2ccc1 | ||
|
|
8f1a436ba1 | ||
|
|
9941348ec6 | ||
|
|
afb71dc762 | ||
|
|
0d96b12566 | ||
|
|
9d2ce4bcd4 | ||
|
|
a3e280c2b9 | ||
|
|
7c7f4f4e54 | ||
|
|
56b3e2cdc8 | ||
|
|
97f042c6ba | ||
|
|
b1aa7c6a79 | ||
|
|
235fda3fbb | ||
|
|
d8d2e596a5 | ||
|
|
e05ce5ae31 | ||
|
|
25e4ea9c24 | ||
|
|
9f88fe16a3 | ||
|
|
ba0e7a2e9d | ||
|
|
dc80152ff8 | ||
|
|
9e3154d167 | ||
|
|
065da34272 | ||
|
|
f89809de9a | ||
|
|
332c57ebcb | ||
|
|
acb173b703 | ||
|
|
47e78b37fe | ||
|
|
fc44af6165 | ||
|
|
73439d5863 | ||
|
|
085aad3038 | ||
|
|
7db500bfbc | ||
|
|
e689f3dce6 | ||
|
|
b076cdc12a | ||
|
|
0dea94c801 | ||
|
|
6cf487aee2 | ||
|
|
2ca629a2f6 | ||
|
|
5c1e953c17 | ||
|
|
a5f565ae10 | ||
|
|
a29ec25598 | ||
|
|
04360cdfe7 | ||
|
|
4824534f8a | ||
|
|
647073b1b9 | ||
|
|
50445f424e | ||
|
|
5b5d855fab | ||
|
|
04e18ebb01 | ||
|
|
d3cfcc24b8 | ||
|
|
89646e0970 | ||
|
|
bc8566e964 | ||
|
|
89932c368d | ||
|
|
dcae8cda92 | ||
|
|
1f6683b8ac | ||
|
|
0ec02e1d7f | ||
|
|
8db1498185 | ||
|
|
26e882d19c | ||
|
|
3a506b44ac | ||
|
|
bae36f8ab0 | ||
|
|
94268815e8 | ||
|
|
5a57e8df06 | ||
|
|
aa82a1d39a | ||
|
|
e0f74dd8ad | ||
|
|
7df254440e | ||
|
|
05a8236d3f | ||
|
|
2067845cef | ||
|
|
dcdd3bbc8e | ||
|
|
82868580a2 | ||
|
|
a18baf0d2c | ||
|
|
ed803f5749 | ||
|
|
032c7563c4 | ||
|
|
b83c641057 | ||
|
|
97d9aeafb7 | ||
|
|
7a277c7302 | ||
|
|
5993765e0a | ||
|
|
435d310c2b | ||
|
|
8aa280342a | ||
|
|
73c61bca4a | ||
|
|
60ed141d80 | ||
|
|
091e72562c | ||
|
|
fc6202ffb3 | ||
|
|
0e201810fa | ||
|
|
7a9739c5d6 | ||
|
|
1bf9ca94bb | ||
|
|
a4d28d2eda | ||
|
|
36ddbaddf4 | ||
|
|
b944fb9145 | ||
|
|
7a85409da7 | ||
|
|
e79c0755eb | ||
|
|
9ab5937a3c | ||
|
|
d3e58955a9 | ||
|
|
a21ec2c0ad | ||
|
|
910a660c6f | ||
|
|
68fa70b959 | ||
|
|
fa0bf4951c | ||
|
|
ba2c0ad41a | ||
|
|
4b32408001 | ||
|
|
5c3304f7c2 | ||
|
|
92f2761398 | ||
|
|
6a6da7ca58 | ||
|
|
676828fd4f | ||
|
|
9ae1b000de | ||
|
|
20f3fd101a | ||
|
|
45c434799f | ||
|
|
826a6b4453 | ||
|
|
db1079a7fe | ||
|
|
b921e280b0 | ||
|
|
cba82773d4 | ||
|
|
66606403c7 | ||
|
|
5b7a01ba62 | ||
|
|
68cc383d02 | ||
|
|
1893f77e15 | ||
|
|
296ecc6728 | ||
|
|
7d4aa755a5 | ||
|
|
8a448303d1 | ||
|
|
ede8a2139f | ||
|
|
609ab1ffa9 | ||
|
|
09b05422a6 | ||
|
|
f3db064d7d | ||
|
|
8bc54f5a29 | ||
|
|
204e750dd5 | ||
|
|
0c0a4b5a5d | ||
|
|
3c3e7f5317 | ||
|
|
033e880408 | ||
|
|
2ba107872d | ||
|
|
929593701a | ||
|
|
d7f2051f8a | ||
|
|
6630542847 | ||
|
|
c0d3734e81 | ||
|
|
88c3c0567a | ||
|
|
1912551296 | ||
|
|
d5d7518ca4 | ||
|
|
820e94e03a | ||
|
|
5ff33c3736 | ||
|
|
5916b676de | ||
|
|
30acb69caa | ||
|
|
84e5adb364 | ||
|
|
ceb7121dc6 | ||
|
|
8ba7001b44 | ||
|
|
76578218d9 | ||
|
|
904f293830 | ||
|
|
a53477321b | ||
|
|
e8196c0c06 | ||
|
|
33ad290512 | ||
|
|
eb2c0ab93b | ||
|
|
0fd2e169bd | ||
|
|
657baabb0b | ||
|
|
f055ba198f | ||
|
|
06d7da3f3e | ||
|
|
8d20f3d469 | ||
|
|
e37e2e1ff6 | ||
|
|
fc80cebfb8 | ||
|
|
412da07893 | ||
|
|
51f2de28c6 | ||
|
|
ab027885c7 | ||
|
|
476d5fb0d1 | ||
|
|
86aa185136 | ||
|
|
a305a911f1 | ||
|
|
13cc39c3f0 | ||
|
|
2322b3aed3 | ||
|
|
e2c97e9675 | ||
|
|
c23ae8a69b | ||
|
|
3c961e2b70 | ||
|
|
8c3373a0d9 | ||
|
|
860fe2cd1e | ||
|
|
524f332e7d | ||
|
|
1a30afdd9e | ||
|
|
90edb2d0cf | ||
|
|
3e39a04bb1 | ||
|
|
31675e74a2 | ||
|
|
8382f338ca | ||
|
|
f4b51d91ee | ||
|
|
97525958b6 | ||
|
|
d33e45f4cd | ||
|
|
0e3e393f37 | ||
|
|
db3c635fcd | ||
|
|
065081f227 | ||
|
|
0cf5a36652 | ||
|
|
a2d6336028 | ||
|
|
c3aab1b148 | ||
|
|
f8314f0dea | ||
|
|
c07ad1dfe3 | ||
|
|
aae38e2c73 | ||
|
|
63e30843dc | ||
|
|
94570acef7 | ||
|
|
1b235e16e1 | ||
|
|
172a534baa | ||
|
|
0513b14225 | ||
|
|
5f73b42067 | ||
|
|
57f7c465ce | ||
|
|
44edbd56e7 | ||
|
|
5fd6469b6f | ||
|
|
e62a88b190 | ||
|
|
8cebfc3e8a | ||
|
|
d6b6354e17 | ||
|
|
3ccbf4326c | ||
|
|
d6c624bd34 | ||
|
|
ddd16124ac | ||
|
|
58c32591d0 | ||
|
|
5fbbb225dd | ||
|
|
84ed1af1e4 | ||
|
|
3f836c5841 | ||
|
|
7f6bef19e1 | ||
|
|
0906c0b634 | ||
|
|
aa0225f63f | ||
|
|
45b965063e | ||
|
|
94ce899a86 | ||
|
|
cc24ba61ef | ||
|
|
d7b7548f8d | ||
|
|
443cb1ce08 | ||
|
|
2420f57a5c | ||
|
|
b7726d05cf | ||
|
|
4ff25e8c92 | ||
|
|
436105e11a | ||
|
|
c139ef4107 | ||
|
|
637e557486 | ||
|
|
8071580a4c | ||
|
|
0f73f1c9c4 | ||
|
|
7e8961969d | ||
|
|
e776e2a13d | ||
|
|
6bdccd61cc | ||
|
|
d48e927224 | ||
|
|
e3828d6fb8 | ||
|
|
186160b9ff | ||
|
|
87ce5fa9a4 | ||
|
|
d6f5fcf439 | ||
|
|
6a92f5ec2d | ||
|
|
d012353637 | ||
|
|
babd7fca92 | ||
|
|
d54d7752d4 | ||
|
|
00bc1a5b94 | ||
|
|
6085c91df1 | ||
|
|
319a0cc56f | ||
|
|
4e1513ae72 | ||
|
|
d1eb1d61ec | ||
|
|
c2ab9a9e25 | ||
|
|
915b00dab7 | ||
|
|
33769c6243 | ||
|
|
42842a6949 | ||
|
|
a13f1a18a9 | ||
|
|
0fe872f87a | ||
|
|
3669a81e2f | ||
|
|
d76eee2a96 | ||
|
|
1b66fa4d44 | ||
|
|
58fbd61898 | ||
|
|
89999949e2 | ||
|
|
1ed3cac7c0 | ||
|
|
9e5d6db24a | ||
|
|
97b98a1da3 | ||
|
|
1df06025c2 | ||
|
|
82d69ca211 | ||
|
|
05f86b9c87 | ||
|
|
fa30872d26 | ||
|
|
9296fe3def | ||
|
|
26c71a9586 | ||
|
|
fc1c16b966 | ||
|
|
7c9877cd2e | ||
|
|
03526cd84e | ||
|
|
9c4bf8cec4 | ||
|
|
d21504e2ca | ||
|
|
4a86571b21 | ||
|
|
4f7d78716a | ||
|
|
9b8418db3f | ||
|
|
ff04517895 | ||
|
|
4919eccf00 | ||
|
|
944fe5a896 | ||
|
|
d52c8651d7 | ||
|
|
bf5190c7bf | ||
|
|
65df539e97 | ||
|
|
756587ae4a | ||
|
|
6876e1b7c0 | ||
|
|
569cb99ca7 | ||
|
|
5e65bd2aaf | ||
|
|
78c371f73a | ||
|
|
dc3b3158eb | ||
|
|
790bc459ed | ||
|
|
6ee986aa45 | ||
|
|
837e9ca388 | ||
|
|
1a4539420d | ||
|
|
e3f7427c99 | ||
|
|
688dfaad24 | ||
|
|
33344d3ce4 | ||
|
|
f6b3b1cc24 | ||
|
|
1d0e982d89 | ||
|
|
d32a84e181 | ||
|
|
9d10ceeff1 | ||
|
|
fcc6541f08 | ||
|
|
e47667efb5 | ||
|
|
18d46878a2 | ||
|
|
c6f18e42db | ||
|
|
d1d019b3c0 | ||
|
|
a647ee1f44 | ||
|
|
b1dabc8596 | ||
|
|
161c51b050 | ||
|
|
4ffac4727c | ||
|
|
b618c99b08 | ||
|
|
70de06ed87 | ||
|
|
d7d1d67828 | ||
|
|
abc4852bd4 | ||
|
|
d3115acb33 | ||
|
|
75d41c09cd | ||
|
|
9772d64360 | ||
|
|
e731eecdd8 | ||
|
|
b6022c7a80 | ||
|
|
c93546bfd9 | ||
|
|
dd7c12834e | ||
|
|
20fdc18ec5 | ||
|
|
069e9b57f2 | ||
|
|
679851dbf1 | ||
|
|
65203d197a | ||
|
|
ff949d1061 | ||
|
|
859c48170e | ||
|
|
b3923e65e8 | ||
|
|
53cf408d99 | ||
|
|
ceed7f183e | ||
|
|
9899c703ea | ||
|
|
5b5e4a0616 | ||
|
|
8ad37f6036 | ||
|
|
37e128ff8f | ||
|
|
c28ec5f182 | ||
|
|
ee1184e7c4 | ||
|
|
b8c79f0be5 | ||
|
|
7198df85ba | ||
|
|
cdc2df7881 | ||
|
|
4777eeb89f | ||
|
|
af38b3cb10 | ||
|
|
0559beb343 | ||
|
|
0486ed9a9c | ||
|
|
93ee3e3146 | ||
|
|
968ce4759a | ||
|
|
b1f9e17c5f | ||
|
|
b239b8427b | ||
|
|
65168577cf | ||
|
|
a5ea2f7add | ||
|
|
3943618fe2 | ||
|
|
1e2abec7f2 | ||
|
|
0846ad6337 | ||
|
|
521544ef12 | ||
|
|
c951a24730 | ||
|
|
5142870815 | ||
|
|
b3f42a8920 | ||
|
|
b2e35bc573 | ||
|
|
507f190123 | ||
|
|
89ed85d2e1 | ||
|
|
280f3edf6d | ||
|
|
0a76c45153 | ||
|
|
da39081d5a | ||
|
|
339d30776a | ||
|
|
da841adcb9 | ||
|
|
fdad958b64 | ||
|
|
6d91bcefe6 | ||
|
|
2a38b607ac | ||
|
|
98cf76837b | ||
|
|
2daeebac95 | ||
|
|
14026d4c3e | ||
|
|
602dcb11d7 | ||
|
|
dd950392c8 | ||
|
|
4d390742cd | ||
|
|
d9ebb09e64 | ||
|
|
3057d15a4d | ||
|
|
36a59a3d3b | ||
|
|
7a7ad2038d | ||
|
|
063d4b052e | ||
|
|
d608b7ca7f | ||
|
|
ae4700b0d4 | ||
|
|
e1e42d3d41 | ||
|
|
a3129ad8ef | ||
|
|
e47c9d72fa | ||
|
|
b4a2f2bb61 | ||
|
|
31fd7c335f | ||
|
|
367a75d2c8 | ||
|
|
d6f8e3de4f | ||
|
|
a341af1f81 | ||
|
|
6bea7094c8 | ||
|
|
2470f54baf | ||
|
|
97aebe4aa0 | ||
|
|
e66b62d1e1 | ||
|
|
cd1b676c7e | ||
|
|
9081cf55fb | ||
|
|
c11dd6b5aa | ||
|
|
98fbbaf945 | ||
|
|
7a758dabd8 | ||
|
|
0f2e6f9e22 | ||
|
|
d5e9ecea12 | ||
|
|
c9fe2596c1 | ||
|
|
cdc3e0a731 | ||
|
|
da779e09b1 | ||
|
|
005e5f403a | ||
|
|
7981a309d1 | ||
|
|
5f14382103 | ||
|
|
7cbcc6d0f3 | ||
|
|
146ef2436f | ||
|
|
c07cd77417 | ||
|
|
34aabd6b47 | ||
|
|
9365ffc932 | ||
|
|
03c34d6105 | ||
|
|
b73aa1fd61 | ||
|
|
7d99b8e823 | ||
|
|
ebf5850cb9 | ||
|
|
72608704d8 | ||
|
|
6bf52e8fe0 | ||
|
|
c3d101f582 | ||
|
|
3e95793756 | ||
|
|
d1d02ba3f5 | ||
|
|
660bbe1971 | ||
|
|
f13643b6cd | ||
|
|
e82976fb37 | ||
|
|
be54401388 | ||
|
|
b1b165c21e | ||
|
|
814b16cc6c | ||
|
|
b367058af6 | ||
|
|
8356bcab10 | ||
|
|
3e4b2f41cd | ||
|
|
6640308d15 | ||
|
|
a276c0b42d | ||
|
|
d774bdd8f7 | ||
|
|
673ffb9c62 | ||
|
|
7c1f3b3a65 | ||
|
|
adc3ec268a | ||
|
|
7b8455174a | ||
|
|
53674772ff | ||
|
|
406064c457 | ||
|
|
bc68266a68 | ||
|
|
dafffdc9b4 | ||
|
|
f997b91a12 | ||
|
|
8ae5fc60d6 | ||
|
|
976c5151af | ||
|
|
d8e991c39f | ||
|
|
33c228aa42 | ||
|
|
0d93a2a2df | ||
|
|
8948c9053b | ||
|
|
b4259b2b4f | ||
|
|
5ba73bfccc | ||
|
|
6283577bc3 | ||
|
|
b037321d1e | ||
|
|
68038a82ee | ||
|
|
b7df52edb5 | ||
|
|
e7ea01fb35 | ||
|
|
333046c253 | ||
|
|
fab9edff87 | ||
|
|
e1ef9c086f | ||
|
|
d5c27b59be | ||
|
|
a8ef9990b6 | ||
|
|
7e94b2c4ed | ||
|
|
5e62a836db | ||
|
|
9b6f7c78a2 | ||
|
|
8feaad23f1 | ||
|
|
2fd4567d6f | ||
|
|
a07801327c | ||
|
|
726ebade70 | ||
|
|
f2c257d7dd | ||
|
|
25941bbfb8 | ||
|
|
29fa0d27fc | ||
|
|
775679d481 | ||
|
|
51a6328096 | ||
|
|
89501e6fa5 | ||
|
|
649d2bebbc | ||
|
|
5fab429163 | ||
|
|
7d3438c94d | ||
|
|
3a0cd6d377 | ||
|
|
4ca92861c6 | ||
|
|
c734df407b | ||
|
|
96252145a7 | ||
|
|
23d1470618 | ||
|
|
973f7c5f61 | ||
|
|
a5b9177d4f |
@@ -1,4 +1,5 @@
|
||||
[run]
|
||||
source = parso
|
||||
|
||||
[report]
|
||||
# Regexes for lines to exclude from consideration
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,3 +9,5 @@
|
||||
/dist/
|
||||
parso.egg-info/
|
||||
/.cache/
|
||||
/.pytest_cache
|
||||
test/fuzz-redo.pickle
|
||||
|
||||
21
.travis.yml
21
.travis.yml
@@ -1,26 +1,25 @@
|
||||
dist: xenial
|
||||
language: python
|
||||
sudo: false
|
||||
python:
|
||||
- 2.6
|
||||
- 2.7
|
||||
- 3.3
|
||||
- 3.4
|
||||
- 3.5
|
||||
- 3.6
|
||||
- pypy
|
||||
- 3.7
|
||||
- 3.8-dev
|
||||
- pypy2.7-6.0
|
||||
- pypy3.5-6.0
|
||||
matrix:
|
||||
allow_failures:
|
||||
- python: pypy
|
||||
- env: TOXENV=cov
|
||||
include:
|
||||
- python: 3.5
|
||||
env: TOXENV=cov
|
||||
env: TOXENV=py35-coverage
|
||||
install:
|
||||
- pip install --quiet tox-travis
|
||||
script:
|
||||
- tox
|
||||
after_script:
|
||||
- if [ $TOXENV == "cov" ]; then
|
||||
pip install --quiet coveralls;
|
||||
coveralls;
|
||||
- |
|
||||
if [ "${TOXENV%-coverage}" == "$TOXENV" ]; then
|
||||
pip install --quiet coveralls;
|
||||
coveralls;
|
||||
fi
|
||||
|
||||
@@ -5,6 +5,7 @@ David Halter (@davidhalter) <davidhalter88@gmail.com>
|
||||
|
||||
Code Contributors
|
||||
=================
|
||||
Alisdair Robertson (@robodair)
|
||||
|
||||
|
||||
Code Contributors (to Jedi and therefore possibly to this library)
|
||||
|
||||
@@ -3,8 +3,69 @@
|
||||
Changelog
|
||||
---------
|
||||
|
||||
0.5.0 (2019-06-20)
|
||||
++++++++++++++++++
|
||||
|
||||
0.1.0 (2017-05-30)
|
||||
- **Breaking Change** comp_for is now called sync_comp_for for all Python
|
||||
versions to be compatible with the Python 3.8 Grammar
|
||||
- Added .pyi stubs for a lot of the parso API
|
||||
- Small FileIO changes
|
||||
|
||||
0.4.0 (2019-04-05)
|
||||
++++++++++++++++++
|
||||
|
||||
- Python 3.8 support
|
||||
- FileIO support, it's now possible to use abstract file IO, support is alpha
|
||||
|
||||
0.3.4 (2018-02-13)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Fix an f-string tokenizer error
|
||||
|
||||
0.3.3 (2018-02-06)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Fix async errors in the diff parser
|
||||
- A fix in iter_errors
|
||||
- This is a very small bugfix release
|
||||
|
||||
0.3.2 (2018-01-24)
|
||||
+++++++++++++++++++
|
||||
|
||||
- 20+ bugfixes in the diff parser and 3 in the tokenizer
|
||||
- A fuzzer for the diff parser, to give confidence that the diff parser is in a
|
||||
good shape.
|
||||
- Some bugfixes for f-string
|
||||
|
||||
0.3.1 (2018-07-09)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Bugfixes in the diff parser and keyword-only arguments
|
||||
|
||||
0.3.0 (2018-06-30)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Rewrote the pgen2 parser generator.
|
||||
|
||||
0.2.1 (2018-05-21)
|
||||
+++++++++++++++++++
|
||||
|
||||
- A bugfix for the diff parser.
|
||||
- Grammar files can now be loaded from a specific path.
|
||||
|
||||
0.2.0 (2018-04-15)
|
||||
+++++++++++++++++++
|
||||
|
||||
- f-strings are now parsed as a part of the normal Python grammar. This makes
|
||||
it way easier to deal with them.
|
||||
|
||||
0.1.1 (2017-11-05)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Fixed a few bugs in the caching layer
|
||||
- Added support for Python 3.7
|
||||
|
||||
0.1.0 (2017-09-04)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Pulling the library out of Jedi. Some APIs will definitely change.
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
We <3 Pull Requests! Only two things:
|
||||
We <3 Pull Requests! Three core things:
|
||||
|
||||
1. If you are adding functionality or fixing a bug, please add a test!
|
||||
2. Add your name to AUTHORS.txt
|
||||
3. Use the PEP8 style guide.
|
||||
|
||||
If you want to add methods to the parser tree, we will need to discuss this in
|
||||
an issue first.
|
||||
|
||||
@@ -4,11 +4,14 @@ Some Python files have been taken from the standard library and are therefore
|
||||
PSF licensed. Modifications on these files are dual licensed (both MIT and
|
||||
PSF). These files are:
|
||||
|
||||
- parso/pgen2
|
||||
- parso/pgen2/*
|
||||
- parso/tokenize.py
|
||||
- parso/token.py
|
||||
- test/test_pgen2.py
|
||||
|
||||
Also some test files under test/normalizer_issue_files have been copied from
|
||||
https://github.com/PyCQA/pycodestyle (Expat License == MIT License).
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
The MIT License (MIT)
|
||||
|
||||
|
||||
83
README.rst
83
README.rst
@@ -1,62 +1,91 @@
|
||||
###################################################################
|
||||
parso - A Python Parser Written in Python
|
||||
parso - A Python Parser
|
||||
###################################################################
|
||||
|
||||
.. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
|
||||
:target: http://travis-ci.org/davidhalter/parso
|
||||
:alt: Travis-CI build status
|
||||
|
||||
.. image:: https://coveralls.io/repos/davidhalter/parso/badge.png?branch=master
|
||||
:target: https://coveralls.io/r/davidhalter/parso
|
||||
.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master
|
||||
:target: https://travis-ci.org/davidhalter/parso
|
||||
:alt: Travis CI build status
|
||||
|
||||
.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master
|
||||
:target: https://coveralls.io/github/davidhalter/parso?branch=master
|
||||
:alt: Coverage Status
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
|
||||
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing.
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing
|
||||
for different Python versions (in multiple Python versions). Parso is also able
|
||||
to list multiple syntax errors in your python file.
|
||||
|
||||
Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful
|
||||
for other projects as well.
|
||||
|
||||
Parso is very simplistic. It consists of a small API to parse Python and
|
||||
analyse the parsing tree.
|
||||
Parso consists of a small API to parse Python and analyse the syntax tree.
|
||||
|
||||
A simple example:
|
||||
|
||||
Ressources
|
||||
==========
|
||||
.. code-block:: python
|
||||
|
||||
>>> import parso
|
||||
>>> module = parso.parse('hello + 1', version="3.6")
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
>>> print(expr.get_code())
|
||||
hello + 1
|
||||
>>> name = expr.children[0]
|
||||
>>> name
|
||||
<Name: hello@1,0>
|
||||
>>> name.end_pos
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
|
||||
To list multiple issues:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> grammar = parso.load_grammar()
|
||||
>>> module = grammar.parse('foo +\nbar\ncontinue')
|
||||
>>> error1, error2 = grammar.iter_errors(module)
|
||||
>>> error1.message
|
||||
'SyntaxError: invalid syntax'
|
||||
>>> error2.message
|
||||
"SyntaxError: 'continue' not properly in loop"
|
||||
|
||||
Resources
|
||||
=========
|
||||
|
||||
- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
|
||||
- `PyPI <https://pypi.python.org/pypi/parso>`_
|
||||
- `Docs <https://parso.readthedocs.org/en/latest/>`_
|
||||
- Uses `semantic versioning <http://semver.org/>`_
|
||||
- Uses `semantic versioning <https://semver.org/>`_
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
pip install parso
|
||||
|
||||
Future
|
||||
======
|
||||
|
||||
Testing
|
||||
=======
|
||||
- There will be better support for refactoring and comments. Stay tuned.
|
||||
- There's a WIP PEP8 validator. It's however not in a good shape, yet.
|
||||
|
||||
The test suite depends on ``tox`` and ``pytest``::
|
||||
Known Issues
|
||||
============
|
||||
|
||||
pip install tox pytest
|
||||
- `async`/`await` are already used as keywords in Python3.6.
|
||||
- `from __future__ import print_function` is not ignored.
|
||||
|
||||
To run the tests for all supported Python versions::
|
||||
|
||||
tox
|
||||
|
||||
If you want to test only a specific Python version (e.g. Python 2.7), it's as
|
||||
easy as ::
|
||||
|
||||
tox -e py27
|
||||
|
||||
Tests are also run automatically on `Travis CI
|
||||
<https://travis-ci.org/davidhalter/parso/>`_.
|
||||
|
||||
Acknowledgements
|
||||
================
|
||||
|
||||
- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2
|
||||
(originally used in lib2to3).
|
||||
- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_
|
||||
for the extremely awesome parso logo.
|
||||
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
138
conftest.py
138
conftest.py
@@ -1,13 +1,22 @@
|
||||
import re
|
||||
import tempfile
|
||||
import shutil
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
import parso
|
||||
from parso import cache
|
||||
|
||||
from parso.utils import parse_version_string
|
||||
|
||||
collect_ignore = ["setup.py"]
|
||||
|
||||
VERSIONS_2 = '2.6', '2.7'
|
||||
VERSIONS_3 = '3.3', '3.4', '3.5', '3.6', '3.7', '3.8'
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def clean_parso_cache():
|
||||
"""
|
||||
@@ -25,3 +34,130 @@ def clean_parso_cache():
|
||||
yield
|
||||
cache._default_cache_path = old
|
||||
shutil.rmtree(tmp)
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption("--logging", "-L", action='store_true',
|
||||
help="Enables the logging output.")
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
if 'normalizer_issue_case' in metafunc.fixturenames:
|
||||
base_dir = os.path.join(os.path.dirname(__file__), 'test', 'normalizer_issue_files')
|
||||
|
||||
cases = list(colllect_normalizer_tests(base_dir))
|
||||
metafunc.parametrize(
|
||||
'normalizer_issue_case',
|
||||
cases,
|
||||
ids=[c.name for c in cases]
|
||||
)
|
||||
elif 'each_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize('each_version', VERSIONS_2 + VERSIONS_3)
|
||||
elif 'each_py2_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize('each_py2_version', VERSIONS_2)
|
||||
elif 'each_py3_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize('each_py3_version', VERSIONS_3)
|
||||
elif 'version_ge_py36' in metafunc.fixturenames:
|
||||
metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])
|
||||
|
||||
|
||||
class NormalizerIssueCase(object):
|
||||
"""
|
||||
Static Analysis cases lie in the static_analysis folder.
|
||||
The tests also start with `#!`, like the goto_definition tests.
|
||||
"""
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
self.name = os.path.basename(path)
|
||||
match = re.search(r'python([\d.]+)\.py', self.name)
|
||||
self.python_version = match and match.group(1)
|
||||
|
||||
|
||||
def colllect_normalizer_tests(base_dir):
|
||||
for f_name in os.listdir(base_dir):
|
||||
if f_name.endswith(".py"):
|
||||
path = os.path.join(base_dir, f_name)
|
||||
yield NormalizerIssueCase(path)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
if config.option.logging:
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.DEBUG)
|
||||
|
||||
ch = logging.StreamHandler(sys.stdout)
|
||||
ch.setLevel(logging.DEBUG)
|
||||
#formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
#ch.setFormatter(formatter)
|
||||
|
||||
root.addHandler(ch)
|
||||
|
||||
|
||||
class Checker():
|
||||
def __init__(self, version, is_passing):
|
||||
self.version = version
|
||||
self._is_passing = is_passing
|
||||
self.grammar = parso.load_grammar(version=self.version)
|
||||
|
||||
def parse(self, code):
|
||||
if self._is_passing:
|
||||
return parso.parse(code, version=self.version, error_recovery=False)
|
||||
else:
|
||||
self._invalid_syntax(code)
|
||||
|
||||
def _invalid_syntax(self, code):
|
||||
with pytest.raises(parso.ParserSyntaxError):
|
||||
module = parso.parse(code, version=self.version, error_recovery=False)
|
||||
# For debugging
|
||||
print(module.children)
|
||||
|
||||
def get_error(self, code):
|
||||
errors = list(self.grammar.iter_errors(self.grammar.parse(code)))
|
||||
assert bool(errors) != self._is_passing
|
||||
if errors:
|
||||
return errors[0]
|
||||
|
||||
def get_error_message(self, code):
|
||||
error = self.get_error(code)
|
||||
if error is None:
|
||||
return
|
||||
return error.message
|
||||
|
||||
def assert_no_error_in_passing(self, code):
|
||||
if self._is_passing:
|
||||
module = self.grammar.parse(code)
|
||||
assert not list(self.grammar.iter_errors(module))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_not_in_py(each_version):
|
||||
return Checker(each_version, False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_in_py2(each_version):
|
||||
return Checker(each_version, each_version.startswith('2'))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_ge_py27(each_version):
|
||||
version_info = parse_version_string(each_version)
|
||||
return Checker(each_version, version_info >= (2, 7))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_ge_py3(each_version):
|
||||
version_info = parse_version_string(each_version)
|
||||
return Checker(each_version, version_info >= (3, 0))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_ge_py35(each_version):
|
||||
version_info = parse_version_string(each_version)
|
||||
return Checker(each_version, version_info >= (3, 5))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def works_ge_py38(each_version):
|
||||
version_info = parse_version_string(each_version)
|
||||
return Checker(each_version, version_info >= (3, 8))
|
||||
|
||||
52
deploy-master.sh
Executable file
52
deploy-master.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
# The script creates a separate folder in build/ and creates tags there, pushes
|
||||
# them and then uploads the package to PyPI.
|
||||
|
||||
set -eu -o pipefail
|
||||
|
||||
BASE_DIR=$(dirname $(readlink -f "$0"))
|
||||
cd $BASE_DIR
|
||||
|
||||
git fetch --tags
|
||||
|
||||
PROJECT_NAME=parso
|
||||
BRANCH=master
|
||||
BUILD_FOLDER=build
|
||||
|
||||
[ -d $BUILD_FOLDER ] || mkdir $BUILD_FOLDER
|
||||
# Remove the previous deployment first.
|
||||
# Checkout the right branch
|
||||
cd $BUILD_FOLDER
|
||||
rm -rf $PROJECT_NAME
|
||||
git clone .. $PROJECT_NAME
|
||||
cd $PROJECT_NAME
|
||||
git checkout $BRANCH
|
||||
|
||||
# Test first.
|
||||
tox
|
||||
|
||||
# Create tag
|
||||
tag=v$(python -c "import $PROJECT_NAME; print($PROJECT_NAME.__version__)")
|
||||
|
||||
master_ref=$(git show-ref -s heads/$BRANCH)
|
||||
tag_ref=$(git show-ref -s $tag || true)
|
||||
if [[ $tag_ref ]]; then
|
||||
if [[ $tag_ref != $master_ref ]]; then
|
||||
echo 'Cannot tag something that has already been tagged with another commit.'
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
git tag -a $tag
|
||||
git push --tags
|
||||
fi
|
||||
|
||||
# Package and upload to PyPI
|
||||
#rm -rf dist/ - Not needed anymore, because the folder is never reused.
|
||||
echo `pwd`
|
||||
python setup.py sdist bdist_wheel
|
||||
# Maybe do a pip install twine before.
|
||||
twine upload dist/*
|
||||
|
||||
cd $BASE_DIR
|
||||
# The tags have been pushed to this repo. Push the tags to github, now.
|
||||
git push --tags
|
||||
13
deploy.sh
13
deploy.sh
@@ -1,13 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eu -o pipefail
|
||||
|
||||
# Create tag
|
||||
git tag $(python -c 'import parso; print(parso.__version__)')
|
||||
git push --tags
|
||||
|
||||
# Package and upload to PyPI
|
||||
rm -rf dist/
|
||||
python setup.py sdist bdist_wheel
|
||||
# Maybe do a pip install twine before.
|
||||
twine upload dist/*
|
||||
BIN
docs/_static/logo.png
vendored
BIN
docs/_static/logo.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 200 KiB |
BIN
docs/_static/logo_characters.png
vendored
Normal file
BIN
docs/_static/logo_characters.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
5
docs/_themes/flask/layout.html
vendored
5
docs/_themes/flask/layout.html
vendored
@@ -6,8 +6,8 @@
|
||||
{% endif %}
|
||||
<link media="only screen and (max-device-width: 480px)" href="{{
|
||||
pathto('_static/small_flask.css', 1) }}" type= "text/css" rel="stylesheet" />
|
||||
<a href="https://github.com/davidhalter/jedi">
|
||||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me on GitHub">
|
||||
<a href="https://github.com/davidhalter/parso">
|
||||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me">
|
||||
</a>
|
||||
{% endblock %}
|
||||
{%- block relbar2 %}{% endblock %}
|
||||
@@ -19,7 +19,6 @@
|
||||
{% endblock %}
|
||||
{%- block footer %}
|
||||
<div class="footer">
|
||||
© Copyright {{ copyright }}.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
|
||||
</div>
|
||||
{% if pagename == 'index' %}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
import datetime
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
@@ -45,7 +44,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'parso'
|
||||
copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
|
||||
copyright = u'parso contributors'
|
||||
|
||||
import parso
|
||||
from parso.utils import version_info
|
||||
@@ -145,7 +144,7 @@ html_sidebars = {
|
||||
#'relations.html',
|
||||
'ghbuttons.html',
|
||||
#'sourcelink.html',
|
||||
#'searchbox.html'
|
||||
'searchbox.html'
|
||||
]
|
||||
}
|
||||
|
||||
@@ -274,7 +273,7 @@ autodoc_default_flags = []
|
||||
# -- Options for intersphinx module --------------------------------------------
|
||||
|
||||
intersphinx_mapping = {
|
||||
'http://docs.python.org/': None,
|
||||
'http://docs.python.org/': ('https://docs.python.org/3.6', None),
|
||||
}
|
||||
|
||||
|
||||
|
||||
38
docs/docs/development.rst
Normal file
38
docs/docs/development.rst
Normal file
@@ -0,0 +1,38 @@
|
||||
.. include:: ../global.rst
|
||||
|
||||
Development
|
||||
===========
|
||||
|
||||
If you want to contribute anything to |parso|, just open an issue or pull
|
||||
request to discuss it. We welcome changes! Please check the ``CONTRIBUTING.md``
|
||||
file in the repository, first.
|
||||
|
||||
|
||||
Deprecations Process
|
||||
--------------------
|
||||
|
||||
The deprecation process is as follows:
|
||||
|
||||
1. A deprecation is announced in the next major/minor release.
|
||||
2. We wait either at least a year & at least two minor releases until we remove
|
||||
the deprecated functionality.
|
||||
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
The test suite depends on ``tox`` and ``pytest``::
|
||||
|
||||
pip install tox pytest
|
||||
|
||||
To run the tests for all supported Python versions::
|
||||
|
||||
tox
|
||||
|
||||
If you want to test only a specific Python version (e.g. Python 2.7), it's as
|
||||
easy as::
|
||||
|
||||
tox -e py27
|
||||
|
||||
Tests are also run automatically on `Travis CI
|
||||
<https://travis-ci.org/davidhalter/parso/>`_.
|
||||
32
docs/docs/installation.rst
Normal file
32
docs/docs/installation.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
.. include:: ../global.rst
|
||||
|
||||
Installation and Configuration
|
||||
==============================
|
||||
|
||||
The preferred way (pip)
|
||||
-----------------------
|
||||
|
||||
On any system you can install |parso| directly from the Python package index
|
||||
using pip::
|
||||
|
||||
sudo pip install parso
|
||||
|
||||
|
||||
From git
|
||||
--------
|
||||
If you want to install the current development version (master branch)::
|
||||
|
||||
sudo pip install -e git://github.com/davidhalter/parso.git#egg=parso
|
||||
|
||||
|
||||
Manual installation from a downloaded package (not recommended)
|
||||
---------------------------------------------------------------
|
||||
|
||||
If you prefer not to use an automated package installer, you can `download
|
||||
<https://github.com/davidhalter/parso/archive/master.zip>`__ a current copy of
|
||||
|parso| and install it manually.
|
||||
|
||||
To install it, navigate to the directory containing `setup.py` on your console
|
||||
and type::
|
||||
|
||||
sudo python setup.py install
|
||||
49
docs/docs/parser-tree.rst
Normal file
49
docs/docs/parser-tree.rst
Normal file
@@ -0,0 +1,49 @@
|
||||
.. include:: ../global.rst
|
||||
|
||||
.. _parser-tree:
|
||||
|
||||
Parser Tree
|
||||
===========
|
||||
|
||||
The parser tree is returned by calling :py:meth:`parso.Grammar.parse`.
|
||||
|
||||
.. note:: Note that parso positions are always 1 based for lines and zero
|
||||
based for columns. This means the first position in a file is (1, 0).
|
||||
|
||||
Parser Tree Base Classes
|
||||
------------------------
|
||||
|
||||
Generally there are two types of classes you will deal with:
|
||||
:py:class:`parso.tree.Leaf` and :py:class:`parso.tree.BaseNode`.
|
||||
|
||||
.. autoclass:: parso.tree.BaseNode
|
||||
:show-inheritance:
|
||||
:members:
|
||||
|
||||
.. autoclass:: parso.tree.Leaf
|
||||
:show-inheritance:
|
||||
:members:
|
||||
|
||||
All nodes and leaves have these methods/properties:
|
||||
|
||||
.. autoclass:: parso.tree.NodeOrLeaf
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Python Parser Tree
|
||||
------------------
|
||||
|
||||
.. currentmodule:: parso.python.tree
|
||||
|
||||
.. automodule:: parso.python.tree
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Utility
|
||||
-------
|
||||
|
||||
.. autofunction:: parso.tree.search_ancestor
|
||||
68
docs/docs/usage.rst
Normal file
68
docs/docs/usage.rst
Normal file
@@ -0,0 +1,68 @@
|
||||
.. include:: ../global.rst
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
|parso| works around grammars. You can simply create Python grammars by calling
|
||||
:py:func:`parso.load_grammar`. Grammars (with a custom tokenizer and custom parser trees)
|
||||
can also be created by directly instantiating :py:func:`parso.Grammar`. More information
|
||||
about the resulting objects can be found in the :ref:`parser tree documentation
|
||||
<parser-tree>`.
|
||||
|
||||
The simplest way of using parso is without even loading a grammar
|
||||
(:py:func:`parso.parse`):
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
>>> import parso
|
||||
>>> parso.parse('foo + bar')
|
||||
<Module: @1-1>
|
||||
|
||||
Loading a Grammar
|
||||
-----------------
|
||||
|
||||
Typically if you want to work with one specific Python version, use:
|
||||
|
||||
.. autofunction:: parso.load_grammar
|
||||
|
||||
Grammar methods
|
||||
---------------
|
||||
|
||||
You will get back a grammar object that you can use to parse code and find
|
||||
issues in it:
|
||||
|
||||
.. autoclass:: parso.Grammar
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
|
||||
Error Retrieval
|
||||
---------------
|
||||
|
||||
|parso| is able to find multiple errors in your source code. Iterating through
|
||||
those errors yields the following instances:
|
||||
|
||||
.. autoclass:: parso.normalizer.Issue
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
|
||||
Utility
|
||||
-------
|
||||
|
||||
|parso| also offers some utility functions that can be really useful:
|
||||
|
||||
.. autofunction:: parso.parse
|
||||
.. autofunction:: parso.split_lines
|
||||
.. autofunction:: parso.python_bytes_to_unicode
|
||||
|
||||
|
||||
Used By
|
||||
-------
|
||||
|
||||
- jedi_ (which is used by IPython and a lot of editor plugins).
|
||||
- mutmut_ (mutation tester)
|
||||
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
.. _mutmut: https://github.com/boxed/mutmut
|
||||
31
docs/index.rst
Normal file
31
docs/index.rst
Normal file
@@ -0,0 +1,31 @@
|
||||
.. include global.rst
|
||||
|
||||
parso - A Python Parser
|
||||
=======================
|
||||
|
||||
Release v\ |release|. (:doc:`Installation <docs/installation>`)
|
||||
|
||||
.. automodule:: parso
|
||||
|
||||
.. _toc:
|
||||
|
||||
Docs
|
||||
----
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
docs/installation
|
||||
docs/usage
|
||||
docs/parser-tree
|
||||
docs/development
|
||||
|
||||
|
||||
.. _resources:
|
||||
|
||||
Resources
|
||||
---------
|
||||
|
||||
- `Source Code on Github <https://github.com/davidhalter/parso>`_
|
||||
- `Travis Testing <https://travis-ci.org/davidhalter/parso>`_
|
||||
- `Python Package Index <http://pypi.python.org/pypi/parso/>`_
|
||||
@@ -1,11 +1,58 @@
|
||||
r"""
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing
|
||||
for different Python versions (in multiple Python versions). Parso is also able
|
||||
to list multiple syntax errors in your python file.
|
||||
|
||||
Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful
|
||||
for other projects as well.
|
||||
|
||||
Parso consists of a small API to parse Python and analyse the syntax tree.
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
A simple example:
|
||||
|
||||
>>> import parso
|
||||
>>> module = parso.parse('hello + 1', version="3.6")
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
>>> print(expr.get_code())
|
||||
hello + 1
|
||||
>>> name = expr.children[0]
|
||||
>>> name
|
||||
<Name: hello@1,0>
|
||||
>>> name.end_pos
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
|
||||
To list multiple issues:
|
||||
|
||||
>>> grammar = parso.load_grammar()
|
||||
>>> module = grammar.parse('foo +\nbar\ncontinue')
|
||||
>>> error1, error2 = grammar.iter_errors(module)
|
||||
>>> error1.message
|
||||
'SyntaxError: invalid syntax'
|
||||
>>> error2.message
|
||||
"SyntaxError: 'continue' not properly in loop"
|
||||
"""
|
||||
|
||||
from parso.parser import ParserSyntaxError
|
||||
from parso.pgen2.pgen import generate_grammar
|
||||
from parso import python
|
||||
from parso.grammar import Grammar, load_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode
|
||||
|
||||
|
||||
__version__ = '0.0.1'
|
||||
__version__ = '0.5.0'
|
||||
|
||||
|
||||
def parse(grammar, code):
|
||||
raise NotImplementedError
|
||||
Parser(grammar, code)
|
||||
def parse(code=None, **kwargs):
|
||||
"""
|
||||
A utility function to avoid loading grammars.
|
||||
Params are documented in :py:meth:`parso.Grammar.parse`.
|
||||
|
||||
:param str version: The version used by :py:func:`parso.load_grammar`.
|
||||
"""
|
||||
version = kwargs.pop('version', None)
|
||||
grammar = load_grammar(version=version)
|
||||
return grammar.parse(code, **kwargs)
|
||||
|
||||
19
parso/__init__.pyi
Normal file
19
parso/__init__.pyi
Normal file
@@ -0,0 +1,19 @@
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from parso.grammar import Grammar as Grammar, load_grammar as load_grammar
|
||||
from parso.parser import ParserSyntaxError as ParserSyntaxError
|
||||
from parso.utils import python_bytes_to_unicode as python_bytes_to_unicode, split_lines as split_lines
|
||||
|
||||
__version__: str = ...
|
||||
|
||||
def parse(
|
||||
code: Optional[Union[str, bytes]],
|
||||
*,
|
||||
version: Optional[str] = None,
|
||||
error_recovery: bool = True,
|
||||
path: Optional[str] = None,
|
||||
start_symbol: Optional[str] = None,
|
||||
cache: bool = False,
|
||||
diff_cache: bool = False,
|
||||
cache_path: Optional[str] = None,
|
||||
) -> Any: ...
|
||||
@@ -3,6 +3,7 @@ To ensure compatibility from Python ``2.6`` - ``3.3``, a module has been
|
||||
created. Clearly there is huge need to use conforming syntax.
|
||||
"""
|
||||
import sys
|
||||
import platform
|
||||
|
||||
# Cannot use sys.version.major and minor names, because in Python 2.6 it's not
|
||||
# a namedtuple.
|
||||
@@ -14,6 +15,8 @@ try:
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
is_pypy = platform.python_implementation() == 'PyPy'
|
||||
|
||||
|
||||
def use_metaclass(meta, *bases):
|
||||
""" Create a class with a metaclass. """
|
||||
@@ -33,7 +36,7 @@ except AttributeError:
|
||||
def u(string):
|
||||
"""Cast to unicode DAMMIT!
|
||||
Written because Python2 repr always implicitly casts to a string, so we
|
||||
have to cast back to a unicode (and we now that we always deal with valid
|
||||
have to cast back to a unicode (and we know that we always deal with valid
|
||||
unicode, because we check that in the beginning).
|
||||
"""
|
||||
if py_version >= 30:
|
||||
@@ -66,3 +69,35 @@ def utf8_repr(func):
|
||||
return func
|
||||
else:
|
||||
return wrapper
|
||||
|
||||
|
||||
try:
|
||||
from functools import total_ordering
|
||||
except ImportError:
|
||||
# Python 2.6
|
||||
def total_ordering(cls):
|
||||
"""Class decorator that fills in missing ordering methods"""
|
||||
convert = {
|
||||
'__lt__': [('__gt__', lambda self, other: not (self < other or self == other)),
|
||||
('__le__', lambda self, other: self < other or self == other),
|
||||
('__ge__', lambda self, other: not self < other)],
|
||||
'__le__': [('__ge__', lambda self, other: not self <= other or self == other),
|
||||
('__lt__', lambda self, other: self <= other and not self == other),
|
||||
('__gt__', lambda self, other: not self <= other)],
|
||||
'__gt__': [('__lt__', lambda self, other: not (self > other or self == other)),
|
||||
('__ge__', lambda self, other: self > other or self == other),
|
||||
('__le__', lambda self, other: not self > other)],
|
||||
'__ge__': [('__le__', lambda self, other: (not self >= other) or self == other),
|
||||
('__gt__', lambda self, other: self >= other and not self == other),
|
||||
('__lt__', lambda self, other: not self >= other)]
|
||||
}
|
||||
roots = set(dir(cls)) & set(convert)
|
||||
if not roots:
|
||||
raise ValueError('must define at least one ordering operation: < > <= >=')
|
||||
root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
|
||||
for opname, opfunc in convert[root]:
|
||||
if opname not in roots:
|
||||
opfunc.__name__ = opname
|
||||
opfunc.__doc__ = getattr(int, opname).__doc__
|
||||
setattr(cls, opname, opfunc)
|
||||
return cls
|
||||
|
||||
@@ -4,15 +4,21 @@ import sys
|
||||
import hashlib
|
||||
import gc
|
||||
import shutil
|
||||
import pickle
|
||||
import platform
|
||||
import errno
|
||||
import logging
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except:
|
||||
import pickle
|
||||
|
||||
from parso._compatibility import FileNotFoundError
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
_PICKLE_VERSION = 30
|
||||
|
||||
_PICKLE_VERSION = 32
|
||||
"""
|
||||
Version number (integer) for file system cache.
|
||||
|
||||
@@ -39,30 +45,30 @@ we generate something similar. See:
|
||||
http://docs.python.org/3/library/sys.html#sys.implementation
|
||||
"""
|
||||
|
||||
|
||||
def _get_default_cache_path():
|
||||
if platform.system().lower() == 'windows':
|
||||
dir_ = os.path.join(os.getenv('APPDATA') or '~', 'Parso', 'Parso')
|
||||
dir_ = os.path.join(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso')
|
||||
elif platform.system().lower() == 'darwin':
|
||||
dir_ = os.path.join('~', 'Library', 'Caches', 'Parso')
|
||||
else:
|
||||
dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso')
|
||||
return os.path.expanduser(dir_)
|
||||
|
||||
|
||||
_default_cache_path = _get_default_cache_path()
|
||||
"""
|
||||
The path where the cache is stored.
|
||||
|
||||
On Linux, this defaults to ``~/.cache/parso/``, on OS X to
|
||||
``~/Library/Caches/Parso/`` and on Windows to ``%APPDATA%\\Parso\\Parso\\``.
|
||||
``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``.
|
||||
On Linux, if environment variable ``$XDG_CACHE_HOME`` is set,
|
||||
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
|
||||
"""
|
||||
|
||||
# for fast_parser, should not be deleted
|
||||
parser_cache = {}
|
||||
|
||||
|
||||
|
||||
class _NodeCacheItem(object):
|
||||
def __init__(self, node, lines, change_time=None):
|
||||
self.node = node
|
||||
@@ -72,26 +78,29 @@ class _NodeCacheItem(object):
|
||||
self.change_time = change_time
|
||||
|
||||
|
||||
def load_module(grammar, path, cache_path=None):
|
||||
def load_module(hashed_grammar, file_io, cache_path=None):
|
||||
"""
|
||||
Returns a module or None, if it fails.
|
||||
"""
|
||||
try:
|
||||
p_time = os.path.getmtime(path)
|
||||
except FileNotFoundError:
|
||||
p_time = file_io.get_last_modified()
|
||||
if p_time is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# TODO Add grammar sha256
|
||||
module_cache_item = parser_cache[path]
|
||||
module_cache_item = parser_cache[hashed_grammar][file_io.path]
|
||||
if p_time <= module_cache_item.change_time:
|
||||
return module_cache_item.node
|
||||
except KeyError:
|
||||
return _load_from_file_system(grammar, path, p_time, cache_path=cache_path)
|
||||
return _load_from_file_system(
|
||||
hashed_grammar,
|
||||
file_io.path,
|
||||
p_time,
|
||||
cache_path=cache_path
|
||||
)
|
||||
|
||||
|
||||
def _load_from_file_system(grammar, path, p_time, cache_path=None):
|
||||
cache_path = _get_hashed_path(grammar, path, cache_path=cache_path)
|
||||
def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
|
||||
cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path)
|
||||
try:
|
||||
try:
|
||||
if p_time > os.path.getmtime(cache_path):
|
||||
@@ -113,37 +122,30 @@ def _load_from_file_system(grammar, path, p_time, cache_path=None):
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
else:
|
||||
parser_cache[path] = module_cache_item
|
||||
logging.debug('pickle loaded: %s', path)
|
||||
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
||||
LOG.debug('pickle loaded: %s', path)
|
||||
return module_cache_item.node
|
||||
|
||||
|
||||
def save_module(grammar, path, module, lines, pickling=True, cache_path=None):
|
||||
def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None):
|
||||
path = file_io.path
|
||||
try:
|
||||
p_time = None if path is None else os.path.getmtime(path)
|
||||
p_time = None if path is None else file_io.get_last_modified()
|
||||
except OSError:
|
||||
p_time = None
|
||||
pickling = False
|
||||
|
||||
item = _NodeCacheItem(module, lines, p_time)
|
||||
parser_cache[path] = item
|
||||
parser_cache.setdefault(hashed_grammar, {})[path] = item
|
||||
if pickling and path is not None:
|
||||
_save_to_file_system(grammar, path, item)
|
||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||
|
||||
|
||||
def _save_to_file_system(grammar, path, item, cache_path=None):
|
||||
with open(_get_hashed_path(grammar, path, cache_path=cache_path), 'wb') as f:
|
||||
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||||
with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f:
|
||||
pickle.dump(item, f, pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
def remove_old_modules(self):
|
||||
"""
|
||||
# TODO Might want to use such a function to clean up the cache (if it's
|
||||
# too old). We could potentially also scan for old files in the
|
||||
# directory and delete those.
|
||||
"""
|
||||
|
||||
|
||||
def clear_cache(cache_path=None):
|
||||
if cache_path is None:
|
||||
cache_path = _default_cache_path
|
||||
@@ -151,11 +153,11 @@ def clear_cache(cache_path=None):
|
||||
parser_cache.clear()
|
||||
|
||||
|
||||
def _get_hashed_path(grammar, path, cache_path=None):
|
||||
def _get_hashed_path(hashed_grammar, path, cache_path=None):
|
||||
directory = _get_cache_directory_path(cache_path=cache_path)
|
||||
|
||||
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
|
||||
return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
|
||||
return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash))
|
||||
|
||||
|
||||
def _get_cache_directory_path(cache_path=None):
|
||||
|
||||
35
parso/file_io.py
Normal file
35
parso/file_io.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
|
||||
|
||||
class FileIO(object):
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def read(self): # Returns bytes/str
|
||||
# We would like to read unicode here, but we cannot, because we are not
|
||||
# sure if it is a valid unicode file. Therefore just read whatever is
|
||||
# here.
|
||||
with open(self.path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def get_last_modified(self):
|
||||
"""
|
||||
Returns float - timestamp or None, if path doesn't exist.
|
||||
"""
|
||||
try:
|
||||
return os.path.getmtime(self.path)
|
||||
except OSError:
|
||||
# Might raise FileNotFoundError, OSError for Python 2
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.path)
|
||||
|
||||
|
||||
class KnownContentFileIO(FileIO):
|
||||
def __init__(self, path, content):
|
||||
super(KnownContentFileIO, self).__init__(path)
|
||||
self._content = content
|
||||
|
||||
def read(self):
|
||||
return self._content
|
||||
255
parso/grammar.py
Normal file
255
parso/grammar.py
Normal file
@@ -0,0 +1,255 @@
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
from parso._compatibility import FileNotFoundError, is_pypy
|
||||
from parso.pgen2 import generate_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
||||
from parso.python.diff import DiffParser
|
||||
from parso.python.tokenize import tokenize_lines, tokenize
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.cache import parser_cache, load_module, save_module
|
||||
from parso.parser import BaseParser
|
||||
from parso.python.parser import Parser as PythonParser
|
||||
from parso.python.errors import ErrorFinderConfig
|
||||
from parso.python import pep8
|
||||
from parso.file_io import FileIO, KnownContentFileIO
|
||||
|
||||
_loaded_grammars = {}
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""
|
||||
:py:func:`parso.load_grammar` returns instances of this class.
|
||||
|
||||
Creating custom none-python grammars by calling this is not supported, yet.
|
||||
"""
|
||||
#:param text: A BNF representation of your grammar.
|
||||
_error_normalizer_config = None
|
||||
_token_namespace = None
|
||||
_default_normalizer_config = pep8.PEP8NormalizerConfig()
|
||||
|
||||
def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None):
|
||||
self._pgen_grammar = generate_grammar(
|
||||
text,
|
||||
token_namespace=self._get_token_namespace()
|
||||
)
|
||||
self._parser = parser
|
||||
self._tokenizer = tokenizer
|
||||
self._diff_parser = diff_parser
|
||||
self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
def parse(self, code=None, **kwargs):
|
||||
"""
|
||||
If you want to parse a Python file you want to start here, most likely.
|
||||
|
||||
If you need finer grained control over the parsed instance, there will be
|
||||
other ways to access it.
|
||||
|
||||
:param str code: A unicode or bytes string. When it's not possible to
|
||||
decode bytes to a string, returns a
|
||||
:py:class:`UnicodeDecodeError`.
|
||||
:param bool error_recovery: If enabled, any code will be returned. If
|
||||
it is invalid, it will be returned as an error node. If disabled,
|
||||
you will get a ParseError when encountering syntax errors in your
|
||||
code.
|
||||
:param str start_symbol: The grammar rule (nonterminal) that you want
|
||||
to parse. Only allowed to be used when error_recovery is False.
|
||||
:param str path: The path to the file you want to open. Only needed for caching.
|
||||
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
||||
if a path is given. Returns the cached trees if the corresponding
|
||||
files on disk have not changed.
|
||||
:param bool diff_cache: Diffs the cached python module against the new
|
||||
code and tries to parse only the parts that have changed. Returns
|
||||
the same (changed) module that is found in cache. Using this option
|
||||
requires you to not do anything anymore with the cached modules
|
||||
under that path, because the contents of it might change. This
|
||||
option is still somewhat experimental. If you want stability,
|
||||
please don't use it.
|
||||
:param bool cache_path: If given saves the parso cache in this
|
||||
directory. If not given, defaults to the default cache places on
|
||||
each platform.
|
||||
|
||||
:return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
|
||||
:py:class:`parso.python.tree.Module`.
|
||||
"""
|
||||
if 'start_pos' in kwargs:
|
||||
raise TypeError("parse() got an unexpected keyword argument.")
|
||||
return self._parse(code=code, **kwargs)
|
||||
|
||||
def _parse(self, code=None, error_recovery=True, path=None,
|
||||
start_symbol=None, cache=False, diff_cache=False,
|
||||
cache_path=None, file_io=None, start_pos=(1, 0)):
|
||||
"""
|
||||
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
||||
wrap it all.
|
||||
start_pos here is just a parameter internally used. Might be public
|
||||
sometime in the future.
|
||||
"""
|
||||
if code is None and path is None and file_io is None:
|
||||
raise TypeError("Please provide either code or a path.")
|
||||
|
||||
if start_symbol is None:
|
||||
start_symbol = self._start_nonterminal
|
||||
|
||||
if error_recovery and start_symbol != 'file_input':
|
||||
raise NotImplementedError("This is currently not implemented.")
|
||||
|
||||
if file_io is None:
|
||||
if code is None:
|
||||
file_io = FileIO(path)
|
||||
else:
|
||||
file_io = KnownContentFileIO(path, code)
|
||||
|
||||
if cache and file_io.path is not None:
|
||||
module_node = load_module(self._hashed, file_io, cache_path=cache_path)
|
||||
if module_node is not None:
|
||||
return module_node
|
||||
|
||||
if code is None:
|
||||
code = file_io.read()
|
||||
code = python_bytes_to_unicode(code)
|
||||
|
||||
lines = split_lines(code, keepends=True)
|
||||
if diff_cache:
|
||||
if self._diff_parser is None:
|
||||
raise TypeError("You have to define a diff parser to be able "
|
||||
"to use this option.")
|
||||
try:
|
||||
module_cache_item = parser_cache[self._hashed][file_io.path]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
module_node = module_cache_item.node
|
||||
old_lines = module_cache_item.lines
|
||||
if old_lines == lines:
|
||||
return module_node
|
||||
|
||||
new_node = self._diff_parser(
|
||||
self._pgen_grammar, self._tokenizer, module_node
|
||||
).update(
|
||||
old_lines=old_lines,
|
||||
new_lines=lines
|
||||
)
|
||||
save_module(self._hashed, file_io, new_node, lines,
|
||||
# Never pickle in pypy, it's slow as hell.
|
||||
pickling=cache and not is_pypy,
|
||||
cache_path=cache_path)
|
||||
return new_node
|
||||
|
||||
tokens = self._tokenizer(lines, start_pos)
|
||||
|
||||
p = self._parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=error_recovery,
|
||||
start_nonterminal=start_symbol
|
||||
)
|
||||
root_node = p.parse(tokens=tokens)
|
||||
|
||||
if cache or diff_cache:
|
||||
save_module(self._hashed, file_io, root_node, lines,
|
||||
# Never pickle in pypy, it's slow as hell.
|
||||
pickling=cache and not is_pypy,
|
||||
cache_path=cache_path)
|
||||
return root_node
|
||||
|
||||
def _get_token_namespace(self):
|
||||
ns = self._token_namespace
|
||||
if ns is None:
|
||||
raise ValueError("The token namespace should be set.")
|
||||
return ns
|
||||
|
||||
def iter_errors(self, node):
|
||||
"""
|
||||
Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of
|
||||
:py:class:`parso.normalizer.Issue` objects. For Python this is
|
||||
a list of syntax/indentation errors.
|
||||
"""
|
||||
if self._error_normalizer_config is None:
|
||||
raise ValueError("No error normalizer specified for this grammar.")
|
||||
|
||||
return self._get_normalizer_issues(node, self._error_normalizer_config)
|
||||
|
||||
def _get_normalizer(self, normalizer_config):
|
||||
if normalizer_config is None:
|
||||
normalizer_config = self._default_normalizer_config
|
||||
if normalizer_config is None:
|
||||
raise ValueError("You need to specify a normalizer, because "
|
||||
"there's no default normalizer for this tree.")
|
||||
return normalizer_config.create_normalizer(self)
|
||||
|
||||
def _normalize(self, node, normalizer_config=None):
|
||||
"""
|
||||
TODO this is not public, yet.
|
||||
The returned code will be normalized, e.g. PEP8 for Python.
|
||||
"""
|
||||
normalizer = self._get_normalizer(normalizer_config)
|
||||
return normalizer.walk(node)
|
||||
|
||||
def _get_normalizer_issues(self, node, normalizer_config=None):
|
||||
normalizer = self._get_normalizer(normalizer_config)
|
||||
normalizer.walk(node)
|
||||
return normalizer.issues
|
||||
|
||||
def __repr__(self):
|
||||
nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
|
||||
txt = ' '.join(list(nonterminals)[:3]) + ' ...'
|
||||
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
_error_normalizer_config = ErrorFinderConfig()
|
||||
_token_namespace = PythonTokenTypes
|
||||
_start_nonterminal = 'file_input'
|
||||
|
||||
def __init__(self, version_info, bnf_text):
|
||||
super(PythonGrammar, self).__init__(
|
||||
bnf_text,
|
||||
tokenizer=self._tokenize_lines,
|
||||
parser=PythonParser,
|
||||
diff_parser=DiffParser
|
||||
)
|
||||
self.version_info = version_info
|
||||
|
||||
def _tokenize_lines(self, lines, start_pos):
|
||||
return tokenize_lines(lines, self.version_info, start_pos=start_pos)
|
||||
|
||||
def _tokenize(self, code):
|
||||
# Used by Jedi.
|
||||
return tokenize(code, self.version_info)
|
||||
|
||||
|
||||
def load_grammar(**kwargs):
|
||||
"""
|
||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||
version.
|
||||
|
||||
:param str version: A python version string, e.g. ``version='3.3'``.
|
||||
:param str path: A path to a grammar file
|
||||
"""
|
||||
def load_grammar(language='python', version=None, path=None):
|
||||
if language == 'python':
|
||||
version_info = parse_version_string(version)
|
||||
|
||||
file = path or os.path.join(
|
||||
'python',
|
||||
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||
)
|
||||
|
||||
global _loaded_grammars
|
||||
path = os.path.join(os.path.dirname(__file__), file)
|
||||
try:
|
||||
return _loaded_grammars[path]
|
||||
except KeyError:
|
||||
try:
|
||||
with open(path) as f:
|
||||
bnf_text = f.read()
|
||||
|
||||
grammar = PythonGrammar(version_info, bnf_text)
|
||||
return _loaded_grammars.setdefault(path, grammar)
|
||||
except FileNotFoundError:
|
||||
message = "Python version %s is currently not supported." % version
|
||||
raise NotImplementedError(message)
|
||||
else:
|
||||
raise NotImplementedError("No support for language %s." % language)
|
||||
|
||||
return load_grammar(**kwargs)
|
||||
38
parso/grammar.pyi
Normal file
38
parso/grammar.pyi
Normal file
@@ -0,0 +1,38 @@
|
||||
from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union
|
||||
from typing_extensions import Literal
|
||||
|
||||
from parso.utils import PythonVersionInfo
|
||||
|
||||
_Token = Any
|
||||
_NodeT = TypeVar("_NodeT")
|
||||
|
||||
class Grammar(Generic[_NodeT]):
|
||||
_default_normalizer_config: Optional[Any] = ...
|
||||
_error_normalizer_config: Optional[Any] = None
|
||||
_start_nonterminal: str = ...
|
||||
_token_namespace: Optional[str] = None
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
tokenizer: Callable[[Sequence[str], int], Sequence[_Token]],
|
||||
parser: Any = ...,
|
||||
diff_parser: Any = ...,
|
||||
) -> None: ...
|
||||
def parse(
|
||||
self,
|
||||
code: Union[str, bytes] = ...,
|
||||
error_recovery: bool = ...,
|
||||
path: Optional[str] = ...,
|
||||
start_symbol: Optional[str] = ...,
|
||||
cache: bool = ...,
|
||||
diff_cache: bool = ...,
|
||||
cache_path: Optional[str] = ...,
|
||||
) -> _NodeT: ...
|
||||
|
||||
class PythonGrammar(Grammar):
|
||||
version_info: PythonVersionInfo
|
||||
def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ...
|
||||
|
||||
def load_grammar(
|
||||
language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ...
|
||||
) -> Grammar: ...
|
||||
183
parso/normalizer.py
Normal file
183
parso/normalizer.py
Normal file
@@ -0,0 +1,183 @@
|
||||
from contextlib import contextmanager
|
||||
|
||||
from parso._compatibility import use_metaclass
|
||||
|
||||
|
||||
class _NormalizerMeta(type):
|
||||
def __new__(cls, name, bases, dct):
|
||||
new_cls = type.__new__(cls, name, bases, dct)
|
||||
new_cls.rule_value_classes = {}
|
||||
new_cls.rule_type_classes = {}
|
||||
return new_cls
|
||||
|
||||
|
||||
class Normalizer(use_metaclass(_NormalizerMeta)):
|
||||
def __init__(self, grammar, config):
|
||||
self.grammar = grammar
|
||||
self._config = config
|
||||
self.issues = []
|
||||
|
||||
self._rule_type_instances = self._instantiate_rules('rule_type_classes')
|
||||
self._rule_value_instances = self._instantiate_rules('rule_value_classes')
|
||||
|
||||
def _instantiate_rules(self, attr):
|
||||
dct = {}
|
||||
for base in type(self).mro():
|
||||
rules_map = getattr(base, attr, {})
|
||||
for type_, rule_classes in rules_map.items():
|
||||
new = [rule_cls(self) for rule_cls in rule_classes]
|
||||
dct.setdefault(type_, []).extend(new)
|
||||
return dct
|
||||
|
||||
def walk(self, node):
|
||||
self.initialize(node)
|
||||
value = self.visit(node)
|
||||
self.finalize()
|
||||
return value
|
||||
|
||||
def visit(self, node):
|
||||
try:
|
||||
children = node.children
|
||||
except AttributeError:
|
||||
return self.visit_leaf(node)
|
||||
else:
|
||||
with self.visit_node(node):
|
||||
return ''.join(self.visit(child) for child in children)
|
||||
|
||||
@contextmanager
|
||||
def visit_node(self, node):
|
||||
self._check_type_rules(node)
|
||||
yield
|
||||
|
||||
def _check_type_rules(self, node):
|
||||
for rule in self._rule_type_instances.get(node.type, []):
|
||||
rule.feed_node(node)
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
self._check_type_rules(leaf)
|
||||
|
||||
for rule in self._rule_value_instances.get(leaf.value, []):
|
||||
rule.feed_node(leaf)
|
||||
|
||||
return leaf.prefix + leaf.value
|
||||
|
||||
def initialize(self, node):
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def add_issue(self, node, code, message):
|
||||
issue = Issue(node, code, message)
|
||||
if issue not in self.issues:
|
||||
self.issues.append(issue)
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def register_rule(cls, **kwargs):
|
||||
"""
|
||||
Use it as a class decorator::
|
||||
|
||||
normalizer = Normalizer('grammar', 'config')
|
||||
@normalizer.register_rule(value='foo')
|
||||
class MyRule(Rule):
|
||||
error_code = 42
|
||||
"""
|
||||
return cls._register_rule(**kwargs)
|
||||
|
||||
@classmethod
|
||||
def _register_rule(cls, value=None, values=(), type=None, types=()):
|
||||
values = list(values)
|
||||
types = list(types)
|
||||
if value is not None:
|
||||
values.append(value)
|
||||
if type is not None:
|
||||
types.append(type)
|
||||
|
||||
if not values and not types:
|
||||
raise ValueError("You must register at least something.")
|
||||
|
||||
def decorator(rule_cls):
|
||||
for v in values:
|
||||
cls.rule_value_classes.setdefault(v, []).append(rule_cls)
|
||||
for t in types:
|
||||
cls.rule_type_classes.setdefault(t, []).append(rule_cls)
|
||||
return rule_cls
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class NormalizerConfig(object):
|
||||
normalizer_class = Normalizer
|
||||
|
||||
def create_normalizer(self, grammar):
|
||||
if self.normalizer_class is None:
|
||||
return None
|
||||
|
||||
return self.normalizer_class(grammar, self)
|
||||
|
||||
|
||||
class Issue(object):
|
||||
def __init__(self, node, code, message):
|
||||
self._node = node
|
||||
self.code = code
|
||||
"""
|
||||
An integer code that stands for the type of error.
|
||||
"""
|
||||
self.message = message
|
||||
"""
|
||||
A message (string) for the issue.
|
||||
"""
|
||||
self.start_pos = node.start_pos
|
||||
"""
|
||||
The start position position of the error as a tuple (line, column). As
|
||||
always in |parso| the first line is 1 and the first column 0.
|
||||
"""
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.start_pos == other.start_pos and self.code == other.code
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.code, self.start_pos))
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s>' % (self.__class__.__name__, self.code)
|
||||
|
||||
|
||||
class Rule(object):
|
||||
code = None
|
||||
message = None
|
||||
|
||||
def __init__(self, normalizer):
|
||||
self._normalizer = normalizer
|
||||
|
||||
def is_issue(self, node):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_node(self, node):
|
||||
return node
|
||||
|
||||
def _get_message(self, message):
|
||||
if message is None:
|
||||
message = self.message
|
||||
if message is None:
|
||||
raise ValueError("The message on the class is not set.")
|
||||
return message
|
||||
|
||||
def add_issue(self, node, code=None, message=None):
|
||||
if code is None:
|
||||
code = self.code
|
||||
if code is None:
|
||||
raise ValueError("The error code on the class is not set.")
|
||||
|
||||
message = self._get_message(message)
|
||||
|
||||
self._normalizer.add_issue(node, code, message)
|
||||
|
||||
def feed_node(self, node):
|
||||
if self.is_issue(node):
|
||||
issue_node = self.get_node(node)
|
||||
self.add_issue(issue_node)
|
||||
182
parso/parser.py
182
parso/parser.py
@@ -1,3 +1,11 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
# 99% of the code is different from pgen2, now.
|
||||
|
||||
"""
|
||||
The ``Parser`` tries to convert the available Python code in an easy to read
|
||||
format, something like an abstract syntax tree. The classes who represent this
|
||||
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
|
||||
``Statement``, which produces ``Array`` and ``Call``).
|
||||
"""
|
||||
from parso import tree
|
||||
from parso.pgen2.parse import PgenParser
|
||||
from parso.pgen2.generator import ReservedString
|
||||
|
||||
|
||||
class ParserSyntaxError(Exception):
|
||||
@@ -25,12 +33,81 @@ class ParserSyntaxError(Exception):
|
||||
|
||||
May be raised as an exception.
|
||||
"""
|
||||
def __init__(self, message, position):
|
||||
def __init__(self, message, error_leaf):
|
||||
self.message = message
|
||||
self.position = position
|
||||
self.error_leaf = error_leaf
|
||||
|
||||
|
||||
class InternalParseError(Exception):
|
||||
"""
|
||||
Exception to signal the parser is stuck and error recovery didn't help.
|
||||
Basically this shouldn't happen. It's a sign that something is really
|
||||
wrong.
|
||||
"""
|
||||
|
||||
def __init__(self, msg, type_, value, start_pos):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||
(msg, type_.name, value, start_pos))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.start_pos = start_pos
|
||||
|
||||
|
||||
class Stack(list):
|
||||
def _allowed_transition_names_and_token_types(self):
|
||||
def iterate():
|
||||
# An API just for Jedi.
|
||||
for stack_node in reversed(self):
|
||||
for transition in stack_node.dfa.transitions:
|
||||
if isinstance(transition, ReservedString):
|
||||
yield transition.value
|
||||
else:
|
||||
yield transition # A token type
|
||||
|
||||
if not stack_node.dfa.is_final:
|
||||
break
|
||||
|
||||
return list(iterate())
|
||||
|
||||
|
||||
class StackNode(object):
|
||||
def __init__(self, dfa):
|
||||
self.dfa = dfa
|
||||
self.nodes = []
|
||||
|
||||
@property
|
||||
def nonterminal(self):
|
||||
return self.dfa.from_rule
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
|
||||
|
||||
|
||||
def _token_to_transition(grammar, type_, value):
|
||||
# Map from token to label
|
||||
if type_.contains_syntax:
|
||||
# Check for reserved words (keywords)
|
||||
try:
|
||||
return grammar.reserved_syntax_strings[value]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return type_
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
"""Parser engine.
|
||||
|
||||
A Parser instance contains state pertaining to the current token
|
||||
sequence, and should not be used concurrently by different threads
|
||||
to parse separate token sequences.
|
||||
|
||||
See python/tokenize.py for how to get input tokens by a string.
|
||||
|
||||
When a syntax error occurs, error_recovery() is called.
|
||||
"""
|
||||
|
||||
node_map = {}
|
||||
default_node = tree.Node
|
||||
|
||||
@@ -38,40 +115,97 @@ class BaseParser(object):
|
||||
}
|
||||
default_leaf = tree.Leaf
|
||||
|
||||
def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
|
||||
self._grammar = grammar
|
||||
self._start_symbol = start_symbol
|
||||
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
|
||||
self._pgen_grammar = pgen_grammar
|
||||
self._start_nonterminal = start_nonterminal
|
||||
self._error_recovery = error_recovery
|
||||
|
||||
def parse(self, tokens):
|
||||
start_number = self._grammar.symbol2number[self._start_symbol]
|
||||
self.pgen_parser = PgenParser(
|
||||
self._grammar, self.convert_node, self.convert_leaf,
|
||||
self.error_recovery, start_number
|
||||
)
|
||||
first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
|
||||
self.stack = Stack([StackNode(first_dfa)])
|
||||
|
||||
node = self.pgen_parser.parse(tokens)
|
||||
# The stack is empty now, we don't need it anymore.
|
||||
del self.pgen_parser
|
||||
return node
|
||||
for token in tokens:
|
||||
self._add_token(token)
|
||||
|
||||
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
while True:
|
||||
tos = self.stack[-1]
|
||||
if not tos.dfa.is_final:
|
||||
# We never broke out -- EOF is too soon -- Unfinished statement.
|
||||
# However, the error recovery might have added the token again, if
|
||||
# the stack is empty, we're fine.
|
||||
raise InternalParseError(
|
||||
"incomplete input", token.type, token.value, token.start_pos
|
||||
)
|
||||
|
||||
if len(self.stack) > 1:
|
||||
self._pop()
|
||||
else:
|
||||
return self.convert_node(tos.nonterminal, tos.nodes)
|
||||
|
||||
def error_recovery(self, token):
|
||||
if self._error_recovery:
|
||||
raise NotImplementedError("Error Recovery is not implemented")
|
||||
else:
|
||||
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
||||
type_, value, start_pos, prefix = token
|
||||
error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
|
||||
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||
|
||||
def convert_node(self, grammar, type_, children):
|
||||
# TODO REMOVE symbol, we don't want type here.
|
||||
symbol = grammar.number2symbol[type_]
|
||||
def convert_node(self, nonterminal, children):
|
||||
try:
|
||||
return self.node_map[symbol](children)
|
||||
node = self.node_map[nonterminal](children)
|
||||
except KeyError:
|
||||
return self.default_node(symbol, children)
|
||||
node = self.default_node(nonterminal, children)
|
||||
for c in children:
|
||||
c.parent = node
|
||||
return node
|
||||
|
||||
def convert_leaf(self, grammar, type_, value, prefix, start_pos):
|
||||
def convert_leaf(self, type_, value, prefix, start_pos):
|
||||
try:
|
||||
return self.leaf_map[type_](value, start_pos, prefix)
|
||||
except KeyError:
|
||||
return self.default_leaf(value, start_pos, prefix)
|
||||
|
||||
def _add_token(self, token):
|
||||
"""
|
||||
This is the only core function for parsing. Here happens basically
|
||||
everything. Everything is well prepared by the parser generator and we
|
||||
only apply the necessary steps here.
|
||||
"""
|
||||
grammar = self._pgen_grammar
|
||||
stack = self.stack
|
||||
type_, value, start_pos, prefix = token
|
||||
transition = _token_to_transition(grammar, type_, value)
|
||||
|
||||
while True:
|
||||
try:
|
||||
plan = stack[-1].dfa.transitions[transition]
|
||||
break
|
||||
except KeyError:
|
||||
if stack[-1].dfa.is_final:
|
||||
self._pop()
|
||||
else:
|
||||
self.error_recovery(token)
|
||||
return
|
||||
except IndexError:
|
||||
raise InternalParseError("too much input", type_, value, start_pos)
|
||||
|
||||
stack[-1].dfa = plan.next_dfa
|
||||
|
||||
for push in plan.dfa_pushes:
|
||||
stack.append(StackNode(push))
|
||||
|
||||
leaf = self.convert_leaf(type_, value, prefix, start_pos)
|
||||
stack[-1].nodes.append(leaf)
|
||||
|
||||
def _pop(self):
|
||||
tos = self.stack.pop()
|
||||
# If there's exactly one child, return that child instead of
|
||||
# creating a new node. We still create expr_stmt and
|
||||
# file_input though, because a lot of Jedi depends on its
|
||||
# logic.
|
||||
if len(tos.nodes) == 1:
|
||||
new_node = tos.nodes[0]
|
||||
else:
|
||||
new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
|
||||
|
||||
self.stack[-1].nodes.append(new_node)
|
||||
|
||||
@@ -4,5 +4,7 @@
|
||||
# Modifications:
|
||||
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
# Copyright 2014 David Halter. Integration into Jedi.
|
||||
# Copyright 2014 David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.pgen2.generator import generate_grammar
|
||||
|
||||
1
parso/pgen2/__init__.pyi
Normal file
1
parso/pgen2/__init__.pyi
Normal file
@@ -0,0 +1 @@
|
||||
from parso.pgen2.generator import generate_grammar as generate_grammar
|
||||
358
parso/pgen2/generator.py
Normal file
358
parso/pgen2/generator.py
Normal file
@@ -0,0 +1,358 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""
|
||||
This module defines the data structures used to represent a grammar.
|
||||
|
||||
Specifying grammars in pgen is possible with this grammar::
|
||||
|
||||
grammar: (NEWLINE | rule)* ENDMARKER
|
||||
rule: NAME ':' rhs NEWLINE
|
||||
rhs: items ('|' items)*
|
||||
items: item+
|
||||
item: '[' rhs ']' | atom ['+' | '*']
|
||||
atom: '(' rhs ')' | NAME | STRING
|
||||
|
||||
This grammar is self-referencing.
|
||||
|
||||
This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
|
||||
Most of the code has been refactored to make it more Pythonic. Since this was a
|
||||
"copy" of the CPython Parser parser "pgen", there was some work needed to make
|
||||
it more readable. It should also be slightly faster than the original pgen2,
|
||||
because we made some optimizations.
|
||||
"""
|
||||
|
||||
from ast import literal_eval
|
||||
|
||||
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly.
|
||||
|
||||
The only important part in this parsers are dfas and transitions between
|
||||
dfas.
|
||||
"""
|
||||
|
||||
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
|
||||
self.reserved_syntax_strings = reserved_syntax_strings
|
||||
self.start_nonterminal = start_nonterminal
|
||||
|
||||
|
||||
class DFAPlan(object):
|
||||
"""
|
||||
Plans are used for the parser to create stack nodes and do the proper
|
||||
DFA state transitions.
|
||||
"""
|
||||
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||
self.next_dfa = next_dfa
|
||||
self.dfa_pushes = dfa_pushes
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
"""
|
||||
The DFAState object is the core class for pretty much anything. DFAState
|
||||
are the vertices of an ordered graph while arcs and transitions are the
|
||||
edges.
|
||||
|
||||
Arcs are the initial edges, where most DFAStates are not connected and
|
||||
transitions are then calculated to connect the DFA state machines that have
|
||||
different nonterminals.
|
||||
"""
|
||||
def __init__(self, from_rule, nfa_set, final):
|
||||
assert isinstance(nfa_set, set)
|
||||
assert isinstance(next(iter(nfa_set)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.from_rule = from_rule
|
||||
self.nfa_set = nfa_set
|
||||
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||||
# In an intermediary step we set these nonterminal arcs (which has the
|
||||
# same structure as arcs). These don't contain terminals anymore.
|
||||
self.nonterminal_arcs = {}
|
||||
|
||||
# Transitions are basically the only thing that the parser is using
|
||||
# with is_final. Everyting else is purely here to create a parser.
|
||||
self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan]
|
||||
self.is_final = final in nfa_set
|
||||
|
||||
def add_arc(self, next_, label):
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(next_, DFAState)
|
||||
self.arcs[label] = next_
|
||||
|
||||
def unifystate(self, old, new):
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# Equality test -- ignore the nfa_set instance variable
|
||||
assert isinstance(other, DFAState)
|
||||
if self.is_final != other.is_final:
|
||||
return False
|
||||
# Can't just return self.arcs == other.arcs, because that
|
||||
# would invoke this method recursively, with cycles...
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next_ in self.arcs.items():
|
||||
if next_ is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
__hash__ = None # For Py3 compatibility.
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: %s is_final=%s>' % (
|
||||
self.__class__.__name__, self.from_rule, self.is_final
|
||||
)
|
||||
|
||||
|
||||
class ReservedString(object):
|
||||
"""
|
||||
Most grammars will have certain keywords and operators that are mentioned
|
||||
in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
|
||||
This class basically is the former.
|
||||
"""
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.value)
|
||||
|
||||
|
||||
def _simplify_dfas(dfas):
|
||||
"""
|
||||
This is not theoretically optimal, but works well enough.
|
||||
Algorithm: repeatedly look for two states that have the same
|
||||
set of arcs (same labels pointing to the same nodes) and
|
||||
unify them, until things stop changing.
|
||||
|
||||
dfas is a list of DFAState instances
|
||||
"""
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(dfas):
|
||||
for j in range(i + 1, len(dfas)):
|
||||
state_j = dfas[j]
|
||||
if state_i == state_j:
|
||||
#print " unify", i, j
|
||||
del dfas[j]
|
||||
for state in dfas:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
|
||||
def _make_dfas(start, finish):
|
||||
"""
|
||||
Uses the powerset construction algorithm to create DFA states from sets of
|
||||
NFA states.
|
||||
|
||||
Also does state reduction if some states are not needed.
|
||||
"""
|
||||
# To turn an NFA into a DFA, we define the states of the DFA
|
||||
# to correspond to *sets* of states of the NFA. Then do some
|
||||
# state reduction.
|
||||
assert isinstance(start, NFAState)
|
||||
assert isinstance(finish, NFAState)
|
||||
|
||||
def addclosure(nfa_state, base_nfa_set):
|
||||
assert isinstance(nfa_state, NFAState)
|
||||
if nfa_state in base_nfa_set:
|
||||
return
|
||||
base_nfa_set.add(nfa_state)
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.nonterminal_or_string is None:
|
||||
addclosure(nfa_arc.next, base_nfa_set)
|
||||
|
||||
base_nfa_set = set()
|
||||
addclosure(start, base_nfa_set)
|
||||
states = [DFAState(start.from_rule, base_nfa_set, finish)]
|
||||
for state in states: # NB states grows while we're iterating
|
||||
arcs = {}
|
||||
# Find state transitions and store them in arcs.
|
||||
for nfa_state in state.nfa_set:
|
||||
for nfa_arc in nfa_state.arcs:
|
||||
if nfa_arc.nonterminal_or_string is not None:
|
||||
nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
|
||||
addclosure(nfa_arc.next, nfa_set)
|
||||
|
||||
# Now create the dfa's with no None's in arcs anymore. All Nones have
|
||||
# been eliminated and state transitions (arcs) are properly defined, we
|
||||
# just need to create the dfa's.
|
||||
for nonterminal_or_string, nfa_set in arcs.items():
|
||||
for nested_state in states:
|
||||
if nested_state.nfa_set == nfa_set:
|
||||
# The DFA state already exists for this rule.
|
||||
break
|
||||
else:
|
||||
nested_state = DFAState(start.from_rule, nfa_set, finish)
|
||||
states.append(nested_state)
|
||||
|
||||
state.add_arc(nested_state, nonterminal_or_string)
|
||||
return states # List of DFAState instances; first one is start
|
||||
|
||||
|
||||
def _dump_nfa(start, finish):
|
||||
print("Dump of NFA for", start.from_rule)
|
||||
todo = [start]
|
||||
for i, state in enumerate(todo):
|
||||
print(" State", i, state is finish and "(final)" or "")
|
||||
for label, next_ in state.arcs:
|
||||
if next_ in todo:
|
||||
j = todo.index(next_)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next_)
|
||||
if label is None:
|
||||
print(" -> %d" % j)
|
||||
else:
|
||||
print(" %s -> %d" % (label, j))
|
||||
|
||||
|
||||
def _dump_dfas(dfas):
|
||||
print("Dump of DFA for", dfas[0].from_rule)
|
||||
for i, state in enumerate(dfas):
|
||||
print(" State", i, state.is_final and "(final)" or "")
|
||||
for nonterminal, next_ in state.arcs.items():
|
||||
print(" %s -> %d" % (nonterminal, dfas.index(next_)))
|
||||
|
||||
|
||||
def generate_grammar(bnf_grammar, token_namespace):
|
||||
"""
|
||||
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
||||
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
||||
for grouping).
|
||||
|
||||
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
||||
own parser.
|
||||
"""
|
||||
rule_to_dfas = {}
|
||||
start_nonterminal = None
|
||||
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
|
||||
#_dump_nfa(a, z)
|
||||
dfas = _make_dfas(nfa_a, nfa_z)
|
||||
#_dump_dfas(dfas)
|
||||
# oldlen = len(dfas)
|
||||
_simplify_dfas(dfas)
|
||||
# newlen = len(dfas)
|
||||
rule_to_dfas[nfa_a.from_rule] = dfas
|
||||
#print(nfa_a.from_rule, oldlen, newlen)
|
||||
|
||||
if start_nonterminal is None:
|
||||
start_nonterminal = nfa_a.from_rule
|
||||
|
||||
reserved_strings = {}
|
||||
for nonterminal, dfas in rule_to_dfas.items():
|
||||
for dfa_state in dfas:
|
||||
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
|
||||
if terminal_or_nonterminal in rule_to_dfas:
|
||||
dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
|
||||
else:
|
||||
transition = _make_transition(
|
||||
token_namespace,
|
||||
reserved_strings,
|
||||
terminal_or_nonterminal
|
||||
)
|
||||
dfa_state.transitions[transition] = DFAPlan(next_dfa)
|
||||
|
||||
_calculate_tree_traversal(rule_to_dfas)
|
||||
return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
|
||||
|
||||
|
||||
def _make_transition(token_namespace, reserved_syntax_strings, label):
|
||||
"""
|
||||
Creates a reserved string ("if", "for", "*", ...) or returns the token type
|
||||
(NUMBER, STRING, ...) for a given grammar terminal.
|
||||
"""
|
||||
if label[0].isalpha():
|
||||
# A named token (e.g. NAME, NUMBER, STRING)
|
||||
return getattr(token_namespace, label)
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
assert not label.startswith('"""') and not label.startswith("'''")
|
||||
value = literal_eval(label)
|
||||
try:
|
||||
return reserved_syntax_strings[value]
|
||||
except KeyError:
|
||||
r = reserved_syntax_strings[value] = ReservedString(value)
|
||||
return r
|
||||
|
||||
|
||||
def _calculate_tree_traversal(nonterminal_to_dfas):
|
||||
"""
|
||||
By this point we know how dfas can move around within a stack node, but we
|
||||
don't know how we can add a new stack node (nonterminal transitions).
|
||||
"""
|
||||
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||
first_plans = {}
|
||||
|
||||
nonterminals = list(nonterminal_to_dfas.keys())
|
||||
nonterminals.sort()
|
||||
for nonterminal in nonterminals:
|
||||
if nonterminal not in first_plans:
|
||||
_calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
|
||||
|
||||
# Now that we have calculated the first terminals, we are sure that
|
||||
# there is no left recursion or ambiguities.
|
||||
|
||||
for dfas in nonterminal_to_dfas.values():
|
||||
for dfa_state in dfas:
|
||||
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||
for transition, pushes in first_plans[nonterminal].items():
|
||||
dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
|
||||
|
||||
|
||||
def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
|
||||
"""
|
||||
Calculates the first plan in the first_plans dictionary for every given
|
||||
nonterminal. This is going to be used to know when to create stack nodes.
|
||||
"""
|
||||
dfas = nonterminal_to_dfas[nonterminal]
|
||||
new_first_plans = {}
|
||||
first_plans[nonterminal] = None # dummy to detect left recursion
|
||||
# We only need to check the first dfa. All the following ones are not
|
||||
# interesting to find first terminals.
|
||||
state = dfas[0]
|
||||
for transition, next_ in state.transitions.items():
|
||||
# It's a string. We have finally found a possible first token.
|
||||
new_first_plans[transition] = [next_.next_dfa]
|
||||
|
||||
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||
# It's a nonterminal and we have either a left recursion issue
|
||||
# in the grammar or we have to recurse.
|
||||
try:
|
||||
first_plans2 = first_plans[nonterminal2]
|
||||
except KeyError:
|
||||
first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
|
||||
else:
|
||||
if first_plans2 is None:
|
||||
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||
|
||||
for t, pushes in first_plans2.items():
|
||||
check = new_first_plans.get(t)
|
||||
if check is not None:
|
||||
raise ValueError(
|
||||
"Rule %s is ambiguous; %s is the"
|
||||
" start of the rule %s as well as %s."
|
||||
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||
)
|
||||
new_first_plans[t] = [next_] + pushes
|
||||
|
||||
first_plans[nonterminal] = new_first_plans
|
||||
return new_first_plans
|
||||
38
parso/pgen2/generator.pyi
Normal file
38
parso/pgen2/generator.pyi
Normal file
@@ -0,0 +1,38 @@
|
||||
from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union
|
||||
|
||||
from parso.pgen2.grammar_parser import NFAState
|
||||
|
||||
_TokenTypeT = TypeVar("_TokenTypeT")
|
||||
|
||||
class Grammar(Generic[_TokenTypeT]):
|
||||
nonterminal_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]]
|
||||
reserved_syntax_strings: Mapping[str, ReservedString]
|
||||
start_nonterminal: str
|
||||
def __init__(
|
||||
self,
|
||||
start_nonterminal: str,
|
||||
rule_to_dfas: Mapping[str, Sequence[DFAState]],
|
||||
reserved_syntax_strings: Mapping[str, ReservedString],
|
||||
) -> None: ...
|
||||
|
||||
class DFAPlan:
|
||||
next_dfa: DFAState
|
||||
dfa_pushes: Sequence[DFAState]
|
||||
|
||||
class DFAState(Generic[_TokenTypeT]):
|
||||
from_rule: str
|
||||
nfa_set: Set[NFAState]
|
||||
is_final: bool
|
||||
arcs: Mapping[str, DFAState] # map from all terminals/nonterminals to DFAState
|
||||
nonterminal_arcs: Mapping[str, DFAState]
|
||||
transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan]
|
||||
def __init__(
|
||||
self, from_rule: str, nfa_set: Set[NFAState], final: NFAState
|
||||
) -> None: ...
|
||||
|
||||
class ReservedString:
|
||||
value: str
|
||||
def __init__(self, value: str) -> None: ...
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]: ...
|
||||
@@ -1,127 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2014 David Halter. Integration into Jedi.
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""This module defines the data structures used to represent a grammar.
|
||||
|
||||
These are a bit arcane because they are derived from the data
|
||||
structures used by Python's 'pgen' parser generator.
|
||||
|
||||
There's also a table here mapping operators to their names in the
|
||||
token module; the Python tokenize module reports all operators as the
|
||||
fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
import pickle
|
||||
import hashlib
|
||||
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""Pgen parsing tables conversion class.
|
||||
|
||||
Once initialized, this class supplies the grammar tables for the
|
||||
parsing engine implemented by parse.py. The parsing engine
|
||||
accesses the instance variables directly. The class here does not
|
||||
provide initialization of the tables; several subclasses exist to
|
||||
do this (see the conv and pgen modules).
|
||||
|
||||
The load() method reads the tables from a pickle file, which is
|
||||
much faster than the other ways offered by subclasses. The pickle
|
||||
file is written by calling dump() (after loading the grammar
|
||||
tables using a subclass). The report() method prints a readable
|
||||
representation of the tables to stdout, for debugging.
|
||||
|
||||
The instance variables are as follows:
|
||||
|
||||
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
||||
numbers are always 256 or higher, to distinguish
|
||||
them from token numbers, which are between 0 and
|
||||
255 (inclusive).
|
||||
|
||||
number2symbol -- a dict mapping numbers to symbol names;
|
||||
these two are each other's inverse.
|
||||
|
||||
states -- a list of DFAs, where each DFA is a list of
|
||||
states, each state is a list of arcs, and each
|
||||
arc is a (i, j) pair where i is a label and j is
|
||||
a state number. The DFA number is the index into
|
||||
this list. (This name is slightly confusing.)
|
||||
Final states are represented by a special arc of
|
||||
the form (0, j) where j is its own state number.
|
||||
|
||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
||||
pairs, where DFA is an item from the states list
|
||||
above, and first is a set of tokens that can
|
||||
begin this grammar rule (represented by a dict
|
||||
whose values are always 1).
|
||||
|
||||
labels -- a list of (x, y) pairs where x is either a token
|
||||
number or a symbol number, and y is either None
|
||||
or a string; the strings are keywords. The label
|
||||
number is the index in this list; label numbers
|
||||
are used to mark state transitions (arcs) in the
|
||||
DFAs.
|
||||
|
||||
start -- the number of the grammar's start symbol.
|
||||
|
||||
keywords -- a dict mapping keyword strings to arc labels.
|
||||
|
||||
tokens -- a dict mapping token numbers to arc labels.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, bnf_text):
|
||||
self.symbol2number = {}
|
||||
self.number2symbol = {}
|
||||
self.states = []
|
||||
self.dfas = {}
|
||||
self.labels = [(0, "EMPTY")]
|
||||
self.keywords = {}
|
||||
self.tokens = {}
|
||||
self.symbol2label = {}
|
||||
self.start = 256
|
||||
self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
|
||||
|
||||
def dump(self, filename):
|
||||
"""Dump the grammar tables to a pickle file."""
|
||||
with open(filename, "wb") as f:
|
||||
pickle.dump(self.__dict__, f, 2)
|
||||
|
||||
def load(self, filename):
|
||||
"""Load the grammar tables from a pickle file."""
|
||||
with open(filename, "rb") as f:
|
||||
d = pickle.load(f)
|
||||
self.__dict__.update(d)
|
||||
|
||||
def copy(self):
|
||||
"""
|
||||
Copy the grammar.
|
||||
"""
|
||||
new = self.__class__()
|
||||
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
|
||||
"tokens", "symbol2label"):
|
||||
setattr(new, dict_attr, getattr(self, dict_attr).copy())
|
||||
new.labels = self.labels[:]
|
||||
new.states = self.states[:]
|
||||
new.start = self.start
|
||||
return new
|
||||
|
||||
def report(self):
|
||||
"""Dump the grammar tables to standard output, for debugging."""
|
||||
from pprint import pprint
|
||||
print("s2n")
|
||||
pprint(self.symbol2number)
|
||||
print("n2s")
|
||||
pprint(self.number2symbol)
|
||||
print("states")
|
||||
pprint(self.states)
|
||||
print("dfas")
|
||||
pprint(self.dfas)
|
||||
print("labels")
|
||||
pprint(self.labels)
|
||||
print("start", self.start)
|
||||
156
parso/pgen2/grammar_parser.py
Normal file
156
parso/pgen2/grammar_parser.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright David Halter and Contributors
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.python.tokenize import tokenize
|
||||
from parso.utils import parse_version_string
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
|
||||
class GrammarParser():
|
||||
"""
|
||||
The parser for Python grammar files.
|
||||
"""
|
||||
def __init__(self, bnf_grammar):
|
||||
self._bnf_grammar = bnf_grammar
|
||||
self.generator = tokenize(
|
||||
bnf_grammar,
|
||||
version_info=parse_version_string('3.6')
|
||||
)
|
||||
self._gettoken() # Initialize lookahead
|
||||
|
||||
def parse(self):
|
||||
# grammar: (NEWLINE | rule)* ENDMARKER
|
||||
while self.type != PythonTokenTypes.ENDMARKER:
|
||||
while self.type == PythonTokenTypes.NEWLINE:
|
||||
self._gettoken()
|
||||
|
||||
# rule: NAME ':' rhs NEWLINE
|
||||
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
|
||||
self._expect(PythonTokenTypes.OP, ':')
|
||||
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.NEWLINE)
|
||||
|
||||
yield a, z
|
||||
|
||||
def _parse_rhs(self):
|
||||
# rhs: items ('|' items)*
|
||||
a, z = self._parse_items()
|
||||
if self.value != "|":
|
||||
return a, z
|
||||
else:
|
||||
aa = NFAState(self._current_rule_name)
|
||||
zz = NFAState(self._current_rule_name)
|
||||
while True:
|
||||
# Add the possibility to go into the state of a and come back
|
||||
# to finish.
|
||||
aa.add_arc(a)
|
||||
z.add_arc(zz)
|
||||
if self.value != "|":
|
||||
break
|
||||
|
||||
self._gettoken()
|
||||
a, z = self._parse_items()
|
||||
return aa, zz
|
||||
|
||||
def _parse_items(self):
|
||||
# items: item+
|
||||
a, b = self._parse_item()
|
||||
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
|
||||
or self.value in ('(', '['):
|
||||
c, d = self._parse_item()
|
||||
# Need to end on the next item.
|
||||
b.add_arc(c)
|
||||
b = d
|
||||
return a, b
|
||||
|
||||
def _parse_item(self):
|
||||
# item: '[' rhs ']' | atom ['+' | '*']
|
||||
if self.value == "[":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.OP, ']')
|
||||
# Make it also possible that there is no token and change the
|
||||
# state.
|
||||
a.add_arc(z)
|
||||
return a, z
|
||||
else:
|
||||
a, z = self._parse_atom()
|
||||
value = self.value
|
||||
if value not in ("+", "*"):
|
||||
return a, z
|
||||
self._gettoken()
|
||||
# Make it clear that we can go back to the old state and repeat.
|
||||
z.add_arc(a)
|
||||
if value == "+":
|
||||
return a, z
|
||||
else:
|
||||
# The end state is the same as the beginning, nothing must
|
||||
# change.
|
||||
return a, a
|
||||
|
||||
def _parse_atom(self):
|
||||
# atom: '(' rhs ')' | NAME | STRING
|
||||
if self.value == "(":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(PythonTokenTypes.OP, ')')
|
||||
return a, z
|
||||
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
|
||||
a = NFAState(self._current_rule_name)
|
||||
z = NFAState(self._current_rule_name)
|
||||
# Make it clear that the state transition requires that value.
|
||||
a.add_arc(z, self.value)
|
||||
self._gettoken()
|
||||
return a, z
|
||||
else:
|
||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def _expect(self, type_, value=None):
|
||||
if self.type != type_:
|
||||
self._raise_error("expected %s, got %s [%s]",
|
||||
type_, self.type, self.value)
|
||||
if value is not None and self.value != value:
|
||||
self._raise_error("expected %s, got %s", value, self.value)
|
||||
value = self.value
|
||||
self._gettoken()
|
||||
return value
|
||||
|
||||
def _gettoken(self):
|
||||
tup = next(self.generator)
|
||||
self.type, self.value, self.begin, prefix = tup
|
||||
|
||||
def _raise_error(self, msg, *args):
|
||||
if args:
|
||||
try:
|
||||
msg = msg % args
|
||||
except:
|
||||
msg = " ".join([msg] + list(map(str, args)))
|
||||
line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
|
||||
raise SyntaxError(msg, ('<grammar>', self.begin[0],
|
||||
self.begin[1], line))
|
||||
|
||||
|
||||
class NFAArc(object):
|
||||
def __init__(self, next_, nonterminal_or_string):
|
||||
self.next = next_
|
||||
self.nonterminal_or_string = nonterminal_or_string
|
||||
|
||||
|
||||
class NFAState(object):
|
||||
def __init__(self, from_rule):
|
||||
self.from_rule = from_rule
|
||||
self.arcs = [] # List[nonterminal (str), NFAState]
|
||||
|
||||
def add_arc(self, next_, nonterminal_or_string=None):
|
||||
assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
|
||||
assert isinstance(next_, NFAState)
|
||||
self.arcs.append(NFAArc(next_, nonterminal_or_string))
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
|
||||
20
parso/pgen2/grammar_parser.pyi
Normal file
20
parso/pgen2/grammar_parser.pyi
Normal file
@@ -0,0 +1,20 @@
|
||||
from typing import Generator, List, Optional, Tuple
|
||||
|
||||
from parso.python.token import TokenType
|
||||
|
||||
class GrammarParser:
|
||||
generator: Generator[TokenType, None, None]
|
||||
def __init__(self, bnf_grammar: str) -> None: ...
|
||||
def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: ...
|
||||
|
||||
class NFAArc:
|
||||
next: NFAState
|
||||
nonterminal_or_string: Optional[str]
|
||||
def __init__(
|
||||
self, next_: NFAState, nonterminal_or_string: Optional[str]
|
||||
) -> None: ...
|
||||
|
||||
class NFAState:
|
||||
from_rule: str
|
||||
arcs: List[NFAArc]
|
||||
def __init__(self, from_rule: str) -> None: ...
|
||||
@@ -1,217 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2014 David Halter. Integration into Jedi.
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
"""
|
||||
Parser engine for the grammar tables generated by pgen.
|
||||
|
||||
The grammar table must be loaded first.
|
||||
|
||||
See Parser/parser.c in the Python distribution for additional info on
|
||||
how this parsing engine works.
|
||||
"""
|
||||
|
||||
from parso import tokenize
|
||||
|
||||
|
||||
class InternalParseError(Exception):
|
||||
"""
|
||||
Exception to signal the parser is stuck and error recovery didn't help.
|
||||
Basically this shouldn't happen. It's a sign that something is really
|
||||
wrong.
|
||||
"""
|
||||
|
||||
def __init__(self, msg, type, value, start_pos):
|
||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||
(msg, tokenize.tok_name[type], value, start_pos))
|
||||
self.msg = msg
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.start_pos = start_pos
|
||||
|
||||
|
||||
def token_to_ilabel(grammar, type_, value):
|
||||
# Map from token to label
|
||||
if type_ == tokenize.NAME:
|
||||
# Check for reserved words (keywords)
|
||||
try:
|
||||
return grammar.keywords[value]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return grammar.tokens[type_]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
class PgenParser(object):
|
||||
"""Parser engine.
|
||||
|
||||
The proper usage sequence is:
|
||||
|
||||
p = Parser(grammar, [converter]) # create instance
|
||||
p.setup([start]) # prepare for parsing
|
||||
<for each input token>:
|
||||
if p.addtoken(...): # parse a token
|
||||
break
|
||||
root = p.rootnode # root of abstract syntax tree
|
||||
|
||||
A Parser instance may be reused by calling setup() repeatedly.
|
||||
|
||||
A Parser instance contains state pertaining to the current token
|
||||
sequence, and should not be used concurrently by different threads
|
||||
to parse separate token sequences.
|
||||
|
||||
See driver.py for how to get input tokens by tokenizing a file or
|
||||
string.
|
||||
|
||||
Parsing is complete when addtoken() returns True; the root of the
|
||||
abstract syntax tree can then be retrieved from the rootnode
|
||||
instance variable. When a syntax error occurs, error_recovery()
|
||||
is called. There is no error recovery; the parser cannot be used
|
||||
after a syntax error was reported (but it can be reinitialized by
|
||||
calling setup()).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
|
||||
"""Constructor.
|
||||
|
||||
The grammar argument is a grammar.Grammar instance; see the
|
||||
grammar module for more information.
|
||||
|
||||
The parser is not ready yet for parsing; you must call the
|
||||
setup() method to get it started.
|
||||
|
||||
The optional convert argument is a function mapping concrete
|
||||
syntax tree nodes to abstract syntax tree nodes. If not
|
||||
given, no conversion is done and the syntax tree produced is
|
||||
the concrete syntax tree. If given, it must be a function of
|
||||
two arguments, the first being the grammar (a grammar.Grammar
|
||||
instance), and the second being the concrete syntax tree node
|
||||
to be converted. The syntax tree is converted from the bottom
|
||||
up.
|
||||
|
||||
A concrete syntax tree node is a (type, nodes) tuple, where
|
||||
type is the node type (a token or symbol number) and nodes
|
||||
is a list of children for symbols, and None for tokens.
|
||||
|
||||
An abstract syntax tree node may be anything; this is entirely
|
||||
up to the converter function.
|
||||
|
||||
"""
|
||||
self.grammar = grammar
|
||||
self.convert_node = convert_node
|
||||
self.convert_leaf = convert_leaf
|
||||
|
||||
# Each stack entry is a tuple: (dfa, state, node).
|
||||
# A node is a tuple: (type, children),
|
||||
# where children is a list of nodes or None
|
||||
newnode = (start, [])
|
||||
stackentry = (self.grammar.dfas[start], 0, newnode)
|
||||
self.stack = [stackentry]
|
||||
self.rootnode = None
|
||||
self.error_recovery = error_recovery
|
||||
|
||||
def parse(self, tokens):
|
||||
for type_, value, start_pos, prefix in tokens:
|
||||
if self.addtoken(type_, value, start_pos, prefix):
|
||||
break
|
||||
else:
|
||||
# We never broke out -- EOF is too soon -- Unfinished statement.
|
||||
# However, the error recovery might have added the token again, if
|
||||
# the stack is empty, we're fine.
|
||||
if self.stack:
|
||||
raise InternalParseError("incomplete input", type_, value, start_pos)
|
||||
return self.rootnode
|
||||
|
||||
def addtoken(self, type_, value, start_pos, prefix):
|
||||
"""Add a token; return True if this is the end of the program."""
|
||||
ilabel = token_to_ilabel(self.grammar, type_, value)
|
||||
|
||||
# Loop until the token is shifted; may raise exceptions
|
||||
_gram = self.grammar
|
||||
_labels = _gram.labels
|
||||
_push = self._push
|
||||
_pop = self._pop
|
||||
_shift = self._shift
|
||||
while True:
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
arcs = states[state]
|
||||
# Look for a state with this label
|
||||
for i, newstate in arcs:
|
||||
t, v = _labels[i]
|
||||
if ilabel == i:
|
||||
# Look it up in the list of labels
|
||||
assert t < 256
|
||||
# Shift a token; we're done with it
|
||||
_shift(type_, value, newstate, prefix, start_pos)
|
||||
# Pop while we are in an accept-only state
|
||||
state = newstate
|
||||
while states[state] == [(0, state)]:
|
||||
_pop()
|
||||
if not self.stack:
|
||||
# Done parsing!
|
||||
return True
|
||||
dfa, state, node = self.stack[-1]
|
||||
states, first = dfa
|
||||
# Done with this token
|
||||
return False
|
||||
elif t >= 256:
|
||||
# See if it's a symbol and if we're in its first set
|
||||
itsdfa = _gram.dfas[t]
|
||||
itsstates, itsfirst = itsdfa
|
||||
if ilabel in itsfirst:
|
||||
# Push a symbol
|
||||
_push(t, itsdfa, newstate)
|
||||
break # To continue the outer while loop
|
||||
else:
|
||||
if (0, state) in arcs:
|
||||
# An accepting state, pop it and try something else
|
||||
_pop()
|
||||
if not self.stack:
|
||||
# Done parsing, but another token is input
|
||||
raise InternalParseError("too much input", type_, value, start_pos)
|
||||
else:
|
||||
self.error_recovery(self.grammar, self.stack, arcs, type_,
|
||||
value, start_pos, prefix, self.addtoken)
|
||||
break
|
||||
|
||||
def _shift(self, type_, value, newstate, prefix, start_pos):
|
||||
"""Shift a token. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
|
||||
node[-1].append(newnode)
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
|
||||
def _push(self, type_, newdfa, newstate):
|
||||
"""Push a nonterminal. (Internal)"""
|
||||
dfa, state, node = self.stack[-1]
|
||||
newnode = (type_, [])
|
||||
self.stack[-1] = (dfa, newstate, node)
|
||||
self.stack.append((newdfa, 0, newnode))
|
||||
|
||||
def _pop(self):
|
||||
"""Pop a nonterminal. (Internal)"""
|
||||
popdfa, popstate, (type_, children) = self.stack.pop()
|
||||
# If there's exactly one child, return that child instead of creating a
|
||||
# new node. We still create expr_stmt and file_input though, because a
|
||||
# lot of Jedi depends on its logic.
|
||||
if len(children) == 1:
|
||||
newnode = children[0]
|
||||
else:
|
||||
newnode = self.convert_node(self.grammar, type_, children)
|
||||
|
||||
try:
|
||||
# Equal to:
|
||||
# dfa, state, node = self.stack[-1]
|
||||
# symbol, children = node
|
||||
self.stack[-1][2][1].append(newnode)
|
||||
except IndexError:
|
||||
# Stack is empty, set the rootnode.
|
||||
self.rootnode = newnode
|
||||
@@ -1,394 +0,0 @@
|
||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||
# Licensed to PSF under a Contributor Agreement.
|
||||
|
||||
# Modifications:
|
||||
# Copyright 2014 David Halter. Integration into Jedi.
|
||||
# Modifications are dual-licensed: MIT and PSF.
|
||||
|
||||
from parso.pgen2 import grammar
|
||||
from parso import token
|
||||
from parso import tokenize
|
||||
|
||||
|
||||
class ParserGenerator(object):
|
||||
def __init__(self, bnf_text):
|
||||
self._bnf_text = bnf_text
|
||||
self.generator = tokenize.source_tokens(bnf_text)
|
||||
self._gettoken() # Initialize lookahead
|
||||
self.dfas, self.startsymbol = self._parse()
|
||||
self.first = {} # map from symbol name to set of tokens
|
||||
self._addfirstsets()
|
||||
|
||||
def make_grammar(self):
|
||||
c = grammar.Grammar(self._bnf_text)
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
names.remove(self.startsymbol)
|
||||
names.insert(0, self.startsymbol)
|
||||
for name in names:
|
||||
i = 256 + len(c.symbol2number)
|
||||
c.symbol2number[name] = i
|
||||
c.number2symbol[i] = name
|
||||
for name in names:
|
||||
dfa = self.dfas[name]
|
||||
states = []
|
||||
for state in dfa:
|
||||
arcs = []
|
||||
for label, next in state.arcs.items():
|
||||
arcs.append((self._make_label(c, label), dfa.index(next)))
|
||||
if state.isfinal:
|
||||
arcs.append((0, dfa.index(state)))
|
||||
states.append(arcs)
|
||||
c.states.append(states)
|
||||
c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name))
|
||||
c.start = c.symbol2number[self.startsymbol]
|
||||
return c
|
||||
|
||||
def _make_first(self, c, name):
|
||||
rawfirst = self.first[name]
|
||||
first = {}
|
||||
for label in rawfirst:
|
||||
ilabel = self._make_label(c, label)
|
||||
##assert ilabel not in first # XXX failed on <> ... !=
|
||||
first[ilabel] = 1
|
||||
return first
|
||||
|
||||
def _make_label(self, c, label):
|
||||
# XXX Maybe this should be a method on a subclass of converter?
|
||||
ilabel = len(c.labels)
|
||||
if label[0].isalpha():
|
||||
# Either a symbol name or a named token
|
||||
if label in c.symbol2number:
|
||||
# A symbol name (a non-terminal)
|
||||
if label in c.symbol2label:
|
||||
return c.symbol2label[label]
|
||||
else:
|
||||
c.labels.append((c.symbol2number[label], None))
|
||||
c.symbol2label[label] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# A named token (NAME, NUMBER, STRING)
|
||||
itoken = getattr(token, label, None)
|
||||
assert isinstance(itoken, int), label
|
||||
assert itoken in token.tok_name, label
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# Either a keyword or an operator
|
||||
assert label[0] in ('"', "'"), label
|
||||
value = eval(label)
|
||||
if value[0].isalpha():
|
||||
# A keyword
|
||||
if value in c.keywords:
|
||||
return c.keywords[value]
|
||||
else:
|
||||
c.labels.append((token.NAME, value))
|
||||
c.keywords[value] = ilabel
|
||||
return ilabel
|
||||
else:
|
||||
# An operator (any non-numeric token)
|
||||
itoken = token.opmap[value] # Fails if unknown token
|
||||
if itoken in c.tokens:
|
||||
return c.tokens[itoken]
|
||||
else:
|
||||
c.labels.append((itoken, None))
|
||||
c.tokens[itoken] = ilabel
|
||||
return ilabel
|
||||
|
||||
def _addfirstsets(self):
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
for name in names:
|
||||
if name not in self.first:
|
||||
self._calcfirst(name)
|
||||
#print name, self.first[name].keys()
|
||||
|
||||
def _calcfirst(self, name):
|
||||
dfa = self.dfas[name]
|
||||
self.first[name] = None # dummy to detect left recursion
|
||||
state = dfa[0]
|
||||
totalset = {}
|
||||
overlapcheck = {}
|
||||
for label, next in state.arcs.items():
|
||||
if label in self.dfas:
|
||||
if label in self.first:
|
||||
fset = self.first[label]
|
||||
if fset is None:
|
||||
raise ValueError("recursion for rule %r" % name)
|
||||
else:
|
||||
self._calcfirst(label)
|
||||
fset = self.first[label]
|
||||
totalset.update(fset)
|
||||
overlapcheck[label] = fset
|
||||
else:
|
||||
totalset[label] = 1
|
||||
overlapcheck[label] = {label: 1}
|
||||
inverse = {}
|
||||
for label, itsfirst in overlapcheck.items():
|
||||
for symbol in itsfirst:
|
||||
if symbol in inverse:
|
||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
||||
" first sets of %s as well as %s" %
|
||||
(name, symbol, label, inverse[symbol]))
|
||||
inverse[symbol] = label
|
||||
self.first[name] = totalset
|
||||
|
||||
def _parse(self):
|
||||
dfas = {}
|
||||
startsymbol = None
|
||||
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
||||
while self.type != token.ENDMARKER:
|
||||
while self.type == token.NEWLINE:
|
||||
self._gettoken()
|
||||
# RULE: NAME ':' RHS NEWLINE
|
||||
name = self._expect(token.NAME)
|
||||
self._expect(token.OP, ":")
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.NEWLINE)
|
||||
#self._dump_nfa(name, a, z)
|
||||
dfa = self._make_dfa(a, z)
|
||||
#self._dump_dfa(name, dfa)
|
||||
# oldlen = len(dfa)
|
||||
self._simplify_dfa(dfa)
|
||||
# newlen = len(dfa)
|
||||
dfas[name] = dfa
|
||||
#print name, oldlen, newlen
|
||||
if startsymbol is None:
|
||||
startsymbol = name
|
||||
return dfas, startsymbol
|
||||
|
||||
def _make_dfa(self, start, finish):
|
||||
# To turn an NFA into a DFA, we define the states of the DFA
|
||||
# to correspond to *sets* of states of the NFA. Then do some
|
||||
# state reduction. Let's represent sets as dicts with 1 for
|
||||
# values.
|
||||
assert isinstance(start, NFAState)
|
||||
assert isinstance(finish, NFAState)
|
||||
|
||||
def closure(state):
|
||||
base = {}
|
||||
addclosure(state, base)
|
||||
return base
|
||||
|
||||
def addclosure(state, base):
|
||||
assert isinstance(state, NFAState)
|
||||
if state in base:
|
||||
return
|
||||
base[state] = 1
|
||||
for label, next in state.arcs:
|
||||
if label is None:
|
||||
addclosure(next, base)
|
||||
|
||||
states = [DFAState(closure(start), finish)]
|
||||
for state in states: # NB states grows while we're iterating
|
||||
arcs = {}
|
||||
for nfastate in state.nfaset:
|
||||
for label, next in nfastate.arcs:
|
||||
if label is not None:
|
||||
addclosure(next, arcs.setdefault(label, {}))
|
||||
for label, nfaset in arcs.items():
|
||||
for st in states:
|
||||
if st.nfaset == nfaset:
|
||||
break
|
||||
else:
|
||||
st = DFAState(nfaset, finish)
|
||||
states.append(st)
|
||||
state.addarc(st, label)
|
||||
return states # List of DFAState instances; first one is start
|
||||
|
||||
def _dump_nfa(self, name, start, finish):
|
||||
print("Dump of NFA for", name)
|
||||
todo = [start]
|
||||
for i, state in enumerate(todo):
|
||||
print(" State", i, state is finish and "(final)" or "")
|
||||
for label, next in state.arcs:
|
||||
if next in todo:
|
||||
j = todo.index(next)
|
||||
else:
|
||||
j = len(todo)
|
||||
todo.append(next)
|
||||
if label is None:
|
||||
print(" -> %d" % j)
|
||||
else:
|
||||
print(" %s -> %d" % (label, j))
|
||||
|
||||
def _dump_dfa(self, name, dfa):
|
||||
print("Dump of DFA for", name)
|
||||
for i, state in enumerate(dfa):
|
||||
print(" State", i, state.isfinal and "(final)" or "")
|
||||
for label, next in state.arcs.items():
|
||||
print(" %s -> %d" % (label, dfa.index(next)))
|
||||
|
||||
def _simplify_dfa(self, dfa):
|
||||
# This is not theoretically optimal, but works well enough.
|
||||
# Algorithm: repeatedly look for two states that have the same
|
||||
# set of arcs (same labels pointing to the same nodes) and
|
||||
# unify them, until things stop changing.
|
||||
|
||||
# dfa is a list of DFAState instances
|
||||
changes = True
|
||||
while changes:
|
||||
changes = False
|
||||
for i, state_i in enumerate(dfa):
|
||||
for j in range(i + 1, len(dfa)):
|
||||
state_j = dfa[j]
|
||||
if state_i == state_j:
|
||||
#print " unify", i, j
|
||||
del dfa[j]
|
||||
for state in dfa:
|
||||
state.unifystate(state_j, state_i)
|
||||
changes = True
|
||||
break
|
||||
|
||||
def _parse_rhs(self):
|
||||
# RHS: ALT ('|' ALT)*
|
||||
a, z = self._parse_alt()
|
||||
if self.value != "|":
|
||||
return a, z
|
||||
else:
|
||||
aa = NFAState()
|
||||
zz = NFAState()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
while self.value == "|":
|
||||
self._gettoken()
|
||||
a, z = self._parse_alt()
|
||||
aa.addarc(a)
|
||||
z.addarc(zz)
|
||||
return aa, zz
|
||||
|
||||
def _parse_alt(self):
|
||||
# ALT: ITEM+
|
||||
a, b = self._parse_item()
|
||||
while (self.value in ("(", "[") or
|
||||
self.type in (token.NAME, token.STRING)):
|
||||
c, d = self._parse_item()
|
||||
b.addarc(c)
|
||||
b = d
|
||||
return a, b
|
||||
|
||||
def _parse_item(self):
|
||||
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
||||
if self.value == "[":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.OP, "]")
|
||||
a.addarc(z)
|
||||
return a, z
|
||||
else:
|
||||
a, z = self._parse_atom()
|
||||
value = self.value
|
||||
if value not in ("+", "*"):
|
||||
return a, z
|
||||
self._gettoken()
|
||||
z.addarc(a)
|
||||
if value == "+":
|
||||
return a, z
|
||||
else:
|
||||
return a, a
|
||||
|
||||
def _parse_atom(self):
|
||||
# ATOM: '(' RHS ')' | NAME | STRING
|
||||
if self.value == "(":
|
||||
self._gettoken()
|
||||
a, z = self._parse_rhs()
|
||||
self._expect(token.OP, ")")
|
||||
return a, z
|
||||
elif self.type in (token.NAME, token.STRING):
|
||||
a = NFAState()
|
||||
z = NFAState()
|
||||
a.addarc(z, self.value)
|
||||
self._gettoken()
|
||||
return a, z
|
||||
else:
|
||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||
self.type, self.value)
|
||||
|
||||
def _expect(self, type, value=None):
|
||||
if self.type != type or (value is not None and self.value != value):
|
||||
self._raise_error("expected %s/%s, got %s/%s",
|
||||
type, value, self.type, self.value)
|
||||
value = self.value
|
||||
self._gettoken()
|
||||
return value
|
||||
|
||||
def _gettoken(self):
|
||||
tup = next(self.generator)
|
||||
while tup[0] in (token.COMMENT, token.NL):
|
||||
tup = next(self.generator)
|
||||
self.type, self.value, self.begin, prefix = tup
|
||||
#print tokenize.tok_name[self.type], repr(self.value)
|
||||
|
||||
def _raise_error(self, msg, *args):
|
||||
if args:
|
||||
try:
|
||||
msg = msg % args
|
||||
except:
|
||||
msg = " ".join([msg] + list(map(str, args)))
|
||||
line = open(self.filename).readlines()[self.begin[0]]
|
||||
raise SyntaxError(msg, (self.filename, self.begin[0],
|
||||
self.begin[1], line))
|
||||
|
||||
|
||||
class NFAState(object):
|
||||
def __init__(self):
|
||||
self.arcs = [] # list of (label, NFAState) pairs
|
||||
|
||||
def addarc(self, next, label=None):
|
||||
assert label is None or isinstance(label, str)
|
||||
assert isinstance(next, NFAState)
|
||||
self.arcs.append((label, next))
|
||||
|
||||
|
||||
class DFAState(object):
|
||||
def __init__(self, nfaset, final):
|
||||
assert isinstance(nfaset, dict)
|
||||
assert isinstance(next(iter(nfaset)), NFAState)
|
||||
assert isinstance(final, NFAState)
|
||||
self.nfaset = nfaset
|
||||
self.isfinal = final in nfaset
|
||||
self.arcs = {} # map from label to DFAState
|
||||
|
||||
def addarc(self, next, label):
|
||||
assert isinstance(label, str)
|
||||
assert label not in self.arcs
|
||||
assert isinstance(next, DFAState)
|
||||
self.arcs[label] = next
|
||||
|
||||
def unifystate(self, old, new):
|
||||
for label, next in self.arcs.items():
|
||||
if next is old:
|
||||
self.arcs[label] = new
|
||||
|
||||
def __eq__(self, other):
|
||||
# Equality test -- ignore the nfaset instance variable
|
||||
assert isinstance(other, DFAState)
|
||||
if self.isfinal != other.isfinal:
|
||||
return False
|
||||
# Can't just return self.arcs == other.arcs, because that
|
||||
# would invoke this method recursively, with cycles...
|
||||
if len(self.arcs) != len(other.arcs):
|
||||
return False
|
||||
for label, next in self.arcs.items():
|
||||
if next is not other.arcs.get(label):
|
||||
return False
|
||||
return True
|
||||
|
||||
__hash__ = None # For Py3 compatibility.
|
||||
|
||||
|
||||
def generate_grammar(bnf_text):
|
||||
"""
|
||||
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
||||
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
||||
for grouping).
|
||||
|
||||
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
||||
own parser.
|
||||
"""
|
||||
p = ParserGenerator(bnf_text)
|
||||
return p.make_grammar()
|
||||
@@ -1,144 +0,0 @@
|
||||
"""
|
||||
Parsers for Python
|
||||
"""
|
||||
import os
|
||||
|
||||
from parso.utils import splitlines, source_to_unicode
|
||||
from parso._compatibility import FileNotFoundError
|
||||
from parso.pgen2.pgen import generate_grammar
|
||||
from parso.python.parser import Parser, remove_last_newline
|
||||
from parso.python.diff import DiffParser
|
||||
from parso.tokenize import generate_tokens
|
||||
from parso.cache import parser_cache, load_module, save_module
|
||||
|
||||
|
||||
_loaded_grammars = {}
|
||||
|
||||
|
||||
def load_grammar(version=None):
|
||||
"""
|
||||
Loads a Python grammar. The default version is always the latest.
|
||||
|
||||
If you need support for a specific version, please use e.g.
|
||||
`version='3.3'`.
|
||||
"""
|
||||
if version is None:
|
||||
version = '3.6'
|
||||
|
||||
if version in ('3.2', '3.3'):
|
||||
version = '3.4'
|
||||
elif version == '2.6':
|
||||
version = '2.7'
|
||||
|
||||
file = 'grammar' + version + '.txt'
|
||||
|
||||
global _loaded_grammars
|
||||
path = os.path.join(os.path.dirname(__file__), file)
|
||||
try:
|
||||
return _loaded_grammars[path]
|
||||
except KeyError:
|
||||
try:
|
||||
with open(path) as f:
|
||||
bnf_text = f.read()
|
||||
grammar = generate_grammar(bnf_text)
|
||||
return _loaded_grammars.setdefault(path, grammar)
|
||||
except FileNotFoundError:
|
||||
# Just load the default if the file does not exist.
|
||||
return load_grammar()
|
||||
|
||||
|
||||
def parse(code=None, **kwargs):
|
||||
"""
|
||||
If you want to parse a Python file you want to start here, most likely.
|
||||
|
||||
If you need finer grained control over the parsed instance, there will be
|
||||
other ways to access it.
|
||||
|
||||
:param code: A unicode string that contains Python code.
|
||||
:param path: The path to the file you want to open. Only needed for caching.
|
||||
:param grammar: A Python grammar file, created with load_grammar. You may
|
||||
not specify it. In that case it's the current Python version.
|
||||
:param error_recovery: If enabled, any code will be returned. If it is
|
||||
invalid, it will be returned as an error node. If disabled, you will
|
||||
get a ParseError when encountering syntax errors in your code.
|
||||
:param start_symbol: The grammar symbol that you want to parse. Only
|
||||
allowed to be used when error_recovery is disabled.
|
||||
:param cache_path: If given saves the parso cache in this directory. If not
|
||||
given, defaults to the default cache places on each platform.
|
||||
|
||||
:return: A syntax tree node. Typically the module.
|
||||
"""
|
||||
# Wanted python3.5 * operator and keyword only arguments.
|
||||
path = kwargs.pop('path', None)
|
||||
grammar = kwargs.pop('grammar', None)
|
||||
error_recovery = kwargs.pop('error_recovery', True)
|
||||
start_symbol = kwargs.pop('start_symbol', 'file_input')
|
||||
cache = kwargs.pop('cache', False)
|
||||
diff_cache = kwargs.pop('diff_cache', False)
|
||||
cache_path = kwargs.pop('cache_path', None)
|
||||
|
||||
if kwargs:
|
||||
raise TypeError(
|
||||
"parse() got an unexpected keyword argument '%s'"
|
||||
% next(iter(kwargs)))
|
||||
|
||||
# Start with actual code.
|
||||
if code is None and path is None:
|
||||
raise TypeError("Please provide either code or a path.")
|
||||
|
||||
if grammar is None:
|
||||
grammar = load_grammar()
|
||||
|
||||
if cache and code is None and path is not None:
|
||||
# With the current architecture we cannot load from cache if the
|
||||
# code is given, because we just load from cache if it's not older than
|
||||
# the latest change (file last modified).
|
||||
module_node = load_module(grammar, path, cache_path=cache_path)
|
||||
if module_node is not None:
|
||||
return module_node
|
||||
|
||||
if code is None:
|
||||
with open(path, 'rb') as f:
|
||||
code = source_to_unicode(f.read())
|
||||
|
||||
lines = tokenize_lines = splitlines(code, keepends=True)
|
||||
if diff_cache:
|
||||
try:
|
||||
module_cache_item = parser_cache[path]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
module_node = module_cache_item.node
|
||||
old_lines = module_cache_item.lines
|
||||
if old_lines == lines:
|
||||
# TODO remove this line? I think it's not needed. (dave)
|
||||
save_module(grammar, path, module_node, lines, pickling=False,
|
||||
cache_path=cache_path)
|
||||
return module_node
|
||||
|
||||
new_node = DiffParser(grammar, module_node).update(
|
||||
old_lines=old_lines,
|
||||
new_lines=lines
|
||||
)
|
||||
save_module(grammar, path, new_node, lines, pickling=cache,
|
||||
cache_path=cache_path)
|
||||
return new_node
|
||||
|
||||
added_newline = not code.endswith('\n')
|
||||
if added_newline:
|
||||
code += '\n'
|
||||
tokenize_lines = list(tokenize_lines)
|
||||
tokenize_lines[-1] += '\n'
|
||||
tokenize_lines.append('')
|
||||
|
||||
tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
|
||||
|
||||
p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
||||
root_node = p.parse(tokens=tokens)
|
||||
if added_newline:
|
||||
remove_last_newline(root_node)
|
||||
|
||||
if cache or diff_cache:
|
||||
save_module(grammar, path, root_node, lines, pickling=cache,
|
||||
cache_path=cache_path)
|
||||
return root_node
|
||||
|
||||
@@ -10,11 +10,84 @@ import difflib
|
||||
from collections import namedtuple
|
||||
import logging
|
||||
|
||||
from parso.utils import splitlines
|
||||
from parso.python.parser import Parser, remove_last_newline
|
||||
from parso.utils import split_lines
|
||||
from parso.python.parser import Parser
|
||||
from parso.python.tree import EndMarker
|
||||
from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
|
||||
ENDMARKER, INDENT, DEDENT)
|
||||
from parso.python.tokenize import PythonToken
|
||||
from parso.python.token import PythonTokenTypes
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
DEBUG_DIFF_PARSER = False
|
||||
|
||||
_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
|
||||
|
||||
|
||||
def _get_previous_leaf_if_indentation(leaf):
|
||||
while leaf and leaf.type == 'error_leaf' \
|
||||
and leaf.token_type in _INDENTATION_TOKENS:
|
||||
leaf = leaf.get_previous_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _get_next_leaf_if_indentation(leaf):
|
||||
while leaf and leaf.type == 'error_leaf' \
|
||||
and leaf.token_type in _INDENTATION_TOKENS:
|
||||
leaf = leaf.get_previous_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _assert_valid_graph(node):
|
||||
"""
|
||||
Checks if the parent/children relationship is correct.
|
||||
|
||||
This is a check that only runs during debugging/testing.
|
||||
"""
|
||||
try:
|
||||
children = node.children
|
||||
except AttributeError:
|
||||
# Ignore INDENT is necessary, because indent/dedent tokens don't
|
||||
# contain value/prefix and are just around, because of the tokenizer.
|
||||
if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
|
||||
assert not node.value
|
||||
assert not node.prefix
|
||||
return
|
||||
|
||||
# Calculate the content between two start positions.
|
||||
previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
|
||||
if previous_leaf is None:
|
||||
content = node.prefix
|
||||
previous_start_pos = 1, 0
|
||||
else:
|
||||
assert previous_leaf.end_pos <= node.start_pos, \
|
||||
(previous_leaf, node)
|
||||
|
||||
content = previous_leaf.value + node.prefix
|
||||
previous_start_pos = previous_leaf.start_pos
|
||||
|
||||
if '\n' in content or '\r' in content:
|
||||
splitted = split_lines(content)
|
||||
line = previous_start_pos[0] + len(splitted) - 1
|
||||
actual = line, len(splitted[-1])
|
||||
else:
|
||||
actual = previous_start_pos[0], previous_start_pos[1] + len(content)
|
||||
|
||||
assert node.start_pos == actual, (node.start_pos, actual)
|
||||
else:
|
||||
for child in children:
|
||||
assert child.parent == node, (node, child)
|
||||
_assert_valid_graph(child)
|
||||
|
||||
|
||||
def _get_debug_error_message(module, old_lines, new_lines):
|
||||
current_lines = split_lines(module.get_code(), keepends=True)
|
||||
current_diff = difflib.unified_diff(new_lines, current_lines)
|
||||
old_new_diff = difflib.unified_diff(old_lines, new_lines)
|
||||
import parso
|
||||
return (
|
||||
"There's an issue with the diff parser. Please "
|
||||
"report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
|
||||
% (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
|
||||
)
|
||||
|
||||
|
||||
def _get_last_line(node_or_leaf):
|
||||
@@ -25,46 +98,67 @@ def _get_last_line(node_or_leaf):
|
||||
return last_leaf.end_pos[0]
|
||||
|
||||
|
||||
def _skip_dedent_error_leaves(leaf):
|
||||
while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
|
||||
leaf = leaf.get_previous_leaf()
|
||||
return leaf
|
||||
|
||||
|
||||
def _ends_with_newline(leaf, suffix=''):
|
||||
leaf = _skip_dedent_error_leaves(leaf)
|
||||
|
||||
if leaf.type == 'error_leaf':
|
||||
typ = leaf.original_type
|
||||
typ = leaf.token_type.lower()
|
||||
else:
|
||||
typ = leaf.type
|
||||
|
||||
return typ == 'newline' or suffix.endswith('\n')
|
||||
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
|
||||
|
||||
|
||||
def _flows_finished(grammar, stack):
|
||||
def _flows_finished(pgen_grammar, stack):
|
||||
"""
|
||||
if, while, for and try might not be finished, because another part might
|
||||
still be parsed.
|
||||
"""
|
||||
for dfa, newstate, (symbol_number, nodes) in stack:
|
||||
if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
||||
'for_stmt', 'try_stmt'):
|
||||
for stack_node in stack:
|
||||
if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def suite_or_file_input_is_valid(grammar, stack):
|
||||
if not _flows_finished(grammar, stack):
|
||||
def _func_or_class_has_suite(node):
|
||||
if node.type == 'decorated':
|
||||
node = node.children[-1]
|
||||
if node.type in ('async_funcdef', 'async_stmt'):
|
||||
node = node.children[-1]
|
||||
return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
|
||||
|
||||
|
||||
def _suite_or_file_input_is_valid(pgen_grammar, stack):
|
||||
if not _flows_finished(pgen_grammar, stack):
|
||||
return False
|
||||
|
||||
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
||||
if grammar.number2symbol[symbol_number] == 'suite':
|
||||
for stack_node in reversed(stack):
|
||||
if stack_node.nonterminal == 'decorator':
|
||||
# A decorator is only valid with the upcoming function.
|
||||
return False
|
||||
|
||||
if stack_node.nonterminal == 'suite':
|
||||
# If only newline is in the suite, the suite is not valid, yet.
|
||||
return len(nodes) > 1
|
||||
return len(stack_node.nodes) > 1
|
||||
# Not reaching a suite means that we're dealing with file_input levels
|
||||
# where there's no need for a valid statement in it. It can also be empty.
|
||||
return True
|
||||
|
||||
|
||||
def _is_flow_node(node):
|
||||
if node.type == 'async_stmt':
|
||||
node = node.children[1]
|
||||
try:
|
||||
value = node.children[0].value
|
||||
except AttributeError:
|
||||
return False
|
||||
return value in ('if', 'for', 'while', 'try')
|
||||
return value in ('if', 'for', 'while', 'try', 'with')
|
||||
|
||||
|
||||
class _PositionUpdatingFinished(Exception):
|
||||
@@ -89,15 +183,16 @@ class DiffParser(object):
|
||||
An advanced form of parsing a file faster. Unfortunately comes with huge
|
||||
side effects. It changes the given module.
|
||||
"""
|
||||
def __init__(self, grammar, module):
|
||||
self._grammar = grammar
|
||||
def __init__(self, pgen_grammar, tokenizer, module):
|
||||
self._pgen_grammar = pgen_grammar
|
||||
self._tokenizer = tokenizer
|
||||
self._module = module
|
||||
|
||||
def _reset(self):
|
||||
self._copy_count = 0
|
||||
self._parser_count = 0
|
||||
|
||||
self._nodes_stack = _NodesStack(self._module)
|
||||
self._nodes_tree = _NodesTree(self._module)
|
||||
|
||||
def update(self, old_lines, new_lines):
|
||||
'''
|
||||
@@ -115,33 +210,24 @@ class DiffParser(object):
|
||||
|
||||
Returns the new module node.
|
||||
'''
|
||||
logging.debug('diff parser start')
|
||||
LOG.debug('diff parser start')
|
||||
# Reset the used names cache so they get regenerated.
|
||||
self._module._used_names = None
|
||||
|
||||
self._parser_lines_new = new_lines
|
||||
self._added_newline = False
|
||||
if new_lines[-1] != '':
|
||||
# The Python grammar needs a newline at the end of a file, but for
|
||||
# everything else we keep working with new_lines here.
|
||||
self._parser_lines_new = list(new_lines)
|
||||
self._parser_lines_new[-1] += '\n'
|
||||
self._parser_lines_new.append('')
|
||||
self._added_newline = True
|
||||
|
||||
self._reset()
|
||||
|
||||
line_length = len(new_lines)
|
||||
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
||||
opcodes = sm.get_opcodes()
|
||||
logging.debug('diff parser calculated')
|
||||
logging.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
||||
LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
|
||||
|
||||
for operation, i1, i2, j1, j2 in opcodes:
|
||||
logging.debug('diff %s old[%s:%s] new[%s:%s]',
|
||||
LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
|
||||
operation, i1 + 1, i2, j1 + 1, j2)
|
||||
|
||||
if j2 == line_length + int(self._added_newline):
|
||||
if j2 == line_length and new_lines[-1] == '':
|
||||
# The empty part after the last newline is not relevant.
|
||||
j2 -= 1
|
||||
|
||||
@@ -157,51 +243,47 @@ class DiffParser(object):
|
||||
|
||||
# With this action all change will finally be applied and we have a
|
||||
# changed module.
|
||||
self._nodes_stack.close()
|
||||
self._nodes_tree.close()
|
||||
|
||||
if self._added_newline:
|
||||
remove_last_newline(self._module)
|
||||
if DEBUG_DIFF_PARSER:
|
||||
# If there is reasonable suspicion that the diff parser is not
|
||||
# behaving well, this should be enabled.
|
||||
try:
|
||||
assert self._module.get_code() == ''.join(new_lines)
|
||||
_assert_valid_graph(self._module)
|
||||
except AssertionError:
|
||||
print(_get_debug_error_message(self._module, old_lines, new_lines))
|
||||
raise
|
||||
|
||||
last_pos = self._module.end_pos[0]
|
||||
if last_pos != line_length:
|
||||
current_lines = splitlines(self._module.get_code(), keepends=True)
|
||||
diff = difflib.unified_diff(current_lines, new_lines)
|
||||
raise Exception(
|
||||
"There's an issue (%s != %s) with the diff parser. Please report:\n%s"
|
||||
% (last_pos, line_length, ''.join(diff))
|
||||
('(%s != %s) ' % (last_pos, line_length))
|
||||
+ _get_debug_error_message(self._module, old_lines, new_lines)
|
||||
)
|
||||
|
||||
logging.debug('diff parser end')
|
||||
LOG.debug('diff parser end')
|
||||
return self._module
|
||||
|
||||
def _enabled_debugging(self, old_lines, lines_new):
|
||||
if self._module.get_code() != ''.join(lines_new):
|
||||
logging.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
||||
''.join(lines_new))
|
||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
|
||||
|
||||
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||
copied_nodes = [None]
|
||||
|
||||
last_until_line = -1
|
||||
while until_line_new > self._nodes_stack.parsed_until_line:
|
||||
parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
|
||||
while until_line_new > self._nodes_tree.parsed_until_line:
|
||||
parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
|
||||
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
|
||||
if line_stmt is None:
|
||||
# Parse 1 line at least. We don't need more, because we just
|
||||
# want to get into a state where the old parser has statements
|
||||
# again that can be copied (e.g. not lines within parentheses).
|
||||
self._parse(self._nodes_stack.parsed_until_line + 1)
|
||||
elif not copied_nodes:
|
||||
# We have copied as much as possible (but definitely not too
|
||||
# much). Therefore we just parse the rest.
|
||||
# We might not reach the end, because there's a statement
|
||||
# that is not finished.
|
||||
self._parse(until_line_new)
|
||||
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||
else:
|
||||
p_children = line_stmt.parent.children
|
||||
index = p_children.index(line_stmt)
|
||||
|
||||
copied_nodes = self._nodes_stack.copy_nodes(
|
||||
from_ = self._nodes_tree.parsed_until_line + 1
|
||||
copied_nodes = self._nodes_tree.copy_nodes(
|
||||
p_children[index:],
|
||||
until_line_old,
|
||||
line_offset
|
||||
@@ -210,15 +292,19 @@ class DiffParser(object):
|
||||
if copied_nodes:
|
||||
self._copy_count += 1
|
||||
|
||||
from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
|
||||
to = self._nodes_stack.parsed_until_line
|
||||
to = self._nodes_tree.parsed_until_line
|
||||
|
||||
logging.debug('diff actually copy %s to %s', from_, to)
|
||||
LOG.debug('copy old[%s:%s] new[%s:%s]',
|
||||
copied_nodes[0].start_pos[0],
|
||||
copied_nodes[-1].end_pos[0] - 1, from_, to)
|
||||
else:
|
||||
# We have copied as much as possible (but definitely not too
|
||||
# much). Therefore we just parse a bit more.
|
||||
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||
# Since there are potential bugs that might loop here endlessly, we
|
||||
# just stop here.
|
||||
assert last_until_line != self._nodes_stack.parsed_until_line \
|
||||
or not copied_nodes, last_until_line
|
||||
last_until_line = self._nodes_stack.parsed_until_line
|
||||
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||
last_until_line = self._nodes_tree.parsed_until_line
|
||||
|
||||
def _get_old_line_stmt(self, old_line):
|
||||
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
|
||||
@@ -229,57 +315,36 @@ class DiffParser(object):
|
||||
node = leaf
|
||||
while node.parent.type not in ('file_input', 'suite'):
|
||||
node = node.parent
|
||||
return node
|
||||
|
||||
# Make sure that if only the `else:` line of an if statement is
|
||||
# copied that not the whole thing is going to be copied.
|
||||
if node.start_pos[0] >= old_line:
|
||||
return node
|
||||
# Must be on the same line. Otherwise we need to parse that bit.
|
||||
return None
|
||||
|
||||
def _get_before_insertion_node(self):
|
||||
if self._nodes_stack.is_empty():
|
||||
return None
|
||||
|
||||
line = self._nodes_stack.parsed_until_line + 1
|
||||
node = self._new_module.get_last_leaf()
|
||||
while True:
|
||||
parent = node.parent
|
||||
if parent.type in ('suite', 'file_input'):
|
||||
assert node.end_pos[0] <= line
|
||||
assert node.end_pos[1] == 0 or '\n' in self._prefix
|
||||
return node
|
||||
node = parent
|
||||
|
||||
def _parse(self, until_line):
|
||||
"""
|
||||
Parses at least until the given line, but might just parse more until a
|
||||
valid state is reached.
|
||||
"""
|
||||
last_until_line = 0
|
||||
while until_line > self._nodes_stack.parsed_until_line:
|
||||
while until_line > self._nodes_tree.parsed_until_line:
|
||||
node = self._try_parse_part(until_line)
|
||||
nodes = self._get_children_nodes(node)
|
||||
#self._insert_nodes(nodes)
|
||||
nodes = node.children
|
||||
|
||||
self._nodes_stack.add_parsed_nodes(nodes)
|
||||
logging.debug(
|
||||
'parse part %s to %s (to %s in parser)',
|
||||
self._nodes_tree.add_parsed_nodes(nodes)
|
||||
LOG.debug(
|
||||
'parse_part from %s to %s (to %s in part parser)',
|
||||
nodes[0].get_start_pos_of_prefix()[0],
|
||||
self._nodes_stack.parsed_until_line,
|
||||
self._nodes_tree.parsed_until_line,
|
||||
node.end_pos[0] - 1
|
||||
)
|
||||
# Since the tokenizer sometimes has bugs, we cannot be sure that
|
||||
# this loop terminates. Therefore assert that there's always a
|
||||
# change.
|
||||
assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
|
||||
last_until_line = self._nodes_stack.parsed_until_line
|
||||
|
||||
def _get_children_nodes(self, node):
|
||||
nodes = node.children
|
||||
first_element = nodes[0]
|
||||
# TODO this looks very strange...
|
||||
if first_element.type == 'error_leaf' and \
|
||||
first_element.original_type == 'indent':
|
||||
assert False, str(nodes)
|
||||
|
||||
return nodes
|
||||
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||
last_until_line = self._nodes_tree.parsed_until_line
|
||||
|
||||
def _try_parse_part(self, until_line):
|
||||
"""
|
||||
@@ -290,16 +355,15 @@ class DiffParser(object):
|
||||
self._parser_count += 1
|
||||
# TODO speed up, shouldn't copy the whole list all the time.
|
||||
# memoryview?
|
||||
parsed_until_line = self._nodes_stack.parsed_until_line
|
||||
parsed_until_line = self._nodes_tree.parsed_until_line
|
||||
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||
#print('parse_content', parsed_until_line, lines_after, until_line)
|
||||
tokens = self._diff_tokenize(
|
||||
lines_after,
|
||||
until_line,
|
||||
line_offset=parsed_until_line
|
||||
)
|
||||
self._active_parser = Parser(
|
||||
self._grammar,
|
||||
self._pgen_grammar,
|
||||
error_recovery=True
|
||||
)
|
||||
return self._active_parser.parse(tokens=tokens)
|
||||
@@ -308,11 +372,11 @@ class DiffParser(object):
|
||||
is_first_token = True
|
||||
omitted_first_indent = False
|
||||
indents = []
|
||||
tokens = generate_tokens(lines, use_exact_op_types=True)
|
||||
stack = self._active_parser.pgen_parser.stack
|
||||
tokens = self._tokenizer(lines, (1, 0))
|
||||
stack = self._active_parser.stack
|
||||
for typ, string, start_pos, prefix in tokens:
|
||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||
if typ == INDENT:
|
||||
if typ == PythonTokenTypes.INDENT:
|
||||
indents.append(start_pos[1])
|
||||
if is_first_token:
|
||||
omitted_first_indent = True
|
||||
@@ -323,46 +387,62 @@ class DiffParser(object):
|
||||
continue
|
||||
is_first_token = False
|
||||
|
||||
if typ == DEDENT:
|
||||
# In case of omitted_first_indent, it might not be dedented fully.
|
||||
# However this is a sign for us that a dedent happened.
|
||||
if typ == PythonTokenTypes.DEDENT \
|
||||
or typ == PythonTokenTypes.ERROR_DEDENT \
|
||||
and omitted_first_indent and len(indents) == 1:
|
||||
indents.pop()
|
||||
if omitted_first_indent and not indents:
|
||||
# We are done here, only thing that can come now is an
|
||||
# endmarker or another dedented code block.
|
||||
typ, string, start_pos, prefix = next(tokens)
|
||||
if '\n' in prefix:
|
||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
||||
if '\n' in prefix or '\r' in prefix:
|
||||
prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
|
||||
else:
|
||||
prefix = ''
|
||||
yield TokenInfo(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
|
||||
assert start_pos[1] >= len(prefix), repr(prefix)
|
||||
if start_pos[1] - len(prefix) == 0:
|
||||
prefix = ''
|
||||
yield PythonToken(
|
||||
PythonTokenTypes.ENDMARKER, '',
|
||||
(start_pos[0] + line_offset, 0),
|
||||
prefix
|
||||
)
|
||||
break
|
||||
elif typ == NEWLINE and start_pos[0] >= until_line:
|
||||
yield TokenInfo(typ, string, start_pos, prefix)
|
||||
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
||||
yield PythonToken(typ, string, start_pos, prefix)
|
||||
# Check if the parser is actually in a valid suite state.
|
||||
if suite_or_file_input_is_valid(self._grammar, stack):
|
||||
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||
start_pos = start_pos[0] + 1, 0
|
||||
while len(indents) > int(omitted_first_indent):
|
||||
indents.pop()
|
||||
yield TokenInfo(DEDENT, '', start_pos, '')
|
||||
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
||||
|
||||
yield TokenInfo(ENDMARKER, '', start_pos, '')
|
||||
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
||||
break
|
||||
else:
|
||||
continue
|
||||
|
||||
yield TokenInfo(typ, string, start_pos, prefix)
|
||||
yield PythonToken(typ, string, start_pos, prefix)
|
||||
|
||||
|
||||
class _NodesStackNode(object):
|
||||
ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
|
||||
class _NodesTreeNode(object):
|
||||
_ChildrenGroup = namedtuple('_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf')
|
||||
|
||||
def __init__(self, tree_node, parent=None):
|
||||
self.tree_node = tree_node
|
||||
self.children_groups = []
|
||||
self._children_groups = []
|
||||
self.parent = parent
|
||||
self._node_children = []
|
||||
|
||||
def close(self):
|
||||
def finish(self):
|
||||
children = []
|
||||
for children_part, line_offset, last_line_offset_leaf in self.children_groups:
|
||||
for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
|
||||
first_leaf = _get_next_leaf_if_indentation(
|
||||
children_part[0].get_first_leaf()
|
||||
)
|
||||
|
||||
first_leaf.prefix = prefix + first_leaf.prefix
|
||||
if line_offset != 0:
|
||||
try:
|
||||
_update_positions(
|
||||
@@ -375,56 +455,61 @@ class _NodesStackNode(object):
|
||||
for node in children:
|
||||
node.parent = self.tree_node
|
||||
|
||||
def add(self, children, line_offset=0, last_line_offset_leaf=None):
|
||||
group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
|
||||
self.children_groups.append(group)
|
||||
for node_child in self._node_children:
|
||||
node_child.finish()
|
||||
|
||||
def add_child_node(self, child_node):
|
||||
self._node_children.append(child_node)
|
||||
|
||||
def add_tree_nodes(self, prefix, children, line_offset=0, last_line_offset_leaf=None):
|
||||
if last_line_offset_leaf is None:
|
||||
last_line_offset_leaf = children[-1].get_last_leaf()
|
||||
group = self._ChildrenGroup(prefix, children, line_offset, last_line_offset_leaf)
|
||||
self._children_groups.append(group)
|
||||
|
||||
def get_last_line(self, suffix):
|
||||
line = 0
|
||||
if self.children_groups:
|
||||
children_group = self.children_groups[-1]
|
||||
last_leaf = children_group.children[-1].get_last_leaf()
|
||||
line = last_leaf.end_pos[0]
|
||||
if self._children_groups:
|
||||
children_group = self._children_groups[-1]
|
||||
last_leaf = _get_previous_leaf_if_indentation(
|
||||
children_group.last_line_offset_leaf
|
||||
)
|
||||
|
||||
# Calculate the line offsets
|
||||
offset = children_group.line_offset
|
||||
if offset:
|
||||
# In case the line_offset is not applied to this specific leaf,
|
||||
# just ignore it.
|
||||
if last_leaf.line <= children_group.last_line_offset_leaf.line:
|
||||
line += children_group.line_offset
|
||||
line = last_leaf.end_pos[0] + children_group.line_offset
|
||||
|
||||
# Newlines end on the next line, which means that they would cover
|
||||
# the next line. That line is not fully parsed at this point.
|
||||
if _ends_with_newline(last_leaf, suffix):
|
||||
line -= 1
|
||||
line += suffix.count('\n')
|
||||
line += len(split_lines(suffix)) - 1
|
||||
|
||||
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
|
||||
# This is the end of a file (that doesn't end with a newline).
|
||||
line += 1
|
||||
|
||||
if self._node_children:
|
||||
return max(line, self._node_children[-1].get_last_line(suffix))
|
||||
return line
|
||||
|
||||
|
||||
class _NodesStack(object):
|
||||
endmarker_type = 'endmarker'
|
||||
|
||||
class _NodesTree(object):
|
||||
def __init__(self, module):
|
||||
# Top of stack
|
||||
self._tos = self._base_node = _NodesStackNode(module)
|
||||
self._base_node = _NodesTreeNode(module)
|
||||
self._working_stack = [self._base_node]
|
||||
self._module = module
|
||||
self._last_prefix = ''
|
||||
self._prefix_remainder = ''
|
||||
self.prefix = ''
|
||||
|
||||
def is_empty(self):
|
||||
return not self._base_node.children
|
||||
|
||||
@property
|
||||
def parsed_until_line(self):
|
||||
return self._tos.get_last_line(self.prefix)
|
||||
return self._working_stack[-1].get_last_line(self.prefix)
|
||||
|
||||
def _get_insertion_node(self, indentation_node):
|
||||
indentation = indentation_node.start_pos[1]
|
||||
|
||||
# find insertion node
|
||||
node = self._tos
|
||||
while True:
|
||||
node = self._working_stack[-1]
|
||||
tree_node = node.tree_node
|
||||
if tree_node.type == 'suite':
|
||||
# A suite starts with NEWLINE, ...
|
||||
@@ -439,53 +524,57 @@ class _NodesStack(object):
|
||||
elif tree_node.type == 'file_input':
|
||||
return node
|
||||
|
||||
node = self._close_tos()
|
||||
|
||||
def _close_tos(self):
|
||||
self._tos.close()
|
||||
self._tos = self._tos.parent
|
||||
return self._tos
|
||||
self._working_stack.pop()
|
||||
|
||||
def add_parsed_nodes(self, tree_nodes):
|
||||
old_prefix = self.prefix
|
||||
tree_nodes = self._remove_endmarker(tree_nodes)
|
||||
if not tree_nodes:
|
||||
self.prefix = old_prefix + self.prefix
|
||||
return
|
||||
|
||||
assert tree_nodes[0].type != 'newline'
|
||||
|
||||
node = self._get_insertion_node(tree_nodes[0])
|
||||
assert node.tree_node.type in ('suite', 'file_input')
|
||||
node.add(tree_nodes)
|
||||
node.add_tree_nodes(old_prefix, tree_nodes)
|
||||
# tos = Top of stack
|
||||
self._update_tos(tree_nodes[-1])
|
||||
|
||||
def _update_tos(self, tree_node):
|
||||
if tree_node.type in ('suite', 'file_input'):
|
||||
new_tos = _NodesTreeNode(tree_node)
|
||||
new_tos.add_tree_nodes('', list(tree_node.children))
|
||||
|
||||
self._working_stack[-1].add_child_node(new_tos)
|
||||
self._working_stack.append(new_tos)
|
||||
|
||||
self._update_tos(tree_node.children[-1])
|
||||
elif _func_or_class_has_suite(tree_node):
|
||||
self._update_tos(tree_node.children[-1])
|
||||
|
||||
def _remove_endmarker(self, tree_nodes):
|
||||
"""
|
||||
Helps cleaning up the tree nodes that get inserted.
|
||||
"""
|
||||
last_leaf = tree_nodes[-1].get_last_leaf()
|
||||
is_endmarker = last_leaf.type == self.endmarker_type
|
||||
self._last_prefix = ''
|
||||
is_endmarker = last_leaf.type == 'endmarker'
|
||||
self._prefix_remainder = ''
|
||||
if is_endmarker:
|
||||
try:
|
||||
separation = last_leaf.prefix.rindex('\n')
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
|
||||
if separation > -1:
|
||||
# Remove the whitespace part of the prefix after a newline.
|
||||
# That is not relevant if parentheses were opened. Always parse
|
||||
# until the end of a line.
|
||||
last_leaf.prefix, self._last_prefix = \
|
||||
last_leaf.prefix, self._prefix_remainder = \
|
||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||
|
||||
first_leaf = tree_nodes[0].get_first_leaf()
|
||||
first_leaf.prefix = self.prefix + first_leaf.prefix
|
||||
self.prefix = ''
|
||||
|
||||
if is_endmarker:
|
||||
self.prefix = last_leaf.prefix
|
||||
|
||||
tree_nodes = tree_nodes[:-1]
|
||||
|
||||
return tree_nodes
|
||||
|
||||
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||
@@ -494,100 +583,129 @@ class _NodesStack(object):
|
||||
|
||||
Returns the number of tree nodes that were copied.
|
||||
"""
|
||||
tos = self._get_insertion_node(tree_nodes[0])
|
||||
if tree_nodes[0].type in ('error_leaf', 'error_node'):
|
||||
# Avoid copying errors in the beginning. Can lead to a lot of
|
||||
# issues.
|
||||
return []
|
||||
|
||||
new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
|
||||
self._get_insertion_node(tree_nodes[0])
|
||||
|
||||
new_nodes, self._working_stack, self.prefix = self._copy_nodes(
|
||||
list(self._working_stack),
|
||||
tree_nodes,
|
||||
until_line,
|
||||
line_offset,
|
||||
self.prefix,
|
||||
)
|
||||
return new_nodes
|
||||
|
||||
def _copy_nodes(self, tos, nodes, until_line, line_offset):
|
||||
def _copy_nodes(self, working_stack, nodes, until_line, line_offset, prefix=''):
|
||||
new_nodes = []
|
||||
|
||||
new_tos = tos
|
||||
new_prefix = ''
|
||||
for node in nodes:
|
||||
if node.type == 'endmarker':
|
||||
# Endmarkers just distort all the checks below. Remove them.
|
||||
if node.start_pos[0] > until_line:
|
||||
break
|
||||
|
||||
if node.start_pos[0] > until_line:
|
||||
if node.type == 'endmarker':
|
||||
break
|
||||
|
||||
if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
|
||||
break
|
||||
# TODO this check might take a bit of time for large files. We
|
||||
# might want to change this to do more intelligent guessing or
|
||||
# binary search.
|
||||
if _get_last_line(node) > until_line:
|
||||
# We can split up functions and classes later.
|
||||
if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
|
||||
if _func_or_class_has_suite(node):
|
||||
new_nodes.append(node)
|
||||
break
|
||||
|
||||
new_nodes.append(node)
|
||||
|
||||
if not new_nodes:
|
||||
return [], tos
|
||||
return [], working_stack, prefix
|
||||
|
||||
tos = working_stack[-1]
|
||||
last_node = new_nodes[-1]
|
||||
line_offset_index = -1
|
||||
if last_node.type in ('classdef', 'funcdef'):
|
||||
suite = last_node.children[-1]
|
||||
if suite.type == 'suite':
|
||||
suite_tos = _NodesStackNode(suite)
|
||||
# Don't need to pass line_offset here, it's already done by the
|
||||
# parent.
|
||||
suite_nodes, recursive_tos = self._copy_nodes(
|
||||
suite_tos, suite.children, until_line, line_offset)
|
||||
if len(suite_nodes) < 2:
|
||||
# A suite only with newline is not valid.
|
||||
new_nodes.pop()
|
||||
else:
|
||||
suite_tos.parent = tos
|
||||
new_tos = recursive_tos
|
||||
line_offset_index = -2
|
||||
had_valid_suite_last = False
|
||||
if _func_or_class_has_suite(last_node):
|
||||
suite = last_node
|
||||
while suite.type != 'suite':
|
||||
suite = suite.children[-1]
|
||||
|
||||
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
|
||||
_is_flow_node(new_nodes[-1])):
|
||||
# Error leafs/nodes don't have a defined start/end. Error
|
||||
# nodes might not end with a newline (e.g. if there's an
|
||||
# open `(`). Therefore ignore all of them unless they are
|
||||
# succeeded with valid parser state.
|
||||
# If we copy flows at the end, they might be continued
|
||||
# after the copy limit (in the new parser).
|
||||
# In this while loop we try to remove until we find a newline.
|
||||
new_nodes.pop()
|
||||
while new_nodes:
|
||||
last_node = new_nodes[-1]
|
||||
if last_node.get_last_leaf().type == 'newline':
|
||||
break
|
||||
suite_tos = _NodesTreeNode(suite)
|
||||
# Don't need to pass line_offset here, it's already done by the
|
||||
# parent.
|
||||
suite_nodes, new_working_stack, new_prefix = self._copy_nodes(
|
||||
working_stack + [suite_tos], suite.children, until_line, line_offset
|
||||
)
|
||||
if len(suite_nodes) < 2:
|
||||
# A suite only with newline is not valid.
|
||||
new_nodes.pop()
|
||||
new_prefix = ''
|
||||
else:
|
||||
assert new_nodes
|
||||
tos.add_child_node(suite_tos)
|
||||
working_stack = new_working_stack
|
||||
had_valid_suite_last = True
|
||||
|
||||
if new_nodes:
|
||||
try:
|
||||
last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
|
||||
except IndexError:
|
||||
line_offset = 0
|
||||
# In this case we don't have to calculate an offset, because
|
||||
# there's no children to be managed.
|
||||
last_line_offset_leaf = None
|
||||
tos.add(new_nodes, line_offset, last_line_offset_leaf)
|
||||
return new_nodes, new_tos
|
||||
last_node = new_nodes[-1]
|
||||
if (last_node.type in ('error_leaf', 'error_node') or
|
||||
_is_flow_node(new_nodes[-1])):
|
||||
# Error leafs/nodes don't have a defined start/end. Error
|
||||
# nodes might not end with a newline (e.g. if there's an
|
||||
# open `(`). Therefore ignore all of them unless they are
|
||||
# succeeded with valid parser state.
|
||||
# If we copy flows at the end, they might be continued
|
||||
# after the copy limit (in the new parser).
|
||||
# In this while loop we try to remove until we find a newline.
|
||||
new_prefix = ''
|
||||
new_nodes.pop()
|
||||
while new_nodes:
|
||||
last_node = new_nodes[-1]
|
||||
if last_node.get_last_leaf().type == 'newline':
|
||||
break
|
||||
new_nodes.pop()
|
||||
|
||||
def _update_tos(self, tree_node):
|
||||
if tree_node.type in ('suite', 'file_input'):
|
||||
self._tos = _NodesStackNode(tree_node, self._tos)
|
||||
self._tos.add(list(tree_node.children))
|
||||
self._update_tos(tree_node.children[-1])
|
||||
elif tree_node.type in ('classdef', 'funcdef'):
|
||||
self._update_tos(tree_node.children[-1])
|
||||
if new_nodes:
|
||||
if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
|
||||
p = new_nodes[-1].get_next_leaf().prefix
|
||||
# We are not allowed to remove the newline at the end of the
|
||||
# line, otherwise it's going to be missing. This happens e.g.
|
||||
# if a bracket is around before that moves newlines to
|
||||
# prefixes.
|
||||
new_prefix = split_lines(p, keepends=True)[0]
|
||||
|
||||
if had_valid_suite_last:
|
||||
last = new_nodes[-1]
|
||||
if last.type == 'decorated':
|
||||
last = last.children[-1]
|
||||
if last.type in ('async_funcdef', 'async_stmt'):
|
||||
last = last.children[-1]
|
||||
last_line_offset_leaf = last.children[-2].get_last_leaf()
|
||||
assert last_line_offset_leaf == ':'
|
||||
else:
|
||||
last_line_offset_leaf = new_nodes[-1].get_last_leaf()
|
||||
tos.add_tree_nodes(prefix, new_nodes, line_offset, last_line_offset_leaf)
|
||||
prefix = new_prefix
|
||||
self._prefix_remainder = ''
|
||||
|
||||
return new_nodes, working_stack, prefix
|
||||
|
||||
def close(self):
|
||||
while self._tos is not None:
|
||||
self._close_tos()
|
||||
self._base_node.finish()
|
||||
|
||||
# Add an endmarker.
|
||||
try:
|
||||
last_leaf = self._module.get_last_leaf()
|
||||
end_pos = list(last_leaf.end_pos)
|
||||
except IndexError:
|
||||
end_pos = [1, 0]
|
||||
lines = splitlines(self.prefix)
|
||||
else:
|
||||
last_leaf = _skip_dedent_error_leaves(last_leaf)
|
||||
end_pos = list(last_leaf.end_pos)
|
||||
lines = split_lines(self.prefix)
|
||||
assert len(lines) > 0
|
||||
if len(lines) == 1:
|
||||
end_pos[1] += len(lines[0])
|
||||
@@ -595,6 +713,6 @@ class _NodesStack(object):
|
||||
end_pos[0] += len(lines) - 1
|
||||
end_pos[1] = len(lines[-1])
|
||||
|
||||
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
|
||||
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
|
||||
endmarker.parent = self._module
|
||||
self._module.children.append(endmarker)
|
||||
|
||||
1011
parso/python/errors.py
Normal file
1011
parso/python/errors.py
Normal file
File diff suppressed because it is too large
Load Diff
159
parso/python/grammar26.txt
Normal file
159
parso/python/grammar26.txt
Normal file
@@ -0,0 +1,159 @@
|
||||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||
# "How to Change Python's Grammar"
|
||||
|
||||
# Commands for Kees Blom's railroad program
|
||||
#diagram:token NAME
|
||||
#diagram:token NUMBER
|
||||
#diagram:token STRING
|
||||
#diagram:token NEWLINE
|
||||
#diagram:token ENDMARKER
|
||||
#diagram:token INDENT
|
||||
#diagram:output\input python.bla
|
||||
#diagram:token DEDENT
|
||||
#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm
|
||||
#diagram:rules
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() and input() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ':' suite
|
||||
parameters: '(' [varargslist] ')'
|
||||
varargslist: ((fpdef ['=' test] ',')*
|
||||
('*' NAME [',' '**' NAME] | '**' NAME) |
|
||||
fpdef ['=' test] (',' fpdef ['=' test])* [','])
|
||||
fpdef: NAME | '(' fplist ')'
|
||||
fplist: fpdef (',' fpdef)* [',']
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||
expr_stmt: testlist (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist))*)
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
print_stmt: 'print' ( [ test (',' test)* [','] ] |
|
||||
'>>' test [ (',' test)+ [','] ] )
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test [',' test [',' test]]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
import_from: ('from' ('.'* dotted_name | '.'+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
exec_stmt: 'exec' expr ['in' test [',' test]]
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item ':' suite
|
||||
# Dave: Python2.6 actually defines a little bit of a different label called
|
||||
# 'with_var'. However in 2.7+ this is the default. Apply it for
|
||||
# consistency reasons.
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test [('as' | ',') test]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
# Backward compatibility cruft to support:
|
||||
# [ x for x in lambda: True, lambda: False if x() ]
|
||||
# even while also allowing:
|
||||
# lambda x: 5 if x else 2
|
||||
# (But not a mix of the two)
|
||||
testlist_safe: old_test [(',' old_test)+ [',']]
|
||||
old_test: or_test | old_lambdef
|
||||
old_lambdef: 'lambda' [varargslist] ':' old_test
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
|
||||
# default. It's more consistent like this.
|
||||
testlist_comp: test ( gen_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: expr (',' expr)* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
# Dave: Rename from dictmaker to dictorsetmaker, because this is more
|
||||
# consistent with the following grammars.
|
||||
dictorsetmaker: test ':' test (',' test ':' test)* [',']
|
||||
|
||||
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
argument: test [gen_for] | test '=' test # Really [keyword '='] test
|
||||
|
||||
list_iter: list_for | list_if
|
||||
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
|
||||
list_if: 'if' old_test [list_iter]
|
||||
|
||||
gen_iter: gen_for | gen_if
|
||||
gen_for: 'for' exprlist 'in' or_test [gen_iter]
|
||||
gen_if: 'if' old_test [gen_iter]
|
||||
|
||||
testlist1: test (',' test)*
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [testlist]
|
||||
@@ -1,4 +1,4 @@
|
||||
# Grammar for 2to3. This grammar supports Python 2.x and 3.x.
|
||||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
@@ -10,41 +10,32 @@
|
||||
# NOTE WELL: You should also follow all the steps listed in PEP 306,
|
||||
# "How to Change Python's Grammar"
|
||||
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# single_input is a single interactive statement;
|
||||
# eval_input is the input for the eval() and input() functions.
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() and input() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: ((tfpdef ['=' test] ',')*
|
||||
('*' [tname] (',' tname ['=' test])* [',' '**' tname] | '**' tname)
|
||||
| tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
|
||||
tname: NAME [':' test]
|
||||
tfpdef: tname | '(' tfplist ')'
|
||||
tfplist: tfpdef (',' tfpdef)* [',']
|
||||
varargslist: ((vfpdef ['=' test] ',')*
|
||||
('*' [vname] (',' vname ['=' test])* [',' '**' vname] | '**' vname)
|
||||
| vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
|
||||
vname: NAME
|
||||
vfpdef: vname | '(' vfplist ')'
|
||||
vfplist: vfpdef (',' vfpdef)* [',']
|
||||
funcdef: 'def' NAME parameters ':' suite
|
||||
parameters: '(' [varargslist] ')'
|
||||
varargslist: ((fpdef ['=' test] ',')*
|
||||
('*' NAME [',' '**' NAME] | '**' NAME) |
|
||||
fpdef ['=' test] (',' fpdef ['=' test])* [','])
|
||||
fpdef: NAME | '(' fplist ')'
|
||||
fplist: fpdef (',' fpdef)* [',']
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | exec_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
expr_stmt: testlist (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist))*)
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
@@ -60,8 +51,7 @@ yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test [',' test [',' test]]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
import_from: ('from' ('.'* dotted_name | '.'+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
@@ -78,17 +68,14 @@ while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
with_var: 'as' expr
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test [(',' | 'as') test]]
|
||||
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||
except_clause: 'except' [test [('as' | ',') test]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
# Backward compatibility cruft to support:
|
||||
# [ x for x in lambda: True, lambda: False if x() ]
|
||||
@@ -105,7 +92,6 @@ and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
@@ -115,33 +101,38 @@ term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+ | '.' '.' '.')
|
||||
# Modification by David Halter, remove `testlist_gexp` and `listmaker`
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
testlist_comp: test ( sync_comp_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
exprlist: expr (',' expr)* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
# Modification by David Halter, dictsetmaker -> dictorsetmaker (so that it's
|
||||
# the same as in the 3.4 grammar).
|
||||
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||
(test (comp_for | (',' test)* [','])) )
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
argument: test [comp_for] | test '=' test # Really [keyword '='] test
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [sync_comp_for] | test '=' test
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: 'for' exprlist 'in' testlist_safe [comp_iter]
|
||||
list_iter: list_for | list_if
|
||||
list_for: 'for' exprlist 'in' testlist_safe [list_iter]
|
||||
list_if: 'if' old_test [list_iter]
|
||||
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' old_test [comp_iter]
|
||||
|
||||
testlist1: test (',' test)*
|
||||
@@ -15,8 +15,8 @@
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
@@ -78,9 +78,7 @@ with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
@@ -105,16 +103,17 @@ power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) |
|
||||
(test (comp_for | (',' test)* [','])) )
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
@@ -123,9 +122,9 @@ arglist: (argument ',')* (argument [',']
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [comp_for] | test '=' test # Really [keyword '='] test
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
134
parso/python/grammar34.txt
Normal file
134
parso/python/grammar34.txt
Normal file
@@ -0,0 +1,134 @@
|
||||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef)
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [','
|
||||
['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
|
||||
['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
|
||||
(test (sync_comp_for | (',' test)* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: (argument ',')* (argument [',']
|
||||
|'*' test (',' argument)* [',' '**' test]
|
||||
|'**' test)
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
@@ -15,8 +15,8 @@
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
@@ -84,9 +84,7 @@ with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
# Edit by David Halter: The stmt is now optional. This reflects how Jedi allows
|
||||
# classes and functions to be empty, which is beneficial for autocompletion.
|
||||
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
@@ -112,8 +110,9 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
@@ -121,9 +120,9 @@ sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
(sync_comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
(sync_comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
@@ -136,15 +135,15 @@ arglist: argument (',' argument)* [',']
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguements are blocked; keyword unpackings
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
argument: ( test [sync_comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_iter: sync_comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
@@ -1,24 +1,16 @@
|
||||
# Grammar for Python
|
||||
|
||||
# Note: Changing the grammar specified in this file will most likely
|
||||
# require corresponding changes in the parser module
|
||||
# (../Modules/parsermodule.c). If you can't make the changes to
|
||||
# that module yourself, please co-ordinate the required changes
|
||||
# with someone who can; ask around on python-dev for help. Fred
|
||||
# Drake <fdrake@acm.org> will probably be listening there.
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
@@ -90,10 +82,7 @@ with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
# Edit by Francisco Souza/David Halter: The stmt is now optional. This reflects
|
||||
# how Jedi allows classes and functions to be empty, which is beneficial for
|
||||
# autocompletion.
|
||||
suite: simple_stmt | NEWLINE INDENT stmt* DEDENT
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
@@ -119,7 +108,7 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
@@ -151,7 +140,8 @@ argument: ( test [comp_for] |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
@@ -159,3 +149,10 @@ encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
156
parso/python/grammar37.txt
Normal file
156
parso/python/grammar37.txt
Normal file
@@ -0,0 +1,156 @@
|
||||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' test]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
171
parso/python/grammar38.txt
Normal file
171
parso/python/grammar38.txt
Normal file
@@ -0,0 +1,171 @@
|
||||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://devguide.python.org/grammar/
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (
|
||||
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
|
||||
',' tfpdef ['=' test])* ([',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]])
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
|
||||
| '**' tfpdef [',']]] )
|
||||
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
)
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' test]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist_star_expr]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
namedexpr_test: test [':=' test]
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test ':=' test |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_for: ['async'] sync_comp_for
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist_star_expr
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
176
parso/python/issue_list.txt
Normal file
176
parso/python/issue_list.txt
Normal file
@@ -0,0 +1,176 @@
|
||||
A list of syntax/indentation errors I've encountered in CPython.
|
||||
|
||||
# Python/compile.c
|
||||
"'continue' not properly in loop"
|
||||
"'continue' not supported inside 'finally' clause" # Until loop
|
||||
"default 'except:' must be last"
|
||||
"from __future__ imports must occur at the beginning of the file"
|
||||
"'return' outside function"
|
||||
"'return' with value in async generator"
|
||||
"'break' outside loop"
|
||||
"two starred expressions in assignment"
|
||||
"asynchronous comprehension outside of an asynchronous function"
|
||||
"'yield' outside function" # For both yield and yield from
|
||||
"'yield from' inside async function"
|
||||
"'await' outside function"
|
||||
"'await' outside async function"
|
||||
"starred assignment target must be in a list or tuple"
|
||||
"can't use starred expression here"
|
||||
"too many statically nested blocks" # Max. 20
|
||||
# This is one of the few places in the cpython code base that I really
|
||||
# don't understand. It feels a bit hacky if you look at the implementation
|
||||
# of UNPACK_EX.
|
||||
"too many expressions in star-unpacking assignment"
|
||||
|
||||
# Just ignore this one, newer versions will not be affected anymore and
|
||||
# it's a limit of 2^16 - 1.
|
||||
"too many annotations" # Only python 3.0 - 3.5, 3.6 is not affected.
|
||||
|
||||
# Python/ast.c
|
||||
# used with_item exprlist expr_stmt
|
||||
"can't %s %s" % ("assign to" or "delete",
|
||||
"lambda"
|
||||
"function call" # foo()
|
||||
"generator expression"
|
||||
"list comprehension"
|
||||
"set comprehension"
|
||||
"dict comprehension"
|
||||
"keyword"
|
||||
"Ellipsis"
|
||||
"comparison"
|
||||
Dict: Set: Num: Str: Bytes: JoinedStr: FormattedValue:
|
||||
"literal"
|
||||
BoolOp: BinOp: UnaryOp:
|
||||
"operator"
|
||||
Yield: YieldFrom:
|
||||
"yield expression"
|
||||
Await:
|
||||
"await expression"
|
||||
IfExp:
|
||||
"conditional expression"
|
||||
"assignment to keyword" # (keywords + __debug__) # None = 2
|
||||
"named arguments must follow bare *" # def foo(*): pass
|
||||
"non-default argument follows default argument" # def f(x=3, y): pass
|
||||
"iterable unpacking cannot be used in comprehension" # [*[] for a in [1]]
|
||||
"dict unpacking cannot be used in dict comprehension" # {**{} for a in [1]}
|
||||
"Generator expression must be parenthesized if not sole argument" # foo(x for x in [], b)
|
||||
"positional argument follows keyword argument unpacking" # f(**x, y) >= 3.5
|
||||
"positional argument follows keyword argument" # f(x=2, y) >= 3.5
|
||||
"iterable argument unpacking follows keyword argument unpacking" # foo(**kwargs, *args)
|
||||
"lambda cannot contain assignment" # f(lambda: 1=1)
|
||||
"keyword can't be an expression" # f(+x=1)
|
||||
"keyword argument repeated" # f(x=1, x=2)
|
||||
"illegal expression for augmented assignment" # x, y += 1
|
||||
"only single target (not list) can be annotated" # [x, y]: int
|
||||
"only single target (not tuple) can be annotated" # x, y: str
|
||||
"illegal target for annotation" # True: 1`
|
||||
"trailing comma not allowed without surrounding parentheses" # from foo import a,
|
||||
"bytes can only contain ASCII literal characters." # b'ä' # prob. only python 3
|
||||
"cannot mix bytes and nonbytes literals" # 's' b''
|
||||
"assignment to yield expression not possible" # x = yield 1 = 3
|
||||
|
||||
"f-string: empty expression not allowed" # f'{}'
|
||||
"f-string: single '}' is not allowed" # f'}'
|
||||
"f-string: expressions nested too deeply" # f'{1:{5:{3}}}'
|
||||
"f-string expression part cannot include a backslash" # f'{"\"}' or f'{"\\"}'
|
||||
"f-string expression part cannot include '#'" # f'{#}'
|
||||
"f-string: unterminated string" # f'{"}'
|
||||
"f-string: mismatched '(', '{', or '['"
|
||||
"f-string: invalid conversion character: expected 's', 'r', or 'a'" # f'{1!b}'
|
||||
"f-string: unexpected end of string" # Doesn't really happen?!
|
||||
"f-string: expecting '}'" # f'{'
|
||||
"(unicode error) unknown error
|
||||
"(value error) unknown error
|
||||
"(unicode error) MESSAGE
|
||||
MESSAGES = {
|
||||
"\\ at end of string"
|
||||
"truncated \\xXX escape"
|
||||
"truncated \\uXXXX escape"
|
||||
"truncated \\UXXXXXXXX escape"
|
||||
"illegal Unicode character" # '\Uffffffff'
|
||||
"malformed \\N character escape" # '\N{}'
|
||||
"unknown Unicode character name" # '\N{foo}'
|
||||
}
|
||||
"(value error) MESSAGE # bytes
|
||||
MESSAGES = {
|
||||
"Trailing \\ in string"
|
||||
"invalid \\x escape at position %d"
|
||||
}
|
||||
|
||||
"invalid escape sequence \\%c" # Only happens when used in `python -W error`
|
||||
"unexpected node" # Probably irrelevant
|
||||
"Unexpected node-type in from-import" # Irrelevant, doesn't happen.
|
||||
"malformed 'try' statement" # Irrelevant, doesn't happen.
|
||||
|
||||
# Python/symtable.c
|
||||
"duplicate argument '%U' in function definition"
|
||||
"name '%U' is assigned to before global declaration"
|
||||
"name '%U' is assigned to before nonlocal declaration"
|
||||
"name '%U' is used prior to global declaration"
|
||||
"name '%U' is used prior to nonlocal declaration"
|
||||
"annotated name '%U' can't be global"
|
||||
"annotated name '%U' can't be nonlocal"
|
||||
"import * only allowed at module level"
|
||||
|
||||
"name '%U' is parameter and global",
|
||||
"name '%U' is nonlocal and global",
|
||||
"name '%U' is parameter and nonlocal",
|
||||
|
||||
"nonlocal declaration not allowed at module level");
|
||||
"no binding for nonlocal '%U' found",
|
||||
# RecursionError. Not handled. For all human written code, this is probably
|
||||
# not an issue. eval("()"*x) with x>=2998 for example fails, but that's
|
||||
# more than 2000 executions on one line.
|
||||
"maximum recursion depth exceeded during compilation");
|
||||
|
||||
# Python/future.c
|
||||
"not a chance"
|
||||
"future feature %.100s is not defined"
|
||||
"from __future__ imports must occur at the beginning of the file" # Also in compile.c
|
||||
|
||||
# Parser/tokenizer.c
|
||||
# All the following issues seem to be irrelevant for parso, because the
|
||||
# encoding stuff is done before it reaches the tokenizer. It's already
|
||||
# unicode at that point.
|
||||
"encoding problem: %s"
|
||||
"encoding problem: %s with BOM"
|
||||
"Non-UTF-8 code starting with '\\x%.2x' in file %U on line %i, but no encoding declared; see http://python.org/dev/peps/pep-0263/ for details"
|
||||
|
||||
# Parser/pythonrun.c
|
||||
E_SYNTAX: "invalid syntax"
|
||||
E_LINECONT: "unexpected character after line continuation character"
|
||||
E_IDENTIFIER: "invalid character in identifier"
|
||||
# Also just use 'invalid syntax'. Happens mostly with stuff like `(`. This
|
||||
# message doesn't really help the user, because it only appears very
|
||||
# randomly, e.g. `(or` wouldn't yield this error.
|
||||
E_EOF: "unexpected EOF while parsing"
|
||||
# Even in 3.6 this is implemented kind of shaky. Not implemented, I think
|
||||
# cPython needs to fix this one first.
|
||||
# e.g. `ast.parse('def x():\n\t if 1:\n \t \tpass')` works :/
|
||||
E_TABSPACE: "inconsistent use of tabs and spaces in indentation"
|
||||
# Ignored, just shown as "invalid syntax". The error has mostly to do with
|
||||
# numbers like 0b2 everywhere or 1.6_ in Python3.6.
|
||||
E_TOKEN: "invalid token"
|
||||
E_EOFS: "EOF while scanning triple-quoted string literal"
|
||||
E_EOLS: "EOL while scanning string literal"
|
||||
|
||||
# IndentationError
|
||||
E_DEDENT: "unindent does not match any outer indentation level"
|
||||
E_TOODEEP: "too many levels of indentation" # 100 levels
|
||||
E_SYNTAX: "expected an indented block"
|
||||
"unexpected indent"
|
||||
# I don't think this actually ever happens.
|
||||
"unexpected unindent"
|
||||
|
||||
|
||||
# Irrelevant for parso for now.
|
||||
E_OVERFLOW: "expression too long"
|
||||
E_DECODE: "unknown decode error"
|
||||
E_BADSINGLE: "multiple statements found while compiling a single statement"
|
||||
|
||||
|
||||
Version specific:
|
||||
Python 3.5:
|
||||
'yield' inside async function
|
||||
Python 3.3/3.4:
|
||||
can use starred expression only as assignment target
|
||||
@@ -1,9 +1,11 @@
|
||||
from parso.python import tree
|
||||
from parso import tokenize
|
||||
from parso.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name)
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso.parser import BaseParser
|
||||
from parso.utils import splitlines
|
||||
|
||||
|
||||
NAME = PythonTokenTypes.NAME
|
||||
INDENT = PythonTokenTypes.INDENT
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
|
||||
|
||||
class Parser(BaseParser):
|
||||
@@ -11,7 +13,7 @@ class Parser(BaseParser):
|
||||
This class is used to parse a Python file, it then divides them into a
|
||||
class structure of different scopes.
|
||||
|
||||
:param grammar: The grammar object of pgen2. Loaded by load_grammar.
|
||||
:param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.
|
||||
"""
|
||||
|
||||
node_map = {
|
||||
@@ -37,7 +39,13 @@ class Parser(BaseParser):
|
||||
'for_stmt': tree.ForStmt,
|
||||
'while_stmt': tree.WhileStmt,
|
||||
'try_stmt': tree.TryStmt,
|
||||
'comp_for': tree.CompFor,
|
||||
'sync_comp_for': tree.SyncCompFor,
|
||||
# Not sure if this is the best idea, but IMO it's the easiest way to
|
||||
# avoid extreme amounts of work around the subtle difference of 2/3
|
||||
# grammar in list comoprehensions.
|
||||
'list_for': tree.SyncCompFor,
|
||||
# Same here. This just exists in Python 2.6.
|
||||
'gen_for': tree.SyncCompFor,
|
||||
'decorator': tree.Decorator,
|
||||
'lambdef': tree.Lambda,
|
||||
'old_lambdef': tree.Lambda,
|
||||
@@ -45,44 +53,35 @@ class Parser(BaseParser):
|
||||
}
|
||||
default_node = tree.PythonNode
|
||||
|
||||
def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
|
||||
super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
|
||||
# Names/Keywords are handled separately
|
||||
_leaf_map = {
|
||||
PythonTokenTypes.STRING: tree.String,
|
||||
PythonTokenTypes.NUMBER: tree.Number,
|
||||
PythonTokenTypes.NEWLINE: tree.Newline,
|
||||
PythonTokenTypes.ENDMARKER: tree.EndMarker,
|
||||
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
|
||||
PythonTokenTypes.FSTRING_START: tree.FStringStart,
|
||||
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
|
||||
}
|
||||
|
||||
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
|
||||
error_recovery=error_recovery)
|
||||
|
||||
self.syntax_errors = []
|
||||
self._omit_dedent_list = []
|
||||
self._indent_counter = 0
|
||||
|
||||
# TODO do print absolute import detection here.
|
||||
# try:
|
||||
# del python_grammar_no_print_statement.keywords["print"]
|
||||
# except KeyError:
|
||||
# pass # Doesn't exist in the Python 3 grammar.
|
||||
|
||||
# if self.options["print_function"]:
|
||||
# python_grammar = pygram.python_grammar_no_print_statement
|
||||
# else:
|
||||
|
||||
def parse(self, tokens):
|
||||
if self._error_recovery:
|
||||
if self._start_symbol != 'file_input':
|
||||
if self._start_nonterminal != 'file_input':
|
||||
raise NotImplementedError
|
||||
|
||||
tokens = self._recovery_tokenize(tokens)
|
||||
|
||||
node = super(Parser, self).parse(tokens)
|
||||
return super(Parser, self).parse(tokens)
|
||||
|
||||
if self._start_symbol == 'file_input' != node.type:
|
||||
# If there's only one statement, we get back a non-module. That's
|
||||
# not what we want, we want a module, so we add it here:
|
||||
node = self.convert_node(
|
||||
self._grammar,
|
||||
self._grammar.symbol2number['file_input'],
|
||||
[node]
|
||||
)
|
||||
|
||||
return node
|
||||
|
||||
def convert_node(self, grammar, type, children):
|
||||
def convert_node(self, nonterminal, children):
|
||||
"""
|
||||
Convert raw node information to a PythonBaseNode instance.
|
||||
|
||||
@@ -90,96 +89,121 @@ class Parser(BaseParser):
|
||||
grammar rule produces a new complete node, so that the tree is build
|
||||
strictly bottom-up.
|
||||
"""
|
||||
# TODO REMOVE symbol, we don't want type here.
|
||||
symbol = grammar.number2symbol[type]
|
||||
try:
|
||||
return self.node_map[symbol](children)
|
||||
node = self.node_map[nonterminal](children)
|
||||
except KeyError:
|
||||
if symbol == 'suite':
|
||||
if nonterminal == 'suite':
|
||||
# We don't want the INDENT/DEDENT in our parser tree. Those
|
||||
# leaves are just cancer. They are virtual leaves and not real
|
||||
# ones and therefore have pseudo start/end positions and no
|
||||
# prefixes. Just ignore them.
|
||||
children = [children[0]] + children[2:-1]
|
||||
return self.default_node(symbol, children)
|
||||
elif nonterminal == 'list_if':
|
||||
# Make transitioning from 2 to 3 easier.
|
||||
nonterminal = 'comp_if'
|
||||
elif nonterminal == 'listmaker':
|
||||
# Same as list_if above.
|
||||
nonterminal = 'testlist_comp'
|
||||
node = self.default_node(nonterminal, children)
|
||||
for c in children:
|
||||
c.parent = node
|
||||
return node
|
||||
|
||||
def convert_leaf(self, grammar, type, value, prefix, start_pos):
|
||||
def convert_leaf(self, type, value, prefix, start_pos):
|
||||
# print('leaf', repr(value), token.tok_name[type])
|
||||
if type == tokenize.NAME:
|
||||
if value in grammar.keywords:
|
||||
if type == NAME:
|
||||
if value in self._pgen_grammar.reserved_syntax_strings:
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Name(value, start_pos, prefix)
|
||||
elif type == STRING:
|
||||
return tree.String(value, start_pos, prefix)
|
||||
elif type == NUMBER:
|
||||
return tree.Number(value, start_pos, prefix)
|
||||
elif type == NEWLINE:
|
||||
return tree.Newline(value, start_pos, prefix)
|
||||
elif type == ENDMARKER:
|
||||
return tree.EndMarker(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Operator(value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
"""
|
||||
This parser is written in a dynamic way, meaning that this parser
|
||||
allows using different grammars (even non-Python). However, error
|
||||
recovery is purely written for Python.
|
||||
"""
|
||||
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, token):
|
||||
tos_nodes = self.stack[-1].nodes
|
||||
if tos_nodes:
|
||||
last_leaf = tos_nodes[-1].get_last_leaf()
|
||||
else:
|
||||
last_leaf = None
|
||||
|
||||
if self._start_nonterminal == 'file_input' and \
|
||||
(token.type == PythonTokenTypes.ENDMARKER
|
||||
or token.type == DEDENT and '\n' not in last_leaf.value
|
||||
and '\r' not in last_leaf.value):
|
||||
# In Python statements need to end with a newline. But since it's
|
||||
# possible (and valid in Python ) that there's no newline at the
|
||||
# end of a file, we have to recover even if the user doesn't want
|
||||
# error recovery.
|
||||
if self.stack[-1].dfa.from_rule == 'simple_stmt':
|
||||
try:
|
||||
plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if plan.next_dfa.is_final and not plan.dfa_pushes:
|
||||
# We are ignoring here that the newline would be
|
||||
# required for a simple_stmt.
|
||||
self.stack[-1].dfa = plan.next_dfa
|
||||
self._add_token(token)
|
||||
return
|
||||
|
||||
if not self._error_recovery:
|
||||
return super(Parser, self).error_recovery(
|
||||
grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback)
|
||||
return super(Parser, self).error_recovery(token)
|
||||
|
||||
def current_suite(stack):
|
||||
# For now just discard everything that is not a suite or
|
||||
# file_input, if we detect an error.
|
||||
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
|
||||
for until_index, stack_node in reversed(list(enumerate(stack))):
|
||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||
symbol = grammar.number2symbol[type_]
|
||||
if symbol == 'file_input':
|
||||
if stack_node.nonterminal == 'file_input':
|
||||
break
|
||||
elif symbol == 'suite' and len(nodes) > 1:
|
||||
# suites without an indent in them get discarded.
|
||||
break
|
||||
return index, symbol, nodes
|
||||
elif stack_node.nonterminal == 'suite':
|
||||
# In the case where we just have a newline we don't want to
|
||||
# do error recovery here. In all other cases, we want to do
|
||||
# error recovery.
|
||||
if len(stack_node.nodes) != 1:
|
||||
break
|
||||
return until_index
|
||||
|
||||
index, symbol, nodes = current_suite(stack)
|
||||
until_index = current_suite(self.stack)
|
||||
|
||||
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
||||
if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
|
||||
add_token_callback(typ, value, start_pos, prefix)
|
||||
if self._stack_removal(until_index + 1):
|
||||
self._add_token(token)
|
||||
else:
|
||||
typ, value, start_pos, prefix = token
|
||||
if typ == INDENT:
|
||||
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||
self._omit_dedent_list.append(self._indent_counter)
|
||||
else:
|
||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
||||
stack[-1][2][1].append(error_leaf)
|
||||
|
||||
def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
|
||||
failed_stack = []
|
||||
found = False
|
||||
all_nodes = []
|
||||
for dfa, state, (typ, nodes) in stack[start_index:]:
|
||||
if nodes:
|
||||
found = True
|
||||
if found:
|
||||
symbol = grammar.number2symbol[typ]
|
||||
failed_stack.append((symbol, nodes))
|
||||
all_nodes += nodes
|
||||
if failed_stack:
|
||||
stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
|
||||
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
|
||||
self.stack[-1].nodes.append(error_leaf)
|
||||
|
||||
stack[start_index:] = []
|
||||
return failed_stack
|
||||
tos = self.stack[-1]
|
||||
if tos.nonterminal == 'suite':
|
||||
# Need at least one statement in the suite. This happend with the
|
||||
# error recovery above.
|
||||
try:
|
||||
tos.dfa = tos.dfa.arcs['stmt']
|
||||
except KeyError:
|
||||
# We're already in a final state.
|
||||
pass
|
||||
|
||||
def _stack_removal(self, start_index):
|
||||
all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
|
||||
|
||||
if all_nodes:
|
||||
node = tree.PythonErrorNode(all_nodes)
|
||||
for n in all_nodes:
|
||||
n.parent = node
|
||||
self.stack[start_index - 1].nodes.append(node)
|
||||
|
||||
self.stack[start_index:] = []
|
||||
return bool(all_nodes)
|
||||
|
||||
def _recovery_tokenize(self, tokens):
|
||||
for typ, value, start_pos, prefix in tokens:
|
||||
# print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
for token in tokens:
|
||||
typ = token[0]
|
||||
if typ == DEDENT:
|
||||
# We need to count indents, because if we just omit any DEDENT,
|
||||
# we might omit them in the wrong place.
|
||||
@@ -191,42 +215,4 @@ class Parser(BaseParser):
|
||||
self._indent_counter -= 1
|
||||
elif typ == INDENT:
|
||||
self._indent_counter += 1
|
||||
|
||||
yield typ, value, start_pos, prefix
|
||||
|
||||
|
||||
def remove_last_newline(node):
|
||||
endmarker = node.children[-1]
|
||||
# The newline is either in the endmarker as a prefix or the previous
|
||||
# leaf as a newline token.
|
||||
prefix = endmarker.prefix
|
||||
leaf = endmarker.get_previous_leaf()
|
||||
if prefix:
|
||||
text = prefix
|
||||
else:
|
||||
if leaf is None:
|
||||
raise ValueError("You're trying to remove a newline from an empty module.")
|
||||
|
||||
text = leaf.value
|
||||
|
||||
if not text.endswith('\n'):
|
||||
raise ValueError("There's no newline at the end, cannot remove it.")
|
||||
|
||||
text = text[:-1]
|
||||
if prefix:
|
||||
endmarker.prefix = text
|
||||
|
||||
if leaf is None:
|
||||
end_pos = (1, 0)
|
||||
else:
|
||||
end_pos = leaf.end_pos
|
||||
|
||||
lines = splitlines(text, keepends=True)
|
||||
if len(lines) == 1:
|
||||
end_pos = end_pos[0], end_pos[1] + len(lines[0])
|
||||
else:
|
||||
end_pos = end_pos[0] + len(lines) - 1, len(lines[-1])
|
||||
endmarker.start_pos = end_pos
|
||||
else:
|
||||
leaf.value = text
|
||||
endmarker.start_pos = leaf.end_pos
|
||||
yield token
|
||||
|
||||
727
parso/python/pep8.py
Normal file
727
parso/python/pep8.py
Normal file
@@ -0,0 +1,727 @@
|
||||
import re
|
||||
from contextlib import contextmanager
|
||||
|
||||
from parso.python.errors import ErrorFinder, ErrorFinderConfig
|
||||
from parso.normalizer import Rule
|
||||
from parso.python.tree import search_ancestor, Flow, Scope
|
||||
|
||||
|
||||
_IMPORT_TYPES = ('import_name', 'import_from')
|
||||
_SUITE_INTRODUCERS = ('classdef', 'funcdef', 'if_stmt', 'while_stmt',
|
||||
'for_stmt', 'try_stmt', 'with_stmt')
|
||||
_NON_STAR_TYPES = ('term', 'import_from', 'power')
|
||||
_OPENING_BRACKETS = '(', '[', '{'
|
||||
_CLOSING_BRACKETS = ')', ']', '}'
|
||||
_FACTOR = '+', '-', '~'
|
||||
_ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@'
|
||||
_BITWISE_OPERATOR = '<<', '>>', '|', '&', '^'
|
||||
_NEEDS_SPACE = ('=', '%', '->',
|
||||
'<', '>', '==', '>=', '<=', '<>', '!=',
|
||||
'+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=',
|
||||
'>>=', '**=', '//=')
|
||||
_NEEDS_SPACE += _BITWISE_OPERATOR
|
||||
_IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument')
|
||||
_POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop')
|
||||
|
||||
|
||||
class IndentationTypes(object):
|
||||
VERTICAL_BRACKET = object()
|
||||
HANGING_BRACKET = object()
|
||||
BACKSLASH = object()
|
||||
SUITE = object()
|
||||
IMPLICIT = object()
|
||||
|
||||
|
||||
class IndentationNode(object):
|
||||
type = IndentationTypes.SUITE
|
||||
|
||||
def __init__(self, config, indentation, parent=None):
|
||||
self.bracket_indentation = self.indentation = indentation
|
||||
self.parent = parent
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s>' % self.__class__.__name__
|
||||
|
||||
def get_latest_suite_node(self):
|
||||
n = self
|
||||
while n is not None:
|
||||
if n.type == IndentationTypes.SUITE:
|
||||
return n
|
||||
|
||||
n = n.parent
|
||||
|
||||
|
||||
class BracketNode(IndentationNode):
|
||||
def __init__(self, config, leaf, parent, in_suite_introducer=False):
|
||||
self.leaf = leaf
|
||||
|
||||
# Figure out here what the indentation is. For chained brackets
|
||||
# we can basically use the previous indentation.
|
||||
previous_leaf = leaf
|
||||
n = parent
|
||||
if n.type == IndentationTypes.IMPLICIT:
|
||||
n = n.parent
|
||||
while True:
|
||||
if hasattr(n, 'leaf') and previous_leaf.line != n.leaf.line:
|
||||
break
|
||||
|
||||
previous_leaf = previous_leaf.get_previous_leaf()
|
||||
if not isinstance(n, BracketNode) or previous_leaf != n.leaf:
|
||||
break
|
||||
n = n.parent
|
||||
parent_indentation = n.indentation
|
||||
|
||||
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if '\n' in next_leaf.prefix:
|
||||
# This implies code like:
|
||||
# foobarbaz(
|
||||
# a,
|
||||
# b,
|
||||
# )
|
||||
self.bracket_indentation = parent_indentation \
|
||||
+ config.closing_bracket_hanging_indentation
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
self.type = IndentationTypes.HANGING_BRACKET
|
||||
else:
|
||||
# Implies code like:
|
||||
# foobarbaz(
|
||||
# a,
|
||||
# b,
|
||||
# )
|
||||
expected_end_indent = leaf.end_pos[1]
|
||||
if '\t' in config.indentation:
|
||||
self.indentation = None
|
||||
else:
|
||||
self.indentation = ' ' * expected_end_indent
|
||||
self.bracket_indentation = self.indentation
|
||||
self.type = IndentationTypes.VERTICAL_BRACKET
|
||||
|
||||
if in_suite_introducer and parent.type == IndentationTypes.SUITE \
|
||||
and self.indentation == parent_indentation + config.indentation:
|
||||
self.indentation += config.indentation
|
||||
# The closing bracket should have the same indentation.
|
||||
self.bracket_indentation = self.indentation
|
||||
self.parent = parent
|
||||
|
||||
|
||||
class ImplicitNode(BracketNode):
|
||||
"""
|
||||
Implicit indentation after keyword arguments, default arguments,
|
||||
annotations and dict values.
|
||||
"""
|
||||
def __init__(self, config, leaf, parent):
|
||||
super(ImplicitNode, self).__init__(config, leaf, parent)
|
||||
self.type = IndentationTypes.IMPLICIT
|
||||
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if leaf == ':' and '\n' not in next_leaf.prefix:
|
||||
self.indentation += ' '
|
||||
|
||||
|
||||
class BackslashNode(IndentationNode):
|
||||
type = IndentationTypes.BACKSLASH
|
||||
|
||||
def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None):
|
||||
expr_stmt = search_ancestor(containing_leaf, 'expr_stmt')
|
||||
if expr_stmt is not None:
|
||||
equals = expr_stmt.children[-2]
|
||||
|
||||
if '\t' in config.indentation:
|
||||
# TODO unite with the code of BracketNode
|
||||
self.indentation = None
|
||||
else:
|
||||
# If the backslash follows the equals, use normal indentation
|
||||
# otherwise it should align with the equals.
|
||||
if equals.end_pos == spacing.start_pos:
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
else:
|
||||
# +1 because there is a space.
|
||||
self.indentation = ' ' * (equals.end_pos[1] + 1)
|
||||
else:
|
||||
self.indentation = parent_indentation + config.indentation
|
||||
self.bracket_indentation = self.indentation
|
||||
self.parent = parent
|
||||
|
||||
|
||||
def _is_magic_name(name):
|
||||
return name.value.startswith('__') and name.value.endswith('__')
|
||||
|
||||
|
||||
class PEP8Normalizer(ErrorFinder):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(PEP8Normalizer, self).__init__(*args, **kwargs)
|
||||
self._previous_part = None
|
||||
self._previous_leaf = None
|
||||
self._on_newline = True
|
||||
self._newline_count = 0
|
||||
self._wanted_newline_count = None
|
||||
self._max_new_lines_in_prefix = 0
|
||||
self._new_statement = True
|
||||
self._implicit_indentation_possible = False
|
||||
# The top of stack of the indentation nodes.
|
||||
self._indentation_tos = self._last_indentation_tos = \
|
||||
IndentationNode(self._config, indentation='')
|
||||
self._in_suite_introducer = False
|
||||
|
||||
if ' ' in self._config.indentation:
|
||||
self._indentation_type = 'spaces'
|
||||
self._wrong_indentation_char = '\t'
|
||||
else:
|
||||
self._indentation_type = 'tabs'
|
||||
self._wrong_indentation_char = ' '
|
||||
|
||||
@contextmanager
|
||||
def visit_node(self, node):
|
||||
with super(PEP8Normalizer, self).visit_node(node):
|
||||
with self._visit_node(node):
|
||||
yield
|
||||
|
||||
@contextmanager
|
||||
def _visit_node(self, node):
|
||||
typ = node.type
|
||||
|
||||
if typ in 'import_name':
|
||||
names = node.get_defined_names()
|
||||
if len(names) > 1:
|
||||
for name in names[:1]:
|
||||
self.add_issue(name, 401, 'Multiple imports on one line')
|
||||
elif typ == 'lambdef':
|
||||
expr_stmt = node.parent
|
||||
# Check if it's simply defining a single name, not something like
|
||||
# foo.bar or x[1], where using a lambda could make more sense.
|
||||
if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]):
|
||||
self.add_issue(node, 731, 'Do not assign a lambda expression, use a def')
|
||||
elif typ == 'try_stmt':
|
||||
for child in node.children:
|
||||
# Here we can simply check if it's an except, because otherwise
|
||||
# it would be an except_clause.
|
||||
if child.type == 'keyword' and child.value == 'except':
|
||||
self.add_issue(child, 722, 'Do not use bare except, specify exception instead')
|
||||
elif typ == 'comparison':
|
||||
for child in node.children:
|
||||
if child.type not in ('atom_expr', 'power'):
|
||||
continue
|
||||
if len(child.children) > 2:
|
||||
continue
|
||||
trailer = child.children[1]
|
||||
atom = child.children[0]
|
||||
if trailer.type == 'trailer' and atom.type == 'name' \
|
||||
and atom.value == 'type':
|
||||
self.add_issue(node, 721, "Do not compare types, use 'isinstance()")
|
||||
break
|
||||
elif typ == 'file_input':
|
||||
endmarker = node.children[-1]
|
||||
prev = endmarker.get_previous_leaf()
|
||||
prefix = endmarker.prefix
|
||||
if (not prefix.endswith('\n') and (
|
||||
prefix or prev is None or prev.value != '\n')):
|
||||
self.add_issue(endmarker, 292, "No newline at end of file")
|
||||
|
||||
if typ in _IMPORT_TYPES:
|
||||
simple_stmt = node.parent
|
||||
module = simple_stmt.parent
|
||||
#if module.type == 'simple_stmt':
|
||||
if module.type == 'file_input':
|
||||
index = module.children.index(simple_stmt)
|
||||
for child in module.children[:index]:
|
||||
children = [child]
|
||||
if child.type == 'simple_stmt':
|
||||
# Remove the newline.
|
||||
children = child.children[:-1]
|
||||
|
||||
found_docstring = False
|
||||
for c in children:
|
||||
if c.type == 'string' and not found_docstring:
|
||||
continue
|
||||
found_docstring = True
|
||||
|
||||
if c.type == 'expr_stmt' and \
|
||||
all(_is_magic_name(n) for n in c.get_defined_names()):
|
||||
continue
|
||||
|
||||
if c.type in _IMPORT_TYPES or isinstance(c, Flow):
|
||||
continue
|
||||
|
||||
self.add_issue(node, 402, 'Module level import not at top of file')
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
implicit_indentation_possible = typ in _IMPLICIT_INDENTATION_TYPES
|
||||
in_introducer = typ in _SUITE_INTRODUCERS
|
||||
if in_introducer:
|
||||
self._in_suite_introducer = True
|
||||
elif typ == 'suite':
|
||||
if self._indentation_tos.type == IndentationTypes.BACKSLASH:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
self._indentation_tos = IndentationNode(
|
||||
self._config,
|
||||
self._indentation_tos.indentation + self._config.indentation,
|
||||
parent=self._indentation_tos
|
||||
)
|
||||
elif implicit_indentation_possible:
|
||||
self._implicit_indentation_possible = True
|
||||
yield
|
||||
if typ == 'suite':
|
||||
assert self._indentation_tos.type == IndentationTypes.SUITE
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
# If we dedent, no lines are needed anymore.
|
||||
self._wanted_newline_count = None
|
||||
elif implicit_indentation_possible:
|
||||
self._implicit_indentation_possible = False
|
||||
if self._indentation_tos.type == IndentationTypes.IMPLICIT:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
elif in_introducer:
|
||||
self._in_suite_introducer = False
|
||||
if typ in ('classdef', 'funcdef'):
|
||||
self._wanted_newline_count = self._get_wanted_blank_lines_count()
|
||||
|
||||
def _check_tabs_spaces(self, spacing):
|
||||
if self._wrong_indentation_char in spacing.value:
|
||||
self.add_issue(spacing, 101, 'Indentation contains ' + self._indentation_type)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _get_wanted_blank_lines_count(self):
|
||||
suite_node = self._indentation_tos.get_latest_suite_node()
|
||||
return int(suite_node.parent is None) + 1
|
||||
|
||||
def _reset_newlines(self, spacing, leaf, is_comment=False):
|
||||
self._max_new_lines_in_prefix = \
|
||||
max(self._max_new_lines_in_prefix, self._newline_count)
|
||||
|
||||
wanted = self._wanted_newline_count
|
||||
if wanted is not None:
|
||||
# Need to substract one
|
||||
blank_lines = self._newline_count - 1
|
||||
if wanted > blank_lines and leaf.type != 'endmarker':
|
||||
# In case of a comment we don't need to add the issue, yet.
|
||||
if not is_comment:
|
||||
# TODO end_pos wrong.
|
||||
code = 302 if wanted == 2 else 301
|
||||
message = "expected %s blank line, found %s" \
|
||||
% (wanted, blank_lines)
|
||||
self.add_issue(spacing, code, message)
|
||||
self._wanted_newline_count = None
|
||||
else:
|
||||
self._wanted_newline_count = None
|
||||
|
||||
if not is_comment:
|
||||
wanted = self._get_wanted_blank_lines_count()
|
||||
actual = self._max_new_lines_in_prefix - 1
|
||||
|
||||
val = leaf.value
|
||||
needs_lines = (
|
||||
val == '@' and leaf.parent.type == 'decorator'
|
||||
or (
|
||||
val == 'class'
|
||||
or val == 'async' and leaf.get_next_leaf() == 'def'
|
||||
or val == 'def' and self._previous_leaf != 'async'
|
||||
) and leaf.parent.parent.type != 'decorated'
|
||||
)
|
||||
if needs_lines and actual < wanted:
|
||||
func_or_cls = leaf.parent
|
||||
suite = func_or_cls.parent
|
||||
if suite.type == 'decorated':
|
||||
suite = suite.parent
|
||||
|
||||
# The first leaf of a file or a suite should not need blank
|
||||
# lines.
|
||||
if suite.children[int(suite.type == 'suite')] != func_or_cls:
|
||||
code = 302 if wanted == 2 else 301
|
||||
message = "expected %s blank line, found %s" \
|
||||
% (wanted, actual)
|
||||
self.add_issue(spacing, code, message)
|
||||
|
||||
self._max_new_lines_in_prefix = 0
|
||||
|
||||
self._newline_count = 0
|
||||
|
||||
def visit_leaf(self, leaf):
|
||||
super(PEP8Normalizer, self).visit_leaf(leaf)
|
||||
for part in leaf._split_prefix():
|
||||
if part.type == 'spacing':
|
||||
# This part is used for the part call after for.
|
||||
break
|
||||
self._visit_part(part, part.create_spacing_part(), leaf)
|
||||
|
||||
self._analyse_non_prefix(leaf)
|
||||
self._visit_part(leaf, part, leaf)
|
||||
|
||||
# Cleanup
|
||||
self._last_indentation_tos = self._indentation_tos
|
||||
|
||||
self._new_statement = leaf.type == 'newline'
|
||||
|
||||
# TODO does this work? with brackets and stuff?
|
||||
if leaf.type == 'newline' and \
|
||||
self._indentation_tos.type == IndentationTypes.BACKSLASH:
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
if leaf.value == ':' and leaf.parent.type in _SUITE_INTRODUCERS:
|
||||
self._in_suite_introducer = False
|
||||
elif leaf.value == 'elif':
|
||||
self._in_suite_introducer = True
|
||||
|
||||
if not self._new_statement:
|
||||
self._reset_newlines(part, leaf)
|
||||
self._max_blank_lines = 0
|
||||
|
||||
self._previous_leaf = leaf
|
||||
|
||||
return leaf.value
|
||||
|
||||
def _visit_part(self, part, spacing, leaf):
|
||||
value = part.value
|
||||
type_ = part.type
|
||||
if type_ == 'error_leaf':
|
||||
return
|
||||
|
||||
if value == ',' and part.parent.type == 'dictorsetmaker':
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
node = self._indentation_tos
|
||||
|
||||
if type_ == 'comment':
|
||||
if value.startswith('##'):
|
||||
# Whole blocks of # should not raise an error.
|
||||
if value.lstrip('#'):
|
||||
self.add_issue(part, 266, "Too many leading '#' for block comment.")
|
||||
elif self._on_newline:
|
||||
if not re.match(r'#:? ', value) and not value == '#' \
|
||||
and not (value.startswith('#!') and part.start_pos == (1, 0)):
|
||||
self.add_issue(part, 265, "Block comment should start with '# '")
|
||||
else:
|
||||
if not re.match(r'#:? [^ ]', value):
|
||||
self.add_issue(part, 262, "Inline comment should start with '# '")
|
||||
|
||||
self._reset_newlines(spacing, leaf, is_comment=True)
|
||||
elif type_ == 'newline':
|
||||
if self._newline_count > self._get_wanted_blank_lines_count():
|
||||
self.add_issue(part, 303, "Too many blank lines (%s)" % self._newline_count)
|
||||
elif leaf in ('def', 'class') \
|
||||
and leaf.parent.parent.type == 'decorated':
|
||||
self.add_issue(part, 304, "Blank lines found after function decorator")
|
||||
|
||||
|
||||
self._newline_count += 1
|
||||
|
||||
if type_ == 'backslash':
|
||||
# TODO is this enough checking? What about ==?
|
||||
if node.type != IndentationTypes.BACKSLASH:
|
||||
if node.type != IndentationTypes.SUITE:
|
||||
self.add_issue(part, 502, 'The backslash is redundant between brackets')
|
||||
else:
|
||||
indentation = node.indentation
|
||||
if self._in_suite_introducer and node.type == IndentationTypes.SUITE:
|
||||
indentation += self._config.indentation
|
||||
|
||||
self._indentation_tos = BackslashNode(
|
||||
self._config,
|
||||
indentation,
|
||||
part,
|
||||
spacing,
|
||||
parent=self._indentation_tos
|
||||
)
|
||||
elif self._on_newline:
|
||||
indentation = spacing.value
|
||||
if node.type == IndentationTypes.BACKSLASH \
|
||||
and self._previous_part.type == 'newline':
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
|
||||
if not self._check_tabs_spaces(spacing):
|
||||
should_be_indentation = node.indentation
|
||||
if type_ == 'comment':
|
||||
# Comments can be dedented. So we have to care for that.
|
||||
n = self._last_indentation_tos
|
||||
while True:
|
||||
if len(indentation) > len(n.indentation):
|
||||
break
|
||||
|
||||
should_be_indentation = n.indentation
|
||||
|
||||
self._last_indentation_tos = n
|
||||
if n == node:
|
||||
break
|
||||
n = n.parent
|
||||
|
||||
if self._new_statement:
|
||||
if type_ == 'newline':
|
||||
if indentation:
|
||||
self.add_issue(spacing, 291, 'Trailing whitespace')
|
||||
elif indentation != should_be_indentation:
|
||||
s = '%s %s' % (len(self._config.indentation), self._indentation_type)
|
||||
self.add_issue(part, 111, 'Indentation is not a multiple of ' + s)
|
||||
else:
|
||||
if value in '])}':
|
||||
should_be_indentation = node.bracket_indentation
|
||||
else:
|
||||
should_be_indentation = node.indentation
|
||||
if self._in_suite_introducer and indentation == \
|
||||
node.get_latest_suite_node().indentation \
|
||||
+ self._config.indentation:
|
||||
self.add_issue(part, 129, "Line with same indent as next logical block")
|
||||
elif indentation != should_be_indentation:
|
||||
if not self._check_tabs_spaces(spacing) and part.value != '\n':
|
||||
if value in '])}':
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 124, "Closing bracket does not match visual indentation")
|
||||
else:
|
||||
self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line")
|
||||
else:
|
||||
if len(indentation) < len(should_be_indentation):
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 128, 'Continuation line under-indented for visual indent')
|
||||
elif node.type == IndentationTypes.BACKSLASH:
|
||||
self.add_issue(part, 122, 'Continuation line missing indentation or outdented')
|
||||
elif node.type == IndentationTypes.IMPLICIT:
|
||||
self.add_issue(part, 135, 'xxx')
|
||||
else:
|
||||
self.add_issue(part, 121, 'Continuation line under-indented for hanging indent')
|
||||
else:
|
||||
if node.type == IndentationTypes.VERTICAL_BRACKET:
|
||||
self.add_issue(part, 127, 'Continuation line over-indented for visual indent')
|
||||
elif node.type == IndentationTypes.IMPLICIT:
|
||||
self.add_issue(part, 136, 'xxx')
|
||||
else:
|
||||
self.add_issue(part, 126, 'Continuation line over-indented for hanging indent')
|
||||
else:
|
||||
self._check_spacing(part, spacing)
|
||||
|
||||
self._check_line_length(part, spacing)
|
||||
# -------------------------------
|
||||
# Finalizing. Updating the state.
|
||||
# -------------------------------
|
||||
if value and value in '()[]{}' and type_ != 'error_leaf' \
|
||||
and part.parent.type != 'error_node':
|
||||
if value in _OPENING_BRACKETS:
|
||||
self._indentation_tos = BracketNode(
|
||||
self._config, part,
|
||||
parent=self._indentation_tos,
|
||||
in_suite_introducer=self._in_suite_introducer
|
||||
)
|
||||
else:
|
||||
assert node.type != IndentationTypes.IMPLICIT
|
||||
self._indentation_tos = self._indentation_tos.parent
|
||||
elif value in ('=', ':') and self._implicit_indentation_possible \
|
||||
and part.parent.type in _IMPLICIT_INDENTATION_TYPES:
|
||||
indentation = node.indentation
|
||||
self._indentation_tos = ImplicitNode(
|
||||
self._config, part, parent=self._indentation_tos
|
||||
)
|
||||
|
||||
self._on_newline = type_ in ('newline', 'backslash', 'bom')
|
||||
|
||||
self._previous_part = part
|
||||
self._previous_spacing = spacing
|
||||
|
||||
def _check_line_length(self, part, spacing):
|
||||
if part.type == 'backslash':
|
||||
last_column = part.start_pos[1] + 1
|
||||
else:
|
||||
last_column = part.end_pos[1]
|
||||
if last_column > self._config.max_characters \
|
||||
and spacing.start_pos[1] <= self._config.max_characters :
|
||||
# Special case for long URLs in multi-line docstrings or comments,
|
||||
# but still report the error when the 72 first chars are whitespaces.
|
||||
report = True
|
||||
if part.type == 'comment':
|
||||
splitted = part.value[1:].split()
|
||||
if len(splitted) == 1 \
|
||||
and (part.end_pos[1] - len(splitted[0])) < 72:
|
||||
report = False
|
||||
if report:
|
||||
self.add_issue(
|
||||
part,
|
||||
501,
|
||||
'Line too long (%s > %s characters)' %
|
||||
(last_column, self._config.max_characters),
|
||||
)
|
||||
|
||||
def _check_spacing(self, part, spacing):
|
||||
def add_if_spaces(*args):
|
||||
if spaces:
|
||||
return self.add_issue(*args)
|
||||
|
||||
def add_not_spaces(*args):
|
||||
if not spaces:
|
||||
return self.add_issue(*args)
|
||||
|
||||
spaces = spacing.value
|
||||
prev = self._previous_part
|
||||
if prev is not None and prev.type == 'error_leaf' or part.type == 'error_leaf':
|
||||
return
|
||||
|
||||
type_ = part.type
|
||||
if '\t' in spaces:
|
||||
self.add_issue(spacing, 223, 'Used tab to separate tokens')
|
||||
elif type_ == 'comment':
|
||||
if len(spaces) < self._config.spaces_before_comment:
|
||||
self.add_issue(spacing, 261, 'At least two spaces before inline comment')
|
||||
elif type_ == 'newline':
|
||||
add_if_spaces(spacing, 291, 'Trailing whitespace')
|
||||
elif len(spaces) > 1:
|
||||
self.add_issue(spacing, 221, 'Multiple spaces used')
|
||||
else:
|
||||
if prev in _OPENING_BRACKETS:
|
||||
message = "Whitespace after '%s'" % part.value
|
||||
add_if_spaces(spacing, 201, message)
|
||||
elif part in _CLOSING_BRACKETS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 202, message)
|
||||
elif part in (',', ';') or part == ':' \
|
||||
and part.parent.type not in _POSSIBLE_SLICE_PARENTS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 203, message)
|
||||
elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS:
|
||||
pass # TODO
|
||||
elif prev in (',', ';', ':'):
|
||||
add_not_spaces(spacing, 231, "missing whitespace after '%s'")
|
||||
elif part == ':': # Is a subscript
|
||||
# TODO
|
||||
pass
|
||||
elif part in ('*', '**') and part.parent.type not in _NON_STAR_TYPES \
|
||||
or prev in ('*', '**') \
|
||||
and prev.parent.type not in _NON_STAR_TYPES:
|
||||
# TODO
|
||||
pass
|
||||
elif prev in _FACTOR and prev.parent.type == 'factor':
|
||||
pass
|
||||
elif prev == '@' and prev.parent.type == 'decorator':
|
||||
pass # TODO should probably raise an error if there's a space here
|
||||
elif part in _NEEDS_SPACE or prev in _NEEDS_SPACE:
|
||||
if part == '=' and part.parent.type in ('argument', 'param') \
|
||||
or prev == '=' and prev.parent.type in ('argument', 'param'):
|
||||
if part == '=':
|
||||
param = part.parent
|
||||
else:
|
||||
param = prev.parent
|
||||
if param.type == 'param' and param.annotation:
|
||||
add_not_spaces(spacing, 252, 'Expected spaces around annotation equals')
|
||||
else:
|
||||
add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals')
|
||||
elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR:
|
||||
add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator')
|
||||
elif part == '%' or prev == '%':
|
||||
add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator')
|
||||
else:
|
||||
message_225 = 'Missing whitespace between tokens'
|
||||
add_not_spaces(spacing, 225, message_225)
|
||||
elif type_ == 'keyword' or prev.type == 'keyword':
|
||||
add_not_spaces(spacing, 275, 'Missing whitespace around keyword')
|
||||
else:
|
||||
prev_spacing = self._previous_spacing
|
||||
if prev in _ALLOW_SPACE and spaces != prev_spacing.value \
|
||||
and '\n' not in self._previous_leaf.prefix:
|
||||
message = "Whitespace before operator doesn't match with whitespace after"
|
||||
self.add_issue(spacing, 229, message)
|
||||
|
||||
if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE:
|
||||
message_225 = 'Missing whitespace between tokens'
|
||||
#print('xy', spacing)
|
||||
#self.add_issue(spacing, 225, message_225)
|
||||
# TODO why only brackets?
|
||||
if part in _OPENING_BRACKETS:
|
||||
message = "Whitespace before '%s'" % part.value
|
||||
add_if_spaces(spacing, 211, message)
|
||||
|
||||
def _analyse_non_prefix(self, leaf):
|
||||
typ = leaf.type
|
||||
if typ == 'name' and leaf.value in ('l', 'O', 'I'):
|
||||
if leaf.is_definition():
|
||||
message = "Do not define %s named 'l', 'O', or 'I' one line"
|
||||
if leaf.parent.type == 'class' and leaf.parent.name == leaf:
|
||||
self.add_issue(leaf, 742, message % 'classes')
|
||||
elif leaf.parent.type == 'function' and leaf.parent.name == leaf:
|
||||
self.add_issue(leaf, 743, message % 'function')
|
||||
else:
|
||||
self.add_issuadd_issue(741, message % 'variables', leaf)
|
||||
elif leaf.value == ':':
|
||||
if isinstance(leaf.parent, (Flow, Scope)) and leaf.parent.type != 'lambdef':
|
||||
next_leaf = leaf.get_next_leaf()
|
||||
if next_leaf.type != 'newline':
|
||||
if leaf.parent.type == 'funcdef':
|
||||
self.add_issue(next_leaf, 704, 'Multiple statements on one line (def)')
|
||||
else:
|
||||
self.add_issue(next_leaf, 701, 'Multiple statements on one line (colon)')
|
||||
elif leaf.value == ';':
|
||||
if leaf.get_next_leaf().type in ('newline', 'endmarker'):
|
||||
self.add_issue(leaf, 703, 'Statement ends with a semicolon')
|
||||
else:
|
||||
self.add_issue(leaf, 702, 'Multiple statements on one line (semicolon)')
|
||||
elif leaf.value in ('==', '!='):
|
||||
comparison = leaf.parent
|
||||
index = comparison.children.index(leaf)
|
||||
left = comparison.children[index - 1]
|
||||
right = comparison.children[index + 1]
|
||||
for node in left, right:
|
||||
if node.type == 'keyword' or node.type == 'name':
|
||||
if node.value == 'None':
|
||||
message = "comparison to None should be 'if cond is None:'"
|
||||
self.add_issue(leaf, 711, message)
|
||||
break
|
||||
elif node.value in ('True', 'False'):
|
||||
message = "comparison to False/True should be 'if cond is True:' or 'if cond:'"
|
||||
self.add_issue(leaf, 712, message)
|
||||
break
|
||||
elif leaf.value in ('in', 'is'):
|
||||
comparison = leaf.parent
|
||||
if comparison.type == 'comparison' and comparison.parent.type == 'not_test':
|
||||
if leaf.value == 'in':
|
||||
self.add_issue(leaf, 713, "test for membership should be 'not in'")
|
||||
else:
|
||||
self.add_issue(leaf, 714, "test for object identity should be 'is not'")
|
||||
elif typ == 'string':
|
||||
# Checking multiline strings
|
||||
for i, line in enumerate(leaf.value.splitlines()[1:]):
|
||||
indentation = re.match(r'[ \t]*', line).group(0)
|
||||
start_pos = leaf.line + i, len(indentation)
|
||||
# TODO check multiline indentation.
|
||||
elif typ == 'endmarker':
|
||||
if self._newline_count >= 2:
|
||||
self.add_issue(leaf, 391, 'Blank line at end of file')
|
||||
|
||||
def add_issue(self, node, code, message):
|
||||
if self._previous_leaf is not None:
|
||||
if search_ancestor(self._previous_leaf, 'error_node') is not None:
|
||||
return
|
||||
if self._previous_leaf.type == 'error_leaf':
|
||||
return
|
||||
if search_ancestor(node, 'error_node') is not None:
|
||||
return
|
||||
if code in (901, 903):
|
||||
# 901 and 903 are raised by the ErrorFinder.
|
||||
super(PEP8Normalizer, self).add_issue(node, code, message)
|
||||
else:
|
||||
# Skip ErrorFinder here, because it has custom behavior.
|
||||
super(ErrorFinder, self).add_issue(node, code, message)
|
||||
|
||||
|
||||
class PEP8NormalizerConfig(ErrorFinderConfig):
|
||||
normalizer_class = PEP8Normalizer
|
||||
"""
|
||||
Normalizing to PEP8. Not really implemented, yet.
|
||||
"""
|
||||
def __init__(self, indentation=' ' * 4, hanging_indentation=None,
|
||||
max_characters=79, spaces_before_comment=2):
|
||||
self.indentation = indentation
|
||||
if hanging_indentation is None:
|
||||
hanging_indentation = indentation
|
||||
self.hanging_indentation = hanging_indentation
|
||||
self.closing_bracket_hanging_indentation = ''
|
||||
self.break_after_binary = False
|
||||
self.max_characters = max_characters
|
||||
self.spaces_before_comment = spaces_before_comment
|
||||
|
||||
|
||||
# TODO this is not yet ready.
|
||||
#@PEP8Normalizer.register_rule(type='endmarker')
|
||||
class BlankLineAtEnd(Rule):
|
||||
code = 392
|
||||
message = 'Blank line at end of file'
|
||||
|
||||
def is_issue(self, leaf):
|
||||
return self._newline_count >= 2
|
||||
97
parso/python/prefix.py
Normal file
97
parso/python/prefix.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import re
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.tokenize import group
|
||||
|
||||
unicode_bom = BOM_UTF8.decode('utf-8')
|
||||
|
||||
|
||||
class PrefixPart(object):
|
||||
def __init__(self, leaf, typ, value, spacing='', start_pos=None):
|
||||
assert start_pos is not None
|
||||
self.parent = leaf
|
||||
self.type = typ
|
||||
self.value = value
|
||||
self.spacing = spacing
|
||||
self.start_pos = start_pos
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
if self.value.endswith('\n'):
|
||||
return self.start_pos[0] + 1, 0
|
||||
if self.value == unicode_bom:
|
||||
# The bom doesn't have a length at the start of a Python file.
|
||||
return self.start_pos
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.value)
|
||||
|
||||
def create_spacing_part(self):
|
||||
column = self.start_pos[1] - len(self.spacing)
|
||||
return PrefixPart(
|
||||
self.parent, 'spacing', self.spacing,
|
||||
start_pos=(self.start_pos[0], column)
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s, %s, %s)' % (
|
||||
self.__class__.__name__,
|
||||
self.type,
|
||||
repr(self.value),
|
||||
self.start_pos
|
||||
)
|
||||
|
||||
|
||||
_comment = r'#[^\n\r\f]*'
|
||||
_backslash = r'\\\r?\n'
|
||||
_newline = r'\r?\n'
|
||||
_form_feed = r'\f'
|
||||
_only_spacing = '$'
|
||||
_spacing = r'[ \t]*'
|
||||
_bom = unicode_bom
|
||||
|
||||
_regex = group(
|
||||
_comment, _backslash, _newline, _form_feed, _only_spacing, _bom,
|
||||
capture=True
|
||||
)
|
||||
_regex = re.compile(group(_spacing, capture=True) + _regex)
|
||||
|
||||
|
||||
_types = {
|
||||
'#': 'comment',
|
||||
'\\': 'backslash',
|
||||
'\f': 'formfeed',
|
||||
'\n': 'newline',
|
||||
'\r': 'newline',
|
||||
unicode_bom: 'bom'
|
||||
}
|
||||
|
||||
|
||||
def split_prefix(leaf, start_pos):
|
||||
line, column = start_pos
|
||||
start = 0
|
||||
value = spacing = ''
|
||||
bom = False
|
||||
while start != len(leaf.prefix):
|
||||
match =_regex.match(leaf.prefix, start)
|
||||
spacing = match.group(1)
|
||||
value = match.group(2)
|
||||
if not value:
|
||||
break
|
||||
type_ = _types[value[0]]
|
||||
yield PrefixPart(
|
||||
leaf, type_, value, spacing,
|
||||
start_pos=(line, column + start - int(bom) + len(spacing))
|
||||
)
|
||||
if type_ == 'bom':
|
||||
bom = True
|
||||
|
||||
start = match.end(0)
|
||||
if value.endswith('\n'):
|
||||
line += 1
|
||||
column = -start
|
||||
|
||||
if value:
|
||||
spacing = ''
|
||||
yield PrefixPart(
|
||||
leaf, 'spacing', spacing,
|
||||
start_pos=(line, column + start)
|
||||
)
|
||||
27
parso/python/token.py
Normal file
27
parso/python/token.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
class TokenType(object):
|
||||
def __init__(self, name, contains_syntax=False):
|
||||
self.name = name
|
||||
self.contains_syntax = contains_syntax
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__, self.name)
|
||||
|
||||
|
||||
class TokenTypes(object):
|
||||
"""
|
||||
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
||||
"""
|
||||
def __init__(self, names, contains_syntax):
|
||||
for name in names:
|
||||
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
|
||||
|
||||
|
||||
PythonTokenTypes = TokenTypes((
|
||||
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
||||
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
||||
'ENDMARKER'),
|
||||
contains_syntax=('NAME', 'OP'),
|
||||
)
|
||||
30
parso/python/token.pyi
Normal file
30
parso/python/token.pyi
Normal file
@@ -0,0 +1,30 @@
|
||||
from typing import Container, Iterable
|
||||
|
||||
class TokenType:
|
||||
name: str
|
||||
contains_syntax: bool
|
||||
def __init__(self, name: str, contains_syntax: bool) -> None: ...
|
||||
|
||||
class TokenTypes:
|
||||
def __init__(
|
||||
self, names: Iterable[str], contains_syntax: Container[str]
|
||||
) -> None: ...
|
||||
|
||||
# not an actual class in the source code, but we need this class to type the fields of
|
||||
# PythonTokenTypes
|
||||
class _FakePythonTokenTypesClass(TokenTypes):
|
||||
STRING: TokenType
|
||||
NUMBER: TokenType
|
||||
NAME: TokenType
|
||||
ERRORTOKEN: TokenType
|
||||
NEWLINE: TokenType
|
||||
INDENT: TokenType
|
||||
DEDENT: TokenType
|
||||
ERROR_DEDENT: TokenType
|
||||
FSTRING_STRING: TokenType
|
||||
FSTRING_START: TokenType
|
||||
FSTRING_END: TokenType
|
||||
OP: TokenType
|
||||
ENDMARKER: TokenType
|
||||
|
||||
PythonTokenTypes: _FakePythonTokenTypesClass = ...
|
||||
630
parso/python/tokenize.py
Normal file
630
parso/python/tokenize.py
Normal file
@@ -0,0 +1,630 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This tokenizer has been copied from the ``tokenize.py`` standard library
|
||||
tokenizer. The reason was simple: The standard library tokenizer fails
|
||||
if the indentation is not right. To make it possible to do error recovery the
|
||||
tokenizer needed to be rewritten.
|
||||
|
||||
Basically this is a stripped down version of the standard library module, so
|
||||
you can read the documentation there. Additionally we included some speed and
|
||||
memory optimizations here.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import sys
|
||||
import string
|
||||
import re
|
||||
from collections import namedtuple
|
||||
import itertools as _itertools
|
||||
from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.token import PythonTokenTypes
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
STRING = PythonTokenTypes.STRING
|
||||
NAME = PythonTokenTypes.NAME
|
||||
NUMBER = PythonTokenTypes.NUMBER
|
||||
OP = PythonTokenTypes.OP
|
||||
NEWLINE = PythonTokenTypes.NEWLINE
|
||||
INDENT = PythonTokenTypes.INDENT
|
||||
DEDENT = PythonTokenTypes.DEDENT
|
||||
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
|
||||
FSTRING_END = PythonTokenTypes.FSTRING_END
|
||||
|
||||
TokenCollection = namedtuple(
|
||||
'TokenCollection',
|
||||
'pseudo_token single_quoted triple_quoted endpats whitespace '
|
||||
'fstring_pattern_map always_break_tokens',
|
||||
)
|
||||
|
||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||
|
||||
_token_collection_cache = {}
|
||||
|
||||
if py_version >= 30:
|
||||
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
||||
is_identifier = str.isidentifier
|
||||
else:
|
||||
namechars = string.ascii_letters + '_'
|
||||
is_identifier = lambda s: s in namechars
|
||||
|
||||
|
||||
def group(*choices, **kwargs):
|
||||
capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :(
|
||||
assert not kwargs
|
||||
|
||||
start = '('
|
||||
if not capture:
|
||||
start += '?:'
|
||||
return start + '|'.join(choices) + ')'
|
||||
|
||||
|
||||
def maybe(*choices):
|
||||
return group(*choices) + '?'
|
||||
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
|
||||
def different_case_versions(prefix):
|
||||
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
||||
yield ''.join(s)
|
||||
# The valid string prefixes. Only contain the lower case versions,
|
||||
# and don't contain any permuations (include 'fr', but not
|
||||
# 'rf'). The various permutations will be generated.
|
||||
valid_string_prefixes = ['b', 'r', 'u']
|
||||
if version_info >= (3, 0):
|
||||
valid_string_prefixes.append('br')
|
||||
|
||||
result = set([''])
|
||||
if version_info >= (3, 6) and include_fstring:
|
||||
f = ['f', 'fr']
|
||||
if only_fstring:
|
||||
valid_string_prefixes = f
|
||||
result = set()
|
||||
else:
|
||||
valid_string_prefixes += f
|
||||
elif only_fstring:
|
||||
return set()
|
||||
|
||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||
for prefix in valid_string_prefixes:
|
||||
for t in _itertools.permutations(prefix):
|
||||
# create a list with upper and lower versions of each
|
||||
# character
|
||||
result.update(different_case_versions(t))
|
||||
if version_info <= (2, 7):
|
||||
# In Python 2 the order cannot just be random.
|
||||
result.update(different_case_versions('ur'))
|
||||
result.update(different_case_versions('br'))
|
||||
return result
|
||||
|
||||
|
||||
def _compile(expr):
|
||||
return re.compile(expr, re.UNICODE)
|
||||
|
||||
|
||||
def _get_token_collection(version_info):
|
||||
try:
|
||||
return _token_collection_cache[tuple(version_info)]
|
||||
except KeyError:
|
||||
_token_collection_cache[tuple(version_info)] = result = \
|
||||
_create_token_collection(version_info)
|
||||
return result
|
||||
|
||||
|
||||
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
|
||||
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
|
||||
fstring_format_spec_single_line = _compile(r'[^{}\r\n]+')
|
||||
fstring_format_spec_multi_line = _compile(r'[^{}]+')
|
||||
|
||||
|
||||
def _create_token_collection(version_info):
|
||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||
# number literals.
|
||||
Whitespace = r'[ \f\t]*'
|
||||
whitespace = _compile(Whitespace)
|
||||
Comment = r'#[^\r\n]*'
|
||||
Name = r'\w+'
|
||||
|
||||
if version_info >= (3, 6):
|
||||
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||
Binnumber = r'0[bB](?:_?[01])+'
|
||||
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||
else:
|
||||
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||
Binnumber = r'0[bB][01]+'
|
||||
if version_info >= (3, 0):
|
||||
Octnumber = r'0[oO][0-7]+'
|
||||
else:
|
||||
Octnumber = '0[oO]?[0-7]+'
|
||||
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
if version_info[0] < 3:
|
||||
Intnumber += '[lL]?'
|
||||
Exponent = r'[eE][-+]?[0-9]+'
|
||||
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||
Expfloat = r'[0-9]+' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||
|
||||
# Note that since _all_string_prefixes includes the empty string,
|
||||
# StringPrefix can be the empty string (making it optional).
|
||||
possible_prefixes = _all_string_prefixes(version_info)
|
||||
StringPrefix = group(*possible_prefixes)
|
||||
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
|
||||
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
|
||||
FStringStart = group(*fstring_prefixes)
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"(?:\\.|[^'\\])*'"
|
||||
# Tail end of " string.
|
||||
Double = r'(?:\\.|[^"\\])*"'
|
||||
# Tail end of ''' string.
|
||||
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
|
||||
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@`|^!=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
|
||||
special_args = [r'\r\n?', r'\n', r'[;.,@]']
|
||||
if version_info >= (3, 0):
|
||||
special_args.insert(0, r'\.\.\.')
|
||||
if version_info >= (3, 8):
|
||||
special_args.insert(0, ":=?")
|
||||
else:
|
||||
special_args.insert(0, ":")
|
||||
Special = group(*special_args)
|
||||
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
|
||||
# First (or only) line of ' or " string.
|
||||
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
|
||||
group("'", r'\\(?:\r\n?|\n)'),
|
||||
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
|
||||
group('"', r'\\(?:\r\n?|\n)'))
|
||||
pseudo_extra_pool = [Comment, Triple]
|
||||
all_quotes = '"', "'", '"""', "'''"
|
||||
if fstring_prefixes:
|
||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||
|
||||
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
# For a given string prefix plus quotes, endpats maps it to a regex
|
||||
# to match the remainder of that string. _prefix can be empty, for
|
||||
# a normal single or triple quoted string (with no prefix).
|
||||
endpats = {}
|
||||
for _prefix in possible_prefixes:
|
||||
endpats[_prefix + "'"] = _compile(Single)
|
||||
endpats[_prefix + '"'] = _compile(Double)
|
||||
endpats[_prefix + "'''"] = _compile(Single3)
|
||||
endpats[_prefix + '"""'] = _compile(Double3)
|
||||
|
||||
# A set of all of the single and triple quoted string prefixes,
|
||||
# including the opening quotes.
|
||||
single_quoted = set()
|
||||
triple_quoted = set()
|
||||
fstring_pattern_map = {}
|
||||
for t in possible_prefixes:
|
||||
for quote in '"', "'":
|
||||
single_quoted.add(t + quote)
|
||||
|
||||
for quote in '"""', "'''":
|
||||
triple_quoted.add(t + quote)
|
||||
|
||||
for t in fstring_prefixes:
|
||||
for quote in all_quotes:
|
||||
fstring_pattern_map[t + quote] = quote
|
||||
|
||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'with', 'return')
|
||||
pseudo_token_compiled = _compile(PseudoToken)
|
||||
return TokenCollection(
|
||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||
whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||
)
|
||||
|
||||
|
||||
class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||
@property
|
||||
def end_pos(self):
|
||||
lines = split_lines(self.string)
|
||||
if len(lines) > 1:
|
||||
return self.start_pos[0] + len(lines) - 1, 0
|
||||
else:
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||
|
||||
|
||||
class PythonToken(Token):
|
||||
def __repr__(self):
|
||||
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||
self._replace(type=self.type.name))
|
||||
|
||||
|
||||
class FStringNode(object):
|
||||
def __init__(self, quote):
|
||||
self.quote = quote
|
||||
self.parentheses_count = 0
|
||||
self.previous_lines = ''
|
||||
self.last_string_start_pos = None
|
||||
# In the syntax there can be multiple format_spec's nested:
|
||||
# {x:{y:3}}
|
||||
self.format_spec_count = 0
|
||||
|
||||
def open_parentheses(self, character):
|
||||
self.parentheses_count += 1
|
||||
|
||||
def close_parentheses(self, character):
|
||||
self.parentheses_count -= 1
|
||||
if self.parentheses_count == 0:
|
||||
# No parentheses means that the format spec is also finished.
|
||||
self.format_spec_count = 0
|
||||
|
||||
def allow_multiline(self):
|
||||
return len(self.quote) == 3
|
||||
|
||||
def is_in_expr(self):
|
||||
return self.parentheses_count > self.format_spec_count
|
||||
|
||||
def is_in_format_spec(self):
|
||||
return not self.is_in_expr() and self.format_spec_count
|
||||
|
||||
|
||||
def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
|
||||
for fstring_stack_index, node in enumerate(fstring_stack):
|
||||
if string.startswith(node.quote):
|
||||
token = PythonToken(
|
||||
FSTRING_END,
|
||||
node.quote,
|
||||
start_pos,
|
||||
prefix=additional_prefix,
|
||||
)
|
||||
additional_prefix = ''
|
||||
assert not node.previous_lines
|
||||
del fstring_stack[fstring_stack_index:]
|
||||
return token, '', len(node.quote)
|
||||
return None, additional_prefix, 0
|
||||
|
||||
|
||||
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
|
||||
tos = fstring_stack[-1]
|
||||
allow_multiline = tos.allow_multiline()
|
||||
if tos.is_in_format_spec():
|
||||
if allow_multiline:
|
||||
regex = fstring_format_spec_multi_line
|
||||
else:
|
||||
regex = fstring_format_spec_single_line
|
||||
else:
|
||||
if allow_multiline:
|
||||
regex = fstring_string_multi_line
|
||||
else:
|
||||
regex = fstring_string_single_line
|
||||
|
||||
match = regex.match(line, pos)
|
||||
if match is None:
|
||||
return tos.previous_lines, pos
|
||||
|
||||
if not tos.previous_lines:
|
||||
tos.last_string_start_pos = (lnum, pos)
|
||||
|
||||
string = match.group(0)
|
||||
for fstring_stack_node in fstring_stack:
|
||||
end_match = endpats[fstring_stack_node.quote].match(string)
|
||||
if end_match is not None:
|
||||
string = end_match.group(0)[:-len(fstring_stack_node.quote)]
|
||||
|
||||
new_pos = pos
|
||||
new_pos += len(string)
|
||||
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
|
||||
tos.previous_lines += string
|
||||
string = ''
|
||||
else:
|
||||
string = tos.previous_lines + string
|
||||
|
||||
return string, new_pos
|
||||
|
||||
|
||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = split_lines(code, keepends=True)
|
||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||
|
||||
|
||||
def _print_tokens(func):
|
||||
"""
|
||||
A small helper function to help debug the tokenize_lines function.
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
for token in func(*args, **kwargs):
|
||||
yield token
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# @_print_tokens
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
|
||||
Additionally to the default information, yields also the prefix of each
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
def dedent_if_necessary(start):
|
||||
while start < indents[-1]:
|
||||
if start > indents[-2]:
|
||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||
break
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||
fstring_pattern_map, always_break_tokens, = \
|
||||
_get_token_collection(version_info)
|
||||
paren_level = 0 # count parentheses
|
||||
indents = [0]
|
||||
max = 0
|
||||
numchars = '0123456789'
|
||||
contstr = ''
|
||||
contline = None
|
||||
# We start with a newline. This makes indent at the first position
|
||||
# possible. It's not valid Python, but still better than an INDENT in the
|
||||
# second line (and not in the first). This makes quite a few things in
|
||||
# Jedi's fast parser possible.
|
||||
new_line = True
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
additional_prefix = ''
|
||||
first = True
|
||||
lnum = start_pos[0] - 1
|
||||
fstring_stack = []
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos = 0
|
||||
max = len(line)
|
||||
if first:
|
||||
if line.startswith(BOM_UTF8_STRING):
|
||||
additional_prefix = BOM_UTF8_STRING
|
||||
line = line[1:]
|
||||
max = len(line)
|
||||
|
||||
# Fake that the part before was already parsed.
|
||||
line = '^' * start_pos[1] + line
|
||||
pos = start_pos[1]
|
||||
max += start_pos[1]
|
||||
|
||||
first = False
|
||||
|
||||
if contstr: # continued string
|
||||
endmatch = endprog.match(line)
|
||||
if endmatch:
|
||||
pos = endmatch.end(0)
|
||||
yield PythonToken(
|
||||
STRING, contstr + line[:pos],
|
||||
contstr_start, prefix)
|
||||
contstr = ''
|
||||
contline = None
|
||||
else:
|
||||
contstr = contstr + line
|
||||
contline = contline + line
|
||||
continue
|
||||
|
||||
while pos < max:
|
||||
if fstring_stack:
|
||||
tos = fstring_stack[-1]
|
||||
if not tos.is_in_expr():
|
||||
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
|
||||
if string:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, string,
|
||||
tos.last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
prefix=''
|
||||
)
|
||||
tos.previous_lines = ''
|
||||
continue
|
||||
if pos == max:
|
||||
break
|
||||
|
||||
rest = line[pos:]
|
||||
fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
|
||||
fstring_stack,
|
||||
rest,
|
||||
(lnum, pos),
|
||||
additional_prefix,
|
||||
)
|
||||
pos += quote_length
|
||||
if fstring_end_token is not None:
|
||||
yield fstring_end_token
|
||||
continue
|
||||
|
||||
pseudomatch = pseudo_token.match(line, pos)
|
||||
if not pseudomatch: # scan for tokens
|
||||
match = whitespace.match(line, pos)
|
||||
if pos == 0:
|
||||
for t in dedent_if_necessary(match.end()):
|
||||
yield t
|
||||
pos = match.end()
|
||||
new_line = False
|
||||
yield PythonToken(
|
||||
ERRORTOKEN, line[pos], (lnum, pos),
|
||||
additional_prefix + match.group(0)
|
||||
)
|
||||
additional_prefix = ''
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
prefix = additional_prefix + pseudomatch.group(1)
|
||||
additional_prefix = ''
|
||||
start, pos = pseudomatch.span(2)
|
||||
spos = (lnum, start)
|
||||
token = pseudomatch.group(2)
|
||||
if token == '':
|
||||
assert prefix
|
||||
additional_prefix = prefix
|
||||
# This means that we have a line with whitespace/comments at
|
||||
# the end, which just results in an endmarker.
|
||||
break
|
||||
initial = token[0]
|
||||
|
||||
if new_line and initial not in '\r\n\\#':
|
||||
new_line = False
|
||||
if paren_level == 0 and not fstring_stack:
|
||||
i = 0
|
||||
indent_start = start
|
||||
while line[i] == '\f':
|
||||
i += 1
|
||||
# TODO don't we need to change spos as well?
|
||||
indent_start -= 1
|
||||
if indent_start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
indents.append(indent_start)
|
||||
for t in dedent_if_necessary(indent_start):
|
||||
yield t
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield PythonToken(NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
if any(not f.allow_multiline() for f in fstring_stack):
|
||||
# Would use fstring_stack.clear, but that's not available
|
||||
# in Python 2.
|
||||
fstring_stack[:] = []
|
||||
|
||||
if not new_line and paren_level == 0 and not fstring_stack:
|
||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
new_line = True
|
||||
elif initial == '#': # Comments
|
||||
assert not token.endswith("\n")
|
||||
additional_prefix = prefix + token
|
||||
elif token in triple_quoted:
|
||||
endprog = endpats[token]
|
||||
endmatch = endprog.match(line, pos)
|
||||
if endmatch: # all on one line
|
||||
pos = endmatch.end(0)
|
||||
token = line[start:pos]
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
else:
|
||||
contstr_start = (lnum, start) # multiple lines
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
|
||||
# Check up to the first 3 chars of the token to see if
|
||||
# they're in the single_quoted set. If so, they start
|
||||
# a string.
|
||||
# We're using the first 3, because we're looking for
|
||||
# "rb'" (for example) at the start of the token. If
|
||||
# we switch to longer prefixes, this needs to be
|
||||
# adjusted.
|
||||
# Note that initial == token[:1].
|
||||
# Also note that single quote checking must come after
|
||||
# triple quote checking (above).
|
||||
elif initial in single_quoted or \
|
||||
token[:2] in single_quoted or \
|
||||
token[:3] in single_quoted:
|
||||
if token[-1] in '\r\n': # continued string
|
||||
# This means that a single quoted string ends with a
|
||||
# backslash and is continued.
|
||||
contstr_start = lnum, start
|
||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||
or endpats.get(token[2]))
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
else: # ordinary string
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
elif token in fstring_pattern_map: # The start of an fstring.
|
||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||
elif is_identifier(initial): # ordinary name
|
||||
if token in always_break_tokens:
|
||||
fstring_stack[:] = []
|
||||
paren_level = 0
|
||||
# We only want to dedent if the token is on a new line.
|
||||
if re.match(r'[ \f\t]*$', line[:start]):
|
||||
while True:
|
||||
indent = indents.pop()
|
||||
if indent > start:
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
else:
|
||||
indents.append(indent)
|
||||
break
|
||||
yield PythonToken(NAME, token, spos, prefix)
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
|
||||
additional_prefix += prefix + line[start:]
|
||||
break
|
||||
else:
|
||||
if token in '([{':
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].open_parentheses(token)
|
||||
else:
|
||||
paren_level += 1
|
||||
elif token in ')]}':
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].close_parentheses(token)
|
||||
else:
|
||||
if paren_level:
|
||||
paren_level -= 1
|
||||
elif token == ':' and fstring_stack \
|
||||
and fstring_stack[-1].parentheses_count \
|
||||
- fstring_stack[-1].format_spec_count == 1:
|
||||
fstring_stack[-1].format_spec_count += 1
|
||||
|
||||
yield PythonToken(OP, token, spos, prefix)
|
||||
|
||||
if contstr:
|
||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
if contstr.endswith('\n') or contstr.endswith('\r'):
|
||||
new_line = True
|
||||
|
||||
end_pos = lnum, max
|
||||
# As the last position we just take the maximally possible position. We
|
||||
# remove -1 for the last new line.
|
||||
for indent in indents[1:]:
|
||||
yield PythonToken(DEDENT, '', end_pos, '')
|
||||
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) >= 2:
|
||||
path = sys.argv[1]
|
||||
with open(path) as f:
|
||||
code = f.read()
|
||||
else:
|
||||
code = sys.stdin.read()
|
||||
|
||||
from parso.utils import python_bytes_to_unicode, parse_version_string
|
||||
|
||||
if isinstance(code, bytes):
|
||||
code = python_bytes_to_unicode(code)
|
||||
|
||||
for token in tokenize(code, parse_version_string()):
|
||||
print(token)
|
||||
24
parso/python/tokenize.pyi
Normal file
24
parso/python/tokenize.pyi
Normal file
@@ -0,0 +1,24 @@
|
||||
from typing import Generator, Iterable, NamedTuple, Tuple
|
||||
|
||||
from parso.python.token import TokenType
|
||||
from parso.utils import PythonVersionInfo
|
||||
|
||||
class Token(NamedTuple):
|
||||
type: TokenType
|
||||
string: str
|
||||
start_pos: Tuple[int, int]
|
||||
prefix: str
|
||||
@property
|
||||
def end_pos(self) -> Tuple[int, int]: ...
|
||||
|
||||
class PythonToken(Token):
|
||||
def __repr__(self) -> str: ...
|
||||
|
||||
def tokenize(
|
||||
code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
|
||||
) -> Generator[PythonToken, None, None]: ...
|
||||
def tokenize_lines(
|
||||
lines: Iterable[str],
|
||||
version_info: PythonVersionInfo,
|
||||
start_pos: Tuple[int, int] = (1, 0),
|
||||
) -> Generator[PythonToken, None, None]: ...
|
||||
@@ -1,18 +1,20 @@
|
||||
"""
|
||||
If you know what an syntax tree is, you'll see that this module is pretty much
|
||||
that. The classes represent syntax elements like functions and imports.
|
||||
This is the syntax tree for Python syntaxes (2 & 3). The classes represent
|
||||
syntax elements like functions and imports.
|
||||
|
||||
This is the "business logic" part of the parser. There's a lot of logic here
|
||||
that makes it easier for Jedi (and other libraries) to deal with a Python syntax
|
||||
tree.
|
||||
All of the nodes can be traced back to the `Python grammar file
|
||||
<https://docs.python.org/3/reference/grammar.html>`_. If you want to know how
|
||||
a tree is structured, just analyse that file (for each Python version it's a
|
||||
bit different).
|
||||
|
||||
By using `get_code` on a module, you can get back the 1-to-1 representation of
|
||||
the input given to the parser. This is important if you are using refactoring.
|
||||
There's a lot of logic here that makes it easier for Jedi (and other libraries)
|
||||
to deal with a Python syntax tree.
|
||||
|
||||
The easiest way to play with this module is to use :class:`parsing.Parser`.
|
||||
:attr:`parsing.Parser.module` holds an instance of :class:`Module`:
|
||||
By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get
|
||||
back the 1-to-1 representation of the input given to the parser. This is
|
||||
important if you want to refactor a parser tree.
|
||||
|
||||
>>> from parso.python import parse
|
||||
>>> from parso import parse
|
||||
>>> parser = parse('import os')
|
||||
>>> module = parser.get_root_node()
|
||||
>>> module
|
||||
@@ -23,11 +25,41 @@ Any subclasses of :class:`Scope`, including :class:`Module` has an attribute
|
||||
|
||||
>>> list(module.iter_imports())
|
||||
[<ImportName: import os@1,0>]
|
||||
|
||||
Changes to the Python Grammar
|
||||
-----------------------------
|
||||
|
||||
A few things have changed when looking at Python grammar files:
|
||||
|
||||
- :class:`Param` does not exist in Python grammar files. It is essentially a
|
||||
part of a ``parameters`` node. |parso| splits it up to make it easier to
|
||||
analyse parameters. However this just makes it easier to deal with the syntax
|
||||
tree, it doesn't actually change the valid syntax.
|
||||
- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the
|
||||
syntax tree to make it easier to do deal with them.
|
||||
|
||||
Parser Tree Classes
|
||||
-------------------
|
||||
"""
|
||||
|
||||
import re
|
||||
from collections import Mapping
|
||||
|
||||
from parso._compatibility import utf8_repr, unicode
|
||||
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
|
||||
search_ancestor
|
||||
from parso.python.prefix import split_prefix
|
||||
from parso.utils import split_lines
|
||||
|
||||
_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
|
||||
'with_stmt', 'async_stmt', 'suite'])
|
||||
_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS
|
||||
_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS
|
||||
_GET_DEFINITION_TYPES = set([
|
||||
'expr_stmt', 'sync_comp_for', 'with_stmt', 'for_stmt', 'import_name',
|
||||
'import_from', 'param'
|
||||
])
|
||||
_IMPORTS = set(['import_name', 'import_from'])
|
||||
|
||||
|
||||
class DocstringMixin(object):
|
||||
@@ -39,7 +71,7 @@ class DocstringMixin(object):
|
||||
"""
|
||||
if self.type == 'file_input':
|
||||
node = self.children[0]
|
||||
elif isinstance(self, ClassOrFunc):
|
||||
elif self.type in ('funcdef', 'classdef'):
|
||||
node = self.children[self.children.index(':') + 1]
|
||||
if node.type == 'suite': # Normally a suite
|
||||
node = node.children[1] # -> NEWLINE stmt
|
||||
@@ -64,25 +96,11 @@ class PythonMixin(object):
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def get_definition(self):
|
||||
if self.type in ('newline', 'endmarker'):
|
||||
raise ValueError('Cannot get the indentation of whitespace or indentation.')
|
||||
scope = self
|
||||
while scope.parent is not None:
|
||||
parent = scope.parent
|
||||
if isinstance(scope, (PythonNode, PythonLeaf)) and parent.type != 'simple_stmt':
|
||||
if scope.type == 'testlist_comp':
|
||||
try:
|
||||
if scope.children[1].type == 'comp_for':
|
||||
return scope.children[1]
|
||||
except IndexError:
|
||||
pass
|
||||
scope = parent
|
||||
else:
|
||||
break
|
||||
return scope
|
||||
|
||||
def get_name_of_position(self, position):
|
||||
"""
|
||||
Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if
|
||||
there is no name at that position.
|
||||
"""
|
||||
for c in self.children:
|
||||
if isinstance(c, Leaf):
|
||||
if c.type == 'name' and c.start_pos <= position <= c.end_pos:
|
||||
@@ -94,9 +112,29 @@ class PythonMixin(object):
|
||||
return None
|
||||
|
||||
|
||||
class PythonLeaf(Leaf, PythonMixin):
|
||||
class PythonLeaf(PythonMixin, Leaf):
|
||||
__slots__ = ()
|
||||
|
||||
def _split_prefix(self):
|
||||
return split_prefix(self, self.get_start_pos_of_prefix())
|
||||
|
||||
def get_start_pos_of_prefix(self):
|
||||
"""
|
||||
Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`.
|
||||
"""
|
||||
# TODO it is really ugly that we have to override it. Maybe change
|
||||
# indent error leafs somehow? No idea how, though.
|
||||
previous_leaf = self.get_previous_leaf()
|
||||
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
||||
and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
|
||||
previous_leaf = previous_leaf.get_previous_leaf()
|
||||
|
||||
if previous_leaf is None: # It's the first leaf.
|
||||
lines = split_lines(self.prefix)
|
||||
# + 1 is needed because split_lines always returns at least [''].
|
||||
return self.line - len(lines) + 1, 0 # It's the first leaf.
|
||||
return previous_leaf.end_pos
|
||||
|
||||
|
||||
class _LeafWithoutNewlines(PythonLeaf):
|
||||
"""
|
||||
@@ -106,23 +144,23 @@ class _LeafWithoutNewlines(PythonLeaf):
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
return self.line, self.indent + len(self.value)
|
||||
return self.line, self.column + len(self.value)
|
||||
|
||||
|
||||
# Python base classes
|
||||
class PythonBaseNode(BaseNode, PythonMixin):
|
||||
class PythonBaseNode(PythonMixin, BaseNode):
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class PythonNode(Node, PythonMixin):
|
||||
class PythonNode(PythonMixin, Node):
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class PythonErrorNode(ErrorNode, PythonMixin):
|
||||
class PythonErrorNode(PythonMixin, ErrorNode):
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class PythonErrorLeaf(ErrorLeaf, PythonMixin):
|
||||
class PythonErrorLeaf(ErrorLeaf, PythonLeaf):
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
@@ -130,6 +168,12 @@ class EndMarker(_LeafWithoutNewlines):
|
||||
__slots__ = ()
|
||||
type = 'endmarker'
|
||||
|
||||
@utf8_repr
|
||||
def __repr__(self):
|
||||
return "<%s: prefix=%s end_pos=%s>" % (
|
||||
type(self).__name__, repr(self.prefix), self.end_pos
|
||||
)
|
||||
|
||||
|
||||
class Newline(PythonLeaf):
|
||||
"""Contains NEWLINE and ENDMARKER tokens."""
|
||||
@@ -151,24 +195,52 @@ class Name(_LeafWithoutNewlines):
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s@%s,%s>" % (type(self).__name__, self.value,
|
||||
self.line, self.indent)
|
||||
self.line, self.column)
|
||||
|
||||
def is_definition(self):
|
||||
if self.parent.type in ('power', 'atom_expr'):
|
||||
# In `self.x = 3` self is not a definition, but x is.
|
||||
return False
|
||||
"""
|
||||
Returns True if the name is being defined.
|
||||
"""
|
||||
return self.get_definition() is not None
|
||||
|
||||
stmt = self.get_definition()
|
||||
if stmt.type in ('funcdef', 'classdef', 'param'):
|
||||
return self == stmt.name
|
||||
elif stmt.type == 'for_stmt':
|
||||
return self.start_pos < stmt.children[2].start_pos
|
||||
elif stmt.type == 'try_stmt':
|
||||
return self.get_previous_sibling() == 'as'
|
||||
else:
|
||||
return stmt.type in ('expr_stmt', 'import_name', 'import_from',
|
||||
'comp_for', 'with_stmt') \
|
||||
and self in stmt.get_defined_names()
|
||||
def get_definition(self, import_name_always=False):
|
||||
"""
|
||||
Returns None if there's on definition for a name.
|
||||
|
||||
:param import_name_alway: Specifies if an import name is always a
|
||||
definition. Normally foo in `from foo import bar` is not a
|
||||
definition.
|
||||
"""
|
||||
node = self.parent
|
||||
type_ = node.type
|
||||
if type_ in ('power', 'atom_expr'):
|
||||
# In `self.x = 3` self is not a definition, but x is.
|
||||
return None
|
||||
|
||||
if type_ in ('funcdef', 'classdef'):
|
||||
if self == node.name:
|
||||
return node
|
||||
return None
|
||||
|
||||
if type_ == 'except_clause':
|
||||
# TODO in Python 2 this doesn't work correctly. See grammar file.
|
||||
# I think we'll just let it be. Python 2 will be gone in a few
|
||||
# years.
|
||||
if self.get_previous_sibling() == 'as':
|
||||
return node.parent # The try_stmt.
|
||||
return None
|
||||
|
||||
while node is not None:
|
||||
if node.type == 'suite':
|
||||
return None
|
||||
if node.type in _GET_DEFINITION_TYPES:
|
||||
if self in node.get_defined_names():
|
||||
return node
|
||||
if import_name_always and node.type in _IMPORTS:
|
||||
return node
|
||||
return None
|
||||
node = node.parent
|
||||
return None
|
||||
|
||||
|
||||
class Literal(PythonLeaf):
|
||||
@@ -184,6 +256,45 @@ class String(Literal):
|
||||
type = 'string'
|
||||
__slots__ = ()
|
||||
|
||||
@property
|
||||
def string_prefix(self):
|
||||
return re.match(r'\w*(?=[\'"])', self.value).group(0)
|
||||
|
||||
def _get_payload(self):
|
||||
match = re.search(
|
||||
r'''('{3}|"{3}|'|")(.*)$''',
|
||||
self.value,
|
||||
flags=re.DOTALL
|
||||
)
|
||||
return match.group(2)[:-len(match.group(1))]
|
||||
|
||||
|
||||
class FStringString(PythonLeaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_string'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringStart(PythonLeaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_start'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringEnd(PythonLeaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_end'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class _StringComparisonMixin(object):
|
||||
def __eq__(self, other):
|
||||
@@ -248,8 +359,7 @@ class Scope(PythonBaseNode, DocstringMixin):
|
||||
for element in children:
|
||||
if element.type in names:
|
||||
yield element
|
||||
if element.type in ('suite', 'simple_stmt', 'decorated') \
|
||||
or isinstance(element, Flow):
|
||||
if element.type in _FUNC_CONTAINERS:
|
||||
for e in scan(element.children):
|
||||
yield e
|
||||
|
||||
@@ -284,14 +394,15 @@ class Module(Scope):
|
||||
super(Module, self).__init__(children)
|
||||
self._used_names = None
|
||||
|
||||
def iter_future_import_names(self):
|
||||
def _iter_future_import_names(self):
|
||||
"""
|
||||
:return list of str: A list of future import names.
|
||||
:return: A list of future import names.
|
||||
:rtype: list of str
|
||||
"""
|
||||
# TODO this is a strange scan and not fully correct. I think Python's
|
||||
# parser does it in a different way and scans for the first
|
||||
# statement/import with a tokenizer (to check for syntax changes like
|
||||
# the future print statement).
|
||||
# In Python it's not allowed to use future imports after the first
|
||||
# actual (non-future) statement. However this is not a linter here,
|
||||
# just return all future imports. If people want to scan for issues
|
||||
# they should use the API.
|
||||
for imp in self.iter_imports():
|
||||
if imp.type == 'import_from' and imp.level == 0:
|
||||
for path in imp.get_paths():
|
||||
@@ -299,21 +410,22 @@ class Module(Scope):
|
||||
if len(names) == 2 and names[0] == '__future__':
|
||||
yield names[1]
|
||||
|
||||
def has_explicit_absolute_import(self):
|
||||
def _has_explicit_absolute_import(self):
|
||||
"""
|
||||
Checks if imports in this module are explicitly absolute, i.e. there
|
||||
is a ``__future__`` import.
|
||||
Currently not public, might be in the future.
|
||||
:return bool:
|
||||
"""
|
||||
for name in self.iter_future_import_names():
|
||||
for name in self._iter_future_import_names():
|
||||
if name == 'absolute_import':
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_used_names(self):
|
||||
"""
|
||||
Returns all the `Name` leafs that exist in this module. Tihs includes
|
||||
both definitions and references of names.
|
||||
Returns all the :class:`Name` leafs that exist in this module. This
|
||||
includes both definitions and references of names.
|
||||
"""
|
||||
if self._used_names is None:
|
||||
# Don't directly use self._used_names to eliminate a lookup.
|
||||
@@ -331,7 +443,7 @@ class Module(Scope):
|
||||
recurse(child)
|
||||
|
||||
recurse(self)
|
||||
self._used_names = dct
|
||||
self._used_names = UsedNamesMapping(dct)
|
||||
return self._used_names
|
||||
|
||||
|
||||
@@ -352,9 +464,12 @@ class ClassOrFunc(Scope):
|
||||
|
||||
def get_decorators(self):
|
||||
"""
|
||||
:return list of Decorator:
|
||||
:rtype: list of :class:`Decorator`
|
||||
"""
|
||||
decorated = self.parent
|
||||
if decorated.type == 'async_funcdef':
|
||||
decorated = decorated.parent
|
||||
|
||||
if decorated.type == 'decorated':
|
||||
if decorated.children[0].type == 'decorators':
|
||||
return decorated.children[0].children
|
||||
@@ -367,13 +482,6 @@ class ClassOrFunc(Scope):
|
||||
class Class(ClassOrFunc):
|
||||
"""
|
||||
Used to store the parsed contents of a python class.
|
||||
|
||||
:param name: The Class name.
|
||||
:type name: str
|
||||
:param supers: The super classes of a Class.
|
||||
:type supers: list
|
||||
:param start_pos: The start position (line, column) of the class.
|
||||
:type start_pos: tuple(int, int)
|
||||
"""
|
||||
type = 'classdef'
|
||||
__slots__ = ()
|
||||
@@ -412,14 +520,14 @@ def _create_params(parent, argslist_list):
|
||||
basically a way of unpacking tuples in params. Python 3 has ditched
|
||||
this behavior. Jedi currently just ignores those constructs.
|
||||
"""
|
||||
return node.type == 'tfpdef' and node.children[0] == '('
|
||||
return node.type == 'fpdef' and node.children[0] == '('
|
||||
|
||||
try:
|
||||
first = argslist_list[0]
|
||||
except IndexError:
|
||||
return []
|
||||
|
||||
if first.type in ('name', 'tfpdef'):
|
||||
if first.type in ('name', 'fpdef'):
|
||||
if check_python2_nested_param(first):
|
||||
return [first]
|
||||
else:
|
||||
@@ -427,7 +535,10 @@ def _create_params(parent, argslist_list):
|
||||
elif first == '*':
|
||||
return [first]
|
||||
else: # argslist is a `typedargslist` or a `varargslist`.
|
||||
children = first.children
|
||||
if first.type == 'tfpdef':
|
||||
children = [first]
|
||||
else:
|
||||
children = first.children
|
||||
new_children = []
|
||||
start = 0
|
||||
# Start with offset 1, because the end is higher.
|
||||
@@ -435,9 +546,13 @@ def _create_params(parent, argslist_list):
|
||||
if child is None or child == ',':
|
||||
param_children = children[start:end]
|
||||
if param_children: # Could as well be comma and then end.
|
||||
if check_python2_nested_param(param_children[0]):
|
||||
new_children += param_children
|
||||
elif param_children[0] == '*' and param_children[1] == ',':
|
||||
if param_children[0] == '*' \
|
||||
and (len(param_children) == 1
|
||||
or param_children[1] == ',') \
|
||||
or check_python2_nested_param(param_children[0]) \
|
||||
or param_children[0] == '/':
|
||||
for p in param_children:
|
||||
p.parent = parent
|
||||
new_children += param_children
|
||||
else:
|
||||
new_children.append(Param(param_children, parent))
|
||||
@@ -469,8 +584,7 @@ class Function(ClassOrFunc):
|
||||
def _get_param_nodes(self):
|
||||
return self.children[2].children
|
||||
|
||||
@property
|
||||
def params(self):
|
||||
def get_params(self):
|
||||
"""
|
||||
Returns a list of `Param()`.
|
||||
"""
|
||||
@@ -484,14 +598,54 @@ class Function(ClassOrFunc):
|
||||
"""
|
||||
Returns a generator of `yield_expr`.
|
||||
"""
|
||||
# TODO This is incorrect, yields are also possible in a statement.
|
||||
return self._search_in_scope('yield_expr')
|
||||
def scan(children):
|
||||
for element in children:
|
||||
if element.type in ('classdef', 'funcdef', 'lambdef'):
|
||||
continue
|
||||
|
||||
try:
|
||||
nested_children = element.children
|
||||
except AttributeError:
|
||||
if element.value == 'yield':
|
||||
if element.parent.type == 'yield_expr':
|
||||
yield element.parent
|
||||
else:
|
||||
yield element
|
||||
else:
|
||||
for result in scan(nested_children):
|
||||
yield result
|
||||
|
||||
return scan(self.children)
|
||||
|
||||
def iter_return_stmts(self):
|
||||
"""
|
||||
Returns a generator of `return_stmt`.
|
||||
"""
|
||||
return self._search_in_scope('return_stmt')
|
||||
def scan(children):
|
||||
for element in children:
|
||||
if element.type == 'return_stmt' \
|
||||
or element.type == 'keyword' and element.value == 'return':
|
||||
yield element
|
||||
if element.type in _RETURN_STMT_CONTAINERS:
|
||||
for e in scan(element.children):
|
||||
yield e
|
||||
|
||||
return scan(self.children)
|
||||
|
||||
def iter_raise_stmts(self):
|
||||
"""
|
||||
Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks
|
||||
"""
|
||||
def scan(children):
|
||||
for element in children:
|
||||
if element.type == 'raise_stmt' \
|
||||
or element.type == 'keyword' and element.value == 'raise':
|
||||
yield element
|
||||
if element.type in _RETURN_STMT_CONTAINERS:
|
||||
for e in scan(element.children):
|
||||
yield e
|
||||
|
||||
return scan(self.children)
|
||||
|
||||
def is_generator(self):
|
||||
"""
|
||||
@@ -512,6 +666,7 @@ class Function(ClassOrFunc):
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
|
||||
class Lambda(Function):
|
||||
"""
|
||||
Lambdas are basically trimmed functions, so give it the same interface.
|
||||
@@ -617,6 +772,9 @@ class ForStmt(Flow):
|
||||
"""
|
||||
return self.children[3]
|
||||
|
||||
def get_defined_names(self):
|
||||
return _defined_names(self.children[1])
|
||||
|
||||
|
||||
class TryStmt(Flow):
|
||||
type = 'try_stmt'
|
||||
@@ -628,7 +786,6 @@ class TryStmt(Flow):
|
||||
Returns ``[None]`` for except clauses without an exception given.
|
||||
"""
|
||||
for node in self.children:
|
||||
# TODO this is not correct. We're not returning an except clause.
|
||||
if node.type == 'except_clause':
|
||||
yield node.children[1]
|
||||
elif node == 'except':
|
||||
@@ -651,8 +808,7 @@ class WithStmt(Flow):
|
||||
names += _defined_names(with_item.children[2])
|
||||
return names
|
||||
|
||||
def get_context_manager_from_name(self, name):
|
||||
# TODO Replace context_manager with test?
|
||||
def get_test_node_from_name(self, name):
|
||||
node = name.parent
|
||||
if node.type != 'with_item':
|
||||
raise ValueError('The name is not actually part of a with statement.')
|
||||
@@ -818,7 +974,7 @@ class ImportName(Import):
|
||||
class KeywordStatement(PythonBaseNode):
|
||||
"""
|
||||
For the following statements: `assert`, `del`, `global`, `nonlocal`,
|
||||
`raise`, `return`, `yield`, `return`, `yield`.
|
||||
`raise`, `return`, `yield`.
|
||||
|
||||
`pass`, `continue` and `break` are not in there, because they are just
|
||||
simple keywords and the parser reduces it to a keyword.
|
||||
@@ -868,7 +1024,7 @@ def _defined_names(current):
|
||||
list comprehensions.
|
||||
"""
|
||||
names = []
|
||||
if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist'):
|
||||
if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'):
|
||||
for child in current.children[::2]:
|
||||
names += _defined_names(child)
|
||||
elif current.type in ('atom', 'star_expr'):
|
||||
@@ -952,15 +1108,17 @@ class Param(PythonBaseNode):
|
||||
The default is the test node that appears after the `=`. Is `None` in
|
||||
case no default is present.
|
||||
"""
|
||||
has_comma = self.children[-1] == ','
|
||||
try:
|
||||
return self.children[int(self.children[0] in ('*', '**')) + 2]
|
||||
if self.children[-2 - int(has_comma)] == '=':
|
||||
return self.children[-1 - int(has_comma)]
|
||||
except IndexError:
|
||||
return None
|
||||
|
||||
@property
|
||||
def annotation(self):
|
||||
"""
|
||||
The default is the test node that appears after `->`. Is `None` in case
|
||||
The default is the test node that appears after `:`. Is `None` in case
|
||||
no annotation is present.
|
||||
"""
|
||||
tfpdef = self._tfpdef()
|
||||
@@ -974,7 +1132,7 @@ class Param(PythonBaseNode):
|
||||
|
||||
def _tfpdef(self):
|
||||
"""
|
||||
tfpdef: see grammar.txt.
|
||||
tfpdef: see e.g. grammar36.txt.
|
||||
"""
|
||||
offset = int(self.children[0] in ('*', '**'))
|
||||
return self.children[offset]
|
||||
@@ -989,6 +1147,9 @@ class Param(PythonBaseNode):
|
||||
else:
|
||||
return self._tfpdef()
|
||||
|
||||
def get_defined_names(self):
|
||||
return [self.name]
|
||||
|
||||
@property
|
||||
def position_index(self):
|
||||
"""
|
||||
@@ -1002,6 +1163,13 @@ class Param(PythonBaseNode):
|
||||
index -= 2
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
keyword_only_index = self.parent.children.index('/')
|
||||
if index > keyword_only_index:
|
||||
# Skip the ` /, `
|
||||
index -= 2
|
||||
except ValueError:
|
||||
pass
|
||||
return index - 1
|
||||
|
||||
def get_parent_function(self):
|
||||
@@ -1010,7 +1178,7 @@ class Param(PythonBaseNode):
|
||||
"""
|
||||
return search_ancestor(self, 'funcdef', 'lambdef')
|
||||
|
||||
def get_code(self, normalized=False, include_prefix=True, include_comma=True):
|
||||
def get_code(self, include_prefix=True, include_comma=True):
|
||||
"""
|
||||
Like all the other get_code functions, but includes the param
|
||||
`include_comma`.
|
||||
@@ -1018,14 +1186,13 @@ class Param(PythonBaseNode):
|
||||
:param include_comma bool: If enabled includes the comma in the string output.
|
||||
"""
|
||||
if include_comma:
|
||||
return super(Param, self).get_code(normalized, include_prefix)
|
||||
return super(Param, self).get_code(include_prefix)
|
||||
|
||||
children = self.children
|
||||
if children[-1] == ',':
|
||||
children = children[:-1]
|
||||
return self._get_code_for_children(
|
||||
children,
|
||||
normalized=False,
|
||||
include_prefix=include_prefix
|
||||
)
|
||||
|
||||
@@ -1034,12 +1201,42 @@ class Param(PythonBaseNode):
|
||||
return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default)
|
||||
|
||||
|
||||
class CompFor(PythonBaseNode):
|
||||
type = 'comp_for'
|
||||
class SyncCompFor(PythonBaseNode):
|
||||
type = 'sync_comp_for'
|
||||
__slots__ = ()
|
||||
|
||||
def get_defined_names(self):
|
||||
"""
|
||||
Returns the a list of `Name` that the comprehension defines.
|
||||
"""
|
||||
# allow async for
|
||||
return _defined_names(self.children[1])
|
||||
|
||||
|
||||
# This is simply here so an older Jedi version can work with this new parso
|
||||
# version. Can be deleted in the next release.
|
||||
CompFor = SyncCompFor
|
||||
|
||||
|
||||
class UsedNamesMapping(Mapping):
|
||||
"""
|
||||
This class exists for the sole purpose of creating an immutable dict.
|
||||
"""
|
||||
def __init__(self, dct):
|
||||
self._dict = dct
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._dict[key]
|
||||
|
||||
def __len__(self):
|
||||
return len(self._dict)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._dict)
|
||||
|
||||
def __hash__(self):
|
||||
return id(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
# Comparing these dicts does not make sense.
|
||||
return self is other
|
||||
|
||||
@@ -1,90 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from parso._compatibility import py_version
|
||||
from token import *
|
||||
|
||||
|
||||
COMMENT = N_TOKENS
|
||||
tok_name[COMMENT] = 'COMMENT'
|
||||
N_TOKENS += 1
|
||||
|
||||
NL = N_TOKENS
|
||||
tok_name[NL] = 'NL'
|
||||
N_TOKENS += 1
|
||||
|
||||
if py_version >= 30:
|
||||
BACKQUOTE = N_TOKENS
|
||||
tok_name[BACKQUOTE] = 'BACKQUOTE'
|
||||
N_TOKENS += 1
|
||||
else:
|
||||
RARROW = N_TOKENS
|
||||
tok_name[RARROW] = 'RARROW'
|
||||
N_TOKENS += 1
|
||||
ELLIPSIS = N_TOKENS
|
||||
tok_name[ELLIPSIS] = 'ELLIPSIS'
|
||||
N_TOKENS += 1
|
||||
|
||||
if not py_version >= 35:
|
||||
ATEQUAL = N_TOKENS
|
||||
tok_name[ATEQUAL] = 'ATEQUAL'
|
||||
N_TOKENS += 1
|
||||
|
||||
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
opmap_raw = """\
|
||||
( LPAR
|
||||
) RPAR
|
||||
[ LSQB
|
||||
] RSQB
|
||||
: COLON
|
||||
, COMMA
|
||||
; SEMI
|
||||
+ PLUS
|
||||
- MINUS
|
||||
* STAR
|
||||
/ SLASH
|
||||
| VBAR
|
||||
& AMPER
|
||||
< LESS
|
||||
> GREATER
|
||||
= EQUAL
|
||||
. DOT
|
||||
% PERCENT
|
||||
` BACKQUOTE
|
||||
{ LBRACE
|
||||
} RBRACE
|
||||
@ AT
|
||||
== EQEQUAL
|
||||
!= NOTEQUAL
|
||||
<> NOTEQUAL
|
||||
<= LESSEQUAL
|
||||
>= GREATEREQUAL
|
||||
~ TILDE
|
||||
^ CIRCUMFLEX
|
||||
<< LEFTSHIFT
|
||||
>> RIGHTSHIFT
|
||||
** DOUBLESTAR
|
||||
+= PLUSEQUAL
|
||||
-= MINEQUAL
|
||||
*= STAREQUAL
|
||||
/= SLASHEQUAL
|
||||
%= PERCENTEQUAL
|
||||
&= AMPEREQUAL
|
||||
|= VBAREQUAL
|
||||
@= ATEQUAL
|
||||
^= CIRCUMFLEXEQUAL
|
||||
<<= LEFTSHIFTEQUAL
|
||||
>>= RIGHTSHIFTEQUAL
|
||||
**= DOUBLESTAREQUAL
|
||||
// DOUBLESLASH
|
||||
//= DOUBLESLASHEQUAL
|
||||
-> RARROW
|
||||
... ELLIPSIS
|
||||
"""
|
||||
|
||||
opmap = {}
|
||||
for line in opmap_raw.splitlines():
|
||||
op, name = line.split()
|
||||
opmap[op] = globals()[name]
|
||||
@@ -1,369 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This tokenizer has been copied from the ``tokenize.py`` standard library
|
||||
tokenizer. The reason was simple: The standard library tokenizer fails
|
||||
if the indentation is not right. To make it possible to do error recovery the
|
||||
tokenizer needed to be rewritten.
|
||||
|
||||
Basically this is a stripped down version of the standard library module, so
|
||||
you can read the documentation there. Additionally we included some speed and
|
||||
memory optimizations here.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import string
|
||||
import re
|
||||
from collections import namedtuple
|
||||
import itertools as _itertools
|
||||
|
||||
from parso.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
|
||||
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||
from parso._compatibility import py_version, u
|
||||
from parso.utils import splitlines
|
||||
|
||||
|
||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||
|
||||
|
||||
if py_version >= 30:
|
||||
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
||||
is_identifier = str.isidentifier
|
||||
else:
|
||||
namechars = string.ascii_letters + '_'
|
||||
is_identifier = lambda s: s in namechars
|
||||
|
||||
|
||||
COMMENT = N_TOKENS
|
||||
tok_name[COMMENT] = 'COMMENT'
|
||||
|
||||
|
||||
def group(*choices, **kwargs):
|
||||
capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :(
|
||||
assert not kwargs
|
||||
|
||||
start = '('
|
||||
if not capture:
|
||||
start += '?:'
|
||||
return start + '|'.join(choices) + ')'
|
||||
|
||||
def any(*choices):
|
||||
return group(*choices) + '*'
|
||||
|
||||
def maybe(*choices):
|
||||
return group(*choices) + '?'
|
||||
|
||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||
# number literals.
|
||||
Whitespace = r'[ \f\t]*'
|
||||
Comment = r'#[^\r\n]*'
|
||||
Name = r'\w+'
|
||||
|
||||
if py_version >= 36:
|
||||
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||
Binnumber = r'0[bB](?:_?[01])+'
|
||||
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||
else:
|
||||
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||
Binnumber = r'0[bB][01]+'
|
||||
if py_version >= 30:
|
||||
Octnumber = r'0[oO][0-7]+'
|
||||
else:
|
||||
Octnumber = '0[0-7]+'
|
||||
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||
Exponent = r'[eE][-+]?[0-9]+'
|
||||
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||
Expfloat = r'[0-9]+' + Exponent
|
||||
Floatnumber = group(Pointfloat, Expfloat)
|
||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
def _all_string_prefixes():
|
||||
# The valid string prefixes. Only contain the lower case versions,
|
||||
# and don't contain any permuations (include 'fr', but not
|
||||
# 'rf'). The various permutations will be generated.
|
||||
_valid_string_prefixes = ['b', 'r', 'u', 'br']
|
||||
if py_version >= 36:
|
||||
_valid_string_prefixes += ['f', 'fr']
|
||||
if py_version <= 27:
|
||||
# TODO this is actually not 100% valid. ur is valid in Python 2.7,
|
||||
# while ru is not.
|
||||
_valid_string_prefixes.append('ur')
|
||||
|
||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||
result = set([''])
|
||||
for prefix in _valid_string_prefixes:
|
||||
for t in _itertools.permutations(prefix):
|
||||
# create a list with upper and lower versions of each
|
||||
# character
|
||||
for u in _itertools.product(*[(c, c.upper()) for c in t]):
|
||||
result.add(''.join(u))
|
||||
return result
|
||||
|
||||
def _compile(expr):
|
||||
return re.compile(expr, re.UNICODE)
|
||||
|
||||
# Note that since _all_string_prefixes includes the empty string,
|
||||
# StringPrefix can be the empty string (making it optional).
|
||||
StringPrefix = group(*_all_string_prefixes())
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||
# Tail end of " string.
|
||||
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
||||
# Tail end of ''' string.
|
||||
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@|^=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
||||
Funny = group(Operator, Bracket, Special)
|
||||
|
||||
PlainToken = group(Number, Funny, Name, capture=True)
|
||||
|
||||
# First (or only) line of ' or " string.
|
||||
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||
group("'", r'\\\r?\n'),
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||
group('"', r'\\\r?\n'))
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
# For a given string prefix plus quotes, endpats maps it to a regex
|
||||
# to match the remainder of that string. _prefix can be empty, for
|
||||
# a normal single or triple quoted string (with no prefix).
|
||||
endpats = {}
|
||||
for _prefix in _all_string_prefixes():
|
||||
endpats[_prefix + "'"] = _compile(Single)
|
||||
endpats[_prefix + '"'] = _compile(Double)
|
||||
endpats[_prefix + "'''"] = _compile(Single3)
|
||||
endpats[_prefix + '"""'] = _compile(Double3)
|
||||
|
||||
# A set of all of the single and triple quoted string prefixes,
|
||||
# including the opening quotes.
|
||||
single_quoted = set()
|
||||
triple_quoted = set()
|
||||
for t in _all_string_prefixes():
|
||||
for p in (t + '"', t + "'"):
|
||||
single_quoted.add(p)
|
||||
for p in (t + '"""', t + "'''"):
|
||||
triple_quoted.add(p)
|
||||
|
||||
|
||||
# TODO add with?
|
||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'return')
|
||||
pseudo_token_compiled = _compile(PseudoToken)
|
||||
|
||||
|
||||
class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||
def __repr__(self):
|
||||
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
|
||||
self._replace(type=self.get_type_name()))
|
||||
|
||||
def get_type_name(self, exact=True):
|
||||
if exact:
|
||||
typ = self.exact_type
|
||||
else:
|
||||
typ = self.type
|
||||
return tok_name[typ]
|
||||
|
||||
@property
|
||||
def exact_type(self):
|
||||
if self.type == OP and self.string in opmap:
|
||||
return opmap[self.string]
|
||||
else:
|
||||
return self.type
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
lines = splitlines(self.string)
|
||||
if len(lines) > 1:
|
||||
return self.start_pos[0] + len(lines) - 1, 0
|
||||
else:
|
||||
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||
|
||||
|
||||
def source_tokens(source, use_exact_op_types=False):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = splitlines(source, keepends=True)
|
||||
return generate_tokens(lines, use_exact_op_types)
|
||||
|
||||
|
||||
def generate_tokens(lines, use_exact_op_types=False):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
|
||||
Additionally to the default information, yields also the prefix of each
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
paren_level = 0 # count parentheses
|
||||
indents = [0]
|
||||
max = 0
|
||||
numchars = '0123456789'
|
||||
contstr = ''
|
||||
contline = None
|
||||
# We start with a newline. This makes indent at the first position
|
||||
# possible. It's not valid Python, but still better than an INDENT in the
|
||||
# second line (and not in the first). This makes quite a few things in
|
||||
# Jedi's fast parser possible.
|
||||
new_line = True
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
additional_prefix = ''
|
||||
for lnum, line in enumerate(lines, 1): # loop over lines in stream
|
||||
pos, max = 0, len(line)
|
||||
|
||||
if contstr: # continued string
|
||||
endmatch = endprog.match(line)
|
||||
if endmatch:
|
||||
pos = endmatch.end(0)
|
||||
yield TokenInfo(STRING, contstr + line[:pos], contstr_start, prefix)
|
||||
contstr = ''
|
||||
contline = None
|
||||
else:
|
||||
contstr = contstr + line
|
||||
contline = contline + line
|
||||
continue
|
||||
|
||||
while pos < max:
|
||||
pseudomatch = pseudo_token_compiled.match(line, pos)
|
||||
if not pseudomatch: # scan for tokens
|
||||
txt = line[pos:]
|
||||
if txt.endswith('\n'):
|
||||
new_line = True
|
||||
yield TokenInfo(ERRORTOKEN, txt, (lnum, pos), prefix)
|
||||
break
|
||||
|
||||
prefix = additional_prefix + pseudomatch.group(1)
|
||||
additional_prefix = ''
|
||||
start, pos = pseudomatch.span(2)
|
||||
spos = (lnum, start)
|
||||
token = pseudomatch.group(2)
|
||||
initial = token[0]
|
||||
|
||||
if new_line and initial not in '\r\n#':
|
||||
new_line = False
|
||||
if paren_level == 0:
|
||||
i = 0
|
||||
while line[i] == '\f':
|
||||
i += 1
|
||||
start -= 1
|
||||
if start > indents[-1]:
|
||||
yield TokenInfo(INDENT, '', spos, '')
|
||||
indents.append(start)
|
||||
while start < indents[-1]:
|
||||
yield TokenInfo(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield TokenInfo(NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
if not new_line and paren_level == 0:
|
||||
yield TokenInfo(NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
new_line = True
|
||||
elif initial == '#': # Comments
|
||||
assert not token.endswith("\n")
|
||||
additional_prefix = prefix + token
|
||||
elif token in triple_quoted:
|
||||
endprog = endpats[token]
|
||||
endmatch = endprog.match(line, pos)
|
||||
if endmatch: # all on one line
|
||||
pos = endmatch.end(0)
|
||||
token = line[start:pos]
|
||||
yield TokenInfo(STRING, token, spos, prefix)
|
||||
else:
|
||||
contstr_start = (lnum, start) # multiple lines
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
elif initial in single_quoted or \
|
||||
token[:2] in single_quoted or \
|
||||
token[:3] in single_quoted:
|
||||
if token[-1] == '\n': # continued string
|
||||
contstr_start = lnum, start
|
||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||
or endpats.get(token[2]))
|
||||
contstr = line[start:]
|
||||
contline = line
|
||||
break
|
||||
else: # ordinary string
|
||||
yield TokenInfo(STRING, token, spos, prefix)
|
||||
elif is_identifier(initial): # ordinary name
|
||||
if token in ALWAYS_BREAK_TOKENS:
|
||||
paren_level = 0
|
||||
while True:
|
||||
indent = indents.pop()
|
||||
if indent > start:
|
||||
yield TokenInfo(DEDENT, '', spos, '')
|
||||
else:
|
||||
indents.append(indent)
|
||||
break
|
||||
yield TokenInfo(NAME, token, spos, prefix)
|
||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
||||
additional_prefix += prefix + line[start:]
|
||||
break
|
||||
else:
|
||||
if token in '([{':
|
||||
paren_level += 1
|
||||
elif token in ')]}':
|
||||
paren_level -= 1
|
||||
|
||||
try:
|
||||
# This check is needed in any case to check if it's a valid
|
||||
# operator or just some random unicode character.
|
||||
exact_type = opmap[token]
|
||||
except KeyError:
|
||||
exact_type = typ = ERRORTOKEN
|
||||
if use_exact_op_types:
|
||||
typ = exact_type
|
||||
else:
|
||||
typ = OP
|
||||
yield TokenInfo(typ, token, spos, prefix)
|
||||
|
||||
if contstr:
|
||||
yield TokenInfo(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||
if contstr.endswith('\n'):
|
||||
new_line = True
|
||||
|
||||
end_pos = lnum, max
|
||||
# As the last position we just take the maximally possible position. We
|
||||
# remove -1 for the last new line.
|
||||
for indent in indents[1:]:
|
||||
yield TokenInfo(DEDENT, '', end_pos, '')
|
||||
yield TokenInfo(ENDMARKER, '', end_pos, additional_prefix)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) >= 2:
|
||||
path = sys.argv[1]
|
||||
with open(path) as f:
|
||||
code = u(f.read())
|
||||
else:
|
||||
code = u(sys.stdin.read())
|
||||
for token in source_tokens(code, use_exact_op_types=True):
|
||||
print(token)
|
||||
138
parso/tree.py
138
parso/tree.py
@@ -1,15 +1,17 @@
|
||||
from abc import abstractmethod, abstractproperty
|
||||
|
||||
from parso._compatibility import utf8_repr, encoding, py_version
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
def search_ancestor(node, *node_types):
|
||||
"""
|
||||
Recursively looks at the parents of a node and checks if the type names
|
||||
match.
|
||||
Recursively looks at the parents of a node and returns the first found node
|
||||
that matches node_types. Returns ``None`` if no matching node is found.
|
||||
|
||||
:param node: The node that is looked at.
|
||||
:param node_types: A tuple or a string of type names that are
|
||||
searched for.
|
||||
:param node: The ancestors of this node will be checked.
|
||||
:param node_types: type names that are searched for.
|
||||
:type node_types: tuple of str
|
||||
"""
|
||||
while True:
|
||||
node = node.parent
|
||||
@@ -22,6 +24,10 @@ class NodeOrLeaf(object):
|
||||
The base class for nodes and leaves.
|
||||
"""
|
||||
__slots__ = ()
|
||||
type = None
|
||||
'''
|
||||
The type is a string that typically matches the types of the grammar file.
|
||||
'''
|
||||
|
||||
def get_root_node(self):
|
||||
"""
|
||||
@@ -35,8 +41,8 @@ class NodeOrLeaf(object):
|
||||
|
||||
def get_next_sibling(self):
|
||||
"""
|
||||
The node immediately following the invocant in their parent's children
|
||||
list. If the invocant does not have a next sibling, it is None
|
||||
Returns the node immediately following this node in this parent's
|
||||
children list. If this node does not have a next sibling, it is None
|
||||
"""
|
||||
# Can't use index(); we need to test by identity
|
||||
for i, child in enumerate(self.parent.children):
|
||||
@@ -48,8 +54,8 @@ class NodeOrLeaf(object):
|
||||
|
||||
def get_previous_sibling(self):
|
||||
"""
|
||||
The node/leaf immediately preceding the invocant in their parent's
|
||||
children list. If the invocant does not have a previous sibling, it is
|
||||
Returns the node immediately preceding this node in this parent's
|
||||
children list. If this node does not have a previous sibling, it is
|
||||
None.
|
||||
"""
|
||||
# Can't use index(); we need to test by identity
|
||||
@@ -62,7 +68,7 @@ class NodeOrLeaf(object):
|
||||
def get_previous_leaf(self):
|
||||
"""
|
||||
Returns the previous leaf in the parser tree.
|
||||
Raises an IndexError if it's the first element in the parser tree.
|
||||
Returns `None` if this is the first element in the parser tree.
|
||||
"""
|
||||
node = self
|
||||
while True:
|
||||
@@ -85,7 +91,7 @@ class NodeOrLeaf(object):
|
||||
def get_next_leaf(self):
|
||||
"""
|
||||
Returns the next leaf in the parser tree.
|
||||
Returns `None` if it's the last element in the parser tree.
|
||||
Returns None if this is the last element in the parser tree.
|
||||
"""
|
||||
node = self
|
||||
while True:
|
||||
@@ -135,50 +141,63 @@ class NodeOrLeaf(object):
|
||||
@abstractmethod
|
||||
def get_first_leaf(self):
|
||||
"""
|
||||
Returns the first leaf of a node or itself it's a leaf.
|
||||
Returns the first leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_last_leaf(self):
|
||||
"""
|
||||
Returns the last leaf of a node or itself it's a leaf.
|
||||
Returns the last leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_code(self, normalized=False, include_prefix=True):
|
||||
def get_code(self, include_prefix=True):
|
||||
"""
|
||||
Returns the code that was the input of the parser.
|
||||
Returns the code that was input the input for the parser for this node.
|
||||
|
||||
If a normalizer is given, the returned code will be normalized and will
|
||||
not be equal to the input.
|
||||
|
||||
:param include_prefix: Removes the prefix (whitespace and comments) of e.g. a statement.
|
||||
:param normalized: Deprecated. Please don't use. Will be replaced with something more powerful.
|
||||
:param include_prefix: Removes the prefix (whitespace and comments) of
|
||||
e.g. a statement.
|
||||
"""
|
||||
|
||||
|
||||
class Leaf(NodeOrLeaf):
|
||||
__slots__ = ('value', 'parent', 'line', 'indent', 'prefix')
|
||||
'''
|
||||
Leafs are basically tokens with a better API. Leafs exactly know where they
|
||||
were defined and what text preceeds them.
|
||||
'''
|
||||
__slots__ = ('value', 'parent', 'line', 'column', 'prefix')
|
||||
|
||||
def __init__(self, value, start_pos, prefix=''):
|
||||
self.value = value
|
||||
'''
|
||||
:py:func:`str` The value of the current token.
|
||||
'''
|
||||
self.start_pos = start_pos
|
||||
self.prefix = prefix
|
||||
'''
|
||||
:py:func:`str` Typically a mixture of whitespace and comments. Stuff
|
||||
that is syntactically irrelevant for the syntax tree.
|
||||
'''
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
return self.line, self.indent
|
||||
return self.line, self.column
|
||||
|
||||
@start_pos.setter
|
||||
def start_pos(self, value):
|
||||
self.line = value[0]
|
||||
self.indent = value[1]
|
||||
self.column = value[1]
|
||||
|
||||
def get_start_pos_of_prefix(self):
|
||||
previous_leaf = self.get_previous_leaf()
|
||||
if previous_leaf is None:
|
||||
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
|
||||
lines = split_lines(self.prefix)
|
||||
# + 1 is needed because split_lines always returns at least [''].
|
||||
return self.line - len(lines) + 1, 0 # It's the first leaf.
|
||||
return previous_leaf.end_pos
|
||||
|
||||
def get_first_leaf(self):
|
||||
@@ -187,9 +206,7 @@ class Leaf(NodeOrLeaf):
|
||||
def get_last_leaf(self):
|
||||
return self
|
||||
|
||||
def get_code(self, normalized=False, include_prefix=True):
|
||||
if normalized:
|
||||
return self.value
|
||||
def get_code(self, include_prefix=True):
|
||||
if include_prefix:
|
||||
return self.prefix + self.value
|
||||
else:
|
||||
@@ -197,35 +214,49 @@ class Leaf(NodeOrLeaf):
|
||||
|
||||
@property
|
||||
def end_pos(self):
|
||||
lines = self.value.split('\n')
|
||||
lines = split_lines(self.value)
|
||||
end_pos_line = self.line + len(lines) - 1
|
||||
# Check for multiline token
|
||||
if self.line == end_pos_line:
|
||||
end_pos_indent = self.indent + len(lines[-1])
|
||||
end_pos_column = self.column + len(lines[-1])
|
||||
else:
|
||||
end_pos_indent = len(lines[-1])
|
||||
return end_pos_line, end_pos_indent
|
||||
end_pos_column = len(lines[-1])
|
||||
return end_pos_line, end_pos_column
|
||||
|
||||
@utf8_repr
|
||||
def __repr__(self):
|
||||
return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
|
||||
value = self.value
|
||||
if not value:
|
||||
value = self.type
|
||||
return "<%s: %s>" % (type(self).__name__, value)
|
||||
|
||||
|
||||
class TypedLeaf(Leaf):
|
||||
__slots__ = ('type',)
|
||||
|
||||
def __init__(self, type, value, start_pos, prefix=''):
|
||||
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.type = type
|
||||
|
||||
|
||||
class BaseNode(NodeOrLeaf):
|
||||
"""
|
||||
The super class for all nodes.
|
||||
|
||||
If you create custom nodes, you will probably want to inherit from this
|
||||
``BaseNode``.
|
||||
A node has children, a type and possibly a parent node.
|
||||
"""
|
||||
__slots__ = ('children', 'parent')
|
||||
type = None
|
||||
|
||||
def __init__(self, children):
|
||||
for c in children:
|
||||
c.parent = self
|
||||
self.children = children
|
||||
"""
|
||||
A list of :class:`NodeOrLeaf` child nodes.
|
||||
"""
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
None if this is the root node.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
@@ -238,18 +269,25 @@ class BaseNode(NodeOrLeaf):
|
||||
def end_pos(self):
|
||||
return self.children[-1].end_pos
|
||||
|
||||
def _get_code_for_children(self, children, normalized, include_prefix):
|
||||
# TODO implement normalized (depending on context).
|
||||
def _get_code_for_children(self, children, include_prefix):
|
||||
if include_prefix:
|
||||
return "".join(c.get_code(normalized) for c in children)
|
||||
return "".join(c.get_code() for c in children)
|
||||
else:
|
||||
first = children[0].get_code(include_prefix=False)
|
||||
return first + "".join(c.get_code(normalized) for c in children[1:])
|
||||
return first + "".join(c.get_code() for c in children[1:])
|
||||
|
||||
def get_code(self, normalized=False, include_prefix=True):
|
||||
return self._get_code_for_children(self.children, normalized, include_prefix)
|
||||
def get_code(self, include_prefix=True):
|
||||
return self._get_code_for_children(self.children, include_prefix)
|
||||
|
||||
def get_leaf_for_position(self, position, include_prefixes=False):
|
||||
"""
|
||||
Get the :py:class:`parso.tree.Leaf` at ``position``
|
||||
|
||||
:param tuple position: A position tuple, row, column. Rows start from 1
|
||||
:param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls
|
||||
on whitespace or comments before a leaf
|
||||
:return: :py:class:`parso.tree.Leaf` at ``position``, or ``None``
|
||||
"""
|
||||
def binary_search(lower, upper):
|
||||
if lower == upper:
|
||||
element = self.children[lower]
|
||||
@@ -282,7 +320,7 @@ class BaseNode(NodeOrLeaf):
|
||||
|
||||
@utf8_repr
|
||||
def __repr__(self):
|
||||
code = self.get_code().replace('\n', ' ').strip()
|
||||
code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
|
||||
if not py_version >= 30:
|
||||
code = code.encode(encoding, 'replace')
|
||||
return "<%s: %s@%s,%s>" % \
|
||||
@@ -303,7 +341,7 @@ class Node(BaseNode):
|
||||
|
||||
class ErrorNode(BaseNode):
|
||||
"""
|
||||
A node that containes valid nodes/leaves that we're follow by a token that
|
||||
A node that contains valid nodes/leaves that we're follow by a token that
|
||||
was invalid. This basically means that the leaf after this node is where
|
||||
Python would mark a syntax error.
|
||||
"""
|
||||
@@ -316,13 +354,13 @@ class ErrorLeaf(Leaf):
|
||||
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||
or is invalid at that position. Like the star in `1 +* 1`.
|
||||
"""
|
||||
__slots__ = ('original_type')
|
||||
__slots__ = ('token_type',)
|
||||
type = 'error_leaf'
|
||||
|
||||
def __init__(self, original_type, value, start_pos, prefix=''):
|
||||
def __init__(self, token_type, value, start_pos, prefix=''):
|
||||
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||
self.original_type = original_type
|
||||
self.token_type = token_type
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s: %s:%s, %s)>" % \
|
||||
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
|
||||
return "<%s: %s:%s, %s>" % \
|
||||
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)
|
||||
|
||||
126
parso/utils.py
126
parso/utils.py
@@ -1,15 +1,34 @@
|
||||
from collections import namedtuple
|
||||
import re
|
||||
import sys
|
||||
from ast import literal_eval
|
||||
|
||||
from parso._compatibility import unicode
|
||||
from parso._compatibility import unicode, total_ordering
|
||||
|
||||
# The following is a list in Python that are line breaks in str.splitlines, but
|
||||
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
|
||||
# 0xA) are allowed to split lines.
|
||||
_NON_LINE_BREAKS = (
|
||||
u'\v', # Vertical Tabulation 0xB
|
||||
u'\f', # Form Feed 0xC
|
||||
u'\x1C', # File Separator
|
||||
u'\x1D', # Group Separator
|
||||
u'\x1E', # Record Separator
|
||||
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
|
||||
# Used to mark end-of-line on some IBM mainframes.)
|
||||
u'\u2028', # Line Separator
|
||||
u'\u2029', # Paragraph Separator
|
||||
)
|
||||
|
||||
Version = namedtuple('Version', 'major, minor, micro')
|
||||
|
||||
|
||||
def splitlines(string, keepends=False):
|
||||
"""
|
||||
A splitlines for Python code. In contrast to Python's ``str.splitlines``,
|
||||
def split_lines(string, keepends=False):
|
||||
r"""
|
||||
Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
|
||||
looks at form feeds and other special characters as normal text. Just
|
||||
splits ``\n`` and ``\r\n``.
|
||||
Also different: Returns ``['']`` for an empty string input.
|
||||
Also different: Returns ``[""]`` for an empty string input.
|
||||
|
||||
In Python 2.7 form feeds are used as normal characters when using
|
||||
str.splitlines. However in Python 3 somewhere there was a decision to split
|
||||
@@ -21,8 +40,13 @@ def splitlines(string, keepends=False):
|
||||
# We have to merge lines that were broken by form feed characters.
|
||||
merge = []
|
||||
for i, line in enumerate(lst):
|
||||
if line.endswith('\f'):
|
||||
merge.append(i)
|
||||
try:
|
||||
last_chr = line[-1]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
if last_chr in _NON_LINE_BREAKS:
|
||||
merge.append(i)
|
||||
|
||||
for index in reversed(merge):
|
||||
try:
|
||||
@@ -36,14 +60,22 @@ def splitlines(string, keepends=False):
|
||||
# The stdlib's implementation of the end is inconsistent when calling
|
||||
# it with/without keepends. One time there's an empty string in the
|
||||
# end, one time there's none.
|
||||
if string.endswith('\n') or string == '':
|
||||
if string.endswith('\n') or string.endswith('\r') or string == '':
|
||||
lst.append('')
|
||||
return lst
|
||||
else:
|
||||
return re.split('\n|\r\n', string)
|
||||
return re.split(r'\n|\r\n|\r', string)
|
||||
|
||||
|
||||
def source_to_unicode(source, encoding=None):
|
||||
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
||||
"""
|
||||
Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
|
||||
unicode object like in :py:meth:`bytes.decode`.
|
||||
|
||||
:param encoding: See :py:meth:`bytes.decode` documentation.
|
||||
:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
|
||||
``'strict'``, ``'replace'`` or ``'ignore'``.
|
||||
"""
|
||||
def detect_encoding():
|
||||
"""
|
||||
For the implementation of encoding definitions in Python, look at:
|
||||
@@ -62,7 +94,7 @@ def source_to_unicode(source, encoding=None):
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return encoding if encoding is not None else 'utf-8'
|
||||
return encoding
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
@@ -71,5 +103,73 @@ def source_to_unicode(source, encoding=None):
|
||||
encoding = detect_encoding()
|
||||
if not isinstance(encoding, unicode):
|
||||
encoding = unicode(encoding, 'utf-8', 'replace')
|
||||
# cast to unicode by default
|
||||
return unicode(source, encoding, 'replace')
|
||||
|
||||
# Cast to unicode
|
||||
return unicode(source, encoding, errors)
|
||||
|
||||
|
||||
def version_info():
|
||||
"""
|
||||
Returns a namedtuple of parso's version, similar to Python's
|
||||
``sys.version_info``.
|
||||
"""
|
||||
from parso import __version__
|
||||
tupl = re.findall(r'[a-z]+|\d+', __version__)
|
||||
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
|
||||
|
||||
|
||||
def _parse_version(version):
|
||||
match = re.match(r'(\d+)(?:\.(\d)(?:\.\d+)?)?$', version)
|
||||
if match is None:
|
||||
raise ValueError('The given version is not in the right format. '
|
||||
'Use something like "3.2" or "3".')
|
||||
|
||||
major = int(match.group(1))
|
||||
minor = match.group(2)
|
||||
if minor is None:
|
||||
# Use the latest Python in case it's not exactly defined, because the
|
||||
# grammars are typically backwards compatible?
|
||||
if major == 2:
|
||||
minor = "7"
|
||||
elif major == 3:
|
||||
minor = "6"
|
||||
else:
|
||||
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
|
||||
minor = int(minor)
|
||||
return PythonVersionInfo(major, minor)
|
||||
|
||||
|
||||
@total_ordering
|
||||
class PythonVersionInfo(namedtuple('Version', 'major, minor')):
|
||||
def __gt__(self, other):
|
||||
if isinstance(other, tuple):
|
||||
if len(other) != 2:
|
||||
raise ValueError("Can only compare to tuples of length 2.")
|
||||
return (self.major, self.minor) > other
|
||||
super(PythonVersionInfo, self).__gt__(other)
|
||||
|
||||
return (self.major, self.minor)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, tuple):
|
||||
if len(other) != 2:
|
||||
raise ValueError("Can only compare to tuples of length 2.")
|
||||
return (self.major, self.minor) == other
|
||||
super(PythonVersionInfo, self).__eq__(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
|
||||
def parse_version_string(version=None):
|
||||
"""
|
||||
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
|
||||
returns a corresponding version info that is always two characters long in
|
||||
decimal.
|
||||
"""
|
||||
if version is None:
|
||||
version = '%s.%s' % sys.version_info[:2]
|
||||
if not isinstance(version, (unicode, str)):
|
||||
raise TypeError("version must be a string like 3.2.")
|
||||
|
||||
return _parse_version(version)
|
||||
|
||||
29
parso/utils.pyi
Normal file
29
parso/utils.pyi
Normal file
@@ -0,0 +1,29 @@
|
||||
from typing import NamedTuple, Optional, Sequence, Union
|
||||
|
||||
class Version(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
micro: int
|
||||
|
||||
def split_lines(string: str, keepends: bool = ...) -> Sequence[str]: ...
|
||||
def python_bytes_to_unicode(
|
||||
source: Union[str, bytes], encoding: str = ..., errors: str = ...
|
||||
) -> str: ...
|
||||
def version_info() -> Version:
|
||||
"""
|
||||
Returns a namedtuple of parso's version, similar to Python's
|
||||
``sys.version_info``.
|
||||
"""
|
||||
...
|
||||
|
||||
class PythonVersionInfo(NamedTuple):
|
||||
major: int
|
||||
minor: int
|
||||
|
||||
def parse_version_string(version: Optional[str]) -> PythonVersionInfo:
|
||||
"""
|
||||
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
|
||||
returns a corresponding version info that is always two characters long in
|
||||
decimal.
|
||||
"""
|
||||
...
|
||||
@@ -1,8 +1,10 @@
|
||||
[pytest]
|
||||
addopts = --doctest-modules
|
||||
|
||||
testpaths = parso test
|
||||
|
||||
# Ignore broken files inblackbox test directories
|
||||
norecursedirs = .* docs scripts old*
|
||||
norecursedirs = .* docs scripts normalizer_issue_files build
|
||||
|
||||
# Activate `clean_jedi_cache` fixture for all tests. This should be
|
||||
# fine as long as we are using `clean_jedi_cache` as a session scoped
|
||||
|
||||
10
setup.cfg
10
setup.cfg
@@ -1,2 +1,12 @@
|
||||
[bdist_wheel]
|
||||
universal=1
|
||||
|
||||
[flake8]
|
||||
max-line-length = 100
|
||||
ignore =
|
||||
# do not use bare 'except'
|
||||
E722,
|
||||
# don't know why this was ever even an option, 1+1 should be possible.
|
||||
E226,
|
||||
# line break before binary operator
|
||||
W503,
|
||||
|
||||
20
setup.py
20
setup.py
@@ -1,20 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import with_statement
|
||||
from setuptools import setup
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
import parso
|
||||
|
||||
|
||||
__AUTHOR__ = 'David Halter'
|
||||
__AUTHOR_EMAIL__ = 'davidhalter88@gmail.com'
|
||||
|
||||
readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read()
|
||||
packages = ['parso', 'parso.pgen2', 'parso.python']
|
||||
|
||||
import parso
|
||||
|
||||
setup(name='parso',
|
||||
version=parso.__version__,
|
||||
description='A Python parser written in Python.',
|
||||
description='A Python Parser',
|
||||
author=__AUTHOR__,
|
||||
author_email=__AUTHOR_EMAIL__,
|
||||
include_package_data=True,
|
||||
@@ -24,7 +24,7 @@ setup(name='parso',
|
||||
license='MIT',
|
||||
keywords='python parser parsing',
|
||||
long_description=readme,
|
||||
packages=packages,
|
||||
packages=find_packages(exclude=['test']),
|
||||
package_data={'parso': ['python/grammar*.txt']},
|
||||
platforms=['any'],
|
||||
classifiers=[
|
||||
@@ -40,8 +40,16 @@ setup(name='parso',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||
'Topic :: Text Editors :: Integrated Development Environments (IDE)',
|
||||
'Topic :: Utilities',
|
||||
],
|
||||
extras_require={
|
||||
'testing': [
|
||||
'pytest>=3.0.7',
|
||||
'docopt',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
321
test/failing_examples.py
Normal file
321
test/failing_examples.py
Normal file
@@ -0,0 +1,321 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
from textwrap import dedent
|
||||
|
||||
|
||||
def indent(code):
|
||||
lines = code.splitlines(True)
|
||||
return ''.join([' ' * 2 + line for line in lines])
|
||||
|
||||
|
||||
def build_nested(code, depth, base='def f():\n'):
|
||||
if depth == 0:
|
||||
return code
|
||||
|
||||
new_code = base + indent(code)
|
||||
return build_nested(new_code, depth - 1, base=base)
|
||||
|
||||
|
||||
FAILING_EXAMPLES = [
|
||||
'1 +',
|
||||
'?',
|
||||
'continue',
|
||||
'break',
|
||||
'return',
|
||||
'yield',
|
||||
|
||||
# SyntaxError from Python/ast.c
|
||||
'f(x for x in bar, 1)',
|
||||
'from foo import a,',
|
||||
'from __future__ import whatever',
|
||||
'from __future__ import braces',
|
||||
'from .__future__ import whatever',
|
||||
'def f(x=3, y): pass',
|
||||
'lambda x=3, y: x',
|
||||
'__debug__ = 1',
|
||||
'with x() as __debug__: pass',
|
||||
# Mostly 3.6 relevant
|
||||
'[]: int',
|
||||
'[a, b]: int',
|
||||
'(): int',
|
||||
'(()): int',
|
||||
'((())): int',
|
||||
'{}: int',
|
||||
'True: int',
|
||||
'(a, b): int',
|
||||
'*star,: int',
|
||||
'a, b: int = 3',
|
||||
'foo(+a=3)',
|
||||
'f(lambda: 1=1)',
|
||||
'f(x=1, x=2)',
|
||||
'f(**x, y)',
|
||||
'f(x=2, y)',
|
||||
'f(**x, *y)',
|
||||
'f(**x, y=3, z)',
|
||||
'a, b += 3',
|
||||
'(a, b) += 3',
|
||||
'[a, b] += 3',
|
||||
# All assignment tests
|
||||
'lambda a: 1 = 1',
|
||||
'[x for x in y] = 1',
|
||||
'{x for x in y} = 1',
|
||||
'{x:x for x in y} = 1',
|
||||
'(x for x in y) = 1',
|
||||
'None = 1',
|
||||
'... = 1',
|
||||
'a == b = 1',
|
||||
'{a, b} = 1',
|
||||
'{a: b} = 1',
|
||||
'1 = 1',
|
||||
'"" = 1',
|
||||
'b"" = 1',
|
||||
'b"" = 1',
|
||||
'"" "" = 1',
|
||||
'1 | 1 = 3',
|
||||
'1**1 = 3',
|
||||
'~ 1 = 3',
|
||||
'not 1 = 3',
|
||||
'1 and 1 = 3',
|
||||
'def foo(): (yield 1) = 3',
|
||||
'def foo(): x = yield 1 = 3',
|
||||
'async def foo(): await x = 3',
|
||||
'(a if a else a) = a',
|
||||
'a, 1 = x',
|
||||
'foo() = 1',
|
||||
# Cases without the equals but other assignments.
|
||||
'with x as foo(): pass',
|
||||
'del bar, 1',
|
||||
'for x, 1 in []: pass',
|
||||
'for (not 1) in []: pass',
|
||||
'[x for 1 in y]',
|
||||
'[x for a, 3 in y]',
|
||||
'(x for 1 in y)',
|
||||
'{x for 1 in y}',
|
||||
'{x:x for 1 in y}',
|
||||
# Unicode/Bytes issues.
|
||||
r'u"\x"',
|
||||
r'u"\"',
|
||||
r'u"\u"',
|
||||
r'u"""\U"""',
|
||||
r'u"\Uffffffff"',
|
||||
r"u'''\N{}'''",
|
||||
r"u'\N{foo}'",
|
||||
r'b"\x"',
|
||||
r'b"\"',
|
||||
'*a, *b = 3, 3',
|
||||
'async def foo(): yield from []',
|
||||
'yield from []',
|
||||
'*a = 3',
|
||||
'del *a, b',
|
||||
'def x(*): pass',
|
||||
'(%s *d) = x' % ('a,' * 256),
|
||||
'{**{} for a in [1]}',
|
||||
|
||||
# Parser/tokenize.c
|
||||
r'"""',
|
||||
r'"',
|
||||
r"'''",
|
||||
r"'",
|
||||
r"\blub",
|
||||
# IndentationError: too many levels of indentation
|
||||
build_nested('pass', 100),
|
||||
|
||||
# SyntaxErrors from Python/symtable.c
|
||||
'def f(x, x): pass',
|
||||
'nonlocal a',
|
||||
|
||||
# IndentationError
|
||||
' foo',
|
||||
'def x():\n 1\n 2',
|
||||
'def x():\n 1\n 2',
|
||||
'if 1:\nfoo',
|
||||
'if 1: blubb\nif 1:\npass\nTrue and False',
|
||||
|
||||
# f-strings
|
||||
'f"{}"',
|
||||
r'f"{\}"',
|
||||
'f"{\'\\\'}"',
|
||||
'f"{#}"',
|
||||
"f'{1!b}'",
|
||||
"f'{1:{5:{3}}}'",
|
||||
"f'{'",
|
||||
"f'{'",
|
||||
"f'}'",
|
||||
"f'{\"}'",
|
||||
"f'{\"}'",
|
||||
# Now nested parsing
|
||||
"f'{continue}'",
|
||||
"f'{1;1}'",
|
||||
"f'{a;}'",
|
||||
"f'{b\"\" \"\"}'",
|
||||
]
|
||||
|
||||
GLOBAL_NONLOCAL_ERROR = [
|
||||
dedent('''
|
||||
def glob():
|
||||
x = 3
|
||||
x.z
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x = 3
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x = 3
|
||||
x.z
|
||||
nonlocal x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x = 3
|
||||
nonlocal x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x
|
||||
nonlocal x'''),
|
||||
# Annotation issues
|
||||
dedent('''
|
||||
def glob():
|
||||
x[0]: foo
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x.a: foo
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x: foo
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x: foo = 5
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
x: foo = 5
|
||||
x
|
||||
global x'''),
|
||||
dedent('''
|
||||
def glob():
|
||||
global x
|
||||
x: foo = 3
|
||||
'''),
|
||||
# global/nonlocal + param
|
||||
dedent('''
|
||||
def glob(x):
|
||||
global x
|
||||
'''),
|
||||
dedent('''
|
||||
def glob(x):
|
||||
nonlocal x
|
||||
'''),
|
||||
dedent('''
|
||||
def x():
|
||||
a =3
|
||||
def z():
|
||||
nonlocal a
|
||||
a = 3
|
||||
nonlocal a
|
||||
'''),
|
||||
dedent('''
|
||||
def x():
|
||||
a = 4
|
||||
def y():
|
||||
global a
|
||||
nonlocal a
|
||||
'''),
|
||||
# Missing binding of nonlocal
|
||||
dedent('''
|
||||
def x():
|
||||
nonlocal a
|
||||
'''),
|
||||
dedent('''
|
||||
def x():
|
||||
def y():
|
||||
nonlocal a
|
||||
'''),
|
||||
dedent('''
|
||||
def x():
|
||||
a = 4
|
||||
def y():
|
||||
global a
|
||||
print(a)
|
||||
def z():
|
||||
nonlocal a
|
||||
'''),
|
||||
]
|
||||
|
||||
if sys.version_info >= (3, 6):
|
||||
FAILING_EXAMPLES += GLOBAL_NONLOCAL_ERROR
|
||||
if sys.version_info >= (3, 5):
|
||||
FAILING_EXAMPLES += [
|
||||
# Raises different errors so just ignore them for now.
|
||||
'[*[] for a in [1]]',
|
||||
# Raises multiple errors in previous versions.
|
||||
'async def bla():\n def x(): await bla()',
|
||||
]
|
||||
if sys.version_info >= (3, 4):
|
||||
# Before that del None works like del list, it gives a NameError.
|
||||
FAILING_EXAMPLES.append('del None')
|
||||
if sys.version_info >= (3,):
|
||||
FAILING_EXAMPLES += [
|
||||
# Unfortunately assigning to False and True do not raise an error in
|
||||
# 2.x.
|
||||
'(True,) = x',
|
||||
'([False], a) = x',
|
||||
# A symtable error that raises only a SyntaxWarning in Python 2.
|
||||
'def x(): from math import *',
|
||||
# unicode chars in bytes are allowed in python 2
|
||||
'b"ä"',
|
||||
# combining strings and unicode is allowed in Python 2.
|
||||
'"s" b""',
|
||||
'"s" b"" ""',
|
||||
'b"" "" b"" ""',
|
||||
]
|
||||
if sys.version_info >= (3, 6):
|
||||
FAILING_EXAMPLES += [
|
||||
# Same as above, but for f-strings.
|
||||
'f"s" b""',
|
||||
'b"s" f""',
|
||||
]
|
||||
if sys.version_info >= (2, 7):
|
||||
# This is something that raises a different error in 2.6 than in the other
|
||||
# versions. Just skip it for 2.6.
|
||||
FAILING_EXAMPLES.append('[a, 1] += 3')
|
||||
|
||||
if sys.version_info[:2] == (3, 5):
|
||||
# yields are not allowed in 3.5 async functions. Therefore test them
|
||||
# separately, here.
|
||||
FAILING_EXAMPLES += [
|
||||
'async def foo():\n yield x',
|
||||
'async def foo():\n yield x',
|
||||
]
|
||||
else:
|
||||
FAILING_EXAMPLES += [
|
||||
'async def foo():\n yield x\n return 1',
|
||||
'async def foo():\n yield x\n return 1',
|
||||
]
|
||||
|
||||
|
||||
if sys.version_info[:2] <= (3, 4):
|
||||
# Python > 3.4 this is valid code.
|
||||
FAILING_EXAMPLES += [
|
||||
'a = *[1], 2',
|
||||
'(*[1], 2)',
|
||||
]
|
||||
|
||||
if sys.version_info[:2] < (3, 8):
|
||||
FAILING_EXAMPLES += [
|
||||
# Python/compile.c
|
||||
dedent('''\
|
||||
for a in [1]:
|
||||
try:
|
||||
pass
|
||||
finally:
|
||||
continue
|
||||
'''), # 'continue' not supported inside 'finally' clause"
|
||||
]
|
||||
290
test/fuzz_diff_parser.py
Normal file
290
test/fuzz_diff_parser.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
A script to find bugs in the diff parser.
|
||||
|
||||
This script is extremely useful if changes are made to the diff parser. By
|
||||
running a few thousand iterations, we can assure that the diff parser is in
|
||||
good shape.
|
||||
|
||||
Usage:
|
||||
fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
|
||||
fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p]
|
||||
fuzz_diff_parser.py -h | --help
|
||||
|
||||
Options:
|
||||
-h --help Show this screen
|
||||
-n, --maxtries=<nr> Maximum of random tries [default: 1000]
|
||||
-x, --changes=<nr> Amount of changes to be done to a file per try [default: 5]
|
||||
-l, --logging Prints all the logs
|
||||
-o, --only-last=<nr> Only runs the last n iterations; Defaults to running all
|
||||
-p, --print-code Print all test diffs
|
||||
--pdb Launch pdb when error is raised
|
||||
--ipdb Launch ipdb when error is raised
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import random
|
||||
import pickle
|
||||
|
||||
import parso
|
||||
from parso.utils import split_lines
|
||||
from test.test_diff_parser import _check_error_leaves_nodes
|
||||
|
||||
_latest_grammar = parso.load_grammar(version='3.8')
|
||||
_python_reserved_strings = tuple(
|
||||
# Keywords are ususally only interesting in combination with spaces after
|
||||
# them. We don't put a space before keywords, to avoid indentation errors.
|
||||
s + (' ' if s.isalpha() else '')
|
||||
for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
|
||||
)
|
||||
_random_python_fragments = _python_reserved_strings + (
|
||||
' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
|
||||
"'''", ';', ' some_random_word ', '\\', '#',
|
||||
)
|
||||
|
||||
|
||||
def find_python_files_in_tree(file_path):
|
||||
if not os.path.isdir(file_path):
|
||||
yield file_path
|
||||
return
|
||||
for root, dirnames, filenames in os.walk(file_path):
|
||||
for name in filenames:
|
||||
if name.endswith('.py'):
|
||||
yield os.path.join(root, name)
|
||||
|
||||
|
||||
def _print_copyable_lines(lines):
|
||||
for line in lines:
|
||||
line = repr(line)[1:-1]
|
||||
if line.endswith(r'\n'):
|
||||
line = line[:-2] + '\n'
|
||||
print(line, end='')
|
||||
|
||||
|
||||
def _get_first_error_start_pos_or_none(module):
|
||||
error_leaf = _check_error_leaves_nodes(module)
|
||||
return None if error_leaf is None else error_leaf.start_pos
|
||||
|
||||
|
||||
class LineReplacement:
|
||||
def __init__(self, line_nr, new_line):
|
||||
self._line_nr = line_nr
|
||||
self._new_line = new_line
|
||||
|
||||
def apply(self, code_lines):
|
||||
# print(repr(self._new_line))
|
||||
code_lines[self._line_nr] = self._new_line
|
||||
|
||||
|
||||
class LineDeletion:
|
||||
def __init__(self, line_nr):
|
||||
self.line_nr = line_nr
|
||||
|
||||
def apply(self, code_lines):
|
||||
del code_lines[self.line_nr]
|
||||
|
||||
|
||||
class LineCopy:
|
||||
def __init__(self, copy_line, insertion_line):
|
||||
self._copy_line = copy_line
|
||||
self._insertion_line = insertion_line
|
||||
|
||||
def apply(self, code_lines):
|
||||
code_lines.insert(
|
||||
self._insertion_line,
|
||||
# Use some line from the file. This doesn't feel totally
|
||||
# random, but for the diff parser it will feel like it.
|
||||
code_lines[self._copy_line]
|
||||
)
|
||||
|
||||
|
||||
class FileModification:
|
||||
@classmethod
|
||||
def generate(cls, code_lines, change_count):
|
||||
return cls(
|
||||
list(cls._generate_line_modifications(code_lines, change_count)),
|
||||
# work with changed trees more than with normal ones.
|
||||
check_original=random.random() > 0.8,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _generate_line_modifications(lines, change_count):
|
||||
def random_line(include_end=False):
|
||||
return random.randint(0, len(lines) - (not include_end))
|
||||
|
||||
lines = list(lines)
|
||||
for _ in range(change_count):
|
||||
rand = random.randint(1, 4)
|
||||
if rand == 1:
|
||||
if len(lines) == 1:
|
||||
# We cannot delete every line, that doesn't make sense to
|
||||
# fuzz and it would be annoying to rewrite everything here.
|
||||
continue
|
||||
l = LineDeletion(random_line())
|
||||
elif rand == 2:
|
||||
# Copy / Insertion
|
||||
# Make it possible to insert into the first and the last line
|
||||
l = LineCopy(random_line(), random_line(include_end=True))
|
||||
elif rand in (3, 4):
|
||||
# Modify a line in some weird random ways.
|
||||
line_nr = random_line()
|
||||
line = lines[line_nr]
|
||||
column = random.randint(0, len(line))
|
||||
random_string = ''
|
||||
for _ in range(random.randint(1, 3)):
|
||||
if random.random() > 0.8:
|
||||
# The lower characters cause way more issues.
|
||||
unicode_range = 0x1f if random.randint(0, 1) else 0x3000
|
||||
random_string += chr(random.randint(0, unicode_range))
|
||||
else:
|
||||
# These insertions let us understand how random
|
||||
# keyword/operator insertions work. Theoretically this
|
||||
# could also be done with unicode insertions, but the
|
||||
# fuzzer is just way more effective here.
|
||||
random_string += random.choice(_random_python_fragments)
|
||||
if random.random() > 0.5:
|
||||
# In this case we insert at a very random place that
|
||||
# probably breaks syntax.
|
||||
line = line[:column] + random_string + line[column:]
|
||||
else:
|
||||
# Here we have better chances to not break syntax, because
|
||||
# we really replace the line with something that has
|
||||
# indentation.
|
||||
line = ' ' * random.randint(0, 12) + random_string + '\n'
|
||||
l = LineReplacement(line_nr, line)
|
||||
l.apply(lines)
|
||||
yield l
|
||||
|
||||
def __init__(self, modification_list, check_original):
|
||||
self._modification_list = modification_list
|
||||
self._check_original = check_original
|
||||
|
||||
def _apply(self, code_lines):
|
||||
changed_lines = list(code_lines)
|
||||
for modification in self._modification_list:
|
||||
modification.apply(changed_lines)
|
||||
return changed_lines
|
||||
|
||||
def run(self, grammar, code_lines, print_code):
|
||||
code = ''.join(code_lines)
|
||||
modified_lines = self._apply(code_lines)
|
||||
modified_code = ''.join(modified_lines)
|
||||
|
||||
if print_code:
|
||||
if self._check_original:
|
||||
print('Original:')
|
||||
_print_copyable_lines(code_lines)
|
||||
|
||||
print('\nModified:')
|
||||
_print_copyable_lines(modified_lines)
|
||||
print()
|
||||
|
||||
if self._check_original:
|
||||
m = grammar.parse(code, diff_cache=True)
|
||||
start1 = _get_first_error_start_pos_or_none(m)
|
||||
|
||||
grammar.parse(modified_code, diff_cache=True)
|
||||
|
||||
if self._check_original:
|
||||
# Also check if it's possible to "revert" the changes.
|
||||
m = grammar.parse(code, diff_cache=True)
|
||||
start2 = _get_first_error_start_pos_or_none(m)
|
||||
assert start1 == start2, (start1, start2)
|
||||
|
||||
|
||||
class FileTests:
|
||||
def __init__(self, file_path, test_count, change_count):
|
||||
self._path = file_path
|
||||
with open(file_path) as f:
|
||||
code = f.read()
|
||||
self._code_lines = split_lines(code, keepends=True)
|
||||
self._test_count = test_count
|
||||
self._code_lines = self._code_lines
|
||||
self._change_count = change_count
|
||||
self._file_modifications = []
|
||||
|
||||
def _run(self, grammar, file_modifications, debugger, print_code=False):
|
||||
try:
|
||||
for i, fm in enumerate(file_modifications, 1):
|
||||
fm.run(grammar, self._code_lines, print_code=print_code)
|
||||
print('.', end='')
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
except Exception:
|
||||
print("Issue in file: %s" % self._path)
|
||||
if debugger:
|
||||
einfo = sys.exc_info()
|
||||
pdb = __import__(debugger)
|
||||
pdb.post_mortem(einfo[2])
|
||||
raise
|
||||
|
||||
def redo(self, grammar, debugger, only_last, print_code):
|
||||
mods = self._file_modifications
|
||||
if only_last is not None:
|
||||
mods = mods[-only_last:]
|
||||
self._run(grammar, mods, debugger, print_code=print_code)
|
||||
|
||||
def run(self, grammar, debugger):
|
||||
def iterate():
|
||||
for _ in range(self._test_count):
|
||||
fm = FileModification.generate(self._code_lines, self._change_count)
|
||||
self._file_modifications.append(fm)
|
||||
yield fm
|
||||
|
||||
self._run(grammar, iterate(), debugger)
|
||||
|
||||
|
||||
def main(arguments):
|
||||
debugger = 'pdb' if arguments['--pdb'] else \
|
||||
'ipdb' if arguments['--ipdb'] else None
|
||||
redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')
|
||||
|
||||
if arguments['--logging']:
|
||||
root = logging.getLogger()
|
||||
root.setLevel(logging.DEBUG)
|
||||
|
||||
ch = logging.StreamHandler(sys.stdout)
|
||||
ch.setLevel(logging.DEBUG)
|
||||
root.addHandler(ch)
|
||||
|
||||
grammar = parso.load_grammar()
|
||||
parso.python.diff.DEBUG_DIFF_PARSER = True
|
||||
if arguments['redo']:
|
||||
with open(redo_file, 'rb') as f:
|
||||
file_tests_obj = pickle.load(f)
|
||||
only_last = arguments['--only-last'] and int(arguments['--only-last'])
|
||||
file_tests_obj.redo(
|
||||
grammar,
|
||||
debugger,
|
||||
only_last=only_last,
|
||||
print_code=arguments['--print-code']
|
||||
)
|
||||
elif arguments['random']:
|
||||
# A random file is used to do diff parser checks if no file is given.
|
||||
# This helps us to find errors in a lot of different files.
|
||||
file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.'))
|
||||
max_tries = int(arguments['--maxtries'])
|
||||
tries = 0
|
||||
try:
|
||||
while tries < max_tries:
|
||||
path = random.choice(file_paths)
|
||||
print("Checking %s: %s tries" % (path, tries))
|
||||
now_tries = min(1000, max_tries - tries)
|
||||
file_tests_obj = FileTests(path, now_tries, int(arguments['--changes']))
|
||||
file_tests_obj.run(grammar, debugger)
|
||||
tries += now_tries
|
||||
except Exception:
|
||||
with open(redo_file, 'wb') as f:
|
||||
pickle.dump(file_tests_obj, f)
|
||||
raise
|
||||
else:
|
||||
raise NotImplementedError('Command is not implemented')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from docopt import docopt
|
||||
|
||||
arguments = docopt(__doc__)
|
||||
main(arguments)
|
||||
51
test/normalizer_issue_files/E10.py
Normal file
51
test/normalizer_issue_files/E10.py
Normal file
@@ -0,0 +1,51 @@
|
||||
for a in 'abc':
|
||||
for b in 'xyz':
|
||||
hello(a) # indented with 8 spaces
|
||||
#: E903:0
|
||||
hello(b) # indented with 1 tab
|
||||
if True:
|
||||
#: E101:0
|
||||
pass
|
||||
|
||||
#: E122+1
|
||||
change_2_log = \
|
||||
"""Change 2 by slamb@testclient on 2006/04/13 21:46:23
|
||||
|
||||
creation
|
||||
"""
|
||||
|
||||
p4change = {
|
||||
2: change_2_log,
|
||||
}
|
||||
|
||||
|
||||
class TestP4Poller(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.setUpGetProcessOutput()
|
||||
return self.setUpChangeSource()
|
||||
|
||||
def tearDown(self):
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
if True:
|
||||
#: E101:0 E101+1:0
|
||||
foo(1,
|
||||
2)
|
||||
|
||||
|
||||
def test_keys(self):
|
||||
"""areas.json - All regions are accounted for."""
|
||||
expected = set([
|
||||
#: E101:0
|
||||
u'Norrbotten',
|
||||
#: E101:0
|
||||
u'V\xe4sterbotten',
|
||||
])
|
||||
|
||||
|
||||
if True:
|
||||
hello("""
|
||||
tab at start of this line
|
||||
""")
|
||||
137
test/normalizer_issue_files/E101.py
Normal file
137
test/normalizer_issue_files/E101.py
Normal file
@@ -0,0 +1,137 @@
|
||||
# Used to be the file for W191
|
||||
|
||||
#: E101+1
|
||||
if False:
|
||||
print # indented with 1 tab
|
||||
|
||||
#: E101+1
|
||||
y = x == 2 \
|
||||
or x == 3
|
||||
#: E101+5
|
||||
if (
|
||||
x == (
|
||||
3
|
||||
) or
|
||||
y == 4):
|
||||
pass
|
||||
#: E101+3
|
||||
if x == 2 \
|
||||
or y > 1 \
|
||||
or x == 3:
|
||||
pass
|
||||
#: E101+3
|
||||
if x == 2 \
|
||||
or y > 1 \
|
||||
or x == 3:
|
||||
pass
|
||||
|
||||
#: E101+1
|
||||
if (foo == bar and baz == frop):
|
||||
pass
|
||||
#: E101+1
|
||||
if (foo == bar and baz == frop):
|
||||
pass
|
||||
|
||||
#: E101+2 E101+3
|
||||
if start[1] > end_col and not (
|
||||
over_indent == 4 and indent_next):
|
||||
assert (0, "E121 continuation line over-"
|
||||
"indented for visual indent")
|
||||
|
||||
|
||||
#: E101+3
|
||||
def long_function_name(
|
||||
var_one, var_two, var_three,
|
||||
var_four):
|
||||
hello(var_one)
|
||||
|
||||
|
||||
#: E101+2
|
||||
if ((row < 0 or self.moduleCount <= row or
|
||||
col < 0 or self.moduleCount <= col)):
|
||||
raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
|
||||
#: E101+1 E101+2 E101+3 E101+4 E101+5 E101+6
|
||||
if bar:
|
||||
assert (
|
||||
start, 'E121 lines starting with a '
|
||||
'closing bracket should be indented '
|
||||
"to match that of the opening "
|
||||
"bracket's line"
|
||||
)
|
||||
|
||||
# you want vertical alignment, so use a parens
|
||||
#: E101+3
|
||||
if ((foo.bar("baz") and
|
||||
foo.bar("frop")
|
||||
)):
|
||||
hello("yes")
|
||||
#: E101+3
|
||||
# also ok, but starting to look like LISP
|
||||
if ((foo.bar("baz") and
|
||||
foo.bar("frop"))):
|
||||
hello("yes")
|
||||
#: E101+1
|
||||
if (a == 2 or b == "abc def ghi" "jkl mno"):
|
||||
assert True
|
||||
#: E101+2
|
||||
if (a == 2 or b == """abc def ghi
|
||||
jkl mno"""):
|
||||
assert True
|
||||
#: E101+1 E101+2
|
||||
if length > options.max_line_length:
|
||||
assert options.max_line_length, \
|
||||
"E501 line too long (%d characters)" % length
|
||||
|
||||
|
||||
#: E101+1 E101+2
|
||||
if os.path.exists(os.path.join(path, PEP8_BIN)):
|
||||
cmd = ([os.path.join(path, PEP8_BIN)] +
|
||||
self._pep8_options(targetfile))
|
||||
# TODO Tabs in docstrings shouldn't be there, use \t.
|
||||
'''
|
||||
multiline string with tab in it'''
|
||||
# Same here.
|
||||
'''multiline string
|
||||
with tabs
|
||||
and spaces
|
||||
'''
|
||||
# Okay
|
||||
'''sometimes, you just need to go nuts in a multiline string
|
||||
and allow all sorts of crap
|
||||
like mixed tabs and spaces
|
||||
|
||||
or trailing whitespace
|
||||
or long long long long long long long long long long long long long long long long long lines
|
||||
''' # noqa
|
||||
# Okay
|
||||
'''this one
|
||||
will get no warning
|
||||
even though the noqa comment is not immediately after the string
|
||||
''' + foo # noqa
|
||||
|
||||
#: E101+2
|
||||
if foo is None and bar is "frop" and \
|
||||
blah == 'yeah':
|
||||
blah = 'yeahnah'
|
||||
|
||||
|
||||
#: E101+1 E101+2 E101+3
|
||||
if True:
|
||||
foo(
|
||||
1,
|
||||
2)
|
||||
|
||||
|
||||
#: E101+1 E101+2 E101+3 E101+4 E101+5
|
||||
def test_keys(self):
|
||||
"""areas.json - All regions are accounted for."""
|
||||
expected = set([
|
||||
u'Norrbotten',
|
||||
u'V\xe4sterbotten',
|
||||
])
|
||||
|
||||
|
||||
#: E101+1
|
||||
x = [
|
||||
'abc'
|
||||
]
|
||||
60
test/normalizer_issue_files/E11.py
Normal file
60
test/normalizer_issue_files/E11.py
Normal file
@@ -0,0 +1,60 @@
|
||||
if x > 2:
|
||||
#: E111:2
|
||||
hello(x)
|
||||
if True:
|
||||
#: E111:5
|
||||
print
|
||||
#: E111:6
|
||||
#
|
||||
#: E111:2
|
||||
# what
|
||||
# Comment is fine
|
||||
# Comment is also fine
|
||||
|
||||
if False:
|
||||
pass
|
||||
print
|
||||
print
|
||||
#: E903:0
|
||||
print
|
||||
mimetype = 'application/x-directory'
|
||||
#: E111:5
|
||||
# 'httpd/unix-directory'
|
||||
create_date = False
|
||||
|
||||
|
||||
def start(self):
|
||||
# foo
|
||||
#: E111:8
|
||||
# bar
|
||||
if True: # Hello
|
||||
self.master.start() # Comment
|
||||
# try:
|
||||
#: E111:12
|
||||
# self.master.start()
|
||||
# except MasterExit:
|
||||
#: E111:12
|
||||
# self.shutdown()
|
||||
# finally:
|
||||
#: E111:12
|
||||
# sys.exit()
|
||||
# Dedent to the first level
|
||||
#: E111:6
|
||||
# error
|
||||
# Dedent to the base level
|
||||
#: E111:2
|
||||
# Also wrongly indented.
|
||||
# Indent is correct.
|
||||
|
||||
|
||||
def start(self): # Correct comment
|
||||
if True:
|
||||
#: E111:0
|
||||
# try:
|
||||
#: E111:0
|
||||
# self.master.start()
|
||||
#: E111:0
|
||||
# except MasterExit:
|
||||
#: E111:0
|
||||
# self.shutdown()
|
||||
self.master.start() # comment
|
||||
78
test/normalizer_issue_files/E12_first.py
Normal file
78
test/normalizer_issue_files/E12_first.py
Normal file
@@ -0,0 +1,78 @@
|
||||
abc = "E121", (
|
||||
#: E121:2
|
||||
"dent")
|
||||
abc = "E122", (
|
||||
#: E121:0
|
||||
"dent")
|
||||
my_list = [
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
#: E123
|
||||
]
|
||||
abc = "E124", ("visual",
|
||||
"indent_two"
|
||||
#: E124:14
|
||||
)
|
||||
abc = "E124", ("visual",
|
||||
"indent_five"
|
||||
#: E124:0
|
||||
)
|
||||
a = (123,
|
||||
#: E124:0
|
||||
)
|
||||
#: E129+1:4
|
||||
if (row < 0 or self.moduleCount <= row or
|
||||
col < 0 or self.moduleCount <= col):
|
||||
raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
|
||||
|
||||
abc = "E126", (
|
||||
#: E126:12
|
||||
"dent")
|
||||
abc = "E126", (
|
||||
#: E126:8
|
||||
"dent")
|
||||
abc = "E127", ("over-",
|
||||
#: E127:18
|
||||
"over-indent")
|
||||
abc = "E128", ("visual",
|
||||
#: E128:4
|
||||
"hanging")
|
||||
abc = "E128", ("under-",
|
||||
#: E128:14
|
||||
"under-indent")
|
||||
|
||||
|
||||
my_list = [
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
#: E123:5
|
||||
]
|
||||
result = {
|
||||
#: E121:3
|
||||
'key1': 'value',
|
||||
#: E121:3
|
||||
'key2': 'value',
|
||||
}
|
||||
rv.update(dict.fromkeys((
|
||||
'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'),
|
||||
#: E128:10
|
||||
'?'),
|
||||
"foo")
|
||||
|
||||
abricot = 3 + \
|
||||
4 + \
|
||||
5 + 6
|
||||
abc = "hello", (
|
||||
|
||||
"there",
|
||||
#: E126:5
|
||||
# "john",
|
||||
"dude")
|
||||
part = set_mimetype((
|
||||
a.get('mime_type', 'text')),
|
||||
'default')
|
||||
part = set_mimetype((
|
||||
a.get('mime_type', 'text')),
|
||||
#: E127:21
|
||||
'default')
|
||||
356
test/normalizer_issue_files/E12_not_first.py
Normal file
356
test/normalizer_issue_files/E12_not_first.py
Normal file
@@ -0,0 +1,356 @@
|
||||
# The issue numbers described in this file are part of the pycodestyle tracker
|
||||
# and not of parso.
|
||||
# Originally there were no issues in here, I (dave) added the ones that were
|
||||
# necessary and IMO useful.
|
||||
if (
|
||||
x == (
|
||||
3
|
||||
) or
|
||||
y == 4):
|
||||
pass
|
||||
|
||||
y = x == 2 \
|
||||
or x == 3
|
||||
|
||||
#: E129+1:4
|
||||
if x == 2 \
|
||||
or y > 1 \
|
||||
or x == 3:
|
||||
pass
|
||||
|
||||
if x == 2 \
|
||||
or y > 1 \
|
||||
or x == 3:
|
||||
pass
|
||||
|
||||
|
||||
if (foo == bar and
|
||||
baz == frop):
|
||||
pass
|
||||
|
||||
#: E129+1:4 E129+2:4 E123+3
|
||||
if (
|
||||
foo == bar and
|
||||
baz == frop
|
||||
):
|
||||
pass
|
||||
|
||||
if (
|
||||
foo == bar and
|
||||
baz == frop
|
||||
#: E129:4
|
||||
):
|
||||
pass
|
||||
|
||||
a = (
|
||||
)
|
||||
|
||||
a = (123,
|
||||
)
|
||||
|
||||
|
||||
if start[1] > end_col and not (
|
||||
over_indent == 4 and indent_next):
|
||||
assert (0, "E121 continuation line over-"
|
||||
"indented for visual indent")
|
||||
|
||||
|
||||
abc = "OK", ("visual",
|
||||
"indent")
|
||||
|
||||
abc = "Okay", ("visual",
|
||||
"indent_three"
|
||||
)
|
||||
|
||||
abc = "a-ok", (
|
||||
"there",
|
||||
"dude",
|
||||
)
|
||||
|
||||
abc = "hello", (
|
||||
"there",
|
||||
"dude")
|
||||
|
||||
abc = "hello", (
|
||||
|
||||
"there",
|
||||
# "john",
|
||||
"dude")
|
||||
|
||||
abc = "hello", (
|
||||
"there", "dude")
|
||||
|
||||
abc = "hello", (
|
||||
"there", "dude",
|
||||
)
|
||||
|
||||
# Aligned with opening delimiter
|
||||
foo = long_function_name(var_one, var_two,
|
||||
var_three, var_four)
|
||||
|
||||
# Extra indentation is not necessary.
|
||||
foo = long_function_name(
|
||||
var_one, var_two,
|
||||
var_three, var_four)
|
||||
|
||||
|
||||
arm = 'AAA' \
|
||||
'BBB' \
|
||||
'CCC'
|
||||
|
||||
bbb = 'AAA' \
|
||||
'BBB' \
|
||||
'CCC'
|
||||
|
||||
cc = ('AAA'
|
||||
'BBB'
|
||||
'CCC')
|
||||
|
||||
cc = {'text': 'AAA'
|
||||
'BBB'
|
||||
'CCC'}
|
||||
|
||||
cc = dict(text='AAA'
|
||||
'BBB')
|
||||
|
||||
sat = 'AAA' \
|
||||
'BBB' \
|
||||
'iii' \
|
||||
'CCC'
|
||||
|
||||
abricot = (3 +
|
||||
4 +
|
||||
5 + 6)
|
||||
|
||||
#: E122+1:4
|
||||
abricot = 3 + \
|
||||
4 + \
|
||||
5 + 6
|
||||
|
||||
part = [-1, 2, 3,
|
||||
4, 5, 6]
|
||||
|
||||
#: E128+1:8
|
||||
part = [-1, (2, 3,
|
||||
4, 5, 6), 7,
|
||||
8, 9, 0]
|
||||
|
||||
fnct(1, 2, 3,
|
||||
4, 5, 6)
|
||||
|
||||
fnct(1, 2, 3,
|
||||
4, 5, 6,
|
||||
7, 8, 9,
|
||||
10, 11)
|
||||
|
||||
|
||||
def long_function_name(
|
||||
var_one, var_two, var_three,
|
||||
var_four):
|
||||
hello(var_one)
|
||||
|
||||
|
||||
if ((row < 0 or self.moduleCount <= row or
|
||||
col < 0 or self.moduleCount <= col)):
|
||||
raise Exception("%s,%s - %s" % (row, col, self.moduleCount))
|
||||
|
||||
|
||||
result = {
|
||||
'foo': [
|
||||
'bar', {
|
||||
'baz': 'frop',
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
foo = my.func({
|
||||
"foo": "bar",
|
||||
}, "baz")
|
||||
|
||||
|
||||
fooff(aaaa,
|
||||
cca(
|
||||
vvv,
|
||||
dadd
|
||||
), fff,
|
||||
ggg)
|
||||
|
||||
fooff(aaaa,
|
||||
abbb,
|
||||
cca(
|
||||
vvv,
|
||||
aaa,
|
||||
dadd),
|
||||
"visual indentation is not a multiple of four",)
|
||||
|
||||
if bar:
|
||||
assert (
|
||||
start, 'E121 lines starting with a '
|
||||
'closing bracket should be indented '
|
||||
"to match that of the opening "
|
||||
"bracket's line"
|
||||
)
|
||||
|
||||
# you want vertical alignment, so use a parens
|
||||
if ((foo.bar("baz") and
|
||||
foo.bar("frop")
|
||||
)):
|
||||
hello("yes")
|
||||
|
||||
# also ok, but starting to look like LISP
|
||||
if ((foo.bar("baz") and
|
||||
foo.bar("frop"))):
|
||||
hello("yes")
|
||||
|
||||
#: E129+1:4 E127+2:9
|
||||
if (a == 2 or
|
||||
b == "abc def ghi"
|
||||
"jkl mno"):
|
||||
assert True
|
||||
|
||||
#: E129+1:4
|
||||
if (a == 2 or
|
||||
b == """abc def ghi
|
||||
jkl mno"""):
|
||||
assert True
|
||||
|
||||
if length > options.max_line_length:
|
||||
assert options.max_line_length, \
|
||||
"E501 line too long (%d characters)" % length
|
||||
|
||||
|
||||
# blub
|
||||
|
||||
|
||||
asd = 'l.{line}\t{pos}\t{name}\t{text}'.format(
|
||||
line=token[2][0],
|
||||
pos=pos,
|
||||
name=tokenize.tok_name[token[0]],
|
||||
text=repr(token[1]),
|
||||
)
|
||||
|
||||
#: E121+1:6 E121+2:6
|
||||
hello('%-7d %s per second (%d total)' % (
|
||||
options.counters[key] / elapsed, key,
|
||||
options.counters[key]))
|
||||
|
||||
|
||||
if os.path.exists(os.path.join(path, PEP8_BIN)):
|
||||
cmd = ([os.path.join(path, PEP8_BIN)] +
|
||||
self._pep8_options(targetfile))
|
||||
|
||||
|
||||
fixed = (re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] +
|
||||
target[c + 1:])
|
||||
|
||||
fixed = (
|
||||
re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] +
|
||||
target[c + 1:]
|
||||
)
|
||||
|
||||
|
||||
if foo is None and bar is "frop" and \
|
||||
blah == 'yeah':
|
||||
blah = 'yeahnah'
|
||||
|
||||
|
||||
"""This is a multi-line
|
||||
docstring."""
|
||||
|
||||
|
||||
if blah:
|
||||
# is this actually readable? :)
|
||||
multiline_literal = """
|
||||
while True:
|
||||
if True:
|
||||
1
|
||||
""".lstrip()
|
||||
multiline_literal = (
|
||||
"""
|
||||
while True:
|
||||
if True:
|
||||
1
|
||||
""".lstrip()
|
||||
)
|
||||
multiline_literal = (
|
||||
"""
|
||||
while True:
|
||||
if True:
|
||||
1
|
||||
"""
|
||||
.lstrip()
|
||||
)
|
||||
|
||||
|
||||
if blah:
|
||||
multiline_visual = ("""
|
||||
while True:
|
||||
if True:
|
||||
1
|
||||
"""
|
||||
.lstrip())
|
||||
|
||||
|
||||
rv = {'aaa': 42}
|
||||
rv.update(dict.fromkeys((
|
||||
#: E121:4 E121+1:4
|
||||
'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'), '?'))
|
||||
|
||||
rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en',
|
||||
'reasonComment_fr', 'reasonComment_de',
|
||||
'reasonComment_it'), '?'))
|
||||
|
||||
#: E128+1:10
|
||||
rv.update(dict.fromkeys(('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'), '?'))
|
||||
|
||||
|
||||
rv.update(dict.fromkeys(
|
||||
('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'), '?'
|
||||
), "foo", context={
|
||||
'alpha': 4, 'beta': 53242234, 'gamma': 17,
|
||||
})
|
||||
|
||||
|
||||
rv.update(
|
||||
dict.fromkeys((
|
||||
'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'), '?'),
|
||||
"foo",
|
||||
context={
|
||||
'alpha': 4, 'beta': 53242234, 'gamma': 17,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
event_obj.write(cursor, user_id, {
|
||||
'user': user,
|
||||
'summary': text,
|
||||
'data': data,
|
||||
})
|
||||
|
||||
event_obj.write(cursor, user_id, {
|
||||
'user': user,
|
||||
'summary': text,
|
||||
'data': {'aaa': 1, 'bbb': 2},
|
||||
})
|
||||
|
||||
event_obj.write(cursor, user_id, {
|
||||
'user': user,
|
||||
'summary': text,
|
||||
'data': {
|
||||
'aaa': 1,
|
||||
'bbb': 2},
|
||||
})
|
||||
|
||||
event_obj.write(cursor, user_id, {
|
||||
'user': user,
|
||||
'summary': text,
|
||||
'data': {'timestamp': now, 'content': {
|
||||
'aaa': 1,
|
||||
'bbb': 2
|
||||
}},
|
||||
})
|
||||
294
test/normalizer_issue_files/E12_not_second.py
Normal file
294
test/normalizer_issue_files/E12_not_second.py
Normal file
@@ -0,0 +1,294 @@
|
||||
|
||||
def qualify_by_address(
|
||||
self, cr, uid, ids, context=None,
|
||||
params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
|
||||
""" This gets called by the web server """
|
||||
|
||||
|
||||
def qualify_by_address(self, cr, uid, ids, context=None,
|
||||
params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
|
||||
""" This gets called by the web server """
|
||||
|
||||
|
||||
_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
|
||||
'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
|
||||
'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
|
||||
'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
|
||||
|
||||
|
||||
fct("""
|
||||
AAA """ + status_2_string)
|
||||
|
||||
|
||||
if context:
|
||||
msg = """\
|
||||
action: GET-CONFIG
|
||||
payload:
|
||||
ip_address: "%(ip)s"
|
||||
username: "%(username)s"
|
||||
""" % context
|
||||
|
||||
|
||||
if context:
|
||||
msg = """\
|
||||
action: \
|
||||
GET-CONFIG
|
||||
""" % context
|
||||
|
||||
|
||||
if context:
|
||||
#: E122+2:0
|
||||
msg = """\
|
||||
action: """\
|
||||
"""GET-CONFIG
|
||||
""" % context
|
||||
|
||||
|
||||
def unicode2html(s):
|
||||
"""Convert the characters &<>'" in string s to HTML-safe sequences.
|
||||
Convert newline to <br> too."""
|
||||
#: E127+1:28
|
||||
return unicode((s or '').replace('&', '&')
|
||||
.replace('\n', '<br>\n'))
|
||||
|
||||
|
||||
parser.add_option('--count', action='store_true',
|
||||
help="print total number of errors and warnings "
|
||||
"to standard error and set exit code to 1 if "
|
||||
"total is not null")
|
||||
|
||||
parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s)" %
|
||||
DEFAULT_EXCLUDE)
|
||||
|
||||
add_option('--count',
|
||||
#: E135+1
|
||||
help="print total number of errors "
|
||||
"to standard error total is not null")
|
||||
|
||||
add_option('--count',
|
||||
#: E135+2:11
|
||||
help="print total number of errors "
|
||||
"to standard error "
|
||||
"total is not null")
|
||||
|
||||
|
||||
help = ("print total number of errors " +
|
||||
"to standard error")
|
||||
|
||||
help = "print total number of errors " \
|
||||
"to standard error"
|
||||
|
||||
help = u"print total number of errors " \
|
||||
u"to standard error"
|
||||
|
||||
help = b"print total number of errors " \
|
||||
b"to standard error"
|
||||
|
||||
#: E122+1:5
|
||||
help = br"print total number of errors " \
|
||||
br"to standard error"
|
||||
|
||||
d = dict('foo', help="exclude files or directories which match these "
|
||||
#: E135:9
|
||||
"comma separated patterns (default: %s)" % DEFAULT_EXCLUDE)
|
||||
|
||||
d = dict('foo', help=u"exclude files or directories which match these "
|
||||
u"comma separated patterns (default: %s)"
|
||||
% DEFAULT_EXCLUDE)
|
||||
|
||||
#: E135+1:9 E135+2:9
|
||||
d = dict('foo', help=b"exclude files or directories which match these "
|
||||
b"comma separated patterns (default: %s)"
|
||||
% DEFAULT_EXCLUDE)
|
||||
|
||||
d = dict('foo', help=br"exclude files or directories which match these "
|
||||
br"comma separated patterns (default: %s)" %
|
||||
DEFAULT_EXCLUDE)
|
||||
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s)" %
|
||||
DEFAULT_EXCLUDE)
|
||||
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s, %s)" %
|
||||
(DEFAULT_EXCLUDE, DEFAULT_IGNORE)
|
||||
)
|
||||
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s, %s)" %
|
||||
# who knows what might happen here?
|
||||
(DEFAULT_EXCLUDE, DEFAULT_IGNORE)
|
||||
)
|
||||
|
||||
# parens used to allow the indenting.
|
||||
troublefree_hash = {
|
||||
"hash": "value",
|
||||
"long": ("the quick brown fox jumps over the lazy dog before doing a "
|
||||
"somersault"),
|
||||
"long key that tends to happen more when you're indented": (
|
||||
"stringwithalongtoken you don't want to break"
|
||||
),
|
||||
}
|
||||
|
||||
# another accepted form
|
||||
troublefree_hash = {
|
||||
"hash": "value",
|
||||
"long": "the quick brown fox jumps over the lazy dog before doing "
|
||||
"a somersault",
|
||||
("long key that tends to happen more "
|
||||
"when you're indented"): "stringwithalongtoken you don't want to break",
|
||||
}
|
||||
# confusing but accepted... don't do that
|
||||
troublesome_hash = {
|
||||
"hash": "value",
|
||||
"long": "the quick brown fox jumps over the lazy dog before doing a "
|
||||
#: E135:4
|
||||
"somersault",
|
||||
"longer":
|
||||
"the quick brown fox jumps over the lazy dog before doing a "
|
||||
"somersaulty",
|
||||
"long key that tends to happen more "
|
||||
"when you're indented": "stringwithalongtoken you don't want to break",
|
||||
}
|
||||
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s)" %
|
||||
DEFAULT_EXCLUDE
|
||||
)
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s)" % DEFAULT_EXCLUDE,
|
||||
foobar="this clearly should work, because it is at "
|
||||
"the right indent level",
|
||||
)
|
||||
|
||||
rv.update(dict.fromkeys(
|
||||
('qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
'reasonComment_de', 'reasonComment_it'),
|
||||
'?'), "foo",
|
||||
context={'alpha': 4, 'beta': 53242234, 'gamma': 17})
|
||||
|
||||
|
||||
def f():
|
||||
try:
|
||||
if not Debug:
|
||||
hello('''
|
||||
If you would like to see debugging output,
|
||||
try: %s -d5
|
||||
''' % sys.argv[0])
|
||||
|
||||
|
||||
# The try statement above was not finished.
|
||||
#: E901
|
||||
d = { # comment
|
||||
1: 2
|
||||
}
|
||||
|
||||
# issue 138 (we won't allow this in parso)
|
||||
#: E126+2:9
|
||||
[
|
||||
12, # this is a multi-line inline
|
||||
# comment
|
||||
]
|
||||
# issue 151
|
||||
#: E122+1:3
|
||||
if a > b and \
|
||||
c > d:
|
||||
moo_like_a_cow()
|
||||
|
||||
my_list = [
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
]
|
||||
|
||||
my_list = [1, 2, 3,
|
||||
4, 5, 6,
|
||||
]
|
||||
|
||||
result = some_function_that_takes_arguments(
|
||||
'a', 'b', 'c',
|
||||
'd', 'e', 'f',
|
||||
)
|
||||
|
||||
result = some_function_that_takes_arguments('a', 'b', 'c',
|
||||
'd', 'e', 'f',
|
||||
)
|
||||
|
||||
# issue 203
|
||||
dica = {
|
||||
('abc'
|
||||
'def'): (
|
||||
'abc'),
|
||||
}
|
||||
|
||||
(abcdef[0]
|
||||
[1]) = (
|
||||
'abc')
|
||||
|
||||
('abc'
|
||||
'def') == (
|
||||
'abc')
|
||||
|
||||
# issue 214
|
||||
bar(
|
||||
1).zap(
|
||||
2)
|
||||
|
||||
bar(
|
||||
1).zap(
|
||||
2)
|
||||
|
||||
if True:
|
||||
|
||||
def example_issue254():
|
||||
return [node.copy(
|
||||
(
|
||||
replacement
|
||||
# First, look at all the node's current children.
|
||||
for child in node.children
|
||||
# Replace them.
|
||||
for replacement in replace(child)
|
||||
),
|
||||
dict(name=token.undefined)
|
||||
)]
|
||||
|
||||
|
||||
def valid_example():
|
||||
return [node.copy(properties=dict(
|
||||
(key, val if val is not None else token.undefined)
|
||||
for key, val in node.items()
|
||||
))]
|
||||
|
||||
|
||||
foo([
|
||||
'bug'
|
||||
])
|
||||
|
||||
# issue 144, finally!
|
||||
some_hash = {
|
||||
"long key that tends to happen more when you're indented":
|
||||
"stringwithalongtoken you don't want to break",
|
||||
}
|
||||
|
||||
{
|
||||
1:
|
||||
999999 if True
|
||||
else 0,
|
||||
}
|
||||
|
||||
|
||||
abc = dedent(
|
||||
'''
|
||||
mkdir -p ./{build}/
|
||||
mv ./build/ ./{build}/%(revision)s/
|
||||
'''.format(
|
||||
build='build',
|
||||
# more stuff
|
||||
)
|
||||
)
|
||||
195
test/normalizer_issue_files/E12_second.py
Normal file
195
test/normalizer_issue_files/E12_second.py
Normal file
@@ -0,0 +1,195 @@
|
||||
if True:
|
||||
result = some_function_that_takes_arguments(
|
||||
'a', 'b', 'c',
|
||||
'd', 'e', 'f',
|
||||
#: E123:0
|
||||
)
|
||||
#: E122+1
|
||||
if some_very_very_very_long_variable_name or var \
|
||||
or another_very_long_variable_name:
|
||||
raise Exception()
|
||||
#: E122+1
|
||||
if some_very_very_very_long_variable_name or var[0] \
|
||||
or another_very_long_variable_name:
|
||||
raise Exception()
|
||||
if True:
|
||||
#: E122+1
|
||||
if some_very_very_very_long_variable_name or var \
|
||||
or another_very_long_variable_name:
|
||||
raise Exception()
|
||||
if True:
|
||||
#: E122+1
|
||||
if some_very_very_very_long_variable_name or var[0] \
|
||||
or another_very_long_variable_name:
|
||||
raise Exception()
|
||||
|
||||
#: E901+1:8
|
||||
dictionary = [
|
||||
"is": {
|
||||
# Might be a E122:4, but is not because the code is invalid Python.
|
||||
"nested": yes(),
|
||||
},
|
||||
]
|
||||
setup('',
|
||||
scripts=[''],
|
||||
classifiers=[
|
||||
#: E121:6
|
||||
'Development Status :: 4 - Beta',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: Developers',
|
||||
])
|
||||
|
||||
|
||||
#: E123+2:4 E291:15
|
||||
abc = "E123", (
|
||||
"bad", "hanging", "close"
|
||||
)
|
||||
|
||||
result = {
|
||||
'foo': [
|
||||
'bar', {
|
||||
'baz': 'frop',
|
||||
#: E123
|
||||
}
|
||||
#: E123
|
||||
]
|
||||
#: E123
|
||||
}
|
||||
result = some_function_that_takes_arguments(
|
||||
'a', 'b', 'c',
|
||||
'd', 'e', 'f',
|
||||
#: E123
|
||||
)
|
||||
my_list = [1, 2, 3,
|
||||
4, 5, 6,
|
||||
#: E124:0
|
||||
]
|
||||
my_list = [1, 2, 3,
|
||||
4, 5, 6,
|
||||
#: E124:19
|
||||
]
|
||||
#: E124+2
|
||||
result = some_function_that_takes_arguments('a', 'b', 'c',
|
||||
'd', 'e', 'f',
|
||||
)
|
||||
fooff(aaaa,
|
||||
cca(
|
||||
vvv,
|
||||
dadd
|
||||
), fff,
|
||||
#: E124:0
|
||||
)
|
||||
fooff(aaaa,
|
||||
ccaaa(
|
||||
vvv,
|
||||
dadd
|
||||
),
|
||||
fff,
|
||||
#: E124:0
|
||||
)
|
||||
d = dict('foo',
|
||||
help="exclude files or directories which match these "
|
||||
"comma separated patterns (default: %s)" % DEFAULT_EXCLUDE
|
||||
#: E124:14
|
||||
)
|
||||
|
||||
if line_removed:
|
||||
self.event(cr, uid,
|
||||
#: E128:8
|
||||
name="Removing the option for contract",
|
||||
#: E128:8
|
||||
description="contract line has been removed",
|
||||
#: E124:8
|
||||
)
|
||||
|
||||
#: E129+1:4
|
||||
if foo is None and bar is "frop" and \
|
||||
blah == 'yeah':
|
||||
blah = 'yeahnah'
|
||||
|
||||
|
||||
#: E129+1:4 E129+2:4
|
||||
def long_function_name(
|
||||
var_one, var_two, var_three,
|
||||
var_four):
|
||||
hello(var_one)
|
||||
|
||||
|
||||
def qualify_by_address(
|
||||
#: E129:4 E129+1:4
|
||||
self, cr, uid, ids, context=None,
|
||||
params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
|
||||
""" This gets called by the web server """
|
||||
|
||||
|
||||
#: E129+1:4 E129+2:4
|
||||
if (a == 2 or
|
||||
b == "abc def ghi"
|
||||
"jkl mno"):
|
||||
True
|
||||
|
||||
my_list = [
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
#: E123:8
|
||||
]
|
||||
|
||||
abris = 3 + \
|
||||
4 + \
|
||||
5 + 6
|
||||
|
||||
fixed = re.sub(r'\t+', ' ', target[c::-1], 1)[::-1] + \
|
||||
target[c + 1:]
|
||||
|
||||
rv.update(dict.fromkeys((
|
||||
'qualif_nr', 'reasonComment_en', 'reasonComment_fr',
|
||||
#: E121:12
|
||||
'reasonComment_de', 'reasonComment_it'),
|
||||
'?'),
|
||||
#: E128:4
|
||||
"foo")
|
||||
#: E126+1:8
|
||||
eat_a_dict_a_day({
|
||||
"foo": "bar",
|
||||
})
|
||||
#: E129+1:4
|
||||
if (
|
||||
x == (
|
||||
3
|
||||
#: E129:4
|
||||
) or
|
||||
y == 4):
|
||||
pass
|
||||
#: E129+1:4 E121+2:8 E129+3:4
|
||||
if (
|
||||
x == (
|
||||
3
|
||||
) or
|
||||
x == (
|
||||
# This one has correct indentation.
|
||||
3
|
||||
#: E129:4
|
||||
) or
|
||||
y == 4):
|
||||
pass
|
||||
troublesome_hash = {
|
||||
"hash": "value",
|
||||
#: E135+1:8
|
||||
"long": "the quick brown fox jumps over the lazy dog before doing a "
|
||||
"somersault",
|
||||
}
|
||||
|
||||
# Arguments on first line forbidden when not using vertical alignment
|
||||
#: E128+1:4
|
||||
foo = long_function_name(var_one, var_two,
|
||||
var_three, var_four)
|
||||
|
||||
#: E128+1:4
|
||||
hello('l.%s\t%s\t%s\t%r' %
|
||||
(token[2][0], pos, tokenize.tok_name[token[0]], token[1]))
|
||||
|
||||
|
||||
def qualify_by_address(self, cr, uid, ids, context=None,
|
||||
#: E128:8
|
||||
params_to_check=frozenset(QUALIF_BY_ADDRESS_PARAM)):
|
||||
""" This gets called by the web server """
|
||||
116
test/normalizer_issue_files/E12_third.py
Normal file
116
test/normalizer_issue_files/E12_third.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#: E128+1
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E128+1:1
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E128+1:2
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E128+1:3
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:5
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:6
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:7
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:8
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:9
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:10
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:11
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:12
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
#: E127+1:13
|
||||
foo(1, 2, 3,
|
||||
4, 5, 6)
|
||||
if line_removed:
|
||||
#: E128+1:14 E128+2:14
|
||||
self.event(cr, uid,
|
||||
name="Removing the option for contract",
|
||||
description="contract line has been removed",
|
||||
)
|
||||
|
||||
if line_removed:
|
||||
self.event(cr, uid,
|
||||
#: E127:16
|
||||
name="Removing the option for contract",
|
||||
#: E127:16
|
||||
description="contract line has been removed",
|
||||
#: E124:16
|
||||
)
|
||||
rv.update(d=('a', 'b', 'c'),
|
||||
#: E127:13
|
||||
e=42)
|
||||
|
||||
#: E135+2:17
|
||||
rv.update(d=('a' + 'b', 'c'),
|
||||
e=42, f=42
|
||||
+ 42)
|
||||
rv.update(d=('a' + 'b', 'c'),
|
||||
e=42, f=42
|
||||
+ 42)
|
||||
#: E127+1:26
|
||||
input1 = {'a': {'calc': 1 + 2}, 'b': 1
|
||||
+ 42}
|
||||
#: E128+2:17
|
||||
rv.update(d=('a' + 'b', 'c'),
|
||||
e=42, f=(42
|
||||
+ 42))
|
||||
|
||||
if True:
|
||||
def example_issue254():
|
||||
#:
|
||||
return [node.copy(
|
||||
(
|
||||
#: E121:16 E121+3:20
|
||||
replacement
|
||||
# First, look at all the node's current children.
|
||||
for child in node.children
|
||||
for replacement in replace(child)
|
||||
),
|
||||
dict(name=token.undefined)
|
||||
)]
|
||||
# TODO multiline docstring are currently not handled. E125+1:4?
|
||||
if ("""
|
||||
"""):
|
||||
pass
|
||||
|
||||
# TODO same
|
||||
for foo in """
|
||||
abc
|
||||
123
|
||||
""".strip().split():
|
||||
hello(foo)
|
||||
abc = dedent(
|
||||
'''
|
||||
mkdir -p ./{build}/
|
||||
mv ./build/ ./{build}/%(revision)s/
|
||||
'''.format(
|
||||
#: E121:4 E121+1:4 E123+2:0
|
||||
build='build',
|
||||
# more stuff
|
||||
)
|
||||
)
|
||||
#: E701+1: E122+1
|
||||
if True:\
|
||||
hello(True)
|
||||
|
||||
#: E128+1
|
||||
foobar(a
|
||||
, end=' ')
|
||||
52
test/normalizer_issue_files/E20.py
Normal file
52
test/normalizer_issue_files/E20.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#: E201:5
|
||||
spam( ham[1], {eggs: 2})
|
||||
#: E201:9
|
||||
spam(ham[ 1], {eggs: 2})
|
||||
#: E201:14
|
||||
spam(ham[1], { eggs: 2})
|
||||
|
||||
# Okay
|
||||
spam(ham[1], {eggs: 2})
|
||||
|
||||
|
||||
#: E202:22
|
||||
spam(ham[1], {eggs: 2} )
|
||||
#: E202:21
|
||||
spam(ham[1], {eggs: 2 })
|
||||
#: E202:10
|
||||
spam(ham[1 ], {eggs: 2})
|
||||
# Okay
|
||||
spam(ham[1], {eggs: 2})
|
||||
|
||||
result = func(
|
||||
arg1='some value',
|
||||
arg2='another value',
|
||||
)
|
||||
|
||||
result = func(
|
||||
arg1='some value',
|
||||
arg2='another value'
|
||||
)
|
||||
|
||||
result = [
|
||||
item for item in items
|
||||
if item > 5
|
||||
]
|
||||
|
||||
#: E203:9
|
||||
if x == 4 :
|
||||
foo(x, y)
|
||||
x, y = y, x
|
||||
if x == 4:
|
||||
#: E203:12 E702:13
|
||||
a = x, y ; x, y = y, x
|
||||
if x == 4:
|
||||
foo(x, y)
|
||||
#: E203:12
|
||||
x, y = y , x
|
||||
# Okay
|
||||
if x == 4:
|
||||
foo(x, y)
|
||||
x, y = y, x
|
||||
a[b1, :1] == 3
|
||||
b = a[:, b1]
|
||||
16
test/normalizer_issue_files/E21.py
Normal file
16
test/normalizer_issue_files/E21.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#: E211:4
|
||||
spam (1)
|
||||
#: E211:4 E211:19
|
||||
dict ['key'] = list [index]
|
||||
#: E211:11
|
||||
dict['key'] ['subkey'] = list[index]
|
||||
# Okay
|
||||
spam(1)
|
||||
dict['key'] = list[index]
|
||||
|
||||
|
||||
# This is not prohibited by PEP8, but avoid it.
|
||||
# Dave: I think this is extremely stupid. Use the same convention everywhere.
|
||||
#: E211:9
|
||||
class Foo (Bar, Baz):
|
||||
pass
|
||||
156
test/normalizer_issue_files/E22.py
Normal file
156
test/normalizer_issue_files/E22.py
Normal file
@@ -0,0 +1,156 @@
|
||||
a = 12 + 3
|
||||
#: E221:5 E229:8
|
||||
b = 4 + 5
|
||||
#: E221:1
|
||||
x = 1
|
||||
#: E221:1
|
||||
y = 2
|
||||
long_variable = 3
|
||||
#: E221:4
|
||||
x[0] = 1
|
||||
#: E221:4
|
||||
x[1] = 2
|
||||
long_variable = 3
|
||||
#: E221:8 E229:19
|
||||
x = f(x) + 1
|
||||
y = long_variable + 2
|
||||
#: E221:8 E229:19
|
||||
z = x[0] + 3
|
||||
#: E221+2:13
|
||||
text = """
|
||||
bar
|
||||
foo %s""" % rofl
|
||||
# Okay
|
||||
x = 1
|
||||
y = 2
|
||||
long_variable = 3
|
||||
|
||||
|
||||
#: E221:7
|
||||
a = a + 1
|
||||
b = b + 10
|
||||
#: E221:3
|
||||
x = -1
|
||||
#: E221:3
|
||||
y = -2
|
||||
long_variable = 3
|
||||
#: E221:6
|
||||
x[0] = 1
|
||||
#: E221:6
|
||||
x[1] = 2
|
||||
long_variable = 3
|
||||
|
||||
|
||||
#: E223+1:1
|
||||
foobart = 4
|
||||
a = 3 # aligned with tab
|
||||
|
||||
|
||||
#: E223:4
|
||||
a += 1
|
||||
b += 1000
|
||||
|
||||
|
||||
#: E225:12
|
||||
submitted +=1
|
||||
#: E225:9
|
||||
submitted+= 1
|
||||
#: E225:3
|
||||
c =-1
|
||||
#: E229:7
|
||||
x = x /2 - 1
|
||||
#: E229:11
|
||||
c = alpha -4
|
||||
#: E229:10
|
||||
c = alpha- 4
|
||||
#: E229:8
|
||||
z = x **y
|
||||
#: E229:14
|
||||
z = (x + 1) **y
|
||||
#: E229:13
|
||||
z = (x + 1)** y
|
||||
#: E227:14
|
||||
_1kB = _1MB >>10
|
||||
#: E227:11
|
||||
_1kB = _1MB>> 10
|
||||
#: E225:1 E225:2 E229:4
|
||||
i=i+ 1
|
||||
#: E225:1 E225:2 E229:5
|
||||
i=i +1
|
||||
#: E225:1 E225:2
|
||||
i=i+1
|
||||
#: E225:3
|
||||
i =i+1
|
||||
#: E225:1
|
||||
i= i+1
|
||||
#: E229:8
|
||||
c = (a +b)*(a - b)
|
||||
#: E229:7
|
||||
c = (a+ b)*(a - b)
|
||||
|
||||
z = 2//30
|
||||
c = (a+b) * (a-b)
|
||||
x = x*2 - 1
|
||||
x = x/2 - 1
|
||||
# TODO whitespace should be the other way around according to pep8.
|
||||
x = x / 2-1
|
||||
|
||||
hypot2 = x*x + y*y
|
||||
c = (a + b)*(a - b)
|
||||
|
||||
|
||||
def halves(n):
|
||||
return (i//2 for i in range(n))
|
||||
|
||||
|
||||
#: E227:11 E227:13
|
||||
_1kB = _1MB>>10
|
||||
#: E227:11 E227:13
|
||||
_1MB = _1kB<<10
|
||||
#: E227:5 E227:6
|
||||
a = b|c
|
||||
#: E227:5 E227:6
|
||||
b = c&a
|
||||
#: E227:5 E227:6
|
||||
c = b^a
|
||||
#: E228:5 E228:6
|
||||
a = b%c
|
||||
#: E228:9 E228:10
|
||||
msg = fmt%(errno, errmsg)
|
||||
#: E228:25 E228:26
|
||||
msg = "Error %d occurred"%errno
|
||||
|
||||
#: E228:7
|
||||
a = b %c
|
||||
a = b % c
|
||||
|
||||
# Okay
|
||||
i = i + 1
|
||||
submitted += 1
|
||||
x = x * 2 - 1
|
||||
hypot2 = x * x + y * y
|
||||
c = (a + b) * (a - b)
|
||||
_1MiB = 2 ** 20
|
||||
_1TiB = 2**30
|
||||
foo(bar, key='word', *args, **kwargs)
|
||||
baz(**kwargs)
|
||||
negative = -1
|
||||
spam(-1)
|
||||
-negative
|
||||
func1(lambda *args, **kw: (args, kw))
|
||||
func2(lambda a, b=h[:], c=0: (a, b, c))
|
||||
if not -5 < x < +5:
|
||||
#: E227:12
|
||||
print >>sys.stderr, "x is out of range."
|
||||
print >> sys.stdout, "x is an integer."
|
||||
x = x / 2 - 1
|
||||
|
||||
|
||||
def squares(n):
|
||||
return (i**2 for i in range(n))
|
||||
|
||||
|
||||
ENG_PREFIXES = {
|
||||
-6: "\u03bc", # Greek letter mu
|
||||
-3: "m",
|
||||
}
|
||||
16
test/normalizer_issue_files/E23.py
Normal file
16
test/normalizer_issue_files/E23.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#: E231:7
|
||||
a = (1,2)
|
||||
#: E231:5
|
||||
a[b1,:]
|
||||
#: E231:10
|
||||
a = [{'a':''}]
|
||||
# Okay
|
||||
a = (4,)
|
||||
#: E202:7
|
||||
b = (5, )
|
||||
c = {'text': text[5:]}
|
||||
|
||||
result = {
|
||||
'key1': 'value',
|
||||
'key2': 'value',
|
||||
}
|
||||
36
test/normalizer_issue_files/E25.py
Normal file
36
test/normalizer_issue_files/E25.py
Normal file
@@ -0,0 +1,36 @@
|
||||
#: E251:11 E251:13
|
||||
def foo(bar = False):
|
||||
'''Test function with an error in declaration'''
|
||||
pass
|
||||
|
||||
|
||||
#: E251:8
|
||||
foo(bar= True)
|
||||
#: E251:7
|
||||
foo(bar =True)
|
||||
#: E251:7 E251:9
|
||||
foo(bar = True)
|
||||
#: E251:13
|
||||
y = bar(root= "sdasd")
|
||||
parser.add_argument('--long-option',
|
||||
#: E135+1:20
|
||||
default=
|
||||
"/rather/long/filesystem/path/here/blah/blah/blah")
|
||||
parser.add_argument('--long-option',
|
||||
default=
|
||||
"/rather/long/filesystem")
|
||||
# TODO this looks so stupid.
|
||||
parser.add_argument('--long-option', default
|
||||
="/rather/long/filesystem/path/here/blah/blah/blah")
|
||||
#: E251+2:7 E251+2:9
|
||||
foo(True,
|
||||
baz=(1, 2),
|
||||
biz = 'foo'
|
||||
)
|
||||
# Okay
|
||||
foo(bar=(1 == 1))
|
||||
foo(bar=(1 != 1))
|
||||
foo(bar=(1 >= 1))
|
||||
foo(bar=(1 <= 1))
|
||||
(options, args) = parser.parse_args()
|
||||
d[type(None)] = _deepcopy_atomic
|
||||
78
test/normalizer_issue_files/E26.py
Normal file
78
test/normalizer_issue_files/E26.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#: E261:4
|
||||
pass # an inline comment
|
||||
#: E261:4
|
||||
pass# an inline comment
|
||||
|
||||
# Okay
|
||||
pass # an inline comment
|
||||
pass # an inline comment
|
||||
#: E262:11
|
||||
x = x + 1 #Increment x
|
||||
#: E262:11
|
||||
x = x + 1 # Increment x
|
||||
#: E262:11
|
||||
x = y + 1 #: Increment x
|
||||
#: E265
|
||||
#Block comment
|
||||
a = 1
|
||||
#: E265+1
|
||||
m = 42
|
||||
#! This is important
|
||||
mx = 42 - 42
|
||||
|
||||
# Comment without anything is not an issue.
|
||||
#
|
||||
# However if there are comments at the end without anything it obviously
|
||||
# doesn't make too much sense.
|
||||
#: E262:9
|
||||
foo = 1 #
|
||||
|
||||
|
||||
#: E266+2:4 E266+5:4
|
||||
def how_it_feel(r):
|
||||
|
||||
### This is a variable ###
|
||||
a = 42
|
||||
|
||||
### Of course it is unused
|
||||
return
|
||||
|
||||
|
||||
#: E266 E266+1
|
||||
##if DEBUG:
|
||||
## logging.error()
|
||||
#: E266
|
||||
#########################################
|
||||
|
||||
# Not at the beginning of a file
|
||||
#: E265
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Okay
|
||||
|
||||
pass # an inline comment
|
||||
x = x + 1 # Increment x
|
||||
y = y + 1 #: Increment x
|
||||
|
||||
# Block comment
|
||||
a = 1
|
||||
|
||||
# Block comment1
|
||||
|
||||
# Block comment2
|
||||
aaa = 1
|
||||
|
||||
|
||||
# example of docstring (not parsed)
|
||||
def oof():
|
||||
"""
|
||||
#foo not parsed
|
||||
"""
|
||||
|
||||
###########################################################################
|
||||
# A SEPARATOR #
|
||||
###########################################################################
|
||||
|
||||
# ####################################################################### #
|
||||
# ########################## another separator ########################## #
|
||||
# ####################################################################### #
|
||||
49
test/normalizer_issue_files/E27.py
Normal file
49
test/normalizer_issue_files/E27.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# Okay
|
||||
from u import (a, b)
|
||||
from v import c, d
|
||||
#: E221:13
|
||||
from w import (e, f)
|
||||
#: E275:13
|
||||
from w import(e, f)
|
||||
#: E275:29
|
||||
from importable.module import(e, f)
|
||||
try:
|
||||
#: E275:33
|
||||
from importable.module import(e, f)
|
||||
except ImportError:
|
||||
pass
|
||||
# Okay
|
||||
True and False
|
||||
#: E221:8
|
||||
True and False
|
||||
#: E221:4
|
||||
True and False
|
||||
#: E221:2
|
||||
if 1:
|
||||
pass
|
||||
# Syntax Error, no indentation
|
||||
#: E903+1
|
||||
if 1:
|
||||
pass
|
||||
#: E223:8
|
||||
True and False
|
||||
#: E223:4 E223:9
|
||||
True and False
|
||||
#: E221:5
|
||||
a and b
|
||||
#: E221:5
|
||||
1 and b
|
||||
#: E221:5
|
||||
a and 2
|
||||
#: E221:1 E221:6
|
||||
1 and b
|
||||
#: E221:1 E221:6
|
||||
a and 2
|
||||
#: E221:4
|
||||
this and False
|
||||
#: E223:5
|
||||
a and b
|
||||
#: E223:1
|
||||
a and b
|
||||
#: E223:4 E223:9
|
||||
this and False
|
||||
15
test/normalizer_issue_files/E29.py
Normal file
15
test/normalizer_issue_files/E29.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# Okay
|
||||
# 情
|
||||
#: W291:5
|
||||
print
|
||||
|
||||
|
||||
#: W291+1
|
||||
class Foo(object):
|
||||
|
||||
bang = 12
|
||||
|
||||
|
||||
#: W291+1:34
|
||||
'''multiline
|
||||
string with trailing whitespace'''
|
||||
177
test/normalizer_issue_files/E30.py
Normal file
177
test/normalizer_issue_files/E30.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#: E301+4
|
||||
class X:
|
||||
|
||||
def a():
|
||||
pass
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
#: E301+5
|
||||
class X:
|
||||
|
||||
def a():
|
||||
pass
|
||||
# comment
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
def a():
|
||||
pass
|
||||
|
||||
|
||||
#: E302+1:0
|
||||
"""Main module."""
|
||||
def _main():
|
||||
pass
|
||||
|
||||
|
||||
#: E302+1:0
|
||||
foo = 1
|
||||
def get_sys_path():
|
||||
return sys.path
|
||||
|
||||
|
||||
#: E302+3:0
|
||||
def a():
|
||||
pass
|
||||
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
#: E302+5:0
|
||||
def a():
|
||||
pass
|
||||
|
||||
# comment
|
||||
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
#: E303+3:0
|
||||
print
|
||||
|
||||
|
||||
|
||||
#: E303+3:0 E303+4:0
|
||||
print
|
||||
|
||||
|
||||
|
||||
|
||||
print
|
||||
#: E303+3:0
|
||||
print
|
||||
|
||||
|
||||
|
||||
# comment
|
||||
|
||||
print
|
||||
|
||||
|
||||
#: E303+3 E303+6
|
||||
def a():
|
||||
print
|
||||
|
||||
|
||||
# comment
|
||||
|
||||
|
||||
# another comment
|
||||
|
||||
print
|
||||
|
||||
|
||||
#: E302+2
|
||||
a = 3
|
||||
#: E304+1
|
||||
@decorator
|
||||
|
||||
def function():
|
||||
pass
|
||||
|
||||
|
||||
#: E303+3
|
||||
# something
|
||||
|
||||
|
||||
|
||||
"""This class docstring comes on line 5.
|
||||
It gives error E303: too many blank lines (3)
|
||||
"""
|
||||
|
||||
|
||||
#: E302+6
|
||||
def a():
|
||||
print
|
||||
|
||||
# comment
|
||||
|
||||
# another comment
|
||||
a()
|
||||
|
||||
|
||||
#: E302+7
|
||||
def a():
|
||||
print
|
||||
|
||||
# comment
|
||||
|
||||
# another comment
|
||||
|
||||
try:
|
||||
a()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
#: E302+4
|
||||
def a():
|
||||
print
|
||||
|
||||
# Two spaces before comments, too.
|
||||
if a():
|
||||
a()
|
||||
|
||||
|
||||
#: E301+2
|
||||
def a():
|
||||
x = 1
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
#: E301+2 E301+4
|
||||
def a():
|
||||
x = 2
|
||||
def b():
|
||||
x = 1
|
||||
def c():
|
||||
pass
|
||||
|
||||
|
||||
#: E301+2 E301+4 E301+5
|
||||
def a():
|
||||
x = 1
|
||||
class C:
|
||||
pass
|
||||
x = 2
|
||||
def b():
|
||||
pass
|
||||
|
||||
|
||||
#: E302+7
|
||||
# Example from https://github.com/PyCQA/pycodestyle/issues/400
|
||||
foo = 2
|
||||
|
||||
|
||||
def main():
|
||||
blah, blah
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
175
test/normalizer_issue_files/E30not.py
Normal file
175
test/normalizer_issue_files/E30not.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# Okay
|
||||
class X:
|
||||
pass
|
||||
# Okay
|
||||
|
||||
|
||||
def foo():
|
||||
pass
|
||||
|
||||
|
||||
# Okay
|
||||
# -*- coding: utf-8 -*-
|
||||
class X:
|
||||
pass
|
||||
|
||||
|
||||
# Okay
|
||||
# -*- coding: utf-8 -*-
|
||||
def foo():
|
||||
pass
|
||||
|
||||
|
||||
# Okay
|
||||
class X:
|
||||
|
||||
def a():
|
||||
pass
|
||||
|
||||
# comment
|
||||
def b():
|
||||
pass
|
||||
|
||||
# This is a
|
||||
# ... multi-line comment
|
||||
|
||||
def c():
|
||||
pass
|
||||
|
||||
|
||||
# This is a
|
||||
# ... multi-line comment
|
||||
|
||||
@some_decorator
|
||||
class Y:
|
||||
|
||||
def a():
|
||||
pass
|
||||
|
||||
# comment
|
||||
|
||||
def b():
|
||||
pass
|
||||
|
||||
@property
|
||||
def c():
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
from nonexistent import Bar
|
||||
except ImportError:
|
||||
class Bar(object):
|
||||
"""This is a Bar replacement"""
|
||||
|
||||
|
||||
def with_feature(f):
|
||||
"""Some decorator"""
|
||||
wrapper = f
|
||||
if has_this_feature(f):
|
||||
def wrapper(*args):
|
||||
call_feature(args[0])
|
||||
return f(*args)
|
||||
return wrapper
|
||||
|
||||
|
||||
try:
|
||||
next
|
||||
except NameError:
|
||||
def next(iterator, default):
|
||||
for item in iterator:
|
||||
return item
|
||||
return default
|
||||
|
||||
|
||||
def a():
|
||||
pass
|
||||
|
||||
|
||||
class Foo():
|
||||
"""Class Foo"""
|
||||
|
||||
def b():
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# comment
|
||||
def c():
|
||||
pass
|
||||
|
||||
|
||||
# comment
|
||||
|
||||
|
||||
def d():
|
||||
pass
|
||||
|
||||
# This is a
|
||||
# ... multi-line comment
|
||||
|
||||
# And this one is
|
||||
# ... a second paragraph
|
||||
# ... which spans on 3 lines
|
||||
|
||||
|
||||
# Function `e` is below
|
||||
# NOTE: Hey this is a testcase
|
||||
|
||||
def e():
|
||||
pass
|
||||
|
||||
|
||||
def a():
|
||||
print
|
||||
|
||||
# comment
|
||||
|
||||
print
|
||||
|
||||
print
|
||||
|
||||
# Comment 1
|
||||
|
||||
# Comment 2
|
||||
|
||||
|
||||
# Comment 3
|
||||
|
||||
def b():
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# Okay
|
||||
def foo():
|
||||
pass
|
||||
|
||||
|
||||
def bar():
|
||||
pass
|
||||
|
||||
|
||||
class Foo(object):
|
||||
pass
|
||||
|
||||
|
||||
class Bar(object):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
foo()
|
||||
# Okay
|
||||
classification_errors = None
|
||||
# Okay
|
||||
defined_properly = True
|
||||
# Okay
|
||||
defaults = {}
|
||||
defaults.update({})
|
||||
|
||||
|
||||
# Okay
|
||||
def foo(x):
|
||||
classification = x
|
||||
definitely = not classification
|
||||
39
test/normalizer_issue_files/E40.py
Normal file
39
test/normalizer_issue_files/E40.py
Normal file
@@ -0,0 +1,39 @@
|
||||
#: E401:7
|
||||
import os, sys
|
||||
# Okay
|
||||
import os
|
||||
import sys
|
||||
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
from myclass import MyClass
|
||||
from foo.bar.yourclass import YourClass
|
||||
|
||||
import myclass
|
||||
import foo.bar.yourclass
|
||||
# All Okay from here until the definition of VERSION
|
||||
__all__ = ['abc']
|
||||
|
||||
import foo
|
||||
__version__ = "42"
|
||||
|
||||
import foo
|
||||
__author__ = "Simon Gomizelj"
|
||||
|
||||
import foo
|
||||
try:
|
||||
import foo
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
hello('imported foo')
|
||||
finally:
|
||||
hello('made attempt to import foo')
|
||||
|
||||
import bar
|
||||
VERSION = '1.2.3'
|
||||
|
||||
#: E402
|
||||
import foo
|
||||
#: E402
|
||||
import foo
|
||||
126
test/normalizer_issue_files/E50.py
Normal file
126
test/normalizer_issue_files/E50.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#: E501:4
|
||||
a = '12345678901234567890123456789012345678901234567890123456789012345678901234567890'
|
||||
#: E501:80
|
||||
a = '1234567890123456789012345678901234567890123456789012345678901234567890' or \
|
||||
6
|
||||
#: E501+1:80
|
||||
a = 7 or \
|
||||
'1234567890123456789012345678901234567890123456789012345678901234567890' or \
|
||||
6
|
||||
#: E501+1:80 E501+2:80
|
||||
a = 7 or \
|
||||
'1234567890123456789012345678901234567890123456789012345678901234567890' or \
|
||||
'1234567890123456789012345678901234567890123456789012345678901234567890' or \
|
||||
6
|
||||
#: E501:78
|
||||
a = '1234567890123456789012345678901234567890123456789012345678901234567890' # \
|
||||
#: E502:78
|
||||
a = ('123456789012345678901234567890123456789012345678901234567890123456789' \
|
||||
'01234567890')
|
||||
#: E502+1:11
|
||||
a = ('AAA \
|
||||
BBB' \
|
||||
'CCC')
|
||||
#: E502:38
|
||||
if (foo is None and bar is "e000" and \
|
||||
blah == 'yeah'):
|
||||
blah = 'yeahnah'
|
||||
#
|
||||
# Okay
|
||||
a = ('AAA'
|
||||
'BBB')
|
||||
|
||||
a = ('AAA \
|
||||
BBB'
|
||||
'CCC')
|
||||
|
||||
a = 'AAA' \
|
||||
'BBB' \
|
||||
'CCC'
|
||||
|
||||
a = ('AAA\
|
||||
BBBBBBBBB\
|
||||
CCCCCCCCC\
|
||||
DDDDDDDDD')
|
||||
#
|
||||
# Okay
|
||||
if aaa:
|
||||
pass
|
||||
elif bbb or \
|
||||
ccc:
|
||||
pass
|
||||
|
||||
ddd = \
|
||||
ccc
|
||||
|
||||
('\
|
||||
' + ' \
|
||||
')
|
||||
('''
|
||||
''' + ' \
|
||||
')
|
||||
#: E501:67 E225:21 E225:22
|
||||
very_long_identifiers=and_terrible_whitespace_habits(are_no_excuse+for_long_lines)
|
||||
#
|
||||
# TODO Long multiline strings are not handled. E501?
|
||||
'''multiline string
|
||||
with a long long long long long long long long long long long long long long long long line
|
||||
'''
|
||||
#: E501
|
||||
'''same thing, but this time without a terminal newline in the string
|
||||
long long long long long long long long long long long long long long long long line'''
|
||||
#
|
||||
# issue 224 (unavoidable long lines in docstrings)
|
||||
# Okay
|
||||
"""
|
||||
I'm some great documentation. Because I'm some great documentation, I'm
|
||||
going to give you a reference to some valuable information about some API
|
||||
that I'm calling:
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
|
||||
"""
|
||||
#: E501
|
||||
"""
|
||||
longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces"""
|
||||
|
||||
|
||||
# Regression test for #622
|
||||
def foo():
|
||||
"""Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis pulvinar vitae
|
||||
"""
|
||||
|
||||
|
||||
# Okay
|
||||
"""
|
||||
This
|
||||
almost_empty_line
|
||||
"""
|
||||
|
||||
"""
|
||||
This
|
||||
almost_empty_line
|
||||
"""
|
||||
# A basic comment
|
||||
#: E501
|
||||
# with a long long long long long long long long long long long long long long long long line
|
||||
|
||||
#
|
||||
# Okay
|
||||
# I'm some great comment. Because I'm so great, I'm going to give you a
|
||||
# reference to some valuable information about some API that I'm calling:
|
||||
#
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
|
||||
|
||||
x = 3
|
||||
|
||||
# longnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaceslongnospaces
|
||||
|
||||
#
|
||||
# Okay
|
||||
# This
|
||||
# almost_empty_line
|
||||
|
||||
#
|
||||
#: E501+1
|
||||
# This
|
||||
# almost_empty_line
|
||||
25
test/normalizer_issue_files/E70.py
Normal file
25
test/normalizer_issue_files/E70.py
Normal file
@@ -0,0 +1,25 @@
|
||||
#: E701:6
|
||||
if a: a = False
|
||||
#: E701:41
|
||||
if not header or header[:6] != 'bytes=': pass
|
||||
#: E702:9
|
||||
a = False; b = True
|
||||
#: E702:16 E402
|
||||
import bdist_egg; bdist_egg.write_safety_flag(cmd.egg_info, safe)
|
||||
#: E703:12 E402
|
||||
import shlex;
|
||||
#: E702:8 E703:22
|
||||
del a[:]; a.append(42);
|
||||
|
||||
|
||||
#: E704:10
|
||||
def f(x): return 2
|
||||
|
||||
|
||||
#: E704:10
|
||||
def f(x): return 2 * x
|
||||
|
||||
|
||||
while all is round:
|
||||
#: E704:14
|
||||
def f(x): return 2 * x
|
||||
93
test/normalizer_issue_files/E71.py
Normal file
93
test/normalizer_issue_files/E71.py
Normal file
@@ -0,0 +1,93 @@
|
||||
#: E711:7
|
||||
if res == None:
|
||||
pass
|
||||
#: E711:7
|
||||
if res != None:
|
||||
pass
|
||||
#: E711:8
|
||||
if None == res:
|
||||
pass
|
||||
#: E711:8
|
||||
if None != res:
|
||||
pass
|
||||
#: E711:10
|
||||
if res[1] == None:
|
||||
pass
|
||||
#: E711:10
|
||||
if res[1] != None:
|
||||
pass
|
||||
#: E711:8
|
||||
if None != res[1]:
|
||||
pass
|
||||
#: E711:8
|
||||
if None == res[1]:
|
||||
pass
|
||||
|
||||
#
|
||||
#: E712:7
|
||||
if res == True:
|
||||
pass
|
||||
#: E712:7
|
||||
if res != False:
|
||||
pass
|
||||
#: E712:8
|
||||
if True != res:
|
||||
pass
|
||||
#: E712:9
|
||||
if False == res:
|
||||
pass
|
||||
#: E712:10
|
||||
if res[1] == True:
|
||||
pass
|
||||
#: E712:10
|
||||
if res[1] != False:
|
||||
pass
|
||||
|
||||
if x is False:
|
||||
pass
|
||||
|
||||
#
|
||||
#: E713:9
|
||||
if not X in Y:
|
||||
pass
|
||||
#: E713:11
|
||||
if not X.B in Y:
|
||||
pass
|
||||
#: E713:9
|
||||
if not X in Y and Z == "zero":
|
||||
pass
|
||||
#: E713:24
|
||||
if X == "zero" or not Y in Z:
|
||||
pass
|
||||
|
||||
#
|
||||
#: E714:9
|
||||
if not X is Y:
|
||||
pass
|
||||
#: E714:11
|
||||
if not X.B is Y:
|
||||
pass
|
||||
|
||||
#
|
||||
# Okay
|
||||
if x not in y:
|
||||
pass
|
||||
|
||||
if not (X in Y or X is Z):
|
||||
pass
|
||||
|
||||
if not (X in Y):
|
||||
pass
|
||||
|
||||
if x is not y:
|
||||
pass
|
||||
|
||||
if TrueElement.get_element(True) == TrueElement.get_element(False):
|
||||
pass
|
||||
|
||||
if (True) == TrueElement or x == TrueElement:
|
||||
pass
|
||||
|
||||
assert (not foo) in bar
|
||||
assert {'x': not foo} in bar
|
||||
assert [42, not foo] in bar
|
||||
79
test/normalizer_issue_files/E72.py
Normal file
79
test/normalizer_issue_files/E72.py
Normal file
@@ -0,0 +1,79 @@
|
||||
#: E721:3
|
||||
if type(res) == type(42):
|
||||
pass
|
||||
#: E721:3
|
||||
if type(res) != type(""):
|
||||
pass
|
||||
|
||||
import types
|
||||
|
||||
if res == types.IntType:
|
||||
pass
|
||||
|
||||
import types
|
||||
|
||||
#: E721:3
|
||||
if type(res) is not types.ListType:
|
||||
pass
|
||||
#: E721:7 E721:35
|
||||
assert type(res) == type(False) or type(res) == type(None)
|
||||
#: E721:7
|
||||
assert type(res) == type([])
|
||||
#: E721:7
|
||||
assert type(res) == type(())
|
||||
#: E721:7
|
||||
assert type(res) == type((0,))
|
||||
#: E721:7
|
||||
assert type(res) == type((0))
|
||||
#: E721:7
|
||||
assert type(res) != type((1,))
|
||||
#: E721:7
|
||||
assert type(res) is type((1,))
|
||||
#: E721:7
|
||||
assert type(res) is not type((1,))
|
||||
|
||||
# Okay
|
||||
#: E402
|
||||
import types
|
||||
|
||||
if isinstance(res, int):
|
||||
pass
|
||||
if isinstance(res, str):
|
||||
pass
|
||||
if isinstance(res, types.MethodType):
|
||||
pass
|
||||
|
||||
#: E721:3 E721:25
|
||||
if type(a) != type(b) or type(a) == type(ccc):
|
||||
pass
|
||||
#: E721
|
||||
type(a) != type(b)
|
||||
#: E721
|
||||
1 != type(b)
|
||||
#: E721
|
||||
type(b) != 1
|
||||
1 != 1
|
||||
|
||||
try:
|
||||
pass
|
||||
#: E722
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
#: E722
|
||||
except:
|
||||
pass
|
||||
# Okay
|
||||
fake_code = """"
|
||||
try:
|
||||
do_something()
|
||||
except:
|
||||
pass
|
||||
"""
|
||||
try:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
16
test/normalizer_issue_files/E73.py
Normal file
16
test/normalizer_issue_files/E73.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#: E731:4
|
||||
f = lambda x: 2 * x
|
||||
while False:
|
||||
#: E731:10
|
||||
foo = lambda y, z: 2 * x
|
||||
# Okay
|
||||
f = object()
|
||||
f.method = lambda: 'Method'
|
||||
|
||||
f = {}
|
||||
f['a'] = lambda x: x ** 2
|
||||
|
||||
f = []
|
||||
f.append(lambda x: x ** 2)
|
||||
|
||||
lambda: 'no-op'
|
||||
29
test/normalizer_issue_files/LICENSE
Normal file
29
test/normalizer_issue_files/LICENSE
Normal file
@@ -0,0 +1,29 @@
|
||||
Copyright © 2006-2009 Johann C. Rocholl <johann@rocholl.net>
|
||||
Copyright © 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
|
||||
Copyright © 2014-2016 Ian Lee <IanLee1521@gmail.com>
|
||||
Copyright © 2017-???? Dave Halter <davidhalter88@gmail.com>
|
||||
|
||||
Dave: The files in this folder were ported from pydocstyle and some
|
||||
modifications where made.
|
||||
|
||||
Licensed under the terms of the Expat License
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of this software and associated documentation files
|
||||
(the "Software"), to deal in the Software without restriction,
|
||||
including without limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
53
test/normalizer_issue_files/allowed_syntax.py
Normal file
53
test/normalizer_issue_files/allowed_syntax.py
Normal file
@@ -0,0 +1,53 @@
|
||||
"""
|
||||
Some syntax errors are a bit complicated and need exact checking. Here we
|
||||
gather some of the potentially dangerous ones.
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
|
||||
# With a dot it's not a future import anymore.
|
||||
from .__future__ import absolute_import
|
||||
|
||||
'' ''
|
||||
''r''u''
|
||||
b'' BR''
|
||||
|
||||
for x in [1]:
|
||||
try:
|
||||
continue # Only the other continue and pass is an error.
|
||||
finally:
|
||||
#: E901
|
||||
continue
|
||||
|
||||
|
||||
for x in [1]:
|
||||
break
|
||||
continue
|
||||
|
||||
try:
|
||||
pass
|
||||
except ZeroDivisionError:
|
||||
pass
|
||||
#: E722:0
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
pass
|
||||
#: E722:0 E901:0
|
||||
except:
|
||||
pass
|
||||
except ZeroDivisionError:
|
||||
pass
|
||||
|
||||
|
||||
r'\n'
|
||||
r'\x'
|
||||
b'\n'
|
||||
|
||||
|
||||
a = 3
|
||||
|
||||
|
||||
def x(b=a):
|
||||
global a
|
||||
2
test/normalizer_issue_files/allowed_syntax_python2.py
Normal file
2
test/normalizer_issue_files/allowed_syntax_python2.py
Normal file
@@ -0,0 +1,2 @@
|
||||
's' b''
|
||||
u's' b'ä'
|
||||
3
test/normalizer_issue_files/allowed_syntax_python3.4.py
Normal file
3
test/normalizer_issue_files/allowed_syntax_python3.4.py
Normal file
@@ -0,0 +1,3 @@
|
||||
*foo, a = (1,)
|
||||
*foo[0], a = (1,)
|
||||
*[], a = (1,)
|
||||
23
test/normalizer_issue_files/allowed_syntax_python3.5.py
Normal file
23
test/normalizer_issue_files/allowed_syntax_python3.5.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Mostly allowed syntax in Python 3.5.
|
||||
"""
|
||||
|
||||
|
||||
async def foo():
|
||||
await bar()
|
||||
#: E901
|
||||
yield from []
|
||||
return
|
||||
#: E901
|
||||
return ''
|
||||
|
||||
|
||||
# With decorator it's a different statement.
|
||||
@bla
|
||||
async def foo():
|
||||
await bar()
|
||||
#: E901
|
||||
yield from []
|
||||
return
|
||||
#: E901
|
||||
return ''
|
||||
45
test/normalizer_issue_files/allowed_syntax_python3.6.py
Normal file
45
test/normalizer_issue_files/allowed_syntax_python3.6.py
Normal file
@@ -0,0 +1,45 @@
|
||||
foo: int = 4
|
||||
(foo): int = 3
|
||||
((foo)): int = 3
|
||||
foo.bar: int
|
||||
foo[3]: int
|
||||
|
||||
|
||||
def glob():
|
||||
global x
|
||||
y: foo = x
|
||||
|
||||
|
||||
def c():
|
||||
a = 3
|
||||
|
||||
def d():
|
||||
class X():
|
||||
nonlocal a
|
||||
|
||||
|
||||
def x():
|
||||
a = 3
|
||||
|
||||
def y():
|
||||
nonlocal a
|
||||
|
||||
|
||||
def x():
|
||||
def y():
|
||||
nonlocal a
|
||||
|
||||
a = 3
|
||||
|
||||
|
||||
def x():
|
||||
a = 3
|
||||
|
||||
def y():
|
||||
class z():
|
||||
nonlocal a
|
||||
|
||||
|
||||
a = *args, *args
|
||||
error[(*args, *args)] = 3
|
||||
*args, *args
|
||||
6
test/normalizer_issue_files/latin-1.py
Normal file
6
test/normalizer_issue_files/latin-1.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# -*- coding: latin-1 -*-
|
||||
# Test non-UTF8 encoding
|
||||
latin1 = ('<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'
|
||||
'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>')
|
||||
|
||||
c = ("w<EFBFBD>")
|
||||
14
test/normalizer_issue_files/python2.7.py
Normal file
14
test/normalizer_issue_files/python2.7.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import sys
|
||||
|
||||
print 1, 2 >> sys.stdout
|
||||
|
||||
|
||||
foo = ur'This is not possible in Python 3.'
|
||||
|
||||
# This is actually printing a tuple.
|
||||
#: E275:5
|
||||
print(1, 2)
|
||||
|
||||
# True and False are not keywords in Python 2 and therefore there's no need for
|
||||
# a space.
|
||||
norman = True+False
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user