mirror of
https://github.com/github/codeql.git
synced 2026-04-27 17:55:19 +02:00
Python: Copy Python extractor to codeql repo
This commit is contained in:
134
python/extractor/tests/tokenizer/basic.py
Normal file
134
python/extractor/tests/tokenizer/basic.py
Normal file
@@ -0,0 +1,134 @@
|
||||
|
||||
#AST nodes: Classes, Functions, Modules, expr, stmts
|
||||
|
||||
class C:
|
||||
|
||||
def stmts(p0, p1):
|
||||
global x
|
||||
assert x == 2
|
||||
y = 3
|
||||
y += 4
|
||||
while True:
|
||||
break
|
||||
while x > 0:
|
||||
x -= 1
|
||||
continue
|
||||
|
||||
f()
|
||||
for x in y:
|
||||
pass
|
||||
if x:
|
||||
print(y)
|
||||
import a
|
||||
import a.b as c
|
||||
import a as b
|
||||
from a.b import c
|
||||
|
||||
|
||||
with open("file") as f:
|
||||
pass
|
||||
try:
|
||||
1/0
|
||||
except Exception as ex:
|
||||
del y
|
||||
finally:
|
||||
del x
|
||||
if x:
|
||||
raise Exception()
|
||||
else:
|
||||
return
|
||||
|
||||
def exprs(p2, p3):
|
||||
p2.x = 2
|
||||
a = p3.y
|
||||
x = 1 + 2
|
||||
y = b'h4tpvhsa'
|
||||
call(arg0, arg1, name0="Hi", name1=y, *(), **{})
|
||||
x < y
|
||||
{1:1, 2: 2}
|
||||
|
||||
x[a, 7]
|
||||
(x for x in y)
|
||||
17 if x < y else 16
|
||||
lambda x : x * y
|
||||
[ 1, 2, a, x.b, p1.c ]
|
||||
[ a + "Hi" for a in str(y) ]
|
||||
|
||||
|
||||
|
||||
#a, *b = y
|
||||
u"Hi"
|
||||
x[0]
|
||||
x[y[0]]
|
||||
(p2, p3, 7)
|
||||
|
||||
#Some multiline strings
|
||||
'''
|
||||
Single quotes string'''
|
||||
|
||||
"""
|
||||
Double-quotes
|
||||
string"""
|
||||
|
||||
r'''
|
||||
Bytes
|
||||
'''
|
||||
|
||||
U"""
|
||||
Raw
|
||||
Unicode
|
||||
"""
|
||||
|
||||
#Decorated function
|
||||
@deco
|
||||
def f():
|
||||
pass
|
||||
|
||||
#Inner function (see ODASA-1774)
|
||||
def outer():
|
||||
def inner():
|
||||
pass
|
||||
|
||||
#Oddly laid out comprehension
|
||||
[[
|
||||
x for x in y
|
||||
]
|
||||
|
||||
for a in b
|
||||
]
|
||||
|
||||
#Nested binary operations
|
||||
"Hello" + " " + "world"
|
||||
1+2+f()
|
||||
1+(2+3)
|
||||
|
||||
# operations
|
||||
a|b&c+d-e
|
||||
x*f%g^h@j**k
|
||||
|
||||
#Augmented assigns
|
||||
a @= b
|
||||
a |= b
|
||||
a *= b
|
||||
|
||||
~a
|
||||
|
||||
#Comparisons
|
||||
<
|
||||
>
|
||||
<=
|
||||
>=
|
||||
!=
|
||||
==
|
||||
is
|
||||
is not
|
||||
|
||||
("""
|
||||
""")
|
||||
del x
|
||||
|
||||
`backticks`
|
||||
|
||||
x := y
|
||||
|
||||
1 <> 2
|
||||
472
python/extractor/tests/tokenizer/basic.tokens
Normal file
472
python/extractor/tests/tokenizer/basic.tokens
Normal file
@@ -0,0 +1,472 @@
|
||||
2,0-2,52: COMMENT '#AST nodes: Classes, Functions, Modules, expr, stmts'
|
||||
4,0-4,5: NAME 'class'
|
||||
4,6-4,7: NAME 'C'
|
||||
4,7-4,8: COLON ':'
|
||||
4,8-4,9: NEWLINE '\n'
|
||||
6,0-6,4: INDENT ' '
|
||||
6,4-6,7: NAME 'def'
|
||||
6,8-6,13: NAME 'stmts'
|
||||
6,13-6,14: LPAR '('
|
||||
6,14-6,16: NAME 'p0'
|
||||
6,16-6,17: COMMA ','
|
||||
6,18-6,20: NAME 'p1'
|
||||
6,20-6,21: RPAR ')'
|
||||
6,21-6,22: COLON ':'
|
||||
6,22-6,23: NEWLINE '\n'
|
||||
7,0-7,8: INDENT ' '
|
||||
7,8-7,14: NAME 'global'
|
||||
7,15-7,16: NAME 'x'
|
||||
7,16-7,17: NEWLINE '\n'
|
||||
8,8-8,14: NAME 'assert'
|
||||
8,15-8,16: NAME 'x'
|
||||
8,17-8,19: OP '=='
|
||||
8,20-8,21: NUMBER '2'
|
||||
8,21-8,22: NEWLINE '\n'
|
||||
9,8-9,9: NAME 'y'
|
||||
9,10-9,11: OP '='
|
||||
9,12-9,13: NUMBER '3'
|
||||
9,13-9,14: NEWLINE '\n'
|
||||
10,8-10,9: NAME 'y'
|
||||
10,10-10,12: OP '+='
|
||||
10,13-10,14: NUMBER '4'
|
||||
10,14-10,15: NEWLINE '\n'
|
||||
11,8-11,13: NAME 'while'
|
||||
11,14-11,18: NAME 'True'
|
||||
11,18-11,19: COLON ':'
|
||||
11,19-11,20: NEWLINE '\n'
|
||||
12,0-12,12: INDENT ' '
|
||||
12,12-12,17: NAME 'break'
|
||||
12,17-12,18: NEWLINE '\n'
|
||||
13,8-13,8: DEDENT ''
|
||||
13,8-13,13: NAME 'while'
|
||||
13,14-13,15: NAME 'x'
|
||||
13,16-13,17: OP '>'
|
||||
13,18-13,19: NUMBER '0'
|
||||
13,19-13,20: COLON ':'
|
||||
13,20-13,21: NEWLINE '\n'
|
||||
14,0-14,12: INDENT ' '
|
||||
14,12-14,13: NAME 'x'
|
||||
14,14-14,16: OP '-='
|
||||
14,17-14,18: NUMBER '1'
|
||||
14,18-14,19: NEWLINE '\n'
|
||||
15,12-15,20: NAME 'continue'
|
||||
15,20-15,21: NEWLINE '\n'
|
||||
17,8-17,8: DEDENT ''
|
||||
17,8-17,9: NAME 'f'
|
||||
17,9-17,10: LPAR '('
|
||||
17,10-17,11: RPAR ')'
|
||||
17,11-17,12: NEWLINE '\n'
|
||||
18,8-18,11: NAME 'for'
|
||||
18,12-18,13: NAME 'x'
|
||||
18,14-18,16: NAME 'in'
|
||||
18,17-18,18: NAME 'y'
|
||||
18,18-18,19: COLON ':'
|
||||
18,19-18,20: NEWLINE '\n'
|
||||
19,0-19,12: INDENT ' '
|
||||
19,12-19,16: NAME 'pass'
|
||||
19,16-19,17: NEWLINE '\n'
|
||||
20,8-20,8: DEDENT ''
|
||||
20,8-20,10: NAME 'if'
|
||||
20,11-20,12: NAME 'x'
|
||||
20,12-20,13: COLON ':'
|
||||
20,13-20,14: NEWLINE '\n'
|
||||
21,0-21,12: INDENT ' '
|
||||
21,12-21,17: NAME 'print'
|
||||
21,17-21,18: LPAR '('
|
||||
21,18-21,19: NAME 'y'
|
||||
21,19-21,20: RPAR ')'
|
||||
21,20-21,21: NEWLINE '\n'
|
||||
22,8-22,8: DEDENT ''
|
||||
22,8-22,14: NAME 'import'
|
||||
22,15-22,16: NAME 'a'
|
||||
22,16-22,17: NEWLINE '\n'
|
||||
23,8-23,14: NAME 'import'
|
||||
23,15-23,16: NAME 'a'
|
||||
23,16-23,17: DOT '.'
|
||||
23,17-23,18: NAME 'b'
|
||||
23,19-23,21: NAME 'as'
|
||||
23,22-23,23: NAME 'c'
|
||||
23,23-23,24: NEWLINE '\n'
|
||||
24,8-24,14: NAME 'import'
|
||||
24,15-24,16: NAME 'a'
|
||||
24,17-24,19: NAME 'as'
|
||||
24,20-24,21: NAME 'b'
|
||||
24,21-24,22: NEWLINE '\n'
|
||||
25,8-25,12: NAME 'from'
|
||||
25,13-25,14: NAME 'a'
|
||||
25,14-25,15: DOT '.'
|
||||
25,15-25,16: NAME 'b'
|
||||
25,17-25,23: NAME 'import'
|
||||
25,24-25,25: NAME 'c'
|
||||
25,25-25,26: NEWLINE '\n'
|
||||
28,8-28,12: NAME 'with'
|
||||
28,13-28,17: NAME 'open'
|
||||
28,17-28,18: LPAR '('
|
||||
28,18-28,24: STRING '"file"'
|
||||
28,24-28,25: RPAR ')'
|
||||
28,26-28,28: NAME 'as'
|
||||
28,29-28,30: NAME 'f'
|
||||
28,30-28,31: COLON ':'
|
||||
28,31-28,32: NEWLINE '\n'
|
||||
29,0-29,12: INDENT ' '
|
||||
29,12-29,16: NAME 'pass'
|
||||
29,16-29,17: NEWLINE '\n'
|
||||
30,8-30,8: DEDENT ''
|
||||
30,8-30,11: NAME 'try'
|
||||
30,11-30,12: COLON ':'
|
||||
30,12-30,13: NEWLINE '\n'
|
||||
31,0-31,12: INDENT ' '
|
||||
31,12-31,13: NUMBER '1'
|
||||
31,13-31,14: OP '/'
|
||||
31,14-31,15: NUMBER '0'
|
||||
31,15-31,16: NEWLINE '\n'
|
||||
32,8-32,8: DEDENT ''
|
||||
32,8-32,14: NAME 'except'
|
||||
32,15-32,24: NAME 'Exception'
|
||||
32,25-32,27: NAME 'as'
|
||||
32,28-32,30: NAME 'ex'
|
||||
32,30-32,31: COLON ':'
|
||||
32,31-32,32: NEWLINE '\n'
|
||||
33,0-33,12: INDENT ' '
|
||||
33,12-33,15: NAME 'del'
|
||||
33,16-33,17: NAME 'y'
|
||||
33,17-33,18: NEWLINE '\n'
|
||||
34,8-34,8: DEDENT ''
|
||||
34,8-34,15: NAME 'finally'
|
||||
34,15-34,16: COLON ':'
|
||||
34,16-34,17: NEWLINE '\n'
|
||||
35,0-35,12: INDENT ' '
|
||||
35,12-35,15: NAME 'del'
|
||||
35,16-35,17: NAME 'x'
|
||||
35,17-35,18: NEWLINE '\n'
|
||||
36,8-36,8: DEDENT ''
|
||||
36,8-36,10: NAME 'if'
|
||||
36,11-36,12: NAME 'x'
|
||||
36,12-36,13: COLON ':'
|
||||
36,13-36,14: NEWLINE '\n'
|
||||
37,0-37,12: INDENT ' '
|
||||
37,12-37,17: NAME 'raise'
|
||||
37,18-37,27: NAME 'Exception'
|
||||
37,27-37,28: LPAR '('
|
||||
37,28-37,29: RPAR ')'
|
||||
37,29-37,30: NEWLINE '\n'
|
||||
38,8-38,8: DEDENT ''
|
||||
38,8-38,12: NAME 'else'
|
||||
38,12-38,13: COLON ':'
|
||||
38,13-38,14: NEWLINE '\n'
|
||||
39,0-39,12: INDENT ' '
|
||||
39,12-39,18: NAME 'return'
|
||||
39,18-39,19: NEWLINE '\n'
|
||||
41,4-41,4: DEDENT ''
|
||||
41,4-41,4: DEDENT ''
|
||||
41,4-41,7: NAME 'def'
|
||||
41,8-41,13: NAME 'exprs'
|
||||
41,13-41,14: LPAR '('
|
||||
41,14-41,16: NAME 'p2'
|
||||
41,16-41,17: COMMA ','
|
||||
41,18-41,20: NAME 'p3'
|
||||
41,20-41,21: RPAR ')'
|
||||
41,21-41,22: COLON ':'
|
||||
41,22-41,23: NEWLINE '\n'
|
||||
42,0-42,8: INDENT ' '
|
||||
42,8-42,10: NAME 'p2'
|
||||
42,10-42,11: DOT '.'
|
||||
42,11-42,12: NAME 'x'
|
||||
42,13-42,14: OP '='
|
||||
42,15-42,16: NUMBER '2'
|
||||
42,16-42,17: NEWLINE '\n'
|
||||
43,8-43,9: NAME 'a'
|
||||
43,10-43,11: OP '='
|
||||
43,12-43,14: NAME 'p3'
|
||||
43,14-43,15: DOT '.'
|
||||
43,15-43,16: NAME 'y'
|
||||
43,16-43,17: NEWLINE '\n'
|
||||
44,8-44,9: NAME 'x'
|
||||
44,10-44,11: OP '='
|
||||
44,12-44,13: NUMBER '1'
|
||||
44,14-44,15: OP '+'
|
||||
44,16-44,17: NUMBER '2'
|
||||
44,17-44,18: NEWLINE '\n'
|
||||
45,8-45,9: NAME 'y'
|
||||
45,10-45,11: OP '='
|
||||
45,12-45,23: STRING 'b\'h4tpvhsa\''
|
||||
45,23-45,24: NEWLINE '\n'
|
||||
46,8-46,12: NAME 'call'
|
||||
46,12-46,13: LPAR '('
|
||||
46,13-46,17: NAME 'arg0'
|
||||
46,17-46,18: COMMA ','
|
||||
46,19-46,23: NAME 'arg1'
|
||||
46,23-46,24: COMMA ','
|
||||
46,25-46,30: NAME 'name0'
|
||||
46,30-46,31: OP '='
|
||||
46,31-46,35: STRING '"Hi"'
|
||||
46,35-46,36: COMMA ','
|
||||
46,37-46,42: NAME 'name1'
|
||||
46,42-46,43: OP '='
|
||||
46,43-46,44: NAME 'y'
|
||||
46,44-46,45: COMMA ','
|
||||
46,46-46,47: OP '*'
|
||||
46,47-46,48: LPAR '('
|
||||
46,48-46,49: RPAR ')'
|
||||
46,49-46,50: COMMA ','
|
||||
46,51-46,53: OP '**'
|
||||
46,53-46,54: LBRACE '{'
|
||||
46,54-46,55: RBRACE '}'
|
||||
46,55-46,56: RPAR ')'
|
||||
46,56-46,57: NEWLINE '\n'
|
||||
47,8-47,9: NAME 'x'
|
||||
47,10-47,11: OP '<'
|
||||
47,12-47,13: NAME 'y'
|
||||
47,13-47,14: NEWLINE '\n'
|
||||
48,8-48,9: LBRACE '{'
|
||||
48,9-48,10: NUMBER '1'
|
||||
48,10-48,11: COLON ':'
|
||||
48,11-48,12: NUMBER '1'
|
||||
48,12-48,13: COMMA ','
|
||||
48,14-48,15: NUMBER '2'
|
||||
48,15-48,16: COLON ':'
|
||||
48,17-48,18: NUMBER '2'
|
||||
48,18-48,19: RBRACE '}'
|
||||
48,19-48,20: NEWLINE '\n'
|
||||
50,8-50,9: NAME 'x'
|
||||
50,9-50,10: LSQB '['
|
||||
50,10-50,11: NAME 'a'
|
||||
50,11-50,12: COMMA ','
|
||||
50,13-50,14: NUMBER '7'
|
||||
50,14-50,15: RSQB ']'
|
||||
50,15-50,16: NEWLINE '\n'
|
||||
51,8-51,9: LPAR '('
|
||||
51,9-51,10: NAME 'x'
|
||||
51,11-51,14: NAME 'for'
|
||||
51,15-51,16: NAME 'x'
|
||||
51,17-51,19: NAME 'in'
|
||||
51,20-51,21: NAME 'y'
|
||||
51,21-51,22: RPAR ')'
|
||||
51,22-51,23: NEWLINE '\n'
|
||||
52,8-52,10: NUMBER '17'
|
||||
52,11-52,13: NAME 'if'
|
||||
52,14-52,15: NAME 'x'
|
||||
52,16-52,17: OP '<'
|
||||
52,18-52,19: NAME 'y'
|
||||
52,20-52,24: NAME 'else'
|
||||
52,25-52,27: NUMBER '16'
|
||||
52,27-52,28: NEWLINE '\n'
|
||||
53,8-53,14: NAME 'lambda'
|
||||
53,15-53,16: NAME 'x'
|
||||
53,17-53,18: COLON ':'
|
||||
53,19-53,20: NAME 'x'
|
||||
53,21-53,22: OP '*'
|
||||
53,23-53,24: NAME 'y'
|
||||
53,24-53,25: NEWLINE '\n'
|
||||
54,8-54,9: LSQB '['
|
||||
54,10-54,11: NUMBER '1'
|
||||
54,11-54,12: COMMA ','
|
||||
54,13-54,14: NUMBER '2'
|
||||
54,14-54,15: COMMA ','
|
||||
54,16-54,17: NAME 'a'
|
||||
54,17-54,18: COMMA ','
|
||||
54,19-54,20: NAME 'x'
|
||||
54,20-54,21: DOT '.'
|
||||
54,21-54,22: NAME 'b'
|
||||
54,22-54,23: COMMA ','
|
||||
54,24-54,26: NAME 'p1'
|
||||
54,26-54,27: DOT '.'
|
||||
54,27-54,28: NAME 'c'
|
||||
54,29-54,30: RSQB ']'
|
||||
54,30-54,31: NEWLINE '\n'
|
||||
55,8-55,9: LSQB '['
|
||||
55,10-55,11: NAME 'a'
|
||||
55,12-55,13: OP '+'
|
||||
55,14-55,18: STRING '"Hi"'
|
||||
55,19-55,22: NAME 'for'
|
||||
55,23-55,24: NAME 'a'
|
||||
55,25-55,27: NAME 'in'
|
||||
55,28-55,31: NAME 'str'
|
||||
55,31-55,32: LPAR '('
|
||||
55,32-55,33: NAME 'y'
|
||||
55,33-55,34: RPAR ')'
|
||||
55,35-55,36: RSQB ']'
|
||||
55,36-55,37: NEWLINE '\n'
|
||||
59,8-59,18: COMMENT '#a, *b = y'
|
||||
60,8-60,13: STRING 'u"Hi"'
|
||||
60,13-60,14: NEWLINE '\n'
|
||||
61,8-61,9: NAME 'x'
|
||||
61,9-61,10: LSQB '['
|
||||
61,10-61,11: NUMBER '0'
|
||||
61,11-61,12: RSQB ']'
|
||||
61,12-61,13: NEWLINE '\n'
|
||||
62,8-62,9: NAME 'x'
|
||||
62,9-62,10: LSQB '['
|
||||
62,10-62,11: NAME 'y'
|
||||
62,11-62,12: LSQB '['
|
||||
62,12-62,13: NUMBER '0'
|
||||
62,13-62,14: RSQB ']'
|
||||
62,14-62,15: RSQB ']'
|
||||
62,15-62,16: NEWLINE '\n'
|
||||
63,8-63,9: LPAR '('
|
||||
63,9-63,11: NAME 'p2'
|
||||
63,11-63,12: COMMA ','
|
||||
63,13-63,15: NAME 'p3'
|
||||
63,15-63,16: COMMA ','
|
||||
63,17-63,18: NUMBER '7'
|
||||
63,18-63,19: RPAR ')'
|
||||
63,19-63,20: NEWLINE '\n'
|
||||
65,0-65,23: COMMENT '#Some multiline strings'
|
||||
66,0-66,0: DEDENT ''
|
||||
66,0-66,0: DEDENT ''
|
||||
66,0-67,23: STRING '\'\'\'\nSingle quotes string\'\'\''
|
||||
67,23-67,24: NEWLINE '\n'
|
||||
69,0-71,9: STRING '"""\nDouble-quotes\nstring"""'
|
||||
71,9-71,10: NEWLINE '\n'
|
||||
73,0-75,3: STRING 'r\'\'\'\nBytes\n\'\'\''
|
||||
75,3-75,4: NEWLINE '\n'
|
||||
77,0-80,3: STRING 'U"""\nRaw\nUnicode\n"""'
|
||||
80,3-80,4: NEWLINE '\n'
|
||||
82,0-82,19: COMMENT '#Decorated function'
|
||||
83,0-83,1: AT '@'
|
||||
83,1-83,5: NAME 'deco'
|
||||
83,5-83,6: NEWLINE '\n'
|
||||
84,0-84,3: NAME 'def'
|
||||
84,4-84,5: NAME 'f'
|
||||
84,5-84,6: LPAR '('
|
||||
84,6-84,7: RPAR ')'
|
||||
84,7-84,8: COLON ':'
|
||||
84,8-84,9: NEWLINE '\n'
|
||||
85,0-85,4: INDENT ' '
|
||||
85,4-85,8: NAME 'pass'
|
||||
85,8-85,9: NEWLINE '\n'
|
||||
87,0-87,32: COMMENT '#Inner function (see ODASA-1774)'
|
||||
88,0-88,0: DEDENT ''
|
||||
88,0-88,3: NAME 'def'
|
||||
88,4-88,9: NAME 'outer'
|
||||
88,9-88,10: LPAR '('
|
||||
88,10-88,11: RPAR ')'
|
||||
88,11-88,12: COLON ':'
|
||||
88,12-88,13: NEWLINE '\n'
|
||||
89,0-89,4: INDENT ' '
|
||||
89,4-89,7: NAME 'def'
|
||||
89,8-89,13: NAME 'inner'
|
||||
89,13-89,14: LPAR '('
|
||||
89,14-89,15: RPAR ')'
|
||||
89,15-89,16: COLON ':'
|
||||
89,16-89,17: NEWLINE '\n'
|
||||
90,0-90,8: INDENT ' '
|
||||
90,8-90,12: NAME 'pass'
|
||||
90,12-90,13: NEWLINE '\n'
|
||||
92,0-92,29: COMMENT '#Oddly laid out comprehension'
|
||||
93,0-93,0: DEDENT ''
|
||||
93,0-93,0: DEDENT ''
|
||||
93,0-93,1: LSQB '['
|
||||
93,1-93,2: LSQB '['
|
||||
94,2-94,3: NAME 'x'
|
||||
94,4-94,7: NAME 'for'
|
||||
94,8-94,9: NAME 'x'
|
||||
94,10-94,12: NAME 'in'
|
||||
94,13-94,14: NAME 'y'
|
||||
95,2-95,3: RSQB ']'
|
||||
97,2-97,5: NAME 'for'
|
||||
97,6-97,7: NAME 'a'
|
||||
97,8-97,10: NAME 'in'
|
||||
97,11-97,12: NAME 'b'
|
||||
98,0-98,1: RSQB ']'
|
||||
98,1-98,2: NEWLINE '\n'
|
||||
100,0-100,25: COMMENT '#Nested binary operations'
|
||||
101,0-101,7: STRING '"Hello"'
|
||||
101,8-101,9: OP '+'
|
||||
101,10-101,13: STRING '" "'
|
||||
101,14-101,15: OP '+'
|
||||
101,16-101,23: STRING '"world"'
|
||||
101,23-101,24: NEWLINE '\n'
|
||||
102,0-102,1: NUMBER '1'
|
||||
102,1-102,2: OP '+'
|
||||
102,2-102,3: NUMBER '2'
|
||||
102,3-102,4: OP '+'
|
||||
102,4-102,5: NAME 'f'
|
||||
102,5-102,6: LPAR '('
|
||||
102,6-102,7: RPAR ')'
|
||||
102,7-102,8: NEWLINE '\n'
|
||||
103,0-103,1: NUMBER '1'
|
||||
103,1-103,2: OP '+'
|
||||
103,2-103,3: LPAR '('
|
||||
103,3-103,4: NUMBER '2'
|
||||
103,4-103,5: OP '+'
|
||||
103,5-103,6: NUMBER '3'
|
||||
103,6-103,7: RPAR ')'
|
||||
103,7-103,8: NEWLINE '\n'
|
||||
105,0-105,12: COMMENT '# operations'
|
||||
106,0-106,1: NAME 'a'
|
||||
106,1-106,2: OP '|'
|
||||
106,2-106,3: NAME 'b'
|
||||
106,3-106,4: OP '&'
|
||||
106,4-106,5: NAME 'c'
|
||||
106,5-106,6: OP '+'
|
||||
106,6-106,7: NAME 'd'
|
||||
106,7-106,8: OP '-'
|
||||
106,8-106,9: NAME 'e'
|
||||
106,9-106,10: NEWLINE '\n'
|
||||
107,0-107,1: NAME 'x'
|
||||
107,1-107,2: OP '*'
|
||||
107,2-107,3: NAME 'f'
|
||||
107,3-107,4: OP '%'
|
||||
107,4-107,5: NAME 'g'
|
||||
107,5-107,6: OP '^'
|
||||
107,6-107,7: NAME 'h'
|
||||
107,7-107,8: AT '@'
|
||||
107,8-107,9: NAME 'j'
|
||||
107,9-107,11: OP '**'
|
||||
107,11-107,12: NAME 'k'
|
||||
107,12-107,13: NEWLINE '\n'
|
||||
109,0-109,18: COMMENT '#Augmented assigns'
|
||||
110,0-110,1: NAME 'a'
|
||||
110,2-110,4: OP '@='
|
||||
110,5-110,6: NAME 'b'
|
||||
110,6-110,7: NEWLINE '\n'
|
||||
111,0-111,1: NAME 'a'
|
||||
111,2-111,4: OP '|='
|
||||
111,5-111,6: NAME 'b'
|
||||
111,6-111,7: NEWLINE '\n'
|
||||
112,0-112,1: NAME 'a'
|
||||
112,2-112,4: OP '*='
|
||||
112,5-112,6: NAME 'b'
|
||||
112,6-112,7: NEWLINE '\n'
|
||||
114,0-114,1: OP '~'
|
||||
114,1-114,2: NAME 'a'
|
||||
114,2-114,3: NEWLINE '\n'
|
||||
116,0-116,12: COMMENT '#Comparisons'
|
||||
117,0-117,1: OP '<'
|
||||
117,1-117,2: NEWLINE '\n'
|
||||
118,0-118,1: OP '>'
|
||||
118,1-118,2: NEWLINE '\n'
|
||||
119,0-119,2: OP '<='
|
||||
119,2-119,3: NEWLINE '\n'
|
||||
120,0-120,2: OP '>='
|
||||
120,2-120,3: NEWLINE '\n'
|
||||
121,0-121,2: OP '!='
|
||||
121,2-121,3: NEWLINE '\n'
|
||||
122,0-122,2: OP '=='
|
||||
122,2-122,3: NEWLINE '\n'
|
||||
123,0-123,2: NAME 'is'
|
||||
123,2-123,3: NEWLINE '\n'
|
||||
124,0-124,2: NAME 'is'
|
||||
124,3-124,6: NAME 'not'
|
||||
124,6-124,7: NEWLINE '\n'
|
||||
126,0-126,1: LPAR '('
|
||||
126,1-127,3: STRING '"""\n"""'
|
||||
127,3-127,4: RPAR ')'
|
||||
127,4-127,5: NEWLINE '\n'
|
||||
128,0-128,3: NAME 'del'
|
||||
128,4-128,5: NAME 'x'
|
||||
128,5-128,6: NEWLINE '\n'
|
||||
130,0-130,1: BACKQUOTE '`'
|
||||
130,1-130,10: NAME 'backticks'
|
||||
130,10-130,11: BACKQUOTE '`'
|
||||
130,11-130,12: NEWLINE '\n'
|
||||
132,0-132,1: NAME 'x'
|
||||
132,3-132,4: COLONEQUAL ':='
|
||||
132,5-132,6: NAME 'y'
|
||||
132,6-132,7: NEWLINE '\n'
|
||||
134,0-134,1: NUMBER '1'
|
||||
134,2-134,4: OP '<>'
|
||||
134,5-134,6: NUMBER '2'
|
||||
134,6-134,7: NEWLINE '\n'
|
||||
135,0-135,0: ENDMARKER ''
|
||||
3
python/extractor/tests/tokenizer/close_brace.py
Normal file
3
python/extractor/tests/tokenizer/close_brace.py
Normal file
@@ -0,0 +1,3 @@
|
||||
}
|
||||
)
|
||||
]
|
||||
7
python/extractor/tests/tokenizer/close_brace.tokens
Normal file
7
python/extractor/tests/tokenizer/close_brace.tokens
Normal file
@@ -0,0 +1,7 @@
|
||||
1,0-1,1: RBRACE '}'
|
||||
1,1-1,2: NEWLINE '\n'
|
||||
2,0-2,1: RPAR ')'
|
||||
2,1-2,2: NEWLINE '\n'
|
||||
3,0-3,1: RSQB ']'
|
||||
3,1-3,2: NEWLINE '\n'
|
||||
4,0-4,0: ENDMARKER ''
|
||||
13
python/extractor/tests/tokenizer/comments.py
Normal file
13
python/extractor/tests/tokenizer/comments.py
Normal file
@@ -0,0 +1,13 @@
|
||||
|
||||
import sys
|
||||
|
||||
def f():
|
||||
code-here # Line end comment
|
||||
#Indented comment
|
||||
#Unindented comment
|
||||
return 1
|
||||
|
||||
def g(arg):
|
||||
return arg
|
||||
|
||||
x = g(f())
|
||||
43
python/extractor/tests/tokenizer/comments.tokens
Normal file
43
python/extractor/tests/tokenizer/comments.tokens
Normal file
@@ -0,0 +1,43 @@
|
||||
2,0-2,6: NAME 'import'
|
||||
2,7-2,10: NAME 'sys'
|
||||
2,10-2,11: NEWLINE '\n'
|
||||
4,0-4,3: NAME 'def'
|
||||
4,4-4,5: NAME 'f'
|
||||
4,5-4,6: LPAR '('
|
||||
4,6-4,7: RPAR ')'
|
||||
4,7-4,8: COLON ':'
|
||||
4,8-4,9: NEWLINE '\n'
|
||||
5,0-5,4: INDENT ' '
|
||||
5,4-5,8: NAME 'code'
|
||||
5,8-5,9: OP '-'
|
||||
5,9-5,13: NAME 'here'
|
||||
5,14-5,32: COMMENT '# Line end comment'
|
||||
5,32-5,33: NEWLINE '\n'
|
||||
6,4-6,21: COMMENT '#Indented comment'
|
||||
7,0-7,19: COMMENT '#Unindented comment'
|
||||
8,4-8,10: NAME 'return'
|
||||
8,11-8,12: NUMBER '1'
|
||||
8,12-8,13: NEWLINE '\n'
|
||||
10,0-10,0: DEDENT ''
|
||||
10,0-10,3: NAME 'def'
|
||||
10,4-10,5: NAME 'g'
|
||||
10,5-10,6: LPAR '('
|
||||
10,6-10,9: NAME 'arg'
|
||||
10,9-10,10: RPAR ')'
|
||||
10,10-10,11: COLON ':'
|
||||
10,11-10,12: NEWLINE '\n'
|
||||
11,0-11,4: INDENT ' '
|
||||
11,4-11,10: NAME 'return'
|
||||
11,11-11,14: NAME 'arg'
|
||||
11,14-11,15: NEWLINE '\n'
|
||||
13,0-13,0: DEDENT ''
|
||||
13,0-13,1: NAME 'x'
|
||||
13,2-13,3: OP '='
|
||||
13,4-13,5: NAME 'g'
|
||||
13,5-13,6: LPAR '('
|
||||
13,6-13,7: NAME 'f'
|
||||
13,7-13,8: LPAR '('
|
||||
13,8-13,9: RPAR ')'
|
||||
13,9-13,10: RPAR ')'
|
||||
13,10-13,11: NEWLINE '\n'
|
||||
14,0-14,0: ENDMARKER ''
|
||||
5
python/extractor/tests/tokenizer/continuation.py
Normal file
5
python/extractor/tests/tokenizer/continuation.py
Normal file
@@ -0,0 +1,5 @@
|
||||
def foo():
|
||||
pass \
|
||||
\
|
||||
\
|
||||
|
||||
11
python/extractor/tests/tokenizer/continuation.tokens
Normal file
11
python/extractor/tests/tokenizer/continuation.tokens
Normal file
@@ -0,0 +1,11 @@
|
||||
1,0-1,3: NAME 'def'
|
||||
1,4-1,7: NAME 'foo'
|
||||
1,7-1,8: LPAR '('
|
||||
1,8-1,9: RPAR ')'
|
||||
1,9-1,10: COLON ':'
|
||||
1,10-1,11: NEWLINE '\n'
|
||||
2,0-2,4: INDENT ' '
|
||||
2,4-2,8: NAME 'pass'
|
||||
5,0-5,1: NEWLINE '\n'
|
||||
6,0-6,0: DEDENT ''
|
||||
6,0-6,0: ENDMARKER ''
|
||||
2
python/extractor/tests/tokenizer/dollar.py
Normal file
2
python/extractor/tests/tokenizer/dollar.py
Normal file
@@ -0,0 +1,2 @@
|
||||
$name
|
||||
$ßðđ0
|
||||
5
python/extractor/tests/tokenizer/dollar.tokens
Normal file
5
python/extractor/tests/tokenizer/dollar.tokens
Normal file
@@ -0,0 +1,5 @@
|
||||
1,0-1,5: DOLLARNAME '$name'
|
||||
1,5-1,6: NEWLINE '\n'
|
||||
2,0-2,5: DOLLARNAME '$ßðđ0'
|
||||
2,5-2,6: NEWLINE '\n'
|
||||
3,0-3,0: ENDMARKER ''
|
||||
4
python/extractor/tests/tokenizer/dots.py
Normal file
4
python/extractor/tests/tokenizer/dots.py
Normal file
@@ -0,0 +1,4 @@
|
||||
.
|
||||
..
|
||||
...
|
||||
....
|
||||
15
python/extractor/tests/tokenizer/dots.tokens
Normal file
15
python/extractor/tests/tokenizer/dots.tokens
Normal file
@@ -0,0 +1,15 @@
|
||||
1,0-1,1: DOT '.'
|
||||
1,1-1,2: NEWLINE '\n'
|
||||
2,0-2,1: DOT '.'
|
||||
2,1-2,2: DOT '.'
|
||||
2,2-2,3: NEWLINE '\n'
|
||||
3,0-3,1: DOT '.'
|
||||
3,1-3,2: DOT '.'
|
||||
3,2-3,3: DOT '.'
|
||||
3,3-3,4: NEWLINE '\n'
|
||||
4,0-4,1: DOT '.'
|
||||
4,1-4,2: DOT '.'
|
||||
4,2-4,3: DOT '.'
|
||||
4,3-4,4: DOT '.'
|
||||
4,4-4,5: NEWLINE '\n'
|
||||
5,0-5,0: ENDMARKER ''
|
||||
2
python/extractor/tests/tokenizer/emoji.py
Normal file
2
python/extractor/tests/tokenizer/emoji.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"👦👦🏻👦🏼👦🏽👦🏾👦🏿👧👧🏻👧🏼👧🏽👧🏾👧🏿"
|
||||
"😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏"
|
||||
5
python/extractor/tests/tokenizer/emoji.tokens
Normal file
5
python/extractor/tests/tokenizer/emoji.tokens
Normal file
@@ -0,0 +1,5 @@
|
||||
1,0-1,24: STRING '"👦👦🏻👦🏼👦🏽👦🏾👦🏿👧👧🏻👧🏼👧🏽👧🏾👧🏿"'
|
||||
1,24-1,25: NEWLINE '\n'
|
||||
2,0-2,18: STRING '"😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏"'
|
||||
2,18-2,19: NEWLINE '\n'
|
||||
3,0-3,0: ENDMARKER ''
|
||||
4
python/extractor/tests/tokenizer/feeds.py
Normal file
4
python/extractor/tests/tokenizer/feeds.py
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
|
||||
|
||||
name
|
||||
3
python/extractor/tests/tokenizer/feeds.tokens
Normal file
3
python/extractor/tests/tokenizer/feeds.tokens
Normal file
@@ -0,0 +1,3 @@
|
||||
4,0-4,4: NAME 'name'
|
||||
4,4-4,5: NEWLINE '\n'
|
||||
5,0-5,0: ENDMARKER ''
|
||||
38
python/extractor/tests/tokenizer/gen_tokens.py
Normal file
38
python/extractor/tests/tokenizer/gen_tokens.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import sys
|
||||
import tokenize
|
||||
import token
|
||||
|
||||
def printtoken(type, token, start, end, _):
|
||||
# Use Python 3 tokenize style output, regardless of version
|
||||
if tokenize.tok_name[type] not in ("ENCODING", "NL"):
|
||||
token_range = "%d,%d-%d,%d:" % (start + end)
|
||||
print("%-20s%-15s%r" %
|
||||
(token_range, tokenize.tok_name[type], token)
|
||||
)
|
||||
|
||||
OP_TYPES = {
|
||||
"(" : token.LPAR,
|
||||
")" : token.RPAR,
|
||||
"[" : token.LSQB,
|
||||
"]" : token.RSQB,
|
||||
"{" : token.LBRACE,
|
||||
"}" : token.RBRACE,
|
||||
":" : token.COLON,
|
||||
"," : token.COMMA,
|
||||
"." : token.DOT,
|
||||
"@" : token.AT,
|
||||
}
|
||||
|
||||
def main():
|
||||
readline = open(sys.argv[1], "rb").readline
|
||||
if sys.version < "3":
|
||||
tokenize.tokenize(readline, printtoken)
|
||||
else:
|
||||
for type, token, start, end, _ in tokenize.tokenize(readline):
|
||||
if tokenize.tok_name[type] == "OP":
|
||||
type = OP_TYPES.get(token, type)
|
||||
if tokenize.tok_name[type] not in ("ENCODING", "NL"):
|
||||
printtoken(type, token, start, end, _)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
275
python/extractor/tests/tokenizer/gen_tokens.tokens
Normal file
275
python/extractor/tests/tokenizer/gen_tokens.tokens
Normal file
@@ -0,0 +1,275 @@
|
||||
1,0-1,6: NAME 'import'
|
||||
1,7-1,10: NAME 'sys'
|
||||
1,10-1,11: NEWLINE '\n'
|
||||
2,0-2,6: NAME 'import'
|
||||
2,7-2,15: NAME 'tokenize'
|
||||
2,15-2,16: NEWLINE '\n'
|
||||
3,0-3,6: NAME 'import'
|
||||
3,7-3,12: NAME 'token'
|
||||
3,12-3,13: NEWLINE '\n'
|
||||
5,0-5,3: NAME 'def'
|
||||
5,4-5,14: NAME 'printtoken'
|
||||
5,14-5,15: LPAR '('
|
||||
5,15-5,19: NAME 'type'
|
||||
5,19-5,20: COMMA ','
|
||||
5,21-5,26: NAME 'token'
|
||||
5,26-5,27: COMMA ','
|
||||
5,28-5,33: NAME 'start'
|
||||
5,33-5,34: COMMA ','
|
||||
5,35-5,38: NAME 'end'
|
||||
5,38-5,39: COMMA ','
|
||||
5,40-5,41: NAME '_'
|
||||
5,41-5,42: RPAR ')'
|
||||
5,42-5,43: COLON ':'
|
||||
5,44-5,45: NEWLINE '\n'
|
||||
6,4-6,63: COMMENT '# Use Python 3 tokenize style output, regardless of version'
|
||||
7,0-7,4: INDENT ' '
|
||||
7,4-7,6: NAME 'if'
|
||||
7,7-7,15: NAME 'tokenize'
|
||||
7,15-7,16: DOT '.'
|
||||
7,16-7,24: NAME 'tok_name'
|
||||
7,24-7,25: LSQB '['
|
||||
7,25-7,29: NAME 'type'
|
||||
7,29-7,30: RSQB ']'
|
||||
7,31-7,34: NAME 'not'
|
||||
7,35-7,37: NAME 'in'
|
||||
7,38-7,39: LPAR '('
|
||||
7,39-7,49: STRING '"ENCODING"'
|
||||
7,49-7,50: COMMA ','
|
||||
7,51-7,55: STRING '"NL"'
|
||||
7,55-7,56: RPAR ')'
|
||||
7,56-7,57: COLON ':'
|
||||
7,57-7,58: NEWLINE '\n'
|
||||
8,0-8,8: INDENT ' '
|
||||
8,8-8,19: NAME 'token_range'
|
||||
8,20-8,21: OP '='
|
||||
8,22-8,36: STRING '"%d,%d-%d,%d:"'
|
||||
8,37-8,38: OP '%'
|
||||
8,39-8,40: LPAR '('
|
||||
8,40-8,45: NAME 'start'
|
||||
8,46-8,47: OP '+'
|
||||
8,48-8,51: NAME 'end'
|
||||
8,51-8,52: RPAR ')'
|
||||
8,52-8,53: NEWLINE '\n'
|
||||
9,8-9,13: NAME 'print'
|
||||
9,13-9,14: LPAR '('
|
||||
9,14-9,28: STRING '"%-20s%-15s%r"'
|
||||
9,29-9,30: OP '%'
|
||||
10,12-10,13: LPAR '('
|
||||
10,13-10,24: NAME 'token_range'
|
||||
10,24-10,25: COMMA ','
|
||||
10,26-10,34: NAME 'tokenize'
|
||||
10,34-10,35: DOT '.'
|
||||
10,35-10,43: NAME 'tok_name'
|
||||
10,43-10,44: LSQB '['
|
||||
10,44-10,48: NAME 'type'
|
||||
10,48-10,49: RSQB ']'
|
||||
10,49-10,50: COMMA ','
|
||||
10,51-10,56: NAME 'token'
|
||||
10,56-10,57: RPAR ')'
|
||||
11,8-11,9: RPAR ')'
|
||||
11,9-11,10: NEWLINE '\n'
|
||||
13,0-13,0: DEDENT ''
|
||||
13,0-13,0: DEDENT ''
|
||||
13,0-13,8: NAME 'OP_TYPES'
|
||||
13,9-13,10: OP '='
|
||||
13,11-13,12: LBRACE '{'
|
||||
14,4-14,7: STRING '"("'
|
||||
14,8-14,9: COLON ':'
|
||||
14,10-14,15: NAME 'token'
|
||||
14,15-14,16: DOT '.'
|
||||
14,16-14,20: NAME 'LPAR'
|
||||
14,20-14,21: COMMA ','
|
||||
15,4-15,7: STRING '")"'
|
||||
15,8-15,9: COLON ':'
|
||||
15,10-15,15: NAME 'token'
|
||||
15,15-15,16: DOT '.'
|
||||
15,16-15,20: NAME 'RPAR'
|
||||
15,20-15,21: COMMA ','
|
||||
16,4-16,7: STRING '"["'
|
||||
16,8-16,9: COLON ':'
|
||||
16,10-16,15: NAME 'token'
|
||||
16,15-16,16: DOT '.'
|
||||
16,16-16,20: NAME 'LSQB'
|
||||
16,20-16,21: COMMA ','
|
||||
17,4-17,7: STRING '"]"'
|
||||
17,8-17,9: COLON ':'
|
||||
17,10-17,15: NAME 'token'
|
||||
17,15-17,16: DOT '.'
|
||||
17,16-17,20: NAME 'RSQB'
|
||||
17,20-17,21: COMMA ','
|
||||
18,4-18,7: STRING '"{"'
|
||||
18,8-18,9: COLON ':'
|
||||
18,10-18,15: NAME 'token'
|
||||
18,15-18,16: DOT '.'
|
||||
18,16-18,22: NAME 'LBRACE'
|
||||
18,22-18,23: COMMA ','
|
||||
19,4-19,7: STRING '"}"'
|
||||
19,8-19,9: COLON ':'
|
||||
19,10-19,15: NAME 'token'
|
||||
19,15-19,16: DOT '.'
|
||||
19,16-19,22: NAME 'RBRACE'
|
||||
19,22-19,23: COMMA ','
|
||||
20,4-20,7: STRING '":"'
|
||||
20,8-20,9: COLON ':'
|
||||
20,10-20,15: NAME 'token'
|
||||
20,15-20,16: DOT '.'
|
||||
20,16-20,21: NAME 'COLON'
|
||||
20,21-20,22: COMMA ','
|
||||
21,4-21,7: STRING '","'
|
||||
21,8-21,9: COLON ':'
|
||||
21,10-21,15: NAME 'token'
|
||||
21,15-21,16: DOT '.'
|
||||
21,16-21,21: NAME 'COMMA'
|
||||
21,21-21,22: COMMA ','
|
||||
22,4-22,7: STRING '"."'
|
||||
22,8-22,9: COLON ':'
|
||||
22,10-22,15: NAME 'token'
|
||||
22,15-22,16: DOT '.'
|
||||
22,16-22,19: NAME 'DOT'
|
||||
22,19-22,20: COMMA ','
|
||||
23,4-23,7: STRING '"@"'
|
||||
23,8-23,9: COLON ':'
|
||||
23,10-23,15: NAME 'token'
|
||||
23,15-23,16: DOT '.'
|
||||
23,16-23,18: NAME 'AT'
|
||||
23,18-23,19: COMMA ','
|
||||
24,4-24,5: RBRACE '}'
|
||||
24,5-24,6: NEWLINE '\n'
|
||||
26,0-26,3: NAME 'def'
|
||||
26,4-26,8: NAME 'main'
|
||||
26,8-26,9: LPAR '('
|
||||
26,9-26,10: RPAR ')'
|
||||
26,10-26,11: COLON ':'
|
||||
26,11-26,12: NEWLINE '\n'
|
||||
27,0-27,4: INDENT ' '
|
||||
27,4-27,12: NAME 'readline'
|
||||
27,13-27,14: OP '='
|
||||
27,15-27,19: NAME 'open'
|
||||
27,19-27,20: LPAR '('
|
||||
27,20-27,23: NAME 'sys'
|
||||
27,23-27,24: DOT '.'
|
||||
27,24-27,28: NAME 'argv'
|
||||
27,28-27,29: LSQB '['
|
||||
27,29-27,30: NUMBER '1'
|
||||
27,30-27,31: RSQB ']'
|
||||
27,31-27,32: COMMA ','
|
||||
27,33-27,37: STRING '"rb"'
|
||||
27,37-27,38: RPAR ')'
|
||||
27,38-27,39: DOT '.'
|
||||
27,39-27,47: NAME 'readline'
|
||||
27,47-27,48: NEWLINE '\n'
|
||||
28,4-28,6: NAME 'if'
|
||||
28,7-28,10: NAME 'sys'
|
||||
28,10-28,11: DOT '.'
|
||||
28,11-28,18: NAME 'version'
|
||||
28,19-28,20: OP '<'
|
||||
28,21-28,24: STRING '"3"'
|
||||
28,24-28,25: COLON ':'
|
||||
28,25-28,26: NEWLINE '\n'
|
||||
29,0-29,8: INDENT ' '
|
||||
29,8-29,16: NAME 'tokenize'
|
||||
29,16-29,17: DOT '.'
|
||||
29,17-29,25: NAME 'tokenize'
|
||||
29,25-29,26: LPAR '('
|
||||
29,26-29,34: NAME 'readline'
|
||||
29,34-29,35: COMMA ','
|
||||
29,36-29,46: NAME 'printtoken'
|
||||
29,46-29,47: RPAR ')'
|
||||
29,47-29,48: NEWLINE '\n'
|
||||
30,4-30,4: DEDENT ''
|
||||
30,4-30,8: NAME 'else'
|
||||
30,8-30,9: COLON ':'
|
||||
30,9-30,10: NEWLINE '\n'
|
||||
31,0-31,8: INDENT ' '
|
||||
31,8-31,11: NAME 'for'
|
||||
31,12-31,16: NAME 'type'
|
||||
31,16-31,17: COMMA ','
|
||||
31,18-31,23: NAME 'token'
|
||||
31,23-31,24: COMMA ','
|
||||
31,25-31,30: NAME 'start'
|
||||
31,30-31,31: COMMA ','
|
||||
31,32-31,35: NAME 'end'
|
||||
31,35-31,36: COMMA ','
|
||||
31,37-31,38: NAME '_'
|
||||
31,39-31,41: NAME 'in'
|
||||
31,42-31,50: NAME 'tokenize'
|
||||
31,50-31,51: DOT '.'
|
||||
31,51-31,59: NAME 'tokenize'
|
||||
31,59-31,60: LPAR '('
|
||||
31,60-31,68: NAME 'readline'
|
||||
31,68-31,69: RPAR ')'
|
||||
31,69-31,70: COLON ':'
|
||||
31,70-31,71: NEWLINE '\n'
|
||||
32,0-32,12: INDENT ' '
|
||||
32,12-32,14: NAME 'if'
|
||||
32,15-32,23: NAME 'tokenize'
|
||||
32,23-32,24: DOT '.'
|
||||
32,24-32,32: NAME 'tok_name'
|
||||
32,32-32,33: LSQB '['
|
||||
32,33-32,37: NAME 'type'
|
||||
32,37-32,38: RSQB ']'
|
||||
32,39-32,41: OP '=='
|
||||
32,42-32,46: STRING '"OP"'
|
||||
32,46-32,47: COLON ':'
|
||||
32,47-32,48: NEWLINE '\n'
|
||||
33,0-33,16: INDENT ' '
|
||||
33,16-33,20: NAME 'type'
|
||||
33,21-33,22: OP '='
|
||||
33,23-33,31: NAME 'OP_TYPES'
|
||||
33,31-33,32: DOT '.'
|
||||
33,32-33,35: NAME 'get'
|
||||
33,35-33,36: LPAR '('
|
||||
33,36-33,41: NAME 'token'
|
||||
33,41-33,42: COMMA ','
|
||||
33,43-33,47: NAME 'type'
|
||||
33,47-33,48: RPAR ')'
|
||||
33,48-33,49: NEWLINE '\n'
|
||||
34,12-34,12: DEDENT ''
|
||||
34,12-34,14: NAME 'if'
|
||||
34,15-34,23: NAME 'tokenize'
|
||||
34,23-34,24: DOT '.'
|
||||
34,24-34,32: NAME 'tok_name'
|
||||
34,32-34,33: LSQB '['
|
||||
34,33-34,37: NAME 'type'
|
||||
34,37-34,38: RSQB ']'
|
||||
34,39-34,42: NAME 'not'
|
||||
34,43-34,45: NAME 'in'
|
||||
34,46-34,47: LPAR '('
|
||||
34,47-34,57: STRING '"ENCODING"'
|
||||
34,57-34,58: COMMA ','
|
||||
34,59-34,63: STRING '"NL"'
|
||||
34,63-34,64: RPAR ')'
|
||||
34,64-34,65: COLON ':'
|
||||
34,65-34,66: NEWLINE '\n'
|
||||
35,0-35,16: INDENT ' '
|
||||
35,16-35,26: NAME 'printtoken'
|
||||
35,26-35,27: LPAR '('
|
||||
35,27-35,31: NAME 'type'
|
||||
35,31-35,32: COMMA ','
|
||||
35,33-35,38: NAME 'token'
|
||||
35,38-35,39: COMMA ','
|
||||
35,40-35,45: NAME 'start'
|
||||
35,45-35,46: COMMA ','
|
||||
35,47-35,50: NAME 'end'
|
||||
35,50-35,51: COMMA ','
|
||||
35,52-35,53: NAME '_'
|
||||
35,53-35,54: RPAR ')'
|
||||
35,54-35,55: NEWLINE '\n'
|
||||
37,0-37,0: DEDENT ''
|
||||
37,0-37,0: DEDENT ''
|
||||
37,0-37,0: DEDENT ''
|
||||
37,0-37,0: DEDENT ''
|
||||
37,0-37,2: NAME 'if'
|
||||
37,3-37,11: NAME '__name__'
|
||||
37,12-37,14: OP '=='
|
||||
37,15-37,25: STRING '"__main__"'
|
||||
37,25-37,26: COLON ':'
|
||||
37,26-37,27: NEWLINE '\n'
|
||||
38,0-38,4: INDENT ' '
|
||||
38,4-38,8: NAME 'main'
|
||||
38,8-38,9: LPAR '('
|
||||
38,9-38,10: RPAR ')'
|
||||
38,10-38,11: NEWLINE '\n'
|
||||
39,0-39,0: DEDENT ''
|
||||
39,0-39,0: ENDMARKER ''
|
||||
4
python/extractor/tests/tokenizer/illegal_indentation.py
Normal file
4
python/extractor/tests/tokenizer/illegal_indentation.py
Normal file
@@ -0,0 +1,4 @@
|
||||
def foo(seq):
|
||||
for var in seq:
|
||||
body
|
||||
illegal-dedent
|
||||
24
python/extractor/tests/tokenizer/illegal_indentation.tokens
Normal file
24
python/extractor/tests/tokenizer/illegal_indentation.tokens
Normal file
@@ -0,0 +1,24 @@
|
||||
1,0-1,3: NAME 'def'
|
||||
1,4-1,7: NAME 'foo'
|
||||
1,7-1,8: LPAR '('
|
||||
1,8-1,11: NAME 'seq'
|
||||
1,11-1,12: RPAR ')'
|
||||
1,12-1,13: COLON ':'
|
||||
1,13-1,14: NEWLINE '\n'
|
||||
2,0-2,4: INDENT ' '
|
||||
2,4-2,7: NAME 'for'
|
||||
2,8-2,11: NAME 'var'
|
||||
2,12-2,14: NAME 'in'
|
||||
2,15-2,18: NAME 'seq'
|
||||
2,18-2,19: COLON ':'
|
||||
2,19-2,20: NEWLINE '\n'
|
||||
3,0-3,8: INDENT ' '
|
||||
3,8-3,12: NAME 'body'
|
||||
3,12-3,13: NEWLINE '\n'
|
||||
4,6-4,6: ILLEGALINDENT ''
|
||||
4,6-4,13: NAME 'illegal'
|
||||
4,13-4,14: OP '-'
|
||||
4,14-4,20: NAME 'dedent'
|
||||
4,20-4,21: NEWLINE '\n'
|
||||
5,0-5,0: DEDENT ''
|
||||
5,0-5,0: ENDMARKER ''
|
||||
7
python/extractor/tests/tokenizer/illegal_indentation2.py
Normal file
7
python/extractor/tests/tokenizer/illegal_indentation2.py
Normal file
@@ -0,0 +1,7 @@
|
||||
class C:
|
||||
def foo(seq):
|
||||
for var in seq:
|
||||
body
|
||||
illegal
|
||||
dedent
|
||||
sequence
|
||||
34
python/extractor/tests/tokenizer/illegal_indentation2.tokens
Normal file
34
python/extractor/tests/tokenizer/illegal_indentation2.tokens
Normal file
@@ -0,0 +1,34 @@
|
||||
1,0-1,5: NAME 'class'
|
||||
1,6-1,7: NAME 'C'
|
||||
1,7-1,8: COLON ':'
|
||||
1,8-1,9: NEWLINE '\n'
|
||||
2,0-2,4: INDENT ' '
|
||||
2,4-2,7: NAME 'def'
|
||||
2,8-2,11: NAME 'foo'
|
||||
2,11-2,12: LPAR '('
|
||||
2,12-2,15: NAME 'seq'
|
||||
2,15-2,16: RPAR ')'
|
||||
2,16-2,17: COLON ':'
|
||||
2,17-2,18: NEWLINE '\n'
|
||||
3,0-3,8: INDENT ' '
|
||||
3,8-3,11: NAME 'for'
|
||||
3,12-3,15: NAME 'var'
|
||||
3,16-3,18: NAME 'in'
|
||||
3,19-3,22: NAME 'seq'
|
||||
3,22-3,23: COLON ':'
|
||||
3,23-3,24: NEWLINE '\n'
|
||||
4,0-4,12: INDENT ' '
|
||||
4,12-4,16: NAME 'body'
|
||||
4,16-4,17: NEWLINE '\n'
|
||||
5,6-5,6: DEDENT ''
|
||||
5,6-5,6: ILLEGALINDENT ''
|
||||
5,6-5,13: NAME 'illegal'
|
||||
5,13-5,14: NEWLINE '\n'
|
||||
6,0-6,5: INDENT ' '
|
||||
6,5-6,11: NAME 'dedent'
|
||||
6,11-6,12: NEWLINE '\n'
|
||||
7,4-7,4: DEDENT ''
|
||||
7,4-7,12: NAME 'sequence'
|
||||
7,12-7,13: NEWLINE '\n'
|
||||
8,0-8,0: DEDENT ''
|
||||
8,0-8,0: ENDMARKER ''
|
||||
2
python/extractor/tests/tokenizer/import.py
Normal file
2
python/extractor/tests/tokenizer/import.py
Normal file
@@ -0,0 +1,2 @@
|
||||
import a
|
||||
import why
|
||||
7
python/extractor/tests/tokenizer/import.tokens
Normal file
7
python/extractor/tests/tokenizer/import.tokens
Normal file
@@ -0,0 +1,7 @@
|
||||
1,0-1,6: NAME 'import'
|
||||
1,7-1,8: NAME 'a'
|
||||
1,8-1,9: NEWLINE '\n'
|
||||
2,0-2,6: NAME 'import'
|
||||
2,7-2,10: NAME 'why'
|
||||
2,10-2,11: NEWLINE '\n'
|
||||
3,0-3,0: ENDMARKER ''
|
||||
7
python/extractor/tests/tokenizer/kannada.py
Normal file
7
python/extractor/tests/tokenizer/kannada.py
Normal file
@@ -0,0 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
def ಏನಾದರೂ_ಮಾಡು():
|
||||
print('ಏನೋ ಮಾಡಿದೆ')
|
||||
|
||||
|
||||
ಏನಾದರೂ_ಮಾಡು()
|
||||
19
python/extractor/tests/tokenizer/kannada.tokens
Normal file
19
python/extractor/tests/tokenizer/kannada.tokens
Normal file
@@ -0,0 +1,19 @@
|
||||
1,0-1,23: COMMENT '# -*- coding: utf-8 -*-'
|
||||
3,0-3,3: NAME 'def'
|
||||
3,4-3,15: NAME 'ಏನಾದರೂ_ಮಾಡು'
|
||||
3,15-3,16: LPAR '('
|
||||
3,16-3,17: RPAR ')'
|
||||
3,17-3,18: COLON ':'
|
||||
3,18-3,19: NEWLINE '\n'
|
||||
4,0-4,4: INDENT ' '
|
||||
4,4-4,9: NAME 'print'
|
||||
4,9-4,10: LPAR '('
|
||||
4,10-4,22: STRING '\'ಏನೋ ಮಾಡಿದೆ\''
|
||||
4,22-4,23: RPAR ')'
|
||||
4,23-4,24: NEWLINE '\n'
|
||||
7,0-7,0: DEDENT ''
|
||||
7,0-7,11: NAME 'ಏನಾದರೂ_ಮಾಡು'
|
||||
7,11-7,12: LPAR '('
|
||||
7,12-7,13: RPAR ')'
|
||||
7,13-7,14: NEWLINE '\n'
|
||||
8,0-8,0: ENDMARKER ''
|
||||
4
python/extractor/tests/tokenizer/latin.py
Normal file
4
python/extractor/tests/tokenizer/latin.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"Any old stuff can go here"
|
||||
# -*- coding: latin1 -*-
|
||||
# G<>nter
|
||||
|
||||
5
python/extractor/tests/tokenizer/latin.tokens
Normal file
5
python/extractor/tests/tokenizer/latin.tokens
Normal file
@@ -0,0 +1,5 @@
|
||||
1,0-1,27: STRING '"Any old stuff can go here"'
|
||||
1,27-1,28: NEWLINE '\n'
|
||||
2,0-2,24: COMMENT '# -*- coding: latin1 -*-'
|
||||
3,0-3,8: COMMENT '# Günter'
|
||||
5,0-5,0: ENDMARKER ''
|
||||
83
python/extractor/tests/tokenizer/numbers.py
Normal file
83
python/extractor/tests/tokenizer/numbers.py
Normal file
@@ -0,0 +1,83 @@
|
||||
|
||||
#Some negative numbers
|
||||
|
||||
-1
|
||||
-10000000000000000
|
||||
-1.0
|
||||
-3.0e17
|
||||
|
||||
-(1)
|
||||
-(10000000000000000)
|
||||
-(1.0)
|
||||
-(3.0e17)
|
||||
|
||||
(-1)
|
||||
(-10000000000000000)
|
||||
(-1.0)
|
||||
(-3.0e17)
|
||||
|
||||
-1j
|
||||
|
||||
-3.7e12j
|
||||
|
||||
#Some other numbers
|
||||
0.058823529630899429
|
||||
|
||||
1e-06
|
||||
.9999999
|
||||
0xffffff
|
||||
1e10
|
||||
1.
|
||||
2.79252680
|
||||
0x0001000
|
||||
4987312561856745907287624786230562734672583763984576267
|
||||
|
||||
#Octal both styles
|
||||
0777
|
||||
0o777
|
||||
|
||||
#Python2 longs
|
||||
0
|
||||
0L
|
||||
5L
|
||||
-2L
|
||||
498731256185674590728762478623056L
|
||||
|
||||
0xfffffffL
|
||||
0xeeeeeeeeeeeeeeeeL
|
||||
|
||||
0b00010101011111111111L
|
||||
0o77777777777L
|
||||
0777777777777L
|
||||
0j
|
||||
0_0234j
|
||||
|
||||
0e0
|
||||
|
||||
#Valid uses of underscore:
|
||||
|
||||
1_1
|
||||
1_2_3.4_5_6e7_8_9
|
||||
0b1_1
|
||||
0o1_1
|
||||
0x1_1
|
||||
|
||||
0b_010
|
||||
0o_010
|
||||
0x_010
|
||||
|
||||
#Invalid uses of underscore:
|
||||
|
||||
1__3
|
||||
2e_5
|
||||
2e+_5
|
||||
123_
|
||||
|
||||
#Valid prefixed zero:
|
||||
|
||||
0_0
|
||||
009.
|
||||
009e005
|
||||
00123
|
||||
|
||||
1 if 1else 0
|
||||
156
python/extractor/tests/tokenizer/numbers.tokens
Normal file
156
python/extractor/tests/tokenizer/numbers.tokens
Normal file
@@ -0,0 +1,156 @@
|
||||
2,0-2,22: COMMENT '#Some negative numbers'
|
||||
4,0-4,1: OP '-'
|
||||
4,1-4,2: NUMBER '1'
|
||||
4,2-4,3: NEWLINE '\n'
|
||||
5,0-5,1: OP '-'
|
||||
5,1-5,18: NUMBER '10000000000000000'
|
||||
5,18-5,19: NEWLINE '\n'
|
||||
6,0-6,1: OP '-'
|
||||
6,1-6,4: NUMBER '1.0'
|
||||
6,4-6,5: NEWLINE '\n'
|
||||
7,0-7,1: OP '-'
|
||||
7,1-7,7: NUMBER '3.0e17'
|
||||
7,7-7,8: NEWLINE '\n'
|
||||
9,0-9,1: OP '-'
|
||||
9,1-9,2: LPAR '('
|
||||
9,2-9,3: NUMBER '1'
|
||||
9,3-9,4: RPAR ')'
|
||||
9,4-9,5: NEWLINE '\n'
|
||||
10,0-10,1: OP '-'
|
||||
10,1-10,2: LPAR '('
|
||||
10,2-10,19: NUMBER '10000000000000000'
|
||||
10,19-10,20: RPAR ')'
|
||||
10,20-10,21: NEWLINE '\n'
|
||||
11,0-11,1: OP '-'
|
||||
11,1-11,2: LPAR '('
|
||||
11,2-11,5: NUMBER '1.0'
|
||||
11,5-11,6: RPAR ')'
|
||||
11,6-11,7: NEWLINE '\n'
|
||||
12,0-12,1: OP '-'
|
||||
12,1-12,2: LPAR '('
|
||||
12,2-12,8: NUMBER '3.0e17'
|
||||
12,8-12,9: RPAR ')'
|
||||
12,9-12,10: NEWLINE '\n'
|
||||
14,0-14,1: LPAR '('
|
||||
14,1-14,2: OP '-'
|
||||
14,2-14,3: NUMBER '1'
|
||||
14,3-14,4: RPAR ')'
|
||||
14,4-14,5: NEWLINE '\n'
|
||||
15,0-15,1: LPAR '('
|
||||
15,1-15,2: OP '-'
|
||||
15,2-15,19: NUMBER '10000000000000000'
|
||||
15,19-15,20: RPAR ')'
|
||||
15,20-15,21: NEWLINE '\n'
|
||||
16,0-16,1: LPAR '('
|
||||
16,1-16,2: OP '-'
|
||||
16,2-16,5: NUMBER '1.0'
|
||||
16,5-16,6: RPAR ')'
|
||||
16,6-16,7: NEWLINE '\n'
|
||||
17,0-17,1: LPAR '('
|
||||
17,1-17,2: OP '-'
|
||||
17,2-17,8: NUMBER '3.0e17'
|
||||
17,8-17,9: RPAR ')'
|
||||
17,9-17,10: NEWLINE '\n'
|
||||
19,0-19,1: OP '-'
|
||||
19,1-19,3: NUMBER '1j'
|
||||
19,3-19,4: NEWLINE '\n'
|
||||
21,0-21,1: OP '-'
|
||||
21,1-21,8: NUMBER '3.7e12j'
|
||||
21,8-21,9: NEWLINE '\n'
|
||||
23,0-23,19: COMMENT '#Some other numbers'
|
||||
24,0-24,20: NUMBER '0.058823529630899429'
|
||||
24,20-24,21: NEWLINE '\n'
|
||||
26,0-26,5: NUMBER '1e-06'
|
||||
26,5-26,6: NEWLINE '\n'
|
||||
27,0-27,8: NUMBER '.9999999'
|
||||
27,8-27,9: NEWLINE '\n'
|
||||
28,0-28,8: NUMBER '0xffffff'
|
||||
28,8-28,9: NEWLINE '\n'
|
||||
29,0-29,4: NUMBER '1e10'
|
||||
29,4-29,5: NEWLINE '\n'
|
||||
30,0-30,2: NUMBER '1.'
|
||||
30,2-30,3: NEWLINE '\n'
|
||||
31,0-31,10: NUMBER '2.79252680'
|
||||
31,10-31,11: NEWLINE '\n'
|
||||
32,0-32,9: NUMBER '0x0001000'
|
||||
32,9-32,10: NEWLINE '\n'
|
||||
33,0-33,55: NUMBER '4987312561856745907287624786230562734672583763984576267'
|
||||
33,55-33,56: NEWLINE '\n'
|
||||
35,0-35,18: COMMENT '#Octal both styles'
|
||||
36,0-36,4: NUMBER '0777'
|
||||
36,4-36,5: NEWLINE '\n'
|
||||
37,0-37,5: NUMBER '0o777'
|
||||
37,5-37,6: NEWLINE '\n'
|
||||
39,0-39,14: COMMENT '#Python2 longs'
|
||||
40,0-40,1: NUMBER '0'
|
||||
40,1-40,2: NEWLINE '\n'
|
||||
41,0-41,2: NUMBER '0L'
|
||||
41,2-41,3: NEWLINE '\n'
|
||||
42,0-42,2: NUMBER '5L'
|
||||
42,2-42,3: NEWLINE '\n'
|
||||
43,0-43,1: OP '-'
|
||||
43,1-43,3: NUMBER '2L'
|
||||
43,3-43,4: NEWLINE '\n'
|
||||
44,0-44,34: NUMBER '498731256185674590728762478623056L'
|
||||
44,34-44,35: NEWLINE '\n'
|
||||
46,0-46,10: NUMBER '0xfffffffL'
|
||||
46,10-46,11: NEWLINE '\n'
|
||||
47,0-47,19: NUMBER '0xeeeeeeeeeeeeeeeeL'
|
||||
47,19-47,20: NEWLINE '\n'
|
||||
49,0-49,23: NUMBER '0b00010101011111111111L'
|
||||
49,23-49,24: NEWLINE '\n'
|
||||
50,0-50,14: NUMBER '0o77777777777L'
|
||||
50,14-50,15: NEWLINE '\n'
|
||||
51,0-51,14: NUMBER '0777777777777L'
|
||||
51,14-51,15: NEWLINE '\n'
|
||||
52,0-52,2: NUMBER '0j'
|
||||
52,2-52,3: NEWLINE '\n'
|
||||
53,0-53,7: NUMBER '0_0234j'
|
||||
53,7-53,8: NEWLINE '\n'
|
||||
55,0-55,3: NUMBER '0e0'
|
||||
55,3-55,4: NEWLINE '\n'
|
||||
57,0-57,26: COMMENT '#Valid uses of underscore:'
|
||||
59,0-59,3: NUMBER '1_1'
|
||||
59,3-59,4: NEWLINE '\n'
|
||||
60,0-60,17: NUMBER '1_2_3.4_5_6e7_8_9'
|
||||
60,17-60,18: NEWLINE '\n'
|
||||
61,0-61,5: NUMBER '0b1_1'
|
||||
61,5-61,6: NEWLINE '\n'
|
||||
62,0-62,5: NUMBER '0o1_1'
|
||||
62,5-62,6: NEWLINE '\n'
|
||||
63,0-63,5: NUMBER '0x1_1'
|
||||
63,5-63,6: NEWLINE '\n'
|
||||
65,0-65,6: NUMBER '0b_010'
|
||||
65,6-65,7: NEWLINE '\n'
|
||||
66,0-66,6: NUMBER '0o_010'
|
||||
66,6-66,7: NEWLINE '\n'
|
||||
67,0-67,6: NUMBER '0x_010'
|
||||
67,6-67,7: NEWLINE '\n'
|
||||
69,0-69,28: COMMENT '#Invalid uses of underscore:'
|
||||
71,0-71,3: ERRORTOKEN '1__'
|
||||
71,3-71,4: NUMBER '3'
|
||||
71,4-71,5: NEWLINE '\n'
|
||||
72,0-72,3: ERRORTOKEN '2e_'
|
||||
72,3-72,4: NUMBER '5'
|
||||
72,4-72,5: NEWLINE '\n'
|
||||
73,0-73,4: ERRORTOKEN '2e+_'
|
||||
73,4-73,5: NUMBER '5'
|
||||
73,5-73,6: NEWLINE '\n'
|
||||
74,0-74,5: ERRORTOKEN '123_\n'
|
||||
74,5-74,6: NEWLINE '\n'
|
||||
75,0-75,21: COMMENT '#Valid prefixed zero:'
|
||||
77,0-77,3: NUMBER '0_0'
|
||||
77,3-77,4: NEWLINE '\n'
|
||||
78,0-78,4: NUMBER '009.'
|
||||
78,4-78,5: NEWLINE '\n'
|
||||
79,0-79,7: NUMBER '009e005'
|
||||
79,7-79,8: NEWLINE '\n'
|
||||
80,0-80,5: NUMBER '00123'
|
||||
80,5-80,6: NEWLINE '\n'
|
||||
82,0-82,1: NUMBER '1'
|
||||
82,2-82,4: NAME 'if'
|
||||
82,5-82,6: NUMBER '1'
|
||||
82,6-82,10: NAME 'else'
|
||||
82,11-82,12: NUMBER '0'
|
||||
82,12-82,13: NEWLINE '\n'
|
||||
83,0-83,0: ENDMARKER ''
|
||||
19
python/extractor/tests/tokenizer/pep484.py
Normal file
19
python/extractor/tests/tokenizer/pep484.py
Normal file
@@ -0,0 +1,19 @@
|
||||
#PEP 484 style annotations.
|
||||
|
||||
def func(callee_type: CallableType,
|
||||
formal_to_actual: List[List[int]],
|
||||
strict: bool = True) -> List[Type]:
|
||||
pass
|
||||
|
||||
|
||||
def func(self,
|
||||
name: str,
|
||||
args: List[str],
|
||||
*,
|
||||
cwd: str = None,
|
||||
env: Dict[str, str] = None) -> None:
|
||||
pass
|
||||
|
||||
def specials(self, *varargs: vanno, **kwargs: kwanno):
|
||||
pass
|
||||
|
||||
100
python/extractor/tests/tokenizer/pep484.tokens
Normal file
100
python/extractor/tests/tokenizer/pep484.tokens
Normal file
@@ -0,0 +1,100 @@
|
||||
1,0-1,27: COMMENT '#PEP 484 style annotations.'
|
||||
3,0-3,3: NAME 'def'
|
||||
3,4-3,8: NAME 'func'
|
||||
3,8-3,9: LPAR '('
|
||||
3,9-3,20: NAME 'callee_type'
|
||||
3,20-3,21: COLON ':'
|
||||
3,22-3,34: NAME 'CallableType'
|
||||
3,34-3,35: COMMA ','
|
||||
4,9-4,25: NAME 'formal_to_actual'
|
||||
4,25-4,26: COLON ':'
|
||||
4,27-4,31: NAME 'List'
|
||||
4,31-4,32: LSQB '['
|
||||
4,32-4,36: NAME 'List'
|
||||
4,36-4,37: LSQB '['
|
||||
4,37-4,40: NAME 'int'
|
||||
4,40-4,41: RSQB ']'
|
||||
4,41-4,42: RSQB ']'
|
||||
4,42-4,43: COMMA ','
|
||||
5,9-5,15: NAME 'strict'
|
||||
5,15-5,16: COLON ':'
|
||||
5,17-5,21: NAME 'bool'
|
||||
5,22-5,23: OP '='
|
||||
5,24-5,28: NAME 'True'
|
||||
5,28-5,29: RPAR ')'
|
||||
5,30-5,32: RARROW '->'
|
||||
5,33-5,37: NAME 'List'
|
||||
5,37-5,38: LSQB '['
|
||||
5,38-5,42: NAME 'Type'
|
||||
5,42-5,43: RSQB ']'
|
||||
5,43-5,44: COLON ':'
|
||||
5,44-5,45: NEWLINE '\n'
|
||||
6,0-6,4: INDENT ' '
|
||||
6,4-6,8: NAME 'pass'
|
||||
6,8-6,9: NEWLINE '\n'
|
||||
9,0-9,0: DEDENT ''
|
||||
9,0-9,3: NAME 'def'
|
||||
9,4-9,8: NAME 'func'
|
||||
9,8-9,9: LPAR '('
|
||||
9,9-9,13: NAME 'self'
|
||||
9,13-9,14: COMMA ','
|
||||
10,9-10,13: NAME 'name'
|
||||
10,13-10,14: COLON ':'
|
||||
10,15-10,18: NAME 'str'
|
||||
10,18-10,19: COMMA ','
|
||||
11,9-11,13: NAME 'args'
|
||||
11,13-11,14: COLON ':'
|
||||
11,15-11,19: NAME 'List'
|
||||
11,19-11,20: LSQB '['
|
||||
11,20-11,23: NAME 'str'
|
||||
11,23-11,24: RSQB ']'
|
||||
11,24-11,25: COMMA ','
|
||||
12,9-12,10: OP '*'
|
||||
12,10-12,11: COMMA ','
|
||||
13,9-13,12: NAME 'cwd'
|
||||
13,12-13,13: COLON ':'
|
||||
13,14-13,17: NAME 'str'
|
||||
13,18-13,19: OP '='
|
||||
13,20-13,24: NAME 'None'
|
||||
13,24-13,25: COMMA ','
|
||||
14,9-14,12: NAME 'env'
|
||||
14,12-14,13: COLON ':'
|
||||
14,14-14,18: NAME 'Dict'
|
||||
14,18-14,19: LSQB '['
|
||||
14,19-14,22: NAME 'str'
|
||||
14,22-14,23: COMMA ','
|
||||
14,24-14,27: NAME 'str'
|
||||
14,27-14,28: RSQB ']'
|
||||
14,29-14,30: OP '='
|
||||
14,31-14,35: NAME 'None'
|
||||
14,35-14,36: RPAR ')'
|
||||
14,37-14,39: RARROW '->'
|
||||
14,40-14,44: NAME 'None'
|
||||
14,44-14,45: COLON ':'
|
||||
14,45-14,46: NEWLINE '\n'
|
||||
15,0-15,4: INDENT ' '
|
||||
15,4-15,8: NAME 'pass'
|
||||
15,8-15,9: NEWLINE '\n'
|
||||
17,0-17,0: DEDENT ''
|
||||
17,0-17,3: NAME 'def'
|
||||
17,4-17,12: NAME 'specials'
|
||||
17,12-17,13: LPAR '('
|
||||
17,13-17,17: NAME 'self'
|
||||
17,17-17,18: COMMA ','
|
||||
17,19-17,20: OP '*'
|
||||
17,20-17,27: NAME 'varargs'
|
||||
17,27-17,28: COLON ':'
|
||||
17,29-17,34: NAME 'vanno'
|
||||
17,34-17,35: COMMA ','
|
||||
17,36-17,38: OP '**'
|
||||
17,38-17,44: NAME 'kwargs'
|
||||
17,44-17,45: COLON ':'
|
||||
17,46-17,52: NAME 'kwanno'
|
||||
17,52-17,53: RPAR ')'
|
||||
17,53-17,54: COLON ':'
|
||||
17,54-17,55: NEWLINE '\n'
|
||||
18,0-18,4: INDENT ' '
|
||||
18,4-18,8: NAME 'pass'
|
||||
18,8-18,9: NEWLINE '\n'
|
||||
20,0-20,0: DEDENT ''
|
||||
20,0-20,0: ENDMARKER ''
|
||||
11
python/extractor/tests/tokenizer/shift_jis.py
Normal file
11
python/extractor/tests/tokenizer/shift_jis.py
Normal file
@@ -0,0 +1,11 @@
|
||||
# encoding:shift-jis
|
||||
|
||||
#This is copied from the Python test library copyright PSF.
|
||||
|
||||
"""
|
||||
Python <20>̊J<CC8A><4A><EFBFBD>́A1990 <20>N<EFBFBD><4E><EFBFBD>납<EFBFBD><EB82A9><EFBFBD>J<EFBFBD>n<EFBFBD><6E><EFBFBD><EFBFBD><EFBFBD>Ă<EFBFBD><C482>܂<EFBFBD><DC82>B
|
||||
<EFBFBD>J<EFBFBD><EFBFBD><EFBFBD>҂<EFBFBD> Guido van Rossum <20>͋<EFBFBD><CD8B><EFBFBD><EFBFBD>p<EFBFBD>̃v<CC83><76><EFBFBD>O<EFBFBD><4F><EFBFBD>~<7E><><EFBFBD>O<EFBFBD><4F><EFBFBD><EFBFBD><EFBFBD>uABC<42>v<EFBFBD>̊J<CC8A><4A><EFBFBD>ɎQ<C98E><51><EFBFBD><EFBFBD><EFBFBD>Ă<EFBFBD><C482>܂<EFBFBD><DC82><EFBFBD><EFBFBD><EFBFBD><EFBFBD>AABC <20>͎<EFBFBD><CD8E>p<EFBFBD><70><EFBFBD>̖ړI<DA93>ɂ͂<C982><CD82>܂<EFBFBD><DC82>K<EFBFBD><4B><EFBFBD>Ă<EFBFBD><C482>܂<EFBFBD><DC82><EFBFBD><EFBFBD>ł<EFBFBD><C582><EFBFBD><EFBFBD>B
|
||||
<EFBFBD><EFBFBD><EFBFBD>̂悤<EFBFBD>Ȕw<EFBFBD>i<EFBFBD><EFBFBD><EFBFBD>琶<EFBFBD>܂ꂽ Python <20>̌<EFBFBD><CC8C><EFBFBD><EFBFBD>v<DD8C>́A<CD81>u<EFBFBD>V<EFBFBD><56><EFBFBD>v<EFBFBD><76><EFBFBD>v<EFBFBD>Łu<C581>K<EFBFBD><4B><EFBFBD><EFBFBD><EFBFBD>e<EFBFBD>Ձv<D581>Ƃ<EFBFBD><C682><EFBFBD><EFBFBD>ڕW<DA95>ɏd<C98F>_<EFBFBD><5F><EFBFBD>u<EFBFBD><75><EFBFBD><EFBFBD><EFBFBD>Ă<EFBFBD><C482>܂<EFBFBD><DC82>B
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>̃X<EFBFBD>N<EFBFBD><EFBFBD><EFBFBD>v<EFBFBD>g<EFBFBD>n<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ł̓<EFBFBD><EFBFBD>[<5B>U<EFBFBD>̖ڐ<CC96><DA90>̗<EFBFBD><CC97><EFBFBD><D690><EFBFBD><EFBFBD>D<EFBFBD>悵<EFBFBD>ĐF<C490>X<EFBFBD>ȋ@<40>\<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>v<EFBFBD>f<EFBFBD>Ƃ<EFBFBD><EFBFBD>Ď<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ꍇ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>̂ł<EFBFBD><EFBFBD><EFBFBD><EFBFBD>APython <20>ł͂<C582><CD82><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>H<D78D><48><EFBFBD>lj<EFBFBD><C789><EFBFBD><EFBFBD><EFBFBD><EFBFBD>邱<EFBFBD>Ƃ͂<C682><CD82>܂肠<DC82><E882A0><EFBFBD>܂<EFBFBD><DC82><EFBFBD><EFBFBD>B
|
||||
<EFBFBD><EFBFBD><EFBFBD>ꎩ<EFBFBD>̂̋@<40>\<EFBFBD>͍ŏ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɉ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD>K<EFBFBD>v<EFBFBD>ȋ@<40>\<EFBFBD>͊g<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>W<EFBFBD><EFBFBD><EFBFBD>[<5B><><EFBFBD>Ƃ<EFBFBD><C682>Ēlj<C492><C789><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD>Ƃ<EFBFBD><C682><EFBFBD><EFBFBD>̂<EFBFBD> Python <20>̃|<7C><><EFBFBD>V<EFBFBD>[<5B>ł<EFBFBD><C582>B
|
||||
"""
|
||||
5
python/extractor/tests/tokenizer/shift_jis.tokens
Normal file
5
python/extractor/tests/tokenizer/shift_jis.tokens
Normal file
@@ -0,0 +1,5 @@
|
||||
1,0-1,20: COMMENT '# encoding:shift-jis'
|
||||
3,0-3,59: COMMENT '#This is copied from the Python test library copyright PSF.'
|
||||
5,0-11,3: STRING '"""\nPython の開発は、1990 年ごろから開始されています。\n開発者の Guido van Rossum は教育用のプログラミング言語「ABC」の開発に参加していましたが、ABC は実用上の目的にはあまり適していませんでした。\nこのような背景から生まれた Python の言語設計は、「シンプル」で「習得が容易」という目標に重点が置かれています。\n多くのスクリプト系言語ではユーザの目先の利便性を優先して色々な機能を言語要素として取り入れる場合が多いのですが、Python ではそういった小細工が追加されることはあまりありません。\n言語自体の機能は最小限に押さえ、必要な機能は拡張モジュールとして追加する、というのが Python のポリシーです。\n"""'
|
||||
11,3-11,4: NEWLINE '\n'
|
||||
12,0-12,0: ENDMARKER ''
|
||||
112
python/extractor/tests/tokenizer/strings.py
Normal file
112
python/extractor/tests/tokenizer/strings.py
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
|
||||
#Raw
|
||||
|
||||
r'012345678'
|
||||
r'(\033|~{)'
|
||||
r'\A[+-]?\d+'
|
||||
r'(?P<name>[\w]+)|'
|
||||
r'\|\[\][123]|\{\}'
|
||||
r'^.$'
|
||||
r'[^A-Z]'
|
||||
|
||||
# With escapes
|
||||
|
||||
'\n'
|
||||
"\'"
|
||||
'\''
|
||||
"\""
|
||||
"\t\l\b"
|
||||
|
||||
|
||||
#F-strings
|
||||
|
||||
f''
|
||||
rf'hello'
|
||||
fr'hello'
|
||||
f'a{1+1}b'
|
||||
f'{x}{y}a{z}'
|
||||
#This is not legal in CPython, but we tokenize it anyway.
|
||||
f'a{'x'+"y"}b'
|
||||
|
||||
#Multiline f-string
|
||||
f'''
|
||||
In f-string expressions act as if parenthesised
|
||||
{
|
||||
x +
|
||||
y &
|
||||
z
|
||||
}
|
||||
end
|
||||
'''
|
||||
|
||||
#Multi-line
|
||||
|
||||
|
||||
r""" Single quotation character with multi-line
|
||||
|
||||
"a", 'b', "", ''
|
||||
....
|
||||
"""
|
||||
|
||||
r''' Single quotation character with multi-line
|
||||
|
||||
"a", 'b', "", ''
|
||||
....
|
||||
'''
|
||||
|
||||
#f-string conversions
|
||||
!a
|
||||
!s
|
||||
!r
|
||||
|
||||
f"{k}={v!r}"
|
||||
|
||||
#Implicit concatenation
|
||||
(f'{expr} text '
|
||||
'continuation'
|
||||
f'and{v}'
|
||||
)
|
||||
|
||||
#prefixes
|
||||
|
||||
u'{}\r{}{:<{width}}'
|
||||
u'{}\r{}{:<{}}'
|
||||
|
||||
#f-strings with format specifier
|
||||
f'result: {value:0.2f}'
|
||||
f'result: {value:{width}.{precision}}'
|
||||
|
||||
|
||||
f"Too {'many' if alen > elen else 'few'} parameters for {cls};"
|
||||
|
||||
# f-strings have special escaping rules for curly-brackets
|
||||
f'This should work \{foo}'
|
||||
rf'This should work \{foo}'
|
||||
|
||||
f'\}' # syntax error (we currently don't report this)
|
||||
f'\}}' # ok
|
||||
|
||||
|
||||
# f-strings with unicode literals of the form `\N{...}`
|
||||
f'{degrees:0.0f}\N{DEGREE SIGN}'
|
||||
f"{degrees:0.0f}\N{DEGREE SIGN}"
|
||||
f'''{degrees:0.0f}\N{DEGREE SIGN}'''
|
||||
f"""{degrees:0.0f}\N{DEGREE SIGN}"""
|
||||
|
||||
# double curlies in f-strings with various kinds of quoting
|
||||
f'{{ {foo} }}'
|
||||
f"{{ {foo} }}"
|
||||
f'''{{ {foo} }}'''
|
||||
f"""{{ {foo} }}"""
|
||||
|
||||
# Empty f-strings
|
||||
f''
|
||||
f""
|
||||
f''''''
|
||||
f""""""
|
||||
|
||||
|
||||
r'\NUL' # _Not_ a named unicode escape (`\N{...}`)
|
||||
|
||||
f'res: {val:{width:0}.{prec:1}}'
|
||||
211
python/extractor/tests/tokenizer/strings.tokens
Normal file
211
python/extractor/tests/tokenizer/strings.tokens
Normal file
@@ -0,0 +1,211 @@
|
||||
3,0-3,4: COMMENT '#Raw'
|
||||
5,0-5,12: STRING 'r\'012345678\''
|
||||
5,12-5,13: NEWLINE '\n'
|
||||
6,0-6,12: STRING 'r\'(\\033|~{)\''
|
||||
6,12-6,13: NEWLINE '\n'
|
||||
7,0-7,13: STRING 'r\'\\A[+-]?\\d+\''
|
||||
7,13-7,14: NEWLINE '\n'
|
||||
8,0-8,19: STRING 'r\'(?P<name>[\\w]+)|\''
|
||||
8,19-8,20: NEWLINE '\n'
|
||||
9,0-9,19: STRING 'r\'\\|\\[\\][123]|\\{\\}\''
|
||||
9,19-9,20: NEWLINE '\n'
|
||||
10,0-10,6: STRING 'r\'^.$\''
|
||||
10,6-10,7: NEWLINE '\n'
|
||||
11,0-11,9: STRING 'r\'[^A-Z]\''
|
||||
11,9-11,10: NEWLINE '\n'
|
||||
13,0-13,14: COMMENT '# With escapes'
|
||||
15,0-15,4: STRING '\'\\n\''
|
||||
15,4-15,5: NEWLINE '\n'
|
||||
16,0-16,4: STRING '"\\\'"'
|
||||
16,4-16,5: NEWLINE '\n'
|
||||
17,0-17,4: STRING '\'\\\'\''
|
||||
17,4-17,5: NEWLINE '\n'
|
||||
18,0-18,4: STRING '"\\""'
|
||||
18,4-18,5: NEWLINE '\n'
|
||||
19,0-19,8: STRING '"\\t\\l\\b"'
|
||||
19,8-19,9: NEWLINE '\n'
|
||||
22,0-22,10: COMMENT '#F-strings'
|
||||
24,0-24,3: STRING 'f\'\''
|
||||
24,3-24,4: NEWLINE '\n'
|
||||
25,0-25,9: STRING 'rf\'hello\''
|
||||
25,9-25,10: NEWLINE '\n'
|
||||
26,0-26,9: STRING 'fr\'hello\''
|
||||
26,9-26,10: NEWLINE '\n'
|
||||
27,0-27,4: FSTRING_START 'f\'a{'
|
||||
27,4-27,5: NUMBER '1'
|
||||
27,5-27,6: OP '+'
|
||||
27,6-27,7: NUMBER '1'
|
||||
27,7-27,10: FSTRING_END '}b\''
|
||||
27,10-27,11: NEWLINE '\n'
|
||||
28,0-28,3: FSTRING_START 'f\'{'
|
||||
28,3-28,4: NAME 'x'
|
||||
28,4-28,6: FSTRING_MID '}{'
|
||||
28,6-28,7: NAME 'y'
|
||||
28,7-28,10: FSTRING_MID '}a{'
|
||||
28,10-28,11: NAME 'z'
|
||||
28,11-28,13: FSTRING_END '}\''
|
||||
28,13-28,14: NEWLINE '\n'
|
||||
29,0-29,57: COMMENT '#This is not legal in CPython, but we tokenize it anyway.'
|
||||
30,0-30,4: FSTRING_START 'f\'a{'
|
||||
30,4-30,7: STRING '\'x\''
|
||||
30,7-30,8: OP '+'
|
||||
30,8-30,11: STRING '"y"'
|
||||
30,11-30,14: FSTRING_END '}b\''
|
||||
30,14-30,15: NEWLINE '\n'
|
||||
32,0-32,19: COMMENT '#Multiline f-string'
|
||||
33,0-35,1: FSTRING_START 'f\'\'\'\n In f-string expressions act as if parenthesised\n{'
|
||||
36,4-36,5: NAME 'x'
|
||||
36,6-36,7: OP '+'
|
||||
37,4-37,5: NAME 'y'
|
||||
37,6-37,7: OP '&'
|
||||
38,6-38,7: NAME 'z'
|
||||
39,0-41,3: FSTRING_END '}\nend\n\'\'\''
|
||||
41,3-41,4: NEWLINE '\n'
|
||||
43,0-43,11: COMMENT '#Multi-line'
|
||||
46,0-50,3: STRING 'r""" Single quotation character with multi-line\n\n"a", \'b\', "", \'\'\n....\n"""'
|
||||
50,3-50,4: NEWLINE '\n'
|
||||
52,0-56,3: STRING 'r\'\'\' Single quotation character with multi-line\n\n"a", \'b\', "", \'\'\n....\n\'\'\''
|
||||
56,3-56,4: NEWLINE '\n'
|
||||
58,0-58,21: COMMENT '#f-string conversions'
|
||||
59,0-59,2: CONVERSION '!a'
|
||||
59,2-59,3: NEWLINE '\n'
|
||||
60,0-60,2: CONVERSION '!s'
|
||||
60,2-60,3: NEWLINE '\n'
|
||||
61,0-61,2: CONVERSION '!r'
|
||||
61,2-61,3: NEWLINE '\n'
|
||||
63,0-63,3: FSTRING_START 'f"{'
|
||||
63,3-63,4: NAME 'k'
|
||||
63,4-63,7: FSTRING_MID '}={'
|
||||
63,7-63,8: NAME 'v'
|
||||
63,8-63,10: CONVERSION '!r'
|
||||
63,10-63,12: FSTRING_END '}"'
|
||||
63,12-63,13: NEWLINE '\n'
|
||||
65,0-65,23: COMMENT '#Implicit concatenation'
|
||||
66,0-66,1: LPAR '('
|
||||
66,1-66,4: FSTRING_START 'f\'{'
|
||||
66,4-66,8: NAME 'expr'
|
||||
66,8-66,16: FSTRING_END '} text \''
|
||||
67,4-67,18: STRING '\'continuation\''
|
||||
68,4-68,10: FSTRING_START 'f\'and{'
|
||||
68,10-68,11: NAME 'v'
|
||||
68,11-68,13: FSTRING_END '}\''
|
||||
69,0-69,1: RPAR ')'
|
||||
69,1-69,2: NEWLINE '\n'
|
||||
71,0-71,9: COMMENT '#prefixes'
|
||||
73,0-73,20: STRING 'u\'{}\\r{}{:<{width}}\''
|
||||
73,20-73,21: NEWLINE '\n'
|
||||
74,0-74,15: STRING 'u\'{}\\r{}{:<{}}\''
|
||||
74,15-74,16: NEWLINE '\n'
|
||||
76,0-76,32: COMMENT '#f-strings with format specifier'
|
||||
77,0-77,11: FSTRING_START 'f\'result: {'
|
||||
77,11-77,16: NAME 'value'
|
||||
77,16-77,17: COLON ':'
|
||||
77,17-77,21: FSTRING_SPEC '0.2f'
|
||||
77,21-77,23: FSTRING_END '}\''
|
||||
77,23-77,24: NEWLINE '\n'
|
||||
78,0-78,11: FSTRING_START 'f\'result: {'
|
||||
78,11-78,16: NAME 'value'
|
||||
78,16-78,17: COLON ':'
|
||||
78,17-78,18: FSTRING_SPEC '{'
|
||||
78,18-78,23: NAME 'width'
|
||||
78,23-78,26: FSTRING_SPEC '}.{'
|
||||
78,26-78,35: NAME 'precision'
|
||||
78,35-78,36: FSTRING_SPEC '}'
|
||||
78,36-78,38: FSTRING_END '}\''
|
||||
78,38-78,39: NEWLINE '\n'
|
||||
81,0-81,7: FSTRING_START 'f"Too {'
|
||||
81,7-81,13: STRING '\'many\''
|
||||
81,14-81,16: NAME 'if'
|
||||
81,17-81,21: NAME 'alen'
|
||||
81,22-81,23: OP '>'
|
||||
81,24-81,28: NAME 'elen'
|
||||
81,29-81,33: NAME 'else'
|
||||
81,34-81,39: STRING '\'few\''
|
||||
81,39-81,57: FSTRING_MID '} parameters for {'
|
||||
81,57-81,60: NAME 'cls'
|
||||
81,60-81,63: FSTRING_END '};"'
|
||||
81,63-81,64: NEWLINE '\n'
|
||||
83,0-83,58: COMMENT '# f-strings have special escaping rules for curly-brackets'
|
||||
84,0-84,21: FSTRING_START 'f\'This should work \\{'
|
||||
84,21-84,24: NAME 'foo'
|
||||
84,24-84,26: FSTRING_END '}\''
|
||||
84,26-84,27: NEWLINE '\n'
|
||||
85,0-85,22: FSTRING_START 'rf\'This should work \\{'
|
||||
85,22-85,25: NAME 'foo'
|
||||
85,25-85,27: FSTRING_END '}\''
|
||||
85,27-85,28: NEWLINE '\n'
|
||||
87,0-87,5: STRING 'f\'\\}\''
|
||||
87,6-87,53: COMMENT '# syntax error (we currently don\'t report this)'
|
||||
87,53-87,54: NEWLINE '\n'
|
||||
88,0-88,6: STRING 'f\'\\}}\''
|
||||
88,7-88,11: COMMENT '# ok'
|
||||
88,11-88,12: NEWLINE '\n'
|
||||
91,0-91,55: COMMENT '# f-strings with unicode literals of the form `\\N{...}`'
|
||||
92,0-92,3: FSTRING_START 'f\'{'
|
||||
92,3-92,10: NAME 'degrees'
|
||||
92,10-92,11: COLON ':'
|
||||
92,11-92,15: FSTRING_SPEC '0.0f'
|
||||
92,15-92,32: FSTRING_END '}\\N{DEGREE SIGN}\''
|
||||
92,32-92,33: NEWLINE '\n'
|
||||
93,0-93,3: FSTRING_START 'f"{'
|
||||
93,3-93,10: NAME 'degrees'
|
||||
93,10-93,11: COLON ':'
|
||||
93,11-93,15: FSTRING_SPEC '0.0f'
|
||||
93,15-93,32: FSTRING_END '}\\N{DEGREE SIGN}"'
|
||||
93,32-93,33: NEWLINE '\n'
|
||||
94,0-94,5: FSTRING_START 'f\'\'\'{'
|
||||
94,5-94,12: NAME 'degrees'
|
||||
94,12-94,13: COLON ':'
|
||||
94,13-94,17: FSTRING_SPEC '0.0f'
|
||||
94,17-94,36: FSTRING_END '}\\N{DEGREE SIGN}\'\'\''
|
||||
94,36-94,37: NEWLINE '\n'
|
||||
95,0-95,5: FSTRING_START 'f"""{'
|
||||
95,5-95,12: NAME 'degrees'
|
||||
95,12-95,13: COLON ':'
|
||||
95,13-95,17: FSTRING_SPEC '0.0f'
|
||||
95,17-95,36: FSTRING_END '}\\N{DEGREE SIGN}"""'
|
||||
95,36-95,37: NEWLINE '\n'
|
||||
97,0-97,59: COMMENT '# double curlies in f-strings with various kinds of quoting'
|
||||
98,0-98,6: FSTRING_START 'f\'{{ {'
|
||||
98,6-98,9: NAME 'foo'
|
||||
98,9-98,14: FSTRING_END '} }}\''
|
||||
98,14-98,15: NEWLINE '\n'
|
||||
99,0-99,6: FSTRING_START 'f"{{ {'
|
||||
99,6-99,9: NAME 'foo'
|
||||
99,9-99,14: FSTRING_END '} }}"'
|
||||
99,14-99,15: NEWLINE '\n'
|
||||
100,0-100,8: FSTRING_START 'f\'\'\'{{ {'
|
||||
100,8-100,11: NAME 'foo'
|
||||
100,11-100,18: FSTRING_END '} }}\'\'\''
|
||||
100,18-100,19: NEWLINE '\n'
|
||||
101,0-101,8: FSTRING_START 'f"""{{ {'
|
||||
101,8-101,11: NAME 'foo'
|
||||
101,11-101,18: FSTRING_END '} }}"""'
|
||||
101,18-101,19: NEWLINE '\n'
|
||||
103,0-103,17: COMMENT '# Empty f-strings'
|
||||
104,0-104,3: STRING 'f\'\''
|
||||
104,3-104,4: NEWLINE '\n'
|
||||
105,0-105,3: STRING 'f""'
|
||||
105,3-105,4: NEWLINE '\n'
|
||||
106,0-106,7: STRING 'f\'\'\'\'\'\''
|
||||
106,7-106,8: NEWLINE '\n'
|
||||
107,0-107,7: STRING 'f""""""'
|
||||
107,7-107,8: NEWLINE '\n'
|
||||
110,0-110,7: STRING 'r\'\\NUL\''
|
||||
110,8-110,50: COMMENT '# _Not_ a named unicode escape (`\\N{...}`)'
|
||||
110,50-110,51: NEWLINE '\n'
|
||||
112,0-112,8: FSTRING_START 'f\'res: {'
|
||||
112,8-112,11: NAME 'val'
|
||||
112,11-112,12: COLON ':'
|
||||
112,12-112,13: FSTRING_SPEC '{'
|
||||
112,13-112,18: NAME 'width'
|
||||
112,18-112,19: COLON ':'
|
||||
112,19-112,20: NUMBER '0'
|
||||
112,20-112,23: FSTRING_SPEC '}.{'
|
||||
112,23-112,27: NAME 'prec'
|
||||
112,27-112,28: COLON ':'
|
||||
112,28-112,29: NUMBER '1'
|
||||
112,29-112,30: FSTRING_SPEC '}'
|
||||
112,30-112,32: FSTRING_END '}\''
|
||||
112,32-112,33: NEWLINE '\n'
|
||||
113,0-113,0: ENDMARKER ''
|
||||
3
python/extractor/tests/tokenizer/tab.py
Normal file
3
python/extractor/tests/tokenizer/tab.py
Normal file
@@ -0,0 +1,3 @@
|
||||
|
||||
class C(object):
|
||||
pass
|
||||
12
python/extractor/tests/tokenizer/tab.tokens
Normal file
12
python/extractor/tests/tokenizer/tab.tokens
Normal file
@@ -0,0 +1,12 @@
|
||||
2,0-2,5: NAME 'class'
|
||||
2,6-2,7: NAME 'C'
|
||||
2,7-2,8: LPAR '('
|
||||
2,8-2,14: NAME 'object'
|
||||
2,14-2,15: RPAR ')'
|
||||
2,15-2,16: COLON ':'
|
||||
2,16-2,17: NEWLINE '\n'
|
||||
3,0-3,1: INDENT '\t'
|
||||
3,1-3,5: NAME 'pass'
|
||||
3,5-3,6: NEWLINE '\n'
|
||||
4,0-4,0: DEDENT ''
|
||||
4,0-4,0: ENDMARKER ''
|
||||
84
python/extractor/tests/tokenizer/temp.tokens
Normal file
84
python/extractor/tests/tokenizer/temp.tokens
Normal file
@@ -0,0 +1,84 @@
|
||||
2,0-2,22: COMMENT '#Some negative numbers'
|
||||
4,0-4,1: OP '-'
|
||||
4,1-4,2: NUMBER '1'
|
||||
4,2-4,3: NEWLINE '\n'
|
||||
5,0-5,1: OP '-'
|
||||
5,1-5,18: NUMBER '10000000000000000'
|
||||
5,18-5,19: NEWLINE '\n'
|
||||
6,0-6,1: OP '-'
|
||||
6,1-6,4: NUMBER '1.0'
|
||||
6,4-6,5: NEWLINE '\n'
|
||||
7,0-7,1: OP '-'
|
||||
7,1-7,7: NUMBER '3.0e17'
|
||||
7,7-7,8: NEWLINE '\n'
|
||||
9,0-9,1: OP '-'
|
||||
9,1-9,2: LPAR '('
|
||||
9,2-9,3: NUMBER '1'
|
||||
9,3-9,4: RPAR ')'
|
||||
9,4-9,5: NEWLINE '\n'
|
||||
10,0-10,1: OP '-'
|
||||
10,1-10,2: LPAR '('
|
||||
10,2-10,19: NUMBER '10000000000000000'
|
||||
10,19-10,20: RPAR ')'
|
||||
10,20-10,21: NEWLINE '\n'
|
||||
11,0-11,1: OP '-'
|
||||
11,1-11,2: LPAR '('
|
||||
11,2-11,5: NUMBER '1.0'
|
||||
11,5-11,6: RPAR ')'
|
||||
11,6-11,7: NEWLINE '\n'
|
||||
12,0-12,1: OP '-'
|
||||
12,1-12,2: LPAR '('
|
||||
12,2-12,8: NUMBER '3.0e17'
|
||||
12,8-12,9: RPAR ')'
|
||||
12,9-12,10: NEWLINE '\n'
|
||||
14,0-14,1: LPAR '('
|
||||
14,1-14,2: OP '-'
|
||||
14,2-14,3: NUMBER '1'
|
||||
14,3-14,4: RPAR ')'
|
||||
14,4-14,5: NEWLINE '\n'
|
||||
15,0-15,1: LPAR '('
|
||||
15,1-15,2: OP '-'
|
||||
15,2-15,19: NUMBER '10000000000000000'
|
||||
15,19-15,20: RPAR ')'
|
||||
15,20-15,21: NEWLINE '\n'
|
||||
16,0-16,1: LPAR '('
|
||||
16,1-16,2: OP '-'
|
||||
16,2-16,5: NUMBER '1.0'
|
||||
16,5-16,6: RPAR ')'
|
||||
16,6-16,7: NEWLINE '\n'
|
||||
17,0-17,1: LPAR '('
|
||||
17,1-17,2: OP '-'
|
||||
17,2-17,8: NUMBER '3.0e17'
|
||||
17,8-17,9: RPAR ')'
|
||||
17,9-17,10: NEWLINE '\n'
|
||||
19,0-19,1: OP '-'
|
||||
19,1-19,3: NUMBER '1j'
|
||||
19,3-19,4: NEWLINE '\n'
|
||||
21,0-21,1: OP '-'
|
||||
21,1-21,8: NUMBER '3.7e12j'
|
||||
21,8-21,9: NEWLINE '\n'
|
||||
23,0-23,19: COMMENT '#Some other numbers'
|
||||
24,0-24,20: NUMBER '0.058823529630899429'
|
||||
24,20-24,21: NEWLINE '\n'
|
||||
26,0-26,5: NUMBER '1e-06'
|
||||
26,5-26,6: NEWLINE '\n'
|
||||
27,0-27,8: NUMBER '.9999999'
|
||||
27,8-27,9: NEWLINE '\n'
|
||||
28,0-28,8: NUMBER '0xffffff'
|
||||
28,8-28,9: NEWLINE '\n'
|
||||
29,0-29,4: NUMBER '1e10'
|
||||
29,4-29,5: NEWLINE '\n'
|
||||
30,0-30,2: NUMBER '1.'
|
||||
30,2-30,3: NEWLINE '\n'
|
||||
31,0-31,10: NUMBER '2.79252680'
|
||||
31,10-31,11: NEWLINE '\n'
|
||||
32,0-32,9: NUMBER '0x0001000'
|
||||
32,9-32,10: NEWLINE '\n'
|
||||
33,0-33,55: NUMBER '4987312561856745907287624786230562734672583763984576267'
|
||||
33,55-33,56: NEWLINE '\n'
|
||||
35,0-35,18: COMMENT '#Octal both styles'
|
||||
36,0-36,4: NUMBER '0777'
|
||||
36,4-36,5: NEWLINE '\n'
|
||||
37,0-37,5: NUMBER '0o777'
|
||||
37,5-37,6: NEWLINE '\n'
|
||||
39,0-39,0: ENDMARKER ''
|
||||
2
python/extractor/tests/tokenizer/utf8.py
Normal file
2
python/extractor/tests/tokenizer/utf8.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# Some abitrary prefix with no space beforecoding: utf-8 -*-
|
||||
# €€€€
|
||||
3
python/extractor/tests/tokenizer/utf8.tokens
Normal file
3
python/extractor/tests/tokenizer/utf8.tokens
Normal file
@@ -0,0 +1,3 @@
|
||||
1,0-1,60: COMMENT '# Some abitrary prefix with no space beforecoding: utf-8 -*-'
|
||||
2,0-2,6: COMMENT '# €€€€'
|
||||
3,0-3,0: ENDMARKER ''
|
||||
1
python/extractor/tests/tokenizer/utf8_bom.py
Normal file
1
python/extractor/tests/tokenizer/utf8_bom.py
Normal file
@@ -0,0 +1 @@
|
||||
#Starts with a BOM
|
||||
2
python/extractor/tests/tokenizer/utf8_bom.tokens
Normal file
2
python/extractor/tests/tokenizer/utf8_bom.tokens
Normal file
@@ -0,0 +1,2 @@
|
||||
1,0-1,18: COMMENT '#Starts with a BOM'
|
||||
2,0-2,0: ENDMARKER ''
|
||||
Reference in New Issue
Block a user