11from test import support
22from test .support import os_helper
3- from tokenize import (tokenize , _tokenize , untokenize , NUMBER , NAME , OP ,
3+ from tokenize import (tokenize , untokenize , NUMBER , NAME , OP ,
44 STRING , ENDMARKER , ENCODING , tok_name , detect_encoding ,
55 open as tokenize_open , Untokenizer , generate_tokens ,
66 NEWLINE , _generate_tokens_from_c_tokenizer , DEDENT , TokenInfo )
@@ -50,6 +50,13 @@ def check_tokenize(self, s, expected):
5050 self .assertEqual (result ,
5151 [" ENCODING 'utf-8' (0, 0) (0, 0)" ] +
5252 expected .rstrip ().splitlines ())
53+
54+ def test_invalid_readline (self ):
55+ def gen ():
56+ yield "sdfosdg"
57+ yield "sdfosdg"
58+ with self .assertRaises (TypeError ):
59+ list (tokenize (gen ().__next__ ))
5360
5461 def test_implicit_newline (self ):
5562 # Make sure that the tokenizer puts in an implicit NEWLINE
@@ -1154,7 +1161,8 @@ class TestTokenizerAdheresToPep0263(TestCase):
11541161
11551162 def _testFile (self , filename ):
11561163 path = os .path .join (os .path .dirname (__file__ ), filename )
1157- TestRoundtrip .check_roundtrip (self , open (path , 'rb' ))
1164+ with open (path , 'rb' ) as f :
1165+ TestRoundtrip .check_roundtrip (self , f )
11581166
11591167 def test_utf8_coding_cookie_and_no_utf8_bom (self ):
11601168 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
@@ -1199,7 +1207,8 @@ def readline():
11991207 yield b''
12001208
12011209 # skip the initial encoding token and the end tokens
1202- tokens = list (_tokenize (readline (), encoding = 'utf-8' ))[:- 2 ]
1210+ tokens = list (_generate_tokens_from_c_tokenizer (readline ().__next__ , encoding = 'utf-8' ,
1211+ extra_tokens = True ))[:- 2 ]
12031212 expected_tokens = [TokenInfo (3 , '"ЉЊЈЁЂ"' , (1 , 0 ), (1 , 7 ), '"ЉЊЈЁЂ"\n ' )]
12041213 self .assertEqual (tokens , expected_tokens ,
12051214 "bytes not decoded with encoding" )
@@ -1468,13 +1477,13 @@ def test_tokenize(self):
14681477 def mock_detect_encoding (readline ):
14691478 return encoding , [b'first' , b'second' ]
14701479
1471- def mock__tokenize (readline , encoding ):
1480+ def mock__tokenize (readline , encoding , ** kwargs ):
14721481 nonlocal encoding_used
14731482 encoding_used = encoding
14741483 out = []
14751484 while True :
14761485 try :
1477- next_line = next ( readline )
1486+ next_line = readline ( )
14781487 except StopIteration :
14791488 return out
14801489 if next_line :
@@ -1491,16 +1500,16 @@ def mock_readline():
14911500 return str (counter ).encode ()
14921501
14931502 orig_detect_encoding = tokenize_module .detect_encoding
1494- orig__tokenize = tokenize_module ._tokenize
1503+ orig_c_token = tokenize_module ._generate_tokens_from_c_tokenizer
14951504 tokenize_module .detect_encoding = mock_detect_encoding
1496- tokenize_module ._tokenize = mock__tokenize
1505+ tokenize_module ._generate_tokens_from_c_tokenizer = mock__tokenize
14971506 try :
14981507 results = tokenize (mock_readline )
14991508 self .assertEqual (list (results )[1 :],
15001509 [b'first' , b'second' , b'1' , b'2' , b'3' , b'4' ])
15011510 finally :
15021511 tokenize_module .detect_encoding = orig_detect_encoding
1503- tokenize_module ._tokenize = orig__tokenize
1512+ tokenize_module ._generate_tokens_from_c_tokenizer = orig_c_token
15041513
15051514 self .assertEqual (encoding_used , encoding )
15061515
@@ -1827,9 +1836,10 @@ class CTokenizeTest(TestCase):
18271836 def check_tokenize (self , s , expected ):
18281837 # Format the tokens in s in a table format.
18291838 # The ENDMARKER and final NEWLINE are omitted.
1839+ f = StringIO (s )
18301840 with self .subTest (source = s ):
18311841 result = stringify_tokens_from_source (
1832- _generate_tokens_from_c_tokenizer (s ), s
1842+ _generate_tokens_from_c_tokenizer (f . readline ), s
18331843 )
18341844 self .assertEqual (result , expected .rstrip ().splitlines ())
18351845
@@ -2668,43 +2678,44 @@ def test_unicode(self):
26682678
26692679 def test_invalid_syntax (self ):
26702680 def get_tokens (string ):
2671- return list (_generate_tokens_from_c_tokenizer (string ))
2672-
2673- self .assertRaises (SyntaxError , get_tokens , "(1+2]" )
2674- self .assertRaises (SyntaxError , get_tokens , "(1+2}" )
2675- self .assertRaises (SyntaxError , get_tokens , "{1+2]" )
2676-
2677- self .assertRaises (SyntaxError , get_tokens , "1_" )
2678- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2679- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2680- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2681-
2682- self .assertRaises (SyntaxError , get_tokens , "\xa0 " )
2683- self .assertRaises (SyntaxError , get_tokens , "€" )
2684-
2685- self .assertRaises (SyntaxError , get_tokens , "0b12" )
2686- self .assertRaises (SyntaxError , get_tokens , "0b1_2" )
2687- self .assertRaises (SyntaxError , get_tokens , "0b2" )
2688- self .assertRaises (SyntaxError , get_tokens , "0b1_" )
2689- self .assertRaises (SyntaxError , get_tokens , "0b" )
2690- self .assertRaises (SyntaxError , get_tokens , "0o18" )
2691- self .assertRaises (SyntaxError , get_tokens , "0o1_8" )
2692- self .assertRaises (SyntaxError , get_tokens , "0o8" )
2693- self .assertRaises (SyntaxError , get_tokens , "0o1_" )
2694- self .assertRaises (SyntaxError , get_tokens , "0o" )
2695- self .assertRaises (SyntaxError , get_tokens , "0x1_" )
2696- self .assertRaises (SyntaxError , get_tokens , "0x" )
2697- self .assertRaises (SyntaxError , get_tokens , "1_" )
2698- self .assertRaises (SyntaxError , get_tokens , "012" )
2699- self .assertRaises (SyntaxError , get_tokens , "1.2_" )
2700- self .assertRaises (SyntaxError , get_tokens , "1e2_" )
2701- self .assertRaises (SyntaxError , get_tokens , "1e+" )
2702-
2703- self .assertRaises (SyntaxError , get_tokens , "'sdfsdf" )
2704- self .assertRaises (SyntaxError , get_tokens , "'''sdfsdf''" )
2705-
2706- self .assertRaises (SyntaxError , get_tokens , "(" * 1000 + "a" + ")" * 1000 )
2707- self .assertRaises (SyntaxError , get_tokens , "]" )
2681+ the_string = StringIO (string )
2682+ return list (_generate_tokens_from_c_tokenizer (the_string .readline ))
2683+
2684+ for case in [
2685+ "(1+2]" ,
2686+ "(1+2}" ,
2687+ "{1+2]" ,
2688+ "1_" ,
2689+ "1.2_" ,
2690+ "1e2_" ,
2691+ "1e+" ,
2692+
2693+ "\xa0 " ,
2694+ "€" ,
2695+ "0b12" ,
2696+ "0b1_2" ,
2697+ "0b2" ,
2698+ "0b1_" ,
2699+ "0b" ,
2700+ "0o18" ,
2701+ "0o1_8" ,
2702+ "0o8" ,
2703+ "0o1_" ,
2704+ "0o" ,
2705+ "0x1_" ,
2706+ "0x" ,
2707+ "1_" ,
2708+ "012" ,
2709+ "1.2_" ,
2710+ "1e2_" ,
2711+ "1e+" ,
2712+ "'sdfsdf" ,
2713+ "'''sdfsdf''" ,
2714+ "(" * 1000 + "a" + ")" * 1000 ,
2715+ "]" ,
2716+ ]:
2717+ with self .subTest (case = case ):
2718+ self .assertRaises (SyntaxError , get_tokens , case )
27082719
27092720 def test_max_indent (self ):
27102721 MAXINDENT = 100
@@ -2715,20 +2726,24 @@ def generate_source(indents):
27152726 return source
27162727
27172728 valid = generate_source (MAXINDENT - 1 )
2718- tokens = list (_generate_tokens_from_c_tokenizer (valid ))
2729+ the_input = StringIO (valid )
2730+ tokens = list (_generate_tokens_from_c_tokenizer (the_input .readline ))
27192731 self .assertEqual (tokens [- 2 ].type , DEDENT )
27202732 self .assertEqual (tokens [- 1 ].type , ENDMARKER )
27212733 compile (valid , "<string>" , "exec" )
27222734
27232735 invalid = generate_source (MAXINDENT )
2724- self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (invalid )))
2736+ the_input = StringIO (invalid )
2737+ self .assertRaises (SyntaxError , lambda : list (_generate_tokens_from_c_tokenizer (the_input .readline )))
27252738 self .assertRaises (
27262739 IndentationError , compile , invalid , "<string>" , "exec"
27272740 )
27282741
27292742 def test_continuation_lines_indentation (self ):
27302743 def get_tokens (string ):
2731- return [(kind , string ) for (kind , string , * _ ) in _generate_tokens_from_c_tokenizer (string )]
2744+ the_string = StringIO (string )
2745+ return [(kind , string ) for (kind , string , * _ )
2746+ in _generate_tokens_from_c_tokenizer (the_string .readline )]
27322747
27332748 code = dedent ("""
27342749 def fib(n):
0 commit comments