Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#@+leo-ver=5-thin 

2#@+node:ekr.20211209153303.1: * @file ../plugins/importers/python.py 

3"""The new, tokenize based, @auto importer for Python.""" 

4import sys 

5import tokenize 

6import token 

7from collections import defaultdict 

8import leo.core.leoGlobals as g 

9#@+others 

10#@+node:ekr.20211209052710.1: ** do_import 

11def do_import(c, s, parent): 

12 

13 if sys.version_info < (3, 7, 0): 

14 g.es_print('The python importer requires python 3.7 or above') 

15 return False 

16 split_root(parent, s.splitlines(True)) 

17 parent.b = f'@language python\n@tabwidth -4\n{parent.b}' 

18 if c.config.getBool('put-class-in-imported-headlines'): 

19 for p in parent.subtree(): # Don't change parent.h. 

20 if p.b.startswith('class ') or p.b.partition('\nclass ')[1]: 

21 p.h = f'class {p.h}' 

22 return True 

23#@+node:vitalije.20211201230203.1: ** split_root 

24SPLIT_THRESHOLD = 10 

25def split_root(root, lines): 

26 ''' 

27 Parses given lines and separates all top level function 

28 definitions and class definitions in separate nodes which 

29 are all direct children of the root. All longer class 

30 nodes are further divided, each method in a separate node. 

31  

32 This function puts comments and decorators in the same node 

33 above the definition. 

34 ''' 

35 #@+others 

36 #@+node:vitalije.20211208183603.1: *3* is_intro_line 

37 def is_intro_line(n, col): 

38 """ 

39 Intro line is either a comment line that starts at the same column as the 

40 def/class line or a decorator line 

41 """ 

42 # first we filter list of all tokens in the line n. We don't want white space tokens 

43 # we are interested only in the tokens containing some text. 

44 xs = [x for x in lntokens[n] if x[0] not in (token.DEDENT, token.INDENT, token.NL)] 

45 

46 if not xs: 

47 # all tokens in this line are white space, therefore we 

48 # have a blank line. We want to allow a blank line in the 

49 # block of comments, so we return True 

50 return True 

51 

52 t = xs[0] # this is the first non blank token in the line n 

53 if t[2][1] != col: 

54 # if it isn't at the same column as the definition, it can't be 

55 # considered as a `intro` line 

56 return False 

57 if t[0] == token.OP and t[1] == '@': 

58 # this lines starts with `@`, which means it is the decorator 

59 return True 

60 if t[0] == token.COMMENT: 

61 # this line starts with the comment at the same column as the definition 

62 return True 

63 

64 # in all other cases this isn't an `intro` line 

65 return False 

66 #@+node:vitalije.20211208084231.1: *3* get_intro 

67 def get_intro(row, col): 

68 """ 

69 Returns the number of preceeding lines that can be considered as an `intro` 

70 to this funciton/class/method definition. 

71 """ 

72 last = row 

73 for i in range(row-1, 0, -1): 

74 if is_intro_line(i, col): 

75 last = i 

76 else: 

77 break 

78 # we don't want `intro` to start with the bunch of blank lines 

79 # they better be added to the end of the preceeding node. 

80 for i in range(last, row): 

81 if lines[i-1].isspace(): 

82 last = i + 1 

83 return row - last 

84 #@+node:vitalije.20211206182505.1: *3* mkreadline 

85 def mkreadline(lines): 

86 # tokenize uses readline for its input 

87 itlines = iter(lines) 

88 def nextline(): 

89 try: 

90 return next(itlines) 

91 except StopIteration: 

92 return '' 

93 return nextline 

94 #@+node:vitalije.20211208092828.1: *3* itoks 

95 def itoks(i): 

96 yield from enumerate(rawtokens[i:], start=i) 

97 #@+node:vitalije.20211208092833.1: *3* search 

98 def search(i, k): 

99 for j, t in itoks(i): 

100 if t[0] == k: 

101 yield j, t 

102 #@+node:vitalije.20211208092910.1: *3* getdefn 

103 def getdefn(start): 

104 

105 # pylint: disable=undefined-loop-variable 

106 tok = rawtokens[start] 

107 if tok[0] != token.NAME or tok[1] not in ('async', 'def', 'class'): 

108 return None 

109 

110 # the following few values are easy to get 

111 if tok[1] == 'async': 

112 kind = rawtokens[start+1][1] 

113 name = rawtokens[start+2][1] 

114 else: 

115 kind = tok[1] 

116 name = rawtokens[start+1][1] 

117 if kind == 'def' and rawtokens[start-1][1] == 'async': 

118 return None 

119 a, col = tok[2] 

120 

121 # now we are searching for the end of the definition line 

122 # this one logical line may be divided in several physical 

123 # lines. At the end of this logical line, there will be a 

124 # NEWLINE token 

125 for i, t in search(start+1, token.NEWLINE): 

126 end_h = t[2][0] # the last of the `header lines` 

127 # this lines should not be indented 

128 # in the node body as opposed to 

129 # the lines for function/method/class body 

130 # which will be indented 

131 

132 # in case we have oneliner, let's define end_b here 

133 end_b = end_h 

134 

135 # indented body starts on the next line 

136 start_b = end_h + 1 

137 break 

138 

139 # to check if we have a oneline definition or not 

140 # we have to look forward to see which token will come 

141 # first INDENT or NEWLINE. 

142 oneliner = True 

143 for (i1, t), (i2, t1) in zip(search(i+1, token.INDENT), search(i+1, token.NEWLINE)): 

144 # INDENT comes after the NEWLINE, means the definition is in a single line 

145 oneliner = i1 > i2 

146 break 

147 

148 # finally we can find the end of this definition 

149 if oneliner: 

150 c_ind = col # the following lines will not be indented 

151 # because the definition was in the same line 

152 

153 # end of the body is the same as the start of the body 

154 end_b = start_b 

155 

156 else: 

157 # we have some body lines 

158 # presumably the next token is INDENT 

159 i += 1 

160 

161 # this is the indentation of the first function/method/class body line 

162 c_ind = len(t[1]) + col 

163 

164 # now we are searching to find the end of this function/method/body 

165 for i, t in itoks(i+1): 

166 col2 = t[2][1] 

167 if col2 > col: 

168 continue 

169 if t[0] in (token.DEDENT, token.COMMENT): 

170 end_b = t[2][0] 

171 break 

172 

173 

174 # now let's increase end_b to include all following blank lines 

175 for j in range(end_b, len(lines)+1): 

176 if lines[j-1].isspace(): 

177 end_b = j + 1 

178 else: 

179 break 

180 

181 # number of `intro` lines 

182 intro = get_intro(a, col) 

183 

184 return col, a-intro, end_h, start_b, kind, name, c_ind, end_b 

185 #@+node:vitalije.20211208101750.1: *3* body 

186 def bodyLine(x, ind): 

187 if ind == 0 or x[:ind].isspace(): 

188 return x[ind:] or '\n' 

189 n = len(x) - len(x.lstrip()) 

190 return f'\\\\-{ind-n}.{x[n:]}' 

191 

192 def body(a, b, ind): 

193 xlines = (bodyLine(x, ind) for x in lines[ a-1 : b and (b-1)]) 

194 return ''.join(xlines) 

195 #@+node:vitalije.20211208110301.1: *3* indent 

196 def indent(x, n): 

197 return x.rjust(len(x) + n) 

198 #@+node:vitalije.20211208104408.1: *3* mknode 

199 def mknode(p, start, start_b, end, l_ind, col, xdefs): 

200 # start - first line of this node 

201 # start_b - first line of this node's function/class body 

202 # end - first line after this node 

203 # l_ind - amount of white space to strip from left 

204 # col - column start of child nodes 

205 # xdefs - all definitions inside this node 

206 

207 # first let's find all defs that start at the same column 

208 # as our indented function/method/class body 

209 tdefs = [x for x in xdefs if x[0] == col] 

210 

211 if not tdefs or end-start < SPLIT_THRESHOLD: 

212 # if there are no inner definitions or the total number of 

213 # lines is less than threshold, all lines should be added 

214 # to this node and no further splitting is necessary 

215 p.b = body(start, end, l_ind) 

216 return 

217 

218 # last keeps track of the last used line 

219 last = start 

220 

221 # lets check the first inner definition 

222 col, h1, h2, start_b, kind, name, c_ind, end_b = tdefs[0] 

223 if h1 > start: 

224 # first inner definition starts later 

225 # so we have some content before at-others 

226 b1 = body(start, h1, l_ind) 

227 else: 

228 # inner definitions start at the beginning of our body 

229 # so at-others will be the first line in our body 

230 b1 = '' 

231 o = indent('@others\n', col-l_ind) 

232 

233 # now for the part after at-others we need to check the 

234 # last of inner definitions 

235 if tdefs[-1][-1] < end: 

236 # there are some lines after at-others 

237 b2 = body(tdefs[-1][-1], end, l_ind) 

238 else: 

239 # there are no lines after at-others 

240 b2 = '' 

241 # finally we can set our body 

242 p.b = f'{b1}{o}{b2}' 

243 

244 # now we can continue to add children for each of the inner definitions 

245 last = h1 

246 for col, h1, h2, start_b, kind, name, c_ind, end_b in tdefs: 

247 if h1 > last: 

248 # there are some declaration lines in between two inner definitions 

249 p1 = p.insertAsLastChild() 

250 p1.h = '...some declarations' 

251 p1.b = body(last, h1, col) 

252 last = h1 

253 p1 = p.insertAsLastChild() 

254 p1.h = name 

255 

256 # let's find all next level inner definitions 

257 # those are the definitions whose starting and end line are 

258 # between the start and the end of this node 

259 subdefs = [x for x in xdefs if x[1]>h1 and x[-1] <= end_b] 

260 if subdefs: 

261 # there are some next level inner definitions 

262 # so let's split this node 

263 mknode( p = p1 

264 , start = h1 

265 , start_b = start_b 

266 , end = end_b 

267 , l_ind = l_ind + col # increase indentation for at-others 

268 , col = c_ind 

269 , xdefs = subdefs 

270 ) 

271 else: 

272 # there are no next level inner definitions 

273 # so we can just set the body and continue 

274 # to the next definition 

275 p1.b = body(h1, end_b, col) 

276 

277 last = end_b 

278 #@-others 

279 # rawtokens is a list of all tokens found in input lines 

280 rawtokens = list(tokenize.generate_tokens(mkreadline(lines))) 

281 

282 # lntokens - line tokens are tokens groupped by the line number 

283 # from which they originate. 

284 lntokens = defaultdict(list) 

285 for t in rawtokens: 

286 row = t[2][0] 

287 lntokens[row].append(t) 

288 

289 # we create list of all definitions in the token list 

290 # both `def` and `class` definitions 

291 # each definition is a tuple with the following values 

292 # 

293 # 0: col - column where the definition starts 

294 # 1: h1 - line number of the first line of this node 

295 # this line may be above the starting line 

296 # (comment lines and decorators are in these lines) 

297 # 2: h2 - line number of the last line of the declaration 

298 # it is the line number where the `:` (colon) is. 

299 # 3: start_b - line number of the first indented line of the 

300 # function/class body. 

301 # 4: kind - can be 'def' or 'class' 

302 # 5: name - name of the function, class or method 

303 # 6: c_ind - column of the indented body 

304 # 7: b_ind - minimal number of leading spaces in each line of the 

305 # function, method or class body 

306 # 8: end_b - line number of the first line after the definition  

307 #  

308 # function getdefn returns None if the token at this index isn't start 

309 # of a definition, or if it isn't possible to calculate all the values 

310 # mentioned earlier. Therefore, we filter the list. 

311 definitions = list(filter(None, map(getdefn, range(len(rawtokens)-1)))) 

312 

313 # a preparation step 

314 root.deleteAllChildren() 

315 

316 # function mknode, sets the body and adds children recursively using 

317 # precalculated definitions list. 

318 # parameters are: 

319 # p - current node 

320 # start - line number of the first line of this node 

321 # end - line number of the first line after this node 

322 # l_ind - this is the accumulated indentation through at-others 

323 # it is the number of spaces that should be stripped from  

324 # the beginning of each line in this node 

325 # ind - number of leading white spaces common to all indented 

326 # body lines of this node. It is the indentation at which 

327 # we should put the at-others directive in this body 

328 # col - the column at which start all of the inner definitions 

329 # like methods or inner functions and classes 

330 # xdefs - list of the definitions covering this node 

331 mknode( p = root 

332 , start = 1 

333 , start_b = 1 

334 , end = len(lines)+1 

335 , l_ind = 0 

336 , col = 0 

337 , xdefs = definitions 

338 ) 

339 return definitions 

340#@-others 

341importer_dict = { 

342 'func': do_import, 

343 'extensions': ['.py', '.pyw', '.pyi'], # mypy uses .pyi extension. 

344} 

345#@@language python 

346#@@tabwidth -4 

347#@-leo