Coverage for C:\leo.repo\leo-editor\leo\plugins\importers\python.py : 99%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#@+leo-ver=5-thin
2#@+node:ekr.20211209153303.1: * @file ../plugins/importers/python.py
3"""The new, tokenize based, @auto importer for Python."""
4import sys
5import tokenize
6import token
7from collections import defaultdict
8import leo.core.leoGlobals as g
9#@+others
10#@+node:ekr.20211209052710.1: ** do_import
11def do_import(c, s, parent):
13 if sys.version_info < (3, 7, 0):
14 g.es_print('The python importer requires python 3.7 or above')
15 return False
16 split_root(parent, s.splitlines(True))
17 parent.b = f'@language python\n@tabwidth -4\n{parent.b}'
18 if c.config.getBool('put-class-in-imported-headlines'):
19 for p in parent.subtree(): # Don't change parent.h.
20 if p.b.startswith('class ') or p.b.partition('\nclass ')[1]:
21 p.h = f'class {p.h}'
22 return True
23#@+node:vitalije.20211201230203.1: ** split_root
24SPLIT_THRESHOLD = 10
25def split_root(root, lines):
26 '''
27 Parses given lines and separates all top level function
28 definitions and class definitions in separate nodes which
29 are all direct children of the root. All longer class
30 nodes are further divided, each method in a separate node.
32 This function puts comments and decorators in the same node
33 above the definition.
34 '''
35 #@+others
36 #@+node:vitalije.20211208183603.1: *3* is_intro_line
37 def is_intro_line(n, col):
38 """
39 Intro line is either a comment line that starts at the same column as the
40 def/class line or a decorator line
41 """
42 # first we filter list of all tokens in the line n. We don't want white space tokens
43 # we are interested only in the tokens containing some text.
44 xs = [x for x in lntokens[n] if x[0] not in (token.DEDENT, token.INDENT, token.NL)]
46 if not xs:
47 # all tokens in this line are white space, therefore we
48 # have a blank line. We want to allow a blank line in the
49 # block of comments, so we return True
50 return True
52 t = xs[0] # this is the first non blank token in the line n
53 if t[2][1] != col:
54 # if it isn't at the same column as the definition, it can't be
55 # considered as a `intro` line
56 return False
57 if t[0] == token.OP and t[1] == '@':
58 # this lines starts with `@`, which means it is the decorator
59 return True
60 if t[0] == token.COMMENT:
61 # this line starts with the comment at the same column as the definition
62 return True
64 # in all other cases this isn't an `intro` line
65 return False
66 #@+node:vitalije.20211208084231.1: *3* get_intro
67 def get_intro(row, col):
68 """
69 Returns the number of preceeding lines that can be considered as an `intro`
70 to this funciton/class/method definition.
71 """
72 last = row
73 for i in range(row-1, 0, -1):
74 if is_intro_line(i, col):
75 last = i
76 else:
77 break
78 # we don't want `intro` to start with the bunch of blank lines
79 # they better be added to the end of the preceeding node.
80 for i in range(last, row):
81 if lines[i-1].isspace():
82 last = i + 1
83 return row - last
84 #@+node:vitalije.20211206182505.1: *3* mkreadline
85 def mkreadline(lines):
86 # tokenize uses readline for its input
87 itlines = iter(lines)
88 def nextline():
89 try:
90 return next(itlines)
91 except StopIteration:
92 return ''
93 return nextline
94 #@+node:vitalije.20211208092828.1: *3* itoks
95 def itoks(i):
96 yield from enumerate(rawtokens[i:], start=i)
97 #@+node:vitalije.20211208092833.1: *3* search
98 def search(i, k):
99 for j, t in itoks(i):
100 if t[0] == k:
101 yield j, t
102 #@+node:vitalije.20211208092910.1: *3* getdefn
103 def getdefn(start):
105 # pylint: disable=undefined-loop-variable
106 tok = rawtokens[start]
107 if tok[0] != token.NAME or tok[1] not in ('async', 'def', 'class'):
108 return None
110 # the following few values are easy to get
111 if tok[1] == 'async':
112 kind = rawtokens[start+1][1]
113 name = rawtokens[start+2][1]
114 else:
115 kind = tok[1]
116 name = rawtokens[start+1][1]
117 if kind == 'def' and rawtokens[start-1][1] == 'async':
118 return None
119 a, col = tok[2]
121 # now we are searching for the end of the definition line
122 # this one logical line may be divided in several physical
123 # lines. At the end of this logical line, there will be a
124 # NEWLINE token
125 for i, t in search(start+1, token.NEWLINE):
126 end_h = t[2][0] # the last of the `header lines`
127 # this lines should not be indented
128 # in the node body as opposed to
129 # the lines for function/method/class body
130 # which will be indented
132 # in case we have oneliner, let's define end_b here
133 end_b = end_h
135 # indented body starts on the next line
136 start_b = end_h + 1
137 break
139 # to check if we have a oneline definition or not
140 # we have to look forward to see which token will come
141 # first INDENT or NEWLINE.
142 oneliner = True
143 for (i1, t), (i2, t1) in zip(search(i+1, token.INDENT), search(i+1, token.NEWLINE)):
144 # INDENT comes after the NEWLINE, means the definition is in a single line
145 oneliner = i1 > i2
146 break
148 # finally we can find the end of this definition
149 if oneliner:
150 c_ind = col # the following lines will not be indented
151 # because the definition was in the same line
153 # end of the body is the same as the start of the body
154 end_b = start_b
156 else:
157 # we have some body lines
158 # presumably the next token is INDENT
159 i += 1
161 # this is the indentation of the first function/method/class body line
162 c_ind = len(t[1]) + col
164 # now we are searching to find the end of this function/method/body
165 for i, t in itoks(i+1):
166 col2 = t[2][1]
167 if col2 > col:
168 continue
169 if t[0] in (token.DEDENT, token.COMMENT):
170 end_b = t[2][0]
171 break
174 # now let's increase end_b to include all following blank lines
175 for j in range(end_b, len(lines)+1):
176 if lines[j-1].isspace():
177 end_b = j + 1
178 else:
179 break
181 # number of `intro` lines
182 intro = get_intro(a, col)
184 return col, a-intro, end_h, start_b, kind, name, c_ind, end_b
185 #@+node:vitalije.20211208101750.1: *3* body
186 def bodyLine(x, ind):
187 if ind == 0 or x[:ind].isspace():
188 return x[ind:] or '\n'
189 n = len(x) - len(x.lstrip())
190 return f'\\\\-{ind-n}.{x[n:]}'
192 def body(a, b, ind):
193 xlines = (bodyLine(x, ind) for x in lines[ a-1 : b and (b-1)])
194 return ''.join(xlines)
195 #@+node:vitalije.20211208110301.1: *3* indent
196 def indent(x, n):
197 return x.rjust(len(x) + n)
198 #@+node:vitalije.20211208104408.1: *3* mknode
199 def mknode(p, start, start_b, end, l_ind, col, xdefs):
200 # start - first line of this node
201 # start_b - first line of this node's function/class body
202 # end - first line after this node
203 # l_ind - amount of white space to strip from left
204 # col - column start of child nodes
205 # xdefs - all definitions inside this node
207 # first let's find all defs that start at the same column
208 # as our indented function/method/class body
209 tdefs = [x for x in xdefs if x[0] == col]
211 if not tdefs or end-start < SPLIT_THRESHOLD:
212 # if there are no inner definitions or the total number of
213 # lines is less than threshold, all lines should be added
214 # to this node and no further splitting is necessary
215 p.b = body(start, end, l_ind)
216 return
218 # last keeps track of the last used line
219 last = start
221 # lets check the first inner definition
222 col, h1, h2, start_b, kind, name, c_ind, end_b = tdefs[0]
223 if h1 > start:
224 # first inner definition starts later
225 # so we have some content before at-others
226 b1 = body(start, h1, l_ind)
227 else:
228 # inner definitions start at the beginning of our body
229 # so at-others will be the first line in our body
230 b1 = ''
231 o = indent('@others\n', col-l_ind)
233 # now for the part after at-others we need to check the
234 # last of inner definitions
235 if tdefs[-1][-1] < end:
236 # there are some lines after at-others
237 b2 = body(tdefs[-1][-1], end, l_ind)
238 else:
239 # there are no lines after at-others
240 b2 = ''
241 # finally we can set our body
242 p.b = f'{b1}{o}{b2}'
244 # now we can continue to add children for each of the inner definitions
245 last = h1
246 for col, h1, h2, start_b, kind, name, c_ind, end_b in tdefs:
247 if h1 > last:
248 # there are some declaration lines in between two inner definitions
249 p1 = p.insertAsLastChild()
250 p1.h = '...some declarations'
251 p1.b = body(last, h1, col)
252 last = h1
253 p1 = p.insertAsLastChild()
254 p1.h = name
256 # let's find all next level inner definitions
257 # those are the definitions whose starting and end line are
258 # between the start and the end of this node
259 subdefs = [x for x in xdefs if x[1]>h1 and x[-1] <= end_b]
260 if subdefs:
261 # there are some next level inner definitions
262 # so let's split this node
263 mknode( p = p1
264 , start = h1
265 , start_b = start_b
266 , end = end_b
267 , l_ind = l_ind + col # increase indentation for at-others
268 , col = c_ind
269 , xdefs = subdefs
270 )
271 else:
272 # there are no next level inner definitions
273 # so we can just set the body and continue
274 # to the next definition
275 p1.b = body(h1, end_b, col)
277 last = end_b
278 #@-others
279 # rawtokens is a list of all tokens found in input lines
280 rawtokens = list(tokenize.generate_tokens(mkreadline(lines)))
282 # lntokens - line tokens are tokens groupped by the line number
283 # from which they originate.
284 lntokens = defaultdict(list)
285 for t in rawtokens:
286 row = t[2][0]
287 lntokens[row].append(t)
289 # we create list of all definitions in the token list
290 # both `def` and `class` definitions
291 # each definition is a tuple with the following values
292 #
293 # 0: col - column where the definition starts
294 # 1: h1 - line number of the first line of this node
295 # this line may be above the starting line
296 # (comment lines and decorators are in these lines)
297 # 2: h2 - line number of the last line of the declaration
298 # it is the line number where the `:` (colon) is.
299 # 3: start_b - line number of the first indented line of the
300 # function/class body.
301 # 4: kind - can be 'def' or 'class'
302 # 5: name - name of the function, class or method
303 # 6: c_ind - column of the indented body
304 # 7: b_ind - minimal number of leading spaces in each line of the
305 # function, method or class body
306 # 8: end_b - line number of the first line after the definition
307 #
308 # function getdefn returns None if the token at this index isn't start
309 # of a definition, or if it isn't possible to calculate all the values
310 # mentioned earlier. Therefore, we filter the list.
311 definitions = list(filter(None, map(getdefn, range(len(rawtokens)-1))))
313 # a preparation step
314 root.deleteAllChildren()
316 # function mknode, sets the body and adds children recursively using
317 # precalculated definitions list.
318 # parameters are:
319 # p - current node
320 # start - line number of the first line of this node
321 # end - line number of the first line after this node
322 # l_ind - this is the accumulated indentation through at-others
323 # it is the number of spaces that should be stripped from
324 # the beginning of each line in this node
325 # ind - number of leading white spaces common to all indented
326 # body lines of this node. It is the indentation at which
327 # we should put the at-others directive in this body
328 # col - the column at which start all of the inner definitions
329 # like methods or inner functions and classes
330 # xdefs - list of the definitions covering this node
331 mknode( p = root
332 , start = 1
333 , start_b = 1
334 , end = len(lines)+1
335 , l_ind = 0
336 , col = 0
337 , xdefs = definitions
338 )
339 return definitions
340#@-others
341importer_dict = {
342 'func': do_import,
343 'extensions': ['.py', '.pyw', '.pyi'], # mypy uses .pyi extension.
344}
345#@@language python
346#@@tabwidth -4
347#@-leo