Coverage for C:\leo.repo\leo-editor\leo\plugins\importers\linescanner.py : 65%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#@+leo-ver=5-thin
2#@+node:ekr.20161108125620.1: * @file ../plugins/importers/linescanner.py
3#@+<< linescanner docstring >>
4#@+node:ekr.20161108125805.1: ** << linescanner docstring >>
5"""
6#@@language rest
7#@@wrap
9**Overview**
11Leo's import infrastructure in `leoImport.py` instantiates the
12Importer instance and calls `i.run`, which calls `i.scan_lines`.
14New importers copy entire lines from the input file to Leo nodes. This
15makes the new importers much less error prone than the legacy
16(character-by-character) importers.
18New importers know *nothing* about parsing. They know only about how to
19scan tokens *accurately*.
21**Writing a new importer**
23Just run the importer;; abbreviation!
25To make the importer importer;; functional you must:
271. Copy it from leoSettings (@settings-->Abbreviations-->@outline-data tree-abbreviations)
28 to the corresponding location in myLeoSettings.leo.
302. Make sure @bool scripting-abbreviations is True in myLeoSettings.leo.
32**Using the abbreviation**
341. Just type importer;; in the body pane of an empty node.
36A dialog will prompt you for the name of the language. Suppose you type x.
382. Now you will be prompted for to fill in the first field::
40 'extensions': [comma-separated lists of extensions, with leading periods],
42The italicized field will be highlighted. Type '.x' (including quotes) followed by two commas.
443. You will then be prompted to fill in the second field::
46 strict = True leading whitespace is significant. Otherwise False,
48Again, the italicized field will be highlighted.
50Type False, followed by two commas.
524. You will then be prompted for the last field::
54 return level
55 ### Examples:
56 # self.indent # for python, coffeescript.
57 # self.curlies
58 # (self, curlies, self.parens)
60Only "level" is highlighted. The comments provide some hints about what to type.
62Let's type "self.curlies" followed by two commas.
645. Nothing more is highlighted, so that's it! No more substitutions remain.
65 The outline is ready to use!
67Take a look at the result. The new tree is an almost complete @@file node
68for the importer. Subtrees contain an X_Importer class and an X_ScanState
69class. Docstrings, ctors and __repr__ methods are complete.
71Note: The generated tree contain ### comments showing where more work may
72be needed. I might remove the need for some of them, but there is no great
73need to do so.
75"""
76#@-<< linescanner docstring >>
77#@+<< linescanner imports >>
78#@+node:ekr.20161108130715.1: ** << linescanner imports >>
79import io
80import re
81from typing import Any, Dict, List
82from leo.core import leoGlobals as g
83StringIO = io.StringIO
84#@-<< linescanner imports >>
85#@+others
86#@+node:ekr.20161108155730.1: ** class Importer
87class Importer:
88 """
89 The new, unified, simplified, interface to Leo's importer code.
91 Eventually, all importers will create use this class.
92 """
94 #@+others
95 #@+node:ekr.20161108155925.1: *3* i.__init__ & reloadSettings
96 def __init__(self,
97 importCommands,
98 gen_refs=False, # True: generate section references,
99 language=None, # For @language directive.
100 name=None, # The kind of importer, usually the same as language
101 state_class=None, # For i.scan_line
102 strict=False,
103 ** kwargs
104 ):
105 """
106 Importer.__init__: New in Leo 6.1.1: ic and c may be None for unit tests.
107 """
108 # Copies of args...
109 self.importCommands = ic = importCommands
110 self.c = c = ic and ic.c
111 self.encoding = ic and ic.encoding or 'utf-8'
112 self.gen_refs = gen_refs
113 self.language = language or name
114 # For the @language directive.
115 self.name = name or language
116 language = self.language
117 name = self.name
118 assert language and name
119 assert self.language and self.name
120 self.state_class = state_class
121 self.strict = strict
122 # True: leading whitespace is significant.
123 #
124 # Set from ivars...
125 self.has_decls = name not in ('xml', 'org-mode', 'vimoutliner')
126 self.is_rst = name in ('rst',)
127 self.tree_type = ic.treeType if c else None # '@root', '@file', etc.
128 #
129 # Constants...
130 if ic:
131 data = g.set_delims_from_language(self.name)
132 self.single_comment, self.block1, self.block2 = data
133 else:
134 self.single_comment, self.block1, self.block2 = '//', '/*', '*/' # Javascript.
135 if ic:
136 self.escape = c.atFileCommands.underindentEscapeString
137 self.escape_string = r'%s([0-9]+)\.' % re.escape(self.escape)
138 # m.group(1) is the unindent value.
139 self.escape_pattern = re.compile(self.escape_string)
140 self.ScanState = ScanState
141 # Must be set by subclasses that use general_scan_line.
142 self.tab_width = 0 # Must be set in run, using self.root.
143 self.ws_pattern = re.compile(r'^\s*$|^\s*%s' % (self.single_comment or ''))
144 #
145 # Settings...
146 self.reloadSettings()
147 #
148 # State vars.
149 self.errors = 0
150 if ic:
151 ic.errors = 0 # Required.
152 self.parse_body = False
153 self.refs_dict: Dict[str, int] = {} # Keys are headlines. Values are disambiguating number.
154 self.root = None
155 self.skip = 0 # A skip count for x.gen_lines & its helpers.
156 self.vnode_info: Dict[str, Any] = {}
157 self.ws_error = False
159 def reloadSettings(self):
160 c = self.c
161 if not c:
162 return
163 getBool = c.config.getBool
164 c.registerReloadSettings(self)
165 # self.at_auto_separate_non_def_nodes = False
166 self.add_context = getBool("add-context-to-headlines")
167 self.add_file_context = getBool("add-file-context-to-headlines")
168 self.at_auto_warns_about_leading_whitespace = getBool('at_auto_warns_about_leading_whitespace')
169 self.warn_about_underindented_lines = True
171 #@+node:ekr.20161110042512.1: *3* i.Convenience methods for vnode_info dict
172 def add_line(self, p, s, tag=None):
173 """Append the line s to p.v._import_lines."""
174 assert s and isinstance(s, str), (repr(s), g.callers())
175 self.vnode_info [p.v] ['lines'].append(s)
177 def extend_lines(self, p, lines):
178 self.vnode_info [p.v] ['lines'].extend(list(lines))
180 def get_lines(self, p):
181 return self.vnode_info [p.v] ['lines']
183 def has_lines(self, p):
184 d = self.vnode_info.get(p.v)
185 return d is not None and d.get('lines') is not None
187 def prepend_lines(self, p, lines):
188 self.vnode_info [p.v] ['lines'] = list(lines) + self.vnode_info [p.v] ['lines']
190 def set_lines(self, p, lines):
191 self.vnode_info [p.v] ['lines'] = list(lines)
192 #@+node:ekr.20161108131153.7: *3* i.Overrides
193 # These can be overridden in subclasses.
194 #@+node:ekr.20161108131153.8: *4* i.adjust_parent
195 def adjust_parent(self, parent, headline):
196 """Return the effective parent.
198 This is overridden by the RstScanner class."""
199 return parent
200 #@+node:ekr.20161108131153.9: *4* i.clean_headline
201 def clean_headline(self, s, p=None):
202 """
203 Return the cleaned version headline s.
204 Will typically be overridden in subclasses.
205 """
206 return s.strip()
207 #@+node:ekr.20161110173058.1: *4* i.clean_nodes
208 def clean_nodes(self, parent):
209 """
210 Clean all nodes in parent's tree.
211 Subclasses override this as desired.
212 See perl_i.clean_nodes for an examplle.
213 """
214 pass
215 #@+node:ekr.20161120022121.1: *3* i.Scanning & scan tables
216 #@+node:ekr.20161128025508.1: *4* i.get_new_dict
217 #@@nobeautify
219 def get_new_dict(self, context):
220 """
221 Return a *general* state dictionary for the given context.
222 Subclasses may override...
223 """
224 comment, block1, block2 = self.single_comment, self.block1, self.block2
226 def add_key(d, pattern, data):
227 key = pattern[0]
228 aList = d.get(key,[])
229 aList.append(data)
230 d[key] = aList
232 d: Dict[str, List[Any]]
234 if context:
235 d = {
236 # key kind pattern ends?
237 '\\': [('len+1', '\\', None),],
238 '"': [('len', '"', context == '"'),],
239 "'": [('len', "'", context == "'"),],
240 }
241 if block1 and block2:
242 add_key(d, block2, ('len', block2, True))
243 else:
244 # Not in any context.
245 d = {
246 # key kind pattern new-ctx deltas
247 '\\':[('len+1', '\\', context, None),],
248 '"': [('len', '"', '"', None),],
249 "'": [('len', "'", "'", None),],
250 '{': [('len', '{', context, (1,0,0)),],
251 '}': [('len', '}', context, (-1,0,0)),],
252 '(': [('len', '(', context, (0,1,0)),],
253 ')': [('len', ')', context, (0,-1,0)),],
254 '[': [('len', '[', context, (0,0,1)),],
255 ']': [('len', ']', context, (0,0,-1)),],
256 }
257 if comment:
258 add_key(d, comment, ('all', comment, '', None))
259 if block1 and block2:
260 add_key(d, block1, ('len', block1, block1, None))
261 return d
262 #@+node:ekr.20161113135037.1: *4* i.get_table
263 #@@nobeautify
264 cached_scan_tables: Dict[str, Any] = {}
266 def get_table(self, context):
267 """
268 Return the state table for the given context.
270 This method handles caching. x.get_new_table returns the actual table.
271 """
272 key = '%s.%s' % (self.name, context)
273 # Bug fix: must keep tables separate.
274 table = self.cached_scan_tables.get(key)
275 if table:
276 return table
277 table = self.get_new_dict(context)
278 self.cached_scan_tables[key] = table
279 return table
280 #@+node:ekr.20161128025444.1: *4* i.scan_dict
281 def scan_dict(self, context, i, s, d):
282 """
283 i.scan_dict: Scan at position i of s with the give context and dict.
284 Return the 6-tuple: (new_context, i, delta_c, delta_p, delta_s, bs_nl)
285 """
286 found = False
287 delta_c = delta_p = delta_s = 0
288 ch = s[i]
289 aList = d.get(ch)
290 if aList and context:
291 # In context.
292 for data in aList:
293 kind, pattern, ends = data
294 if self.match(s, i, pattern):
295 if ends is None:
296 found = True
297 new_context = context
298 break
299 elif ends:
300 found = True
301 new_context = ''
302 break
303 else:
304 pass # Ignore this match.
305 elif aList:
306 # Not in context.
307 for data in aList:
308 kind, pattern, new_context, deltas = data
309 if self.match(s, i, pattern):
310 found = True
311 if deltas:
312 delta_c, delta_p, delta_s = deltas
313 break
314 if found:
315 if kind == 'all':
316 i = len(s)
317 elif kind == 'len+1':
318 i += (len(pattern) + 1)
319 else:
320 assert kind == 'len', (kind, self.name)
321 i += len(pattern)
322 bs_nl = pattern == '\\\n'
323 return new_context, i, delta_c, delta_p, delta_s, bs_nl
324 #
325 # No match: stay in present state. All deltas are zero.
326 new_context = context
327 return new_context, i + 1, 0, 0, 0, False
328 #@+node:ekr.20161108170435.1: *4* i.scan_line
329 def scan_line(self, s, prev_state):
330 """
331 A generalized scan-line method.
333 SCAN STATE PROTOCOL:
335 The Importer class should have a state_class ivar that references a
336 **state class**. This class probably should *not* be subclass of the
337 ScanState class, but it should observe the following protocol:
339 1. The state class's ctor must have the following signature:
341 def __init__(self, d)
343 2. The state class must have an update method.
344 """
345 # This dict allows new data to be added without changing ScanState signatures.
346 d = {
347 'indent': self.get_int_lws(s),
348 'is_ws_line': self.is_ws_line(s),
349 'prev': prev_state,
350 's': s,
351 }
352 new_state = self.state_class(d)
353 i = 0
354 while i < len(s):
355 progress = i
356 context = new_state.context
357 table = self.get_table(context)
358 data = self.scan_dict(context, i, s, table)
359 i = new_state.update(data)
360 assert progress < i
361 return new_state
362 #@+node:ekr.20161114024119.1: *4* i.test_scan_state
363 def test_scan_state(self, tests, State):
364 """
365 Test x.scan_line or i.scan_line.
367 `tests` is a list of g.Bunches with 'line' and 'ctx' fields.
369 A typical @command test:
371 if c.isChanged(): c.save()
372 < < imp.reload importers.linescanner and importers.python > >
373 importer = py.Py_Importer(c.importCommands)
374 importer.test_scan_state(tests, Python_ScanState)
375 """
376 assert self.single_comment == '#', self.single_comment
377 table = self.get_table(context='')
378 contexts = self.all_contexts(table)
379 for bunch in tests:
380 assert bunch.line is not None
381 line = bunch.line
382 ctx = getattr(bunch, 'ctx', None)
383 if ctx: # Test one transition.
384 ctx_in, ctx_out = ctx
385 prev_state = State()
386 prev_state.context = ctx_in
387 new_state = self.scan_line(line, prev_state)
388 new_context = new_state.context
389 assert new_context == ctx_out, (
390 'FAIL1:\nline: %r\ncontext: %r new_context: %r ctx_out: %r\n%s\n%s' % (
391 line, ctx_in, new_context, ctx_out, prev_state, new_state))
392 else: # Test all transitions.
393 for context in contexts:
394 prev_state = State()
395 prev_state.context = context
396 new_state = self.scan_line(line, prev_state)
397 assert new_state.context == context, (
398 'FAIL2:\nline: %r\ncontext: %r new_context: %r\n%s\n%s' % (
399 line, context, new_state.context, prev_state, new_state))
400 #@+node:ekr.20161108165530.1: *3* i.The pipeline
401 #@+node:ekr.20161108131153.10: *4* i.run (driver) & helers
402 def run(self, s, parent, parse_body=False):
403 """The common top-level code for all scanners."""
404 c = self.c
405 # Fix #449: Cloned @auto nodes duplicates section references.
406 if parent.isCloned() and parent.hasChildren():
407 return None
408 self.root = root = parent.copy()
409 self.file_s = s
410 # Init the error/status info.
411 self.errors = 0
412 self.parse_body = parse_body
413 # Check for intermixed blanks and tabs.
414 self.tab_width = c.getTabWidth(p=root)
415 lines = g.splitLines(s)
416 ws_ok = self.check_blanks_and_tabs(lines) # Only issues warnings.
417 # Regularize leading whitespace
418 if not ws_ok:
419 lines = self.regularize_whitespace(lines)
420 # Generate the nodes, including directives and section references.
421 # Completely generate all nodes.
422 self.generate_nodes(lines, parent)
423 # Check the generated nodes.
424 # Return True if the result is equivalent to the original file.
425 if parse_body:
426 ok = self.errors == 0 # Work around problems with directives.
427 else:
428 ok = self.errors == 0 and self.check(s, parent)
429 # Insert an @ignore directive if there were any serious problems.
430 if not ok:
431 self.insert_ignore_directive(parent)
432 # Importers should never dirty the outline.
433 for p in root.self_and_subtree():
434 p.clearDirty()
435 # #1451: Do not change the outline's change status.
436 return ok # For unit tests.
437 #@+node:ekr.20161108131153.14: *5* i.regularize_whitespace
438 def regularize_whitespace(self, lines):
439 """
440 Regularize leading whitespace in s:
441 Convert tabs to blanks or vice versa depending on the @tabwidth in effect.
442 """
443 kind = 'tabs' if self.tab_width > 0 else 'blanks'
444 kind2 = 'blanks' if self.tab_width > 0 else 'tabs'
445 fn = g.shortFileName(self.root.h)
446 #lines = g.splitLines(s)
447 count, result, tab_width = 0, [], self.tab_width
448 self.ws_error = False # 2016/11/23
449 if tab_width < 0: # Convert tabs to blanks.
450 for n, line in enumerate(lines):
451 i, w = g.skip_leading_ws_with_indent(line, 0, tab_width)
452 s = g.computeLeadingWhitespace(w, -abs(tab_width)) + line[i:]
453 # Use negative width.
454 if s != line:
455 count += 1
456 result.append(s)
457 elif tab_width > 0: # Convert blanks to tabs.
458 for n, line in enumerate(lines):
459 s = g.optimizeLeadingWhitespace(line, abs(tab_width))
460 # Use positive width.
461 if s != line:
462 count += 1
463 result.append(s)
464 if count:
465 self.ws_error = True # A flag to check.
466 if not g.unitTesting:
467 # g.es_print('Warning: Intermixed tabs and blanks in', fn)
468 # g.es_print('Perfect import test will ignoring leading whitespace.')
469 g.es('changed leading %s to %s in %s line%s in %s' % (
470 kind2, kind, count, g.plural(count), fn))
471 if g.unitTesting: # Sets flag for unit tests.
472 self.report('changed %s lines' % count)
473 return result
474 #@+node:ekr.20161111024447.1: *5* i.generate_nodes
475 def generate_nodes(self, lines, parent):
476 """
477 A three-stage pipeline to generate all imported nodes.
478 """
479 # Stage 1: generate nodes.
480 # After this stage, the p.v._import_lines list contains p's future body text.
481 if isinstance(lines, str):
482 raise ValueError
483 self.gen_lines(lines, parent)
484 #
485 # Optional Stage 2, consisting of zero or more sub-stages.
486 # Subclasses may freely override this method, **provided**
487 # that all substages use the API for setting body text.
488 # Changing p.b directly will cause asserts to fail in i.finish().
489 self.post_pass(parent)
490 #
491 # Stage 3: Put directives in the root node and set p.b for all nodes.
492 #
493 # Subclasses should never need to override this stage.
494 self.finish(parent)
495 #@+node:ekr.20161108131153.11: *4* State 0: i.check_blanks_and_tabs
496 def check_blanks_and_tabs(self, lines):
497 """Check for intermixed blank & tabs."""
498 # Do a quick check for mixed leading tabs/blanks.
499 fn = g.shortFileName(self.root.h)
500 w = self.tab_width
501 blanks = tabs = 0
502 for s in lines:
503 lws = self.get_str_lws(s)
504 blanks += lws.count(' ')
505 tabs += lws.count('\t')
506 # Make sure whitespace matches @tabwidth directive.
507 if w < 0:
508 ok = tabs == 0
509 message = 'tabs found with @tabwidth %s in %s' % (w, fn)
510 elif w > 0:
511 ok = blanks == 0
512 message = 'blanks found with @tabwidth %s in %s' % (w, fn)
513 if ok:
514 ok = (blanks == 0 or tabs == 0)
515 message = 'intermixed blanks and tabs in: %s' % (fn)
516 if not ok:
517 if g.unitTesting:
518 self.report(message)
519 else:
520 g.es(message)
521 return ok
522 #@+node:ekr.20161108160409.1: *4* Stage 1: i.gen_lines & helpers
523 def gen_lines(self, lines, parent):
524 """
525 Non-recursively parse all lines of s into parent, creating descendant
526 nodes as needed.
527 """
528 trace = 'importers' in g.app.debug
529 tail_p = None
530 prev_state = self.state_class()
531 target = Target(parent, prev_state)
532 stack = [target, target]
533 self.vnode_info = {
534 # Keys are vnodes, values are inner dicts.
535 parent.v: {
536 'lines': [],
537 }
538 }
539 if g.unitTesting:
540 g.vnode_info = self.vnode_info # A hack.
542 self.skip = 0
543 for i, line in enumerate(lines):
544 new_state = self.scan_line(line, prev_state)
545 top = stack[-1]
546 # g.trace(new_state.level(), f"{new_state.level() < top.state.level():1}", repr(line))
547 if trace:
548 g.trace('%d %d %s' % (
549 self.starts_block(i, lines, new_state, prev_state),
550 self.ends_block(line, new_state, prev_state, stack),
551 line.rstrip()))
552 if self.skip > 0:
553 self.skip -= 1
554 elif self.is_ws_line(line):
555 p = tail_p or top.p
556 self.add_line(p, line)
557 elif self.starts_block(i, lines, new_state, prev_state):
558 tail_p = None
559 self.start_new_block(i, lines, new_state, prev_state, stack)
560 elif self.ends_block(line, new_state, prev_state, stack):
561 tail_p = self.end_block(line, new_state, stack)
562 else:
563 p = tail_p or top.p
564 self.add_line(p, line)
565 prev_state = new_state
566 #@+node:ekr.20161108160409.7: *5* i.create_child_node
567 def create_child_node(self, parent, line, headline):
568 """Create a child node of parent."""
569 child = parent.insertAsLastChild()
570 self.vnode_info [child.v] = {
571 'lines': [],
572 }
573 if line:
574 self.add_line(child, line)
575 assert isinstance(headline, str), repr(headline)
576 child.h = headline.strip()
577 return child
578 #@+node:ekr.20161119130337.1: *5* i.cut_stack
579 def cut_stack(self, new_state, stack):
580 """Cut back the stack until stack[-1] matches new_state."""
582 def underflow(n):
583 g.trace(n)
584 g.trace(new_state)
585 g.printList(stack)
587 # assert len(stack) > 1 # Fail on entry.
588 if len(stack) <= 1:
589 return underflow(0)
590 while stack:
591 top_state = stack[-1].state
592 if new_state.level() < top_state.level():
593 if len(stack) > 1:
594 stack.pop()
595 else:
596 return underflow(1)
597 elif top_state.level() == new_state.level():
598 # assert len(stack) > 1, stack # ==
599 # This is the only difference between i.cut_stack and python/cs.cut_stack
600 if len(stack) <= 1:
601 return underflow(2)
602 break
603 else:
604 # This happens often in valid Python programs.
605 break
606 # Restore the guard entry if necessary.
607 if len(stack) == 1:
608 stack.append(stack[-1])
609 elif len(stack) <= 1:
610 return underflow(3)
611 return None
612 #@+node:ekr.20161108160409.3: *5* i.end_block
613 def end_block(self, line, new_state, stack):
614 # The block is ending. Add tail lines until the start of the next block.
615 p = stack[-1].p
616 self.add_line(p, line)
617 self.cut_stack(new_state, stack)
618 tail_p = None if self.gen_refs else p
619 return tail_p
620 #@+node:ekr.20161127102339.1: *5* i.ends_block
621 def ends_block(self, line, new_state, prev_state, stack):
622 """True if line ends the block."""
623 # Comparing new_state against prev_state does not work for python.
624 top = stack[-1]
625 return new_state.level() < top.state.level()
626 #@+node:ekr.20161108160409.8: *5* i.gen_ref
627 def gen_ref(self, line, parent, target):
628 """
629 Generate the ref line. Return the headline.
630 """
631 indent_ws = self.get_str_lws(line)
632 h = self.clean_headline(line, p=None)
633 if self.gen_refs:
634 # Fix #441: Make sure all section refs are unique.
635 d = self.refs_dict
636 n = d.get(h, 0)
637 d[h] = n + 1
638 if n > 0:
639 h = '%s: %s' % (n, h)
640 headline = g.angleBrackets(' %s ' % h)
641 ref = '%s%s\n' % (
642 indent_ws,
643 g.angleBrackets(' %s ' % h))
644 else:
645 if target.ref_flag:
646 ref = None
647 else:
648 ref = '%s@others\n' % indent_ws
649 target.at_others_flag = True
650 target.ref_flag = True
651 # Don't generate another @others in this target.
652 headline = h
653 if ref:
654 self.add_line(parent, ref)
655 return headline
656 #@+node:ekr.20161108160409.6: *5* i.start_new_block
657 def start_new_block(self, i, lines, new_state, prev_state, stack):
658 """Create a child node and update the stack."""
659 if hasattr(new_state, 'in_context'):
660 assert not new_state.in_context(), ('start_new_block', new_state)
661 line = lines[i]
662 target = stack[-1]
663 # Insert the reference in *this* node.
664 h = self.gen_ref(line, target.p, target)
665 # Create a new child and associated target.
666 child = self.create_child_node(target.p, line, h)
667 stack.append(Target(child, new_state))
668 #@+node:ekr.20161119124217.1: *5* i.starts_block
669 def starts_block(self, i, lines, new_state, prev_state):
670 """True if the new state starts a block."""
671 return new_state.level() > prev_state.level()
672 #@+node:ekr.20161119162451.1: *5* i.trace_status
673 def trace_status(self, line, new_state, prev_state, stack, top):
674 """Print everything important in the i.gen_lines loop."""
675 print('')
676 try:
677 g.trace(repr(line))
678 except Exception:
679 g.trace(f" top.p: {g.toUnicode(top.p.h)}")
680 # print('len(stack): %s' % len(stack))
681 print(' new_state: %s' % new_state)
682 print('prev_state: %s' % prev_state)
683 # print(' top.state: %s' % top.state)
684 g.printList(stack)
685 #@+node:ekr.20161108131153.13: *4* Stage 2: i.post_pass & helpers
686 def post_pass(self, parent):
687 """
688 Optional Stage 2 of the importer pipeline, consisting of zero or more
689 substages. Each substage alters nodes in various ways.
691 Subclasses may freely override this method, **provided** that all
692 substages use the API for setting body text. Changing p.b directly will
693 cause asserts to fail later in i.finish().
694 """
695 self.clean_all_headlines(parent)
696 if self.add_context:
697 self.add_class_names(parent)
698 self.clean_all_nodes(parent)
699 self.unindent_all_nodes(parent)
700 #
701 # This sub-pass must follow unindent_all_nodes.
702 self.promote_trailing_underindented_lines(parent)
703 self.promote_last_lines(parent)
704 #
705 # This probably should be the last sub-pass.
706 self.delete_all_empty_nodes(parent)
707 #@+node:ekr.20180524130023.1: *5* i.add_class_names
708 # Note: this method is never called for @clean trees.
709 file_pattern = re.compile(r'^(([@])+(auto|clean|edit|file|nosent))')
711 def add_class_names(self, p):
712 """
713 Add class names to headlines for all descendant nodes.
715 Called only when @bool add-context-to-headlines is True.
716 """
717 if g.unitTesting:
718 return # Don't changes the expected headlines.
719 after, fn, class_name = None, None, None
720 for p in p.self_and_subtree():
721 # Part 1: update the status.
722 m = self.file_pattern.match(p.h)
723 if m:
724 prefix = m.group(1)
725 fn = g.shortFileName(p.h[len(prefix) :].strip())
726 after, class_name = None, None
727 continue
728 if p.h.startswith('@path '):
729 after, fn, class_name = None, None, None
730 elif p.h.startswith('class '):
731 class_name = p.h[5:].strip()
732 if class_name:
733 after = p.nodeAfterTree()
734 continue
735 elif p == after:
736 after, class_name = None, None
737 # Part 2: update the headline.
738 if class_name:
739 if not p.h.startswith(class_name):
740 p.h = '%s.%s' % (class_name, p.h)
741 elif fn and self.add_file_context:
742 tag = ' (%s)' % fn
743 if not p.h.endswith(tag):
744 p.h += tag
745 #@+node:ekr.20161110125940.1: *5* i.clean_all_headlines
746 def clean_all_headlines(self, parent):
747 """
748 Clean all headlines in parent's tree by calling the language-specific
749 clean_headline method.
750 """
751 for p in parent.subtree():
752 # Note: i.gen_ref calls clean_headline without knowing p.
753 # As a result, the first argument is required.
754 h = self.clean_headline(p.h, p=p)
755 if h and h != p.h:
756 p.h = h
758 #@+node:ekr.20161110130157.1: *5* i.clean_all_nodes
759 def clean_all_nodes(self, parent):
760 """Clean the nodes in parent's tree, in a language-dependent way."""
761 # i.clean_nodes does nothing.
762 # Subclasses may override as desired.
763 # See perl_i.clean_nodes for an example.
764 self.clean_nodes(parent)
765 #@+node:ekr.20161110130709.1: *5* i.delete_all_empty_nodes
766 def delete_all_empty_nodes(self, parent):
767 """
768 Delete nodes consisting of nothing but whitespace.
769 Move the whitespace to the preceding node.
770 """
771 c = self.c
772 aList = []
773 for p in parent.subtree():
774 back = p.threadBack()
775 if back and back.v != parent.v and back.v != self.root.v and not p.isCloned():
776 lines = self.get_lines(p)
777 # Move the whitespace from p to back.
778 if all(z.isspace() for z in lines):
779 self.extend_lines(back, lines)
780 # New in Leo 5.7: empty nodes may have children.
781 if p.hasChildren():
782 # Don't delete p.
783 p.h = 'organizer'
784 self.get_lines(p)
785 else:
786 # Do delete p.
787 aList.append(p.copy())
788 if aList:
789 c.deletePositionsInList(aList) # Don't redraw.
790 #@+node:ekr.20161222122914.1: *5* i.promote_last_lines
791 def promote_last_lines(self, parent):
792 """A placeholder for rust_i.promote_last_lines."""
793 #@+node:ekr.20161110131509.1: *5* i.promote_trailing_underindented_lines
794 def promote_trailing_underindented_lines(self, parent):
795 """
796 Promote all trailing underindent lines to the node's parent node,
797 deleting one tab's worth of indentation. Typically, this will remove
798 the underindent escape.
799 """
800 pattern = self.escape_pattern # A compiled regex pattern
801 for p in parent.subtree():
802 lines = self.get_lines(p)
803 tail = []
804 while lines:
805 line = lines[-1]
806 m = pattern.match(line)
807 if m:
808 lines.pop()
809 n_str = m.group(1)
810 try:
811 n = int(n_str)
812 except ValueError:
813 break
814 if n == abs(self.tab_width):
815 new_line = line[len(m.group(0)) :]
816 tail.append(new_line)
817 else:
818 g.trace('unexpected unindent value', n)
819 g.trace(line)
820 # Fix #652 by restoring the line.
821 new_line = line[len(m.group(0)) :].lstrip()
822 lines.append(new_line)
823 break
824 else:
825 break
826 if tail:
827 parent = p.parent()
828 if parent.parent() == self.root:
829 parent = parent.parent()
830 self.set_lines(p, lines)
831 self.extend_lines(parent, reversed(tail))
832 #@+node:ekr.20161110130337.1: *5* i.unindent_all_nodes
833 def unindent_all_nodes(self, parent):
834 """Unindent all nodes in parent's tree."""
835 for p in parent.subtree():
836 lines = self.get_lines(p)
837 if all(z.isspace() for z in lines):
838 # Somewhat dubious, but i.check covers for us.
839 self.set_lines(p, [])
840 else:
841 self.set_lines(p, self.undent(p))
842 #@+node:ekr.20161111023249.1: *4* Stage 3: i.finish & helpers
843 def finish(self, parent):
844 """
845 Stage 3 (the last) stage of the importer pipeline.
847 Subclasses should never need to override this method.
848 """
849 # Put directives at the end, so as not to interfere with shebang lines, etc.
850 self.add_root_directives(parent)
851 #
852 # Finally, remove all v._import_list temporaries.
853 self.finalize_ivars(parent)
854 #@+node:ekr.20161108160409.5: *5* i.add_root_directives
855 def add_root_directives(self, parent):
856 """Return the proper directives for the root node p."""
857 table = [
858 '@language %s\n' % self.language,
859 '@tabwidth %d\n' % self.tab_width,
860 ]
861 if self.parse_body:
862 pass
863 elif self.has_lines(parent):
864 # Make sure the last line ends with a newline.
865 lines = self.get_lines(parent)
866 if lines:
867 last_line = lines.pop()
868 last_line = last_line.rstrip() + '\n'
869 self.add_line(parent, last_line)
870 self.extend_lines(parent, table)
871 else:
872 self.set_lines(parent, table)
873 #@+node:ekr.20161110042020.1: *5* i.finalize_ivars
874 def finalize_ivars(self, parent):
875 """
876 Update the body text of all nodes in parent's tree using the injected
877 v._import_lines lists.
878 """
879 for p in parent.self_and_subtree():
880 v = p.v
881 # Make sure that no code in x.post_pass has mistakenly set p.b.
882 assert not v._bodyString, repr(v._bodyString)
883 lines = self.get_lines(p)
884 if lines and not lines[-1].endswith('\n'):
885 lines[-1] += '\n'
886 v._bodyString = g.toUnicode(''.join(lines), reportErrors=True)
887 #@+node:ekr.20161108131153.3: *4* Stage 4: i.check & helpers
888 def check(self, unused_s, parent):
889 """True if perfect import checks pass."""
890 if g.app.suppressImportChecks:
891 g.app.suppressImportChecks = False
892 return True
893 c = self.c
894 sfn = g.shortFileName(self.root.h)
895 s1 = g.toUnicode(self.file_s, self.encoding)
896 s2 = self.trial_write()
897 lines1, lines2 = g.splitLines(s1), g.splitLines(s2)
898 if 0: # An excellent trace for debugging.
899 g.trace(c.shortFileName())
900 g.printObj(lines1, tag='lines1')
901 g.printObj(lines2, tag='lines2')
902 if self.strict:
903 # Ignore blank lines only.
904 # Adding nodes may add blank lines.
905 lines1 = self.strip_blank_lines(lines1)
906 lines2 = self.strip_blank_lines(lines2)
907 else:
908 # Ignore blank lines and leading whitespace.
909 # Importing may regularize whitespace, and that's good.
910 lines1 = self.strip_all(lines1)
911 lines2 = self.strip_all(lines2)
912 # Forgive trailing whitespace problems in the last line.
913 # This is not the same as clean_last_lines.
914 if lines1 and lines2 and lines1 != lines2:
915 lines1[-1] = lines1[-1].rstrip() + '\n'
916 lines2[-1] = lines2[-1].rstrip() + '\n'
917 # self.trace_lines(lines1, lines2, parent)
918 ok = lines1 == lines2
919 if not ok and not self.strict:
920 # Issue an error only if something *other than* lws is amiss.
921 lines1, lines2 = self.strip_lws(lines1), self.strip_lws(lines2)
922 ok = lines1 == lines2
923 if ok and not g.unitTesting:
924 print('warning: leading whitespace changed in:', self.root.h)
925 if not ok:
926 self.show_failure(lines1, lines2, sfn)
927 if g.unitTesting:
928 assert False, 'Perfect import failed!'
929 return ok
930 #@+node:ekr.20161124030004.1: *5* i.clean_last_lines
931 def clean_last_lines(self, lines):
932 """Remove blank lines from the end of lines."""
933 while lines and lines[-1].isspace():
934 lines.pop()
935 return lines
936 #@+node:ekr.20170404035138.1: *5* i.context_lines
937 def context_lines(self, aList, i, n=2):
938 """Return a list containing the n lines of surrounding context of aList[i]."""
939 result = []
940 aList1 = aList[max(0, i - n) : i]
941 aList2 = aList[i + 1 : i + n + 1]
942 result.extend([' %4s %r\n' % (i + 1 - len(aList1) + j, g.truncate(s, 60))
943 for j, s in enumerate(aList1)])
944 result.append('* %4s %r\n' % (i + 1, g.truncate(aList[i], 60)))
945 result.extend([' %4s %r\n' % (i + 2 + j, g.truncate(s, 60))
946 for j, s in enumerate(aList2)])
947 return result
948 #@+node:ekr.20161123210716.1: *5* i.show_failure
949 def show_failure(self, lines1, lines2, sfn):
950 """Print the failing lines, with surrounding context."""
951 if not g.unitTesting:
952 g.es('@auto failed:', sfn, color='red')
953 n1, n2 = len(lines1), len(lines2)
954 print('\n===== PERFECT IMPORT FAILED =====', sfn)
955 print('len(s1): %s len(s2): %s' % (n1, n2))
956 n_min = min(n1, n2)
957 for i in range(n_min):
958 line1, line2 = lines1[i], lines2[i]
959 if line1 != line2:
960 print('first mismatched line: %s' % (i + 1))
961 print('Expected...')
962 print(''.join(self.context_lines(lines1, i)))
963 print('Got...')
964 print(''.join(self.context_lines(lines2, i)))
965 break
966 else:
967 lines_s = 'n2' if n1 > n2 else 'n1'
968 print(f"missing tail lines in {lines_s}")
969 g.printObj(lines1, tag='lines1')
970 g.printObj(lines2, tag='lines2')
971 #@+node:ekr.20161108131153.5: *5* i.strip_*
972 def lstrip_line(self, s):
973 """Delete leading whitespace, *without* deleting the trailing newline!"""
974 # This fixes a major bug in strip_lws.
975 assert s, g.callers()
976 return '\n' if s.isspace() else s.lstrip()
978 def strip_all(self, lines):
979 """Strip blank lines and leading whitespace from all lines of s."""
980 return self.strip_lws(self.strip_blank_lines(lines))
982 def strip_blank_lines(self, lines):
983 """Strip all blank lines from s."""
984 return [z for z in lines if not z.isspace()]
986 def strip_lws(self, lines):
987 """Strip leading whitespace from all lines."""
988 return [self.lstrip_line(z) for z in lines]
989 # This also works, but I prefer the "extra" call to lstrip().
990 # return ['\n' if z.isspace() else z.lstrip() for z in lines].
993 #@+node:ekr.20161123210335.1: *5* i.trace_lines
994 def trace_lines(self, lines1, lines2, parent):
995 """Show both s1 and s2."""
996 print('===== s1: %s' % parent.h)
997 for i, s in enumerate(lines1):
998 g.pr('%3s %r' % (i + 1, s))
999 print('===== s2')
1000 for i, s in enumerate(lines2):
1001 g.pr('%3s %r' % (i + 1, s))
1002 #@+node:ekr.20161108131153.6: *5* i.trial_write
1003 def trial_write(self):
1004 """Return the trial write for self.root."""
1005 at = self.c.atFileCommands
1006 # Leo 5.6: Allow apparent section refs for *all* languages.
1007 ivar = 'allow_undefined_refs'
1008 try:
1009 setattr(at, ivar, True)
1010 result = at.atAutoToString(self.root)
1011 finally:
1012 if hasattr(at, ivar):
1013 delattr(at, ivar)
1014 return g.toUnicode(result, self.encoding)
1015 #@+node:ekr.20161108131153.15: *3* i.Utils
1016 #@+node:ekr.20211118082436.1: *4* i.dump_tree
1017 def dump_tree(self, root, tag=None):
1018 """
1019 Like LeoUnitTest.dump_tree.
1020 """
1021 d = self.vnode_info if hasattr(self, 'vnode_info') else {}
1022 if tag:
1023 print(tag)
1024 for p in root.self_and_subtree():
1025 print('')
1026 print('level:', p.level(), p.h)
1027 lines = d [p.v] ['lines'] if p.v in d else g.splitLines(p.v.b)
1028 g.printObj(lines)
1029 #@+node:ekr.20161114012522.1: *4* i.all_contexts
1030 def all_contexts(self, table):
1031 """
1032 Return a list of all contexts contained in the third column of the given table.
1034 This is a support method for unit tests.
1035 """
1036 contexts = set()
1037 d = table
1038 for key in d:
1039 aList = d.get(key)
1040 for data in aList:
1041 if len(data) == 4:
1042 # It's an out-of-context entry.
1043 contexts.add(data[2])
1044 # Order must not matter, so sorting is ok.
1045 return sorted(contexts)
1046 #@+node:ekr.20161108131153.12: *4* i.insert_ignore_directive
1047 def insert_ignore_directive(self, parent):
1048 c = self.c
1049 parent.v.b = parent.v.b.rstrip() + '\n@ignore\n'
1050 # Do *not* update the screen by setting p.b.
1051 if g.unitTesting:
1052 pass
1053 elif parent.isAnyAtFileNode() and not parent.isAtAutoNode():
1054 g.warning('inserting @ignore')
1055 c.import_error_nodes.append(parent.h)
1056 #@+node:ekr.20161108155143.4: *4* i.match
1057 def match(self, s, i, pattern):
1058 """Return True if the pattern matches at s[i:]"""
1059 return s[i : i + len(pattern)] == pattern
1060 #@+node:ekr.20161108131153.18: *4* i.Messages
1061 def error(self, s):
1062 """Issue an error and cause a unit test to fail."""
1063 self.errors += 1
1064 self.importCommands.errors += 1
1066 def report(self, message):
1067 if self.strict:
1068 self.error(message)
1069 else:
1070 self.warning(message)
1072 def warning(self, s):
1073 if not g.unitTesting:
1074 g.warning('Warning:', s)
1075 #@+node:ekr.20161109045619.1: *4* i.print_lines
1076 def print_lines(self, lines):
1077 """Print lines for debugging."""
1078 print('[')
1079 for line in lines:
1080 print(repr(line))
1081 print(']')
1083 print_list = print_lines
1084 #@+node:ekr.20161125174423.1: *4* i.print_stack
1085 def print_stack(self, stack):
1086 """Print a stack of positions."""
1087 g.printList([p.h for p in stack])
1088 #@+node:ekr.20161108131153.21: *4* i.underindented_comment/line
1089 def underindented_comment(self, line):
1090 if self.at_auto_warns_about_leading_whitespace:
1091 self.warning(
1092 'underindented python comments.\n' +
1093 'Extra leading whitespace will be added\n' + line)
1095 def underindented_line(self, line):
1096 if self.warn_about_underindented_lines:
1097 self.error(
1098 'underindented line.\n'
1099 'Extra leading whitespace will be added\n' + line)
1100 #@+node:ekr.20161109045312.1: *3* i.Whitespace
1101 #@+node:ekr.20161108155143.3: *4* i.get_int_lws
1102 def get_int_lws(self, s):
1103 """Return the the lws (a number) of line s."""
1104 # Important: use self.tab_width, *not* c.tab_width.
1105 return g.computeLeadingWhitespaceWidth(s, self.tab_width)
1106 #@+node:ekr.20161109053143.1: *4* i.get_leading_indent
1107 def get_leading_indent(self, lines, i, ignoreComments=True):
1108 """
1109 Return the leading whitespace (an int) of the first significant line.
1110 Ignore blank and comment lines if ignoreComments is True
1111 """
1112 if ignoreComments:
1113 while i < len(lines):
1114 if self.is_ws_line(lines[i]):
1115 i += 1
1116 else:
1117 break
1118 return self.get_int_lws(lines[i]) if i < len(lines) else 0
1119 #@+node:ekr.20161108131153.17: *4* i.get_str_lws
1120 def get_str_lws(self, s):
1121 """Return the characters of the lws of s."""
1122 m = re.match(r'([ \t]*)', s)
1123 return m.group(0) if m else ''
1124 #@+node:ekr.20161109052011.1: *4* i.is_ws_line
1125 def is_ws_line(self, s):
1126 """Return True if s is nothing but whitespace and single-line comments."""
1127 return bool(self.ws_pattern.match(s))
1128 #@+node:ekr.20161108131153.19: *4* i.undent & helper
1129 def undent(self, p):
1130 """
1131 Remove the *maximum* whitespace of any line from the start of *all* lines,
1132 appending the underindent escape sequence for all underindented lines.
1134 This is *not* the same as textwrap.dedent!
1136 """
1137 # Called from i.post_pass, i.unindent_all_nodes.
1138 c = self.c
1139 if self.is_rst:
1140 return p.b # Never unindent rst code.
1141 escape = c.atFileCommands.underindentEscapeString
1142 lines = self.get_lines(p)
1143 ws = self.common_lws(lines)
1144 result = []
1145 for s in lines:
1146 if s.startswith(ws):
1147 result.append(s[len(ws) :])
1148 elif s.isspace():
1149 # Never change blank lines.
1150 result.append(s)
1151 else:
1152 # Indicate that the line is underindented.
1153 lws = g.get_leading_ws(s)
1154 # Bug fix 2021/11/15: Use n1 - n2, not n1!
1155 n1 = g.computeWidth(ws, self.tab_width)
1156 n2 = g.computeWidth(lws, self.tab_width)
1157 assert n1 > n2, (n1, n2)
1158 result.append(f"{escape}{n1-n2}.{s.lstrip()}")
1159 return result
1160 #@+node:ekr.20161108131153.20: *5* i.common_lws
1161 def common_lws(self, lines):
1162 """Return the lws (a string) common to all lines."""
1163 if not lines:
1164 return ''
1165 lws = self.get_str_lws(lines[0])
1166 for s in lines:
1167 if not self.is_ws_line(s):
1168 lws2 = self.get_str_lws(s)
1169 if lws2.startswith(lws):
1170 pass
1171 elif lws.startswith(lws2):
1172 lws = lws2
1173 else:
1174 lws = '' # Nothing in common.
1175 break
1176 return lws
1177 #@+node:ekr.20161109072221.1: *4* i.undent_body_lines & helper
1178 def undent_body_lines(self, lines, ignoreComments=True):
1179 """
1180 Remove the first line's leading indentation from all lines.
1181 Return the resulting string.
1182 """
1183 s = ''.join(lines)
1184 if self.is_rst:
1185 return s # Never unindent rst code.
1186 # Calculate the amount to be removed from each line.
1187 undent_val = self.get_leading_indent(lines, 0, ignoreComments=ignoreComments)
1188 if undent_val == 0:
1189 return s
1190 result = self.undent_by(s, undent_val)
1191 return result
1192 #@+node:ekr.20161108180655.2: *5* i.undent_by
1193 def undent_by(self, s, undent_val):
1194 """
1195 Remove leading whitespace equivalent to undent_val from each line.
1197 Strict languages: prepend the underindent escape for underindented lines.
1198 """
1199 if self.is_rst:
1200 return s # Never unindent rst code.
1201 result = []
1202 for line in g.splitlines(s):
1203 lws_s = self.get_str_lws(line)
1204 lws = g.computeWidth(lws_s, self.tab_width)
1205 # Add underindentEscapeString only for strict languages.
1206 if self.strict and not line.isspace() and lws < undent_val:
1207 # End the underindent count with a period to
1208 # protect against lines that start with a digit!
1209 result.append("%s%s.%s" % (
1210 self.escape, undent_val - lws, line.lstrip()))
1211 else:
1212 s = g.removeLeadingWhitespace(line, undent_val, self.tab_width)
1213 result.append(s)
1214 return ''.join(result)
1215 #@-others
1217 @classmethod
1218 def do_import(cls):
1219 def f(c, s, parent):
1220 return cls(c.importCommands).run(s, parent)
1221 return f
1222#@+node:ekr.20161108171914.1: ** class ScanState
1223class ScanState:
1224 """
1225 The base class for classes representing the state of the line-oriented
1226 scan.
1227 """
1229 def __init__(self, d=None):
1230 """ScanState ctor."""
1231 if d:
1232 indent = d.get('indent')
1233 prev = d.get('prev')
1234 self.indent = indent # NOT prev.indent
1235 self.bs_nl = prev.bs_nl
1236 self.context = prev.context
1237 self.curlies = prev.curlies
1238 self.parens = prev.parens
1239 self.squares = prev.squares
1240 else:
1241 self.bs_nl = False
1242 self.context = ''
1243 self.curlies = self.indent = self.parens = self.squares = 0
1245 #@+others
1246 #@+node:ekr.20161118043146.1: *3* ScanState.__repr__
1247 def __repr__(self):
1248 """ScanState.__repr__"""
1249 return 'ScanState context: %r curlies: %s' % (
1250 self.context, self.curlies)
1251 #@+node:ekr.20161119115215.1: *3* ScanState.level
1252 def level(self):
1253 """ScanState.level."""
1254 return self.curlies
1255 #@+node:ekr.20161118043530.1: *3* ScanState.update
1256 def update(self, data):
1257 """
1258 Update the state using the 6-tuple returned by i.scan_line.
1259 Return i = data[1]
1260 """
1261 context, i, delta_c, delta_p, delta_s, bs_nl = data
1262 self.bs_nl = bs_nl
1263 self.context = context
1264 self.curlies += delta_c
1265 self.parens += delta_p
1266 self.squares += delta_s
1267 return i
1269 #@-others
1270#@+node:ekr.20161108155158.1: ** class Target
1271class Target:
1272 """
1273 A class describing a target node p.
1274 state is used to cut back the stack.
1275 """
1277 def __init__(self, p, state):
1278 """Target ctor."""
1279 self.at_others_flag = False
1280 # True: @others has been generated for this target.
1281 self.p = p
1282 self.gen_refs = False
1283 # Can be forced True.
1284 self.ref_flag = False
1285 # True: @others or section reference should be generated.
1286 # It's always True when gen_refs is True.
1287 self.state = state
1289 def __repr__(self):
1290 return 'Target: %s @others: %s refs: %s p: %s' % (
1291 self.state,
1292 int(self.at_others_flag),
1293 int(self.gen_refs),
1294 g.shortFileName(self.p.h),
1295 )
1296#@-others
1297#@@language python
1298#@@tabwidth -4
1299#@@pagewidth 70
1300#@-leo