Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#@+leo-ver=5-thin 

2#@+node:ekr.20140723122936.18137: * @file ../plugins/importers/xml.py 

3"""The @auto importer for the xml language.""" 

4import re 

5from leo.core import leoGlobals as g 

6from leo.plugins.importers import linescanner 

7Importer = linescanner.Importer 

8Target = linescanner.Target 

9#@+others 

10#@+node:ekr.20161121204146.3: ** class Xml_Importer 

11class Xml_Importer(Importer): 

12 """The importer for the xml lanuage.""" 

13 

14 #@+others 

15 #@+node:ekr.20161122124109.1: *3* xml_i.__init__ 

16 def __init__(self, importCommands, tags_setting='import_xml_tags', **kwargs): 

17 """Xml_Importer.__init__""" 

18 # Init the base class. 

19 super().__init__( 

20 importCommands, 

21 language='xml', 

22 state_class=Xml_ScanState, 

23 strict=False, 

24 ) 

25 self.tags_setting = tags_setting 

26 self.start_tags = self.add_tags() 

27 self.stack = [] 

28 # Stack of tags. 

29 # A closing tag decrements state.tag_level only if the top is an opening tag. 

30 self.void_tags = [ 

31 '<?xml', 

32 '!doctype', 

33 ] 

34 self.tag_warning_given = False 

35 # True: a structure error has been detected. 

36 # Only warn once. 

37 #@+node:ekr.20161121204918.1: *3* xml_i.add_tags 

38 def add_tags(self): 

39 """Add items to self.class/functionTags and from settings.""" 

40 c, setting = self.c, self.tags_setting 

41 aList = c.config.getData(setting) or [] 

42 aList = [z.lower() for z in aList] 

43 return aList 

44 #@+node:ekr.20170416082422.1: *3* xml_i.clean_headline 

45 def clean_headline(self, s, p=None): 

46 """xml and html: Return a cleaned up headline s.""" 

47 m = re.match(r'\s*(<[^>]+>)', s) 

48 return m.group(1) if m else s.strip() 

49 #@+node:ekr.20161123003732.1: *3* xml_i.error 

50 def error(self, s): 

51 """Issue an error, but do *not* cause a unit test to fail.""" 

52 g.es_print('\nin %s' % self.root.h) 

53 g.es_print(s) 

54 # Tell i.check to strip lws. 

55 self.ws_error = True 

56 #@+node:ekr.20161122073505.1: *3* xml_i.scan_line & helpers 

57 def scan_line(self, s, prev_state): 

58 """Update the xml scan state by scanning line s.""" 

59 context, tag_level = prev_state.context, prev_state.tag_level 

60 i = 0 

61 while i < len(s): 

62 progress = i 

63 if context: 

64 context, i = self.scan_in_context(context, i, s) 

65 else: 

66 context, i, tag_level = self.scan_out_context(i, s, tag_level) 

67 assert progress < i, (repr(s[i]), '***', repr(s)) 

68 d = {'context': context, 'tag_level': tag_level} 

69 return Xml_ScanState(d) 

70 #@+node:ekr.20161122073937.1: *4* xml_i.scan_in_context 

71 def scan_in_context(self, context, i, s): 

72 """ 

73 Scan s from i, within the given context. 

74 Return (context, i) 

75 """ 

76 assert context in ('"', '<!--'), repr(context) 

77 # Only double-quoted strings are valid strings in xml/html. 

78 if context == '"' and self.match(s, i, '"'): 

79 context = '' 

80 i += 1 

81 elif context == '<!--' and self.match(s, i, '-->'): 

82 context = '' 

83 i += 3 

84 else: 

85 i += 1 

86 return context, i 

87 #@+node:ekr.20161122073938.1: *4* xml_i.scan_out_context & helpers 

88 def scan_out_context(self, i, s, tag_level): 

89 """ 

90 Scan s from i, outside any context. 

91 Return (context, i, tag_level) 

92 """ 

93 context = '' 

94 if self.match(s, i, '"'): 

95 context = '"' # Only double-quoted strings are xml/html strings. 

96 i += 1 

97 elif self.match(s, i, '<!--'): 

98 context = '<!--' 

99 i += 4 

100 elif self.match(s, i, '<'): 

101 # xml/html tags do *not* start contexts. 

102 i, tag_level = self.scan_tag(s, i, tag_level) 

103 elif self.match(s, i, '/>'): 

104 i += 2 

105 tag_level = self.end_tag(s, tag='/>', tag_level=tag_level) 

106 elif self.match(s, i, '>'): 

107 tag_level = self.end_tag(s, tag='>', tag_level=tag_level) 

108 i += 1 

109 else: 

110 i += 1 

111 return context, i, tag_level 

112 #@+node:ekr.20161122084808.1: *5* xml_i.end_tag 

113 def end_tag(self, s, tag, tag_level): 

114 """ 

115 Handle the ">" or "/>" that ends an element. 

116 

117 Ignore ">" except for void tags. 

118 """ 

119 if self.stack: 

120 if tag == '/>': 

121 top = self.stack.pop() 

122 if top in self.start_tags: 

123 tag_level -= 1 

124 else: 

125 top = self.stack[-1] 

126 if top in self.void_tags: 

127 self.stack.pop() 

128 elif tag == '/>': 

129 g.es_print("Warning: ignoring dubious /> in...") 

130 g.es_print(repr(s)) 

131 return tag_level 

132 #@+node:ekr.20161122080143.1: *5* xml_i.scan_tag & helper 

133 ch_pattern = re.compile(r'([\!\?]?[\w\_\.\:\-]+)', re.UNICODE) 

134 

135 def scan_tag(self, s, i, tag_level): 

136 """ 

137 Scan an xml tag starting with "<" or "</". 

138 

139 Adjust the stack as appropriate: 

140 - "<" adds the tag to the stack. 

141 - "</" removes the top of the stack if it matches. 

142 """ 

143 assert s[i] == '<', repr(s[i]) 

144 end_tag = self.match(s, i, '</') 

145 # Scan the tag. 

146 i += (2 if end_tag else 1) 

147 m = self.ch_pattern.match(s, i) 

148 if m: 

149 tag = m.group(0).lower() 

150 i += len(m.group(0)) 

151 else: 

152 # All other '<' characters should have had xml/html escapes applied to them. 

153 self.error('missing tag in position %s of %r' % (i, s)) 

154 g.es_print(repr(s)) 

155 return i, tag_level 

156 if end_tag: 

157 self.pop_to_tag(tag, s) 

158 if tag in self.start_tags: 

159 tag_level -= 1 

160 else: 

161 self.stack.append(tag) 

162 if tag in self.start_tags: 

163 tag_level += 1 

164 return i, tag_level 

165 #@+node:ekr.20170416043508.1: *6* xml_i.pop_to_tag 

166 def pop_to_tag(self, tag, s): 

167 """ 

168 Attempt to pop tag from the top of the stack. 

169 

170 If the top doesn't match, issue a warning and attempt to recover. 

171 """ 

172 if not self.stack: 

173 self.error('Empty tag stack: %s' % tag) 

174 g.es_print(repr(s)) 

175 return 

176 top = self.stack[-1] 

177 if top == tag: 

178 self.stack.pop() 

179 return 

180 # Only issue one warning per file. 

181 # Attempt a recovery. 

182 if tag in self.stack: 

183 while self.stack: 

184 top = self.stack.pop() 

185 # if trace: g.trace('POP: ', top) 

186 if top == tag: 

187 return 

188 #@+node:ekr.20161121210839.1: *3* xml_i.starts_block 

189 def starts_block(self, i, lines, new_state, prev_state): 

190 """True if the line startswith an xml block""" 

191 return new_state.tag_level > prev_state.tag_level 

192 #@+node:ekr.20161121212858.1: *3* xml_i.is_ws_line 

193 # Warning: base Importer class defines ws_pattern. 

194 xml_ws_pattern = re.compile(r'\s*(<!--([^-]|-[^-])*-->\s*)*$') 

195 

196 def is_ws_line(self, s): 

197 """True if s is nothing but whitespace or single-line comments.""" 

198 return bool(self.xml_ws_pattern.match(s)) 

199 #@+node:ekr.20161123005742.1: *3* xml_i.undent 

200 def undent(self, p): 

201 """ 

202 Regularize lws before @others, but preserve lws for all other lines. 

203 This is needed to handle embedded brython code properly. 

204 """ 

205 result, w = [], self.tab_width 

206 indent = ' ' * abs(w) if w < 0 else '\t' 

207 for s in self.get_lines(p): 

208 ls = '\n' if s.isspace() else s.lstrip() 

209 if ls.startswith('@others'): 

210 if p == self.root: 

211 result.append(ls) 

212 else: 

213 result.append(indent + ls) 

214 else: 

215 # Fix #479: Preserve brython indentation when importing .html files. 

216 result.append('\n' if s.isspace() else s) 

217 return result 

218 #@-others 

219#@+node:ekr.20161121204146.7: ** class class Xml_ScanState 

220class Xml_ScanState: 

221 """A class representing the state of the xml line-oriented scan.""" 

222 

223 def __init__(self, d=None): 

224 """Xml_ScanState.__init__""" 

225 if d: 

226 self.context = d.get('context') 

227 self.tag_level = d.get('tag_level') 

228 else: 

229 self.context = '' 

230 self.tag_level = 0 

231 

232 def __repr__(self): 

233 """Xml_ScanState.__repr__""" 

234 return "Xml_ScanState context: %r tag_level: %s" % ( 

235 self.context, self.tag_level) 

236 

237 __str__ = __repr__ 

238 

239 #@+others 

240 #@+node:ekr.20161121204146.8: *3* xml_state.level 

241 def level(self): 

242 """Xml_ScanState.level.""" 

243 return self.tag_level 

244 #@-others 

245#@-others 

246importer_dict = { 

247 'func': Xml_Importer.do_import(), 

248 'extensions': ['.xml',], 

249} 

250#@@language python 

251#@@tabwidth -4 

252 

253#@-leo