Coverage for src/midgy/render.py: 94%

238 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-24 15:45 -0700

1"""render builds the machinery to translate markdown documents to code.""" 

2 

3from dataclasses import dataclass, field 

4from functools import partial 

5from io import StringIO 

6from re import compile 

7from textwrap import dedent 

8 

9__all__ = () 

10 

11DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34} 

12DOCTEST_CHARS = DOCTEST_CHAR, DOCTEST_CHAR, DOCTEST_CHAR, 32 

13ESCAPE = {x: "\\" + x for x in "'\""} 

14ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]") 

15ELLIPSIS_CHARS = (ord("."),) * 3 + (32,) 

16escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0))) 

17 

18 

19# the Renderer is special markdown renderer designed to produce 

20# line for line transformations of markdown to the converted code. 

21# not all languages require this, but for python it matters. 

22@dataclass 

23class Renderer: 

24 """the base render system for markdown to code. 

25 

26 * tokenize & render markdown as code 

27 * line-for-line rendering 

28 * use indented code as fiducial markers for translation 

29 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens 

30 * a reusable base class that underlies the python translation 

31 """ 

32 

33 from markdown_it import MarkdownIt 

34 

35 parser: object = field( 

36 default_factory=partial( 

37 MarkdownIt, "gfm-like", options_update=dict(inline_definitions=True, langPrefix="") 

38 ) 

39 ) 

40 cell_hr_length: int = 9 

41 include_code_fences: set = field(default_factory=set) 

42 include_indented_code: bool = True 

43 config_key: str = "py" 

44 

45 def __post_init__(self): 

46 from mdit_py_plugins import deflist, footnote 

47 

48 from .front_matter import _front_matter_lexer, _shebang_lexer 

49 

50 # our tangling system adds extra conventions to commonmark: 

51 ## extend indented code to recognize doctest syntax in-line 

52 ## replace the indented code lexer to recognize doctests and append metadata. 

53 ## recognize shebang lines at the beginning of a document. 

54 ## recognize front-matter at the beginning of document of following shebangs 

55 self.parser.block.ruler.before("code", "doctest", _doctest_lexer) 

56 self.parser.block.ruler.disable("code") 

57 self.parser.block.ruler.after("doctest", "code", _code_lexer) 

58 self.parser.block.ruler.before("table", "shebang", _shebang_lexer) 

59 self.parser.block.ruler.before("table", "front_matter", _front_matter_lexer) 

60 self.parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin) 

61 self.parser.disable("footnote_tail") 

62 

63 def code_block(self, token, env): 

64 if self.include_indented_code: 

65 yield from self.get_block(env, token.map[1]) 

66 

67 code_fence_block = code_block 

68 

69 @classmethod 

70 def code_from_string(cls, body, **kwargs): 

71 """render a string""" 

72 return cls(**kwargs).render(body) 

73 

74 def fence(self, token, env): 

75 if token.info in self.include_code_fences: 

76 return self.code_fence_block(token, env) 

77 method = getattr(self, f"fence_{token.info}", None) 

78 if method: 

79 return method(token, env) 

80 

81 def format(self, body): 

82 """a function that consumers can use to format their code""" 

83 return body 

84 

85 def get_block(self, env, stop=None): 

86 """iterate through the lines in a buffer""" 

87 if stop is None: 

88 yield from env["source"] 

89 else: 

90 while env["last_line"] < stop: 

91 yield self.readline(env) 

92 

93 def non_code(self, env, next=None): 

94 yield from self.get_block(env, next.map[0] if next else None) 

95 

96 def parse(self, src): 

97 return self.parser.parse(src) 

98 

99 def parse_cells(self, body, *, include_cell_hr=True): 

100 yield from ( 

101 x[0] for x in self.walk_cells(self.parse(body), include_cell_hr=include_cell_hr) 

102 ) 

103 

104 def print(self, iter, io): 

105 return print(*iter, file=io, sep="", end="") 

106 

107 def readline(self, env): 

108 try: 

109 return env["source"].readline() 

110 finally: 

111 env["last_line"] += 1 

112 

113 def render(self, src, format=False): 

114 tokens = self.parse(src) 

115 out = self.render_tokens(tokens, src=src) 

116 return self.format(out) if format else out 

117 

118 def render_cells(self, src, *, include_cell_hr=True): 

119 tokens = self.parse(src) 

120 self = self.renderer_from_tokens(tokens) 

121 prior = self._init_env(src, tokens) 

122 prior_token = None 

123 source = prior.pop("source") 

124 for block, next_token in self.walk_cells( 

125 tokens, env=prior, include_cell_hr=include_cell_hr 

126 ): 

127 env = self._init_env(src, block) 

128 env["source"], env["last_line"] = source, prior["last_line"] 

129 prior_token and block.insert(0, prior_token) 

130 yield self.render_tokens(block, env=env, stop=next_token) 

131 prior, prior_token = env, next_token 

132 

133 def render_lines(self, src): 

134 return dedent(self.render("".join(src))).splitlines(True) 

135 

136 def renderer_from_tokens(self, tokens): 

137 front_matter = self._get_front_matter(tokens) 

138 if front_matter: 

139 config = front_matter.get(self.config_key, None) 

140 if config: 

141 return type(self)(**config) 

142 return self 

143 

144 def render_tokens(self, tokens, env=None, src=None, stop=None): 

145 """render parsed markdown tokens""" 

146 target = StringIO() 

147 self = self.renderer_from_tokens(tokens) 

148 if env is None: 

149 env = self._init_env(src, tokens) 

150 

151 for generic, code in self._walk_code_blocks(tokens): 

152 # we walk pairs of tokens preceding code and the code token 

153 # the next code token is needed as a reference for indenting 

154 # non-code blocks that precede the code. 

155 env["next_code"] = code 

156 for token in generic: 

157 # walk the non-code tokens for any markers the class defines 

158 # renderers for. the renderer is responsible for taking of the 

159 # preceding non-code blocks, this feature is needed for any logical 

160 # rendering conditions. 

161 f = getattr(self, token.type, None) 

162 f and self.print(f(token, env) or "", target) 

163 if code: 

164 # format and print the preceding non-code block 

165 self.print(self.non_code(env, code), target) 

166 

167 # update the rendering environment 

168 env.update( 

169 last_indent=code.meta["last_indent"], 

170 ) 

171 

172 # format and print 

173 self.print(self.code_block(code, env), target) 

174 

175 # handle anything left in the buffer 

176 self.print(self.non_code(env, stop), target) 

177 

178 return target.getvalue() # return the value of the target, a format string. 

179 

180 def wrap_lines(self, lines, lead="", pre="", trail="", continuation=""): 

181 """a utility function to manipulate a buffer of content line-by-line.""" 

182 ws, any, continued = "", False, False 

183 for line in lines: 

184 LL = len(line.rstrip()) 

185 if LL: 

186 continued = line[LL - 1] == "\\" 

187 LL -= 1 * continued 

188 if any: 

189 yield ws 

190 else: 

191 for i, l in enumerate(StringIO(ws)): 

192 yield l[:-1] + continuation + l[-1] 

193 yield from (lead, line[:LL]) 

194 any, ws = True, line[LL:] 

195 lead = "" 

196 else: 

197 ws += line 

198 if any: 

199 yield trail 

200 if continued: 

201 for i, line in enumerate(StringIO(ws)): 

202 yield from (lead, line[:-1], i and "\\" or "", line[-1]) 

203 else: 

204 yield ws 

205 

206 def _init_env(self, src, tokens): 

207 env = dict(source=StringIO(src), last_line=0, min_indent=None, last_indent=0) 

208 include_doctest = getattr(self, "include_doctest", False) 

209 for token in tokens: 

210 doctest = False 

211 if token.type == "fence": 

212 if token.info in self.include_code_fences: 

213 env["min_indent"] = 0 

214 continue 

215 if include_doctest: 

216 doctest = token.info == "pycon" 

217 if doctest or (token.type == "code_block"): 

218 if env["min_indent"] is None: 

219 env["min_indent"] = token.meta["min_indent"] 

220 else: 

221 env["min_indent"] = min(env["min_indent"], token.meta["min_indent"]) 

222 

223 if env["min_indent"] is None: 

224 env["min_indent"] = 0 

225 return env 

226 

227 def _get_front_matter(self, tokens): 

228 for token in tokens: 

229 if token.type == "shebang": 

230 continue 

231 if token.type == "front_matter": 

232 from .front_matter import load 

233 

234 return load(token.content) 

235 return 

236 

237 def walk_cells(self, tokens, *, env=None, include_cell_hr=True): 

238 block = [] 

239 for token in tokens: 

240 if token.type == "hr": 

241 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length: 

242 yield (list(block), token) 

243 block.clear() 

244 if include_cell_hr: 

245 block.append(token) 

246 elif env is not None: 

247 list(self.get_block(env, token)) 

248 else: 

249 block.append(token) 

250 if block: 

251 yield block, None 

252 

253 def _walk_code_blocks(self, tokens): 

254 prior = [] 

255 for token in tokens: 

256 if token.type == "code_block": 

257 yield list(prior), token 

258 prior.clear() 

259 else: 

260 prior.append(token) 

261 yield prior, None 

262 

263 del MarkdownIt 

264 

265 

266@dataclass 

267class DedentCodeBlock(Renderer): 

268 def code_block(self, token, env): 

269 ref = env["min_indent"] 

270 for line in self.get_block(env, token.map[1]): 

271 right = line.lstrip() 

272 if right: 

273 yield line[ref:] 

274 last = right 

275 else: 

276 yield line 

277 

278 

279def _code_lexer(state, start, end, silent=False): 

280 """a code lexer that tracks indents in the token and is aware of doctests""" 

281 if state.sCount[start] - state.blkIndent >= 4: 

282 first_indent, last_indent, next, last_line = 0, 0, start, start 

283 while next < end: 

284 if state.isEmpty(next): 

285 next += 1 

286 continue 

287 if state.sCount[next] - state.blkIndent >= 4: 

288 begin = state.bMarks[next] + state.tShift[next] 

289 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

290 break 

291 if not first_indent: 

292 first_indent = state.sCount[next] 

293 last_indent, last_line = state.sCount[next], next 

294 next += 1 

295 else: 

296 break 

297 state.line = last_line + 1 

298 token = state.push("code_block", "code", 0) 

299 token.content = state.getLines(start, state.line, 4 + state.blkIndent, True) 

300 token.map = [start, state.line] 

301 min_indent = min( 

302 state.sCount[i] 

303 for i in range(start, state.line) 

304 if not state.isEmpty(i) and state.sCount[i] 

305 ) 

306 meta = dict( 

307 first_indent=first_indent, 

308 last_indent=last_indent, 

309 min_indent=min_indent, 

310 ) 

311 token.meta.update(meta) 

312 return True 

313 return False 

314 

315 

316def _doctest_lexer(state, startLine, end, silent=False): 

317 """a markdown-it-py plugin for doctests 

318 

319 doctest are a literate programming convention in python that we 

320 include in the pidgy grammar. this avoids a mixing python and doctest 

321 code together. 

322 

323 the doctest blocks: 

324 * extend the indented code blocks 

325 * do not conflict with blockquotes 

326 * are implicit code fences with the `pycon` info 

327 * can be replaced with explicit code blocks. 

328 """ 

329 start = state.bMarks[startLine] + state.tShift[startLine] 

330 

331 if (start - state.blkIndent) < 4: 

332 return False 

333 

334 if state.srcCharCode[start : start + 4] == DOCTEST_CHARS: 

335 lead, extra, output, closed = startLine, startLine + 1, startLine + 1, False 

336 indent, next = state.sCount[startLine], startLine + 1 

337 while next < end: 

338 if state.isEmpty(next): 

339 break 

340 if state.sCount[next] < indent: 

341 break 

342 begin = state.bMarks[next] + state.tShift[next] 

343 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

344 break 

345 

346 next += 1 

347 if (not closed) and state.srcCharCode[begin : begin + 4] == ELLIPSIS_CHARS: 

348 extra = next 

349 else: 

350 closed = True 

351 output = next 

352 state.line = next 

353 token = state.push("fence", "code", 0) 

354 token.info = "pycon" 

355 token.content = state.getLines(startLine, next, 0, True) 

356 token.map = [startLine, state.line] 

357 token.meta.update( 

358 first_indent=indent, 

359 last_indent=indent, 

360 min_indent=indent, 

361 ) 

362 

363 token.meta.update(input=[lead, extra]) 

364 token.meta.update(output=[extra, output] if extra < output else None) 

365 

366 return True 

367 return False