Coverage for src/midgy/render.py: 94%
238 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-24 15:45 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2022-10-24 15:45 -0700
1"""render builds the machinery to translate markdown documents to code."""
3from dataclasses import dataclass, field
4from functools import partial
5from io import StringIO
6from re import compile
7from textwrap import dedent
9__all__ = ()
11DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34}
12DOCTEST_CHARS = DOCTEST_CHAR, DOCTEST_CHAR, DOCTEST_CHAR, 32
13ESCAPE = {x: "\\" + x for x in "'\""}
14ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]")
15ELLIPSIS_CHARS = (ord("."),) * 3 + (32,)
16escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0)))
19# the Renderer is special markdown renderer designed to produce
20# line for line transformations of markdown to the converted code.
21# not all languages require this, but for python it matters.
22@dataclass
23class Renderer:
24 """the base render system for markdown to code.
26 * tokenize & render markdown as code
27 * line-for-line rendering
28 * use indented code as fiducial markers for translation
29 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens
30 * a reusable base class that underlies the python translation
31 """
33 from markdown_it import MarkdownIt
35 parser: object = field(
36 default_factory=partial(
37 MarkdownIt, "gfm-like", options_update=dict(inline_definitions=True, langPrefix="")
38 )
39 )
40 cell_hr_length: int = 9
41 include_code_fences: set = field(default_factory=set)
42 include_indented_code: bool = True
43 config_key: str = "py"
45 def __post_init__(self):
46 from mdit_py_plugins import deflist, footnote
48 from .front_matter import _front_matter_lexer, _shebang_lexer
50 # our tangling system adds extra conventions to commonmark:
51 ## extend indented code to recognize doctest syntax in-line
52 ## replace the indented code lexer to recognize doctests and append metadata.
53 ## recognize shebang lines at the beginning of a document.
54 ## recognize front-matter at the beginning of document of following shebangs
55 self.parser.block.ruler.before("code", "doctest", _doctest_lexer)
56 self.parser.block.ruler.disable("code")
57 self.parser.block.ruler.after("doctest", "code", _code_lexer)
58 self.parser.block.ruler.before("table", "shebang", _shebang_lexer)
59 self.parser.block.ruler.before("table", "front_matter", _front_matter_lexer)
60 self.parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin)
61 self.parser.disable("footnote_tail")
63 def code_block(self, token, env):
64 if self.include_indented_code:
65 yield from self.get_block(env, token.map[1])
67 code_fence_block = code_block
69 @classmethod
70 def code_from_string(cls, body, **kwargs):
71 """render a string"""
72 return cls(**kwargs).render(body)
74 def fence(self, token, env):
75 if token.info in self.include_code_fences:
76 return self.code_fence_block(token, env)
77 method = getattr(self, f"fence_{token.info}", None)
78 if method:
79 return method(token, env)
81 def format(self, body):
82 """a function that consumers can use to format their code"""
83 return body
85 def get_block(self, env, stop=None):
86 """iterate through the lines in a buffer"""
87 if stop is None:
88 yield from env["source"]
89 else:
90 while env["last_line"] < stop:
91 yield self.readline(env)
93 def non_code(self, env, next=None):
94 yield from self.get_block(env, next.map[0] if next else None)
96 def parse(self, src):
97 return self.parser.parse(src)
99 def parse_cells(self, body, *, include_cell_hr=True):
100 yield from (
101 x[0] for x in self.walk_cells(self.parse(body), include_cell_hr=include_cell_hr)
102 )
104 def print(self, iter, io):
105 return print(*iter, file=io, sep="", end="")
107 def readline(self, env):
108 try:
109 return env["source"].readline()
110 finally:
111 env["last_line"] += 1
113 def render(self, src, format=False):
114 tokens = self.parse(src)
115 out = self.render_tokens(tokens, src=src)
116 return self.format(out) if format else out
118 def render_cells(self, src, *, include_cell_hr=True):
119 tokens = self.parse(src)
120 self = self.renderer_from_tokens(tokens)
121 prior = self._init_env(src, tokens)
122 prior_token = None
123 source = prior.pop("source")
124 for block, next_token in self.walk_cells(
125 tokens, env=prior, include_cell_hr=include_cell_hr
126 ):
127 env = self._init_env(src, block)
128 env["source"], env["last_line"] = source, prior["last_line"]
129 prior_token and block.insert(0, prior_token)
130 yield self.render_tokens(block, env=env, stop=next_token)
131 prior, prior_token = env, next_token
133 def render_lines(self, src):
134 return dedent(self.render("".join(src))).splitlines(True)
136 def renderer_from_tokens(self, tokens):
137 front_matter = self._get_front_matter(tokens)
138 if front_matter:
139 config = front_matter.get(self.config_key, None)
140 if config:
141 return type(self)(**config)
142 return self
144 def render_tokens(self, tokens, env=None, src=None, stop=None):
145 """render parsed markdown tokens"""
146 target = StringIO()
147 self = self.renderer_from_tokens(tokens)
148 if env is None:
149 env = self._init_env(src, tokens)
151 for generic, code in self._walk_code_blocks(tokens):
152 # we walk pairs of tokens preceding code and the code token
153 # the next code token is needed as a reference for indenting
154 # non-code blocks that precede the code.
155 env["next_code"] = code
156 for token in generic:
157 # walk the non-code tokens for any markers the class defines
158 # renderers for. the renderer is responsible for taking of the
159 # preceding non-code blocks, this feature is needed for any logical
160 # rendering conditions.
161 f = getattr(self, token.type, None)
162 f and self.print(f(token, env) or "", target)
163 if code:
164 # format and print the preceding non-code block
165 self.print(self.non_code(env, code), target)
167 # update the rendering environment
168 env.update(
169 last_indent=code.meta["last_indent"],
170 )
172 # format and print
173 self.print(self.code_block(code, env), target)
175 # handle anything left in the buffer
176 self.print(self.non_code(env, stop), target)
178 return target.getvalue() # return the value of the target, a format string.
180 def wrap_lines(self, lines, lead="", pre="", trail="", continuation=""):
181 """a utility function to manipulate a buffer of content line-by-line."""
182 ws, any, continued = "", False, False
183 for line in lines:
184 LL = len(line.rstrip())
185 if LL:
186 continued = line[LL - 1] == "\\"
187 LL -= 1 * continued
188 if any:
189 yield ws
190 else:
191 for i, l in enumerate(StringIO(ws)):
192 yield l[:-1] + continuation + l[-1]
193 yield from (lead, line[:LL])
194 any, ws = True, line[LL:]
195 lead = ""
196 else:
197 ws += line
198 if any:
199 yield trail
200 if continued:
201 for i, line in enumerate(StringIO(ws)):
202 yield from (lead, line[:-1], i and "\\" or "", line[-1])
203 else:
204 yield ws
206 def _init_env(self, src, tokens):
207 env = dict(source=StringIO(src), last_line=0, min_indent=None, last_indent=0)
208 include_doctest = getattr(self, "include_doctest", False)
209 for token in tokens:
210 doctest = False
211 if token.type == "fence":
212 if token.info in self.include_code_fences:
213 env["min_indent"] = 0
214 continue
215 if include_doctest:
216 doctest = token.info == "pycon"
217 if doctest or (token.type == "code_block"):
218 if env["min_indent"] is None:
219 env["min_indent"] = token.meta["min_indent"]
220 else:
221 env["min_indent"] = min(env["min_indent"], token.meta["min_indent"])
223 if env["min_indent"] is None:
224 env["min_indent"] = 0
225 return env
227 def _get_front_matter(self, tokens):
228 for token in tokens:
229 if token.type == "shebang":
230 continue
231 if token.type == "front_matter":
232 from .front_matter import load
234 return load(token.content)
235 return
237 def walk_cells(self, tokens, *, env=None, include_cell_hr=True):
238 block = []
239 for token in tokens:
240 if token.type == "hr":
241 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length:
242 yield (list(block), token)
243 block.clear()
244 if include_cell_hr:
245 block.append(token)
246 elif env is not None:
247 list(self.get_block(env, token))
248 else:
249 block.append(token)
250 if block:
251 yield block, None
253 def _walk_code_blocks(self, tokens):
254 prior = []
255 for token in tokens:
256 if token.type == "code_block":
257 yield list(prior), token
258 prior.clear()
259 else:
260 prior.append(token)
261 yield prior, None
263 del MarkdownIt
266@dataclass
267class DedentCodeBlock(Renderer):
268 def code_block(self, token, env):
269 ref = env["min_indent"]
270 for line in self.get_block(env, token.map[1]):
271 right = line.lstrip()
272 if right:
273 yield line[ref:]
274 last = right
275 else:
276 yield line
279def _code_lexer(state, start, end, silent=False):
280 """a code lexer that tracks indents in the token and is aware of doctests"""
281 if state.sCount[start] - state.blkIndent >= 4:
282 first_indent, last_indent, next, last_line = 0, 0, start, start
283 while next < end:
284 if state.isEmpty(next):
285 next += 1
286 continue
287 if state.sCount[next] - state.blkIndent >= 4:
288 begin = state.bMarks[next] + state.tShift[next]
289 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS:
290 break
291 if not first_indent:
292 first_indent = state.sCount[next]
293 last_indent, last_line = state.sCount[next], next
294 next += 1
295 else:
296 break
297 state.line = last_line + 1
298 token = state.push("code_block", "code", 0)
299 token.content = state.getLines(start, state.line, 4 + state.blkIndent, True)
300 token.map = [start, state.line]
301 min_indent = min(
302 state.sCount[i]
303 for i in range(start, state.line)
304 if not state.isEmpty(i) and state.sCount[i]
305 )
306 meta = dict(
307 first_indent=first_indent,
308 last_indent=last_indent,
309 min_indent=min_indent,
310 )
311 token.meta.update(meta)
312 return True
313 return False
316def _doctest_lexer(state, startLine, end, silent=False):
317 """a markdown-it-py plugin for doctests
319 doctest are a literate programming convention in python that we
320 include in the pidgy grammar. this avoids a mixing python and doctest
321 code together.
323 the doctest blocks:
324 * extend the indented code blocks
325 * do not conflict with blockquotes
326 * are implicit code fences with the `pycon` info
327 * can be replaced with explicit code blocks.
328 """
329 start = state.bMarks[startLine] + state.tShift[startLine]
331 if (start - state.blkIndent) < 4:
332 return False
334 if state.srcCharCode[start : start + 4] == DOCTEST_CHARS:
335 lead, extra, output, closed = startLine, startLine + 1, startLine + 1, False
336 indent, next = state.sCount[startLine], startLine + 1
337 while next < end:
338 if state.isEmpty(next):
339 break
340 if state.sCount[next] < indent:
341 break
342 begin = state.bMarks[next] + state.tShift[next]
343 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS:
344 break
346 next += 1
347 if (not closed) and state.srcCharCode[begin : begin + 4] == ELLIPSIS_CHARS:
348 extra = next
349 else:
350 closed = True
351 output = next
352 state.line = next
353 token = state.push("fence", "code", 0)
354 token.info = "pycon"
355 token.content = state.getLines(startLine, next, 0, True)
356 token.map = [startLine, state.line]
357 token.meta.update(
358 first_indent=indent,
359 last_indent=indent,
360 min_indent=indent,
361 )
363 token.meta.update(input=[lead, extra])
364 token.meta.update(output=[extra, output] if extra < output else None)
366 return True
367 return False