1
2
3 r"""
4 =====================
5 Javascript Minifier
6 =====================
7
8 rJSmin is a javascript minifier written in python.
9
10 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12 :Copyright:
13
14 Copyright 2011 - 2014
15 Andr\xe9 Malo or his licensors, as applicable
16
17 :License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31 The module is a re-implementation aiming for speed, so it can be used at
32 runtime (rather than during a preprocessing step). Usually it produces the
33 same results as the original ``jsmin.c``. It differs in the following ways:
34
35 - there is no error detection: unterminated string, regex and comment
36 literals are treated as regular javascript code and minified as such.
37 - Control characters inside string and regex literals are left untouched; they
38 are not converted to spaces (nor to \\n)
39 - Newline characters are not allowed inside string and regex literals, except
40 for line continuations in string literals (ECMA-5).
41 - "return /regex/" is recognized correctly.
42 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
43 - Newlines before ! operators are removed more sensibly
44 - Comments starting with an exclamation mark (``!``) can be kept optionally
45 - rJSmin does not handle streams, but only complete strings. (However, the
46 module provides a "streamy" interface).
47
48 Since most parts of the logic are handled by the regex engine it's way
49 faster than the original python port of ``jsmin.c`` by Baruch Even. The speed
50 factor varies between about 6 and 55 depending on input and python version
51 (it gets faster the more compressed the input already is). Compared to the
52 speed-refactored python port by Dave St.Germain the performance gain is less
53 dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for
54 details.
55
56 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
57
58 Both python 2 and python 3 are supported.
59
60 .. _jsmin.c by Douglas Crockford:
61 http://www.crockford.com/javascript/jsmin.c
62 """
63 if __doc__:
64
65 __doc__ = __doc__.encode('ascii').decode('unicode_escape')
66 __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
67 __docformat__ = "restructuredtext en"
68 __license__ = "Apache License, Version 2.0"
69 __version__ = '1.0.9'
70 __all__ = ['jsmin']
71
72 import re as _re
73
74
76 """
77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
78
79 .. _jsmin.c by Douglas Crockford:
80 http://www.crockford.com/javascript/jsmin.c
81
82 :Parameters:
83 `python_only` : ``bool``
84 Use only the python variant. If true, the c extension is not even
85 tried to be loaded.
86
87 :Return: Minifier
88 :Rtype: ``callable``
89 """
90
91 if not python_only:
92 try:
93 import _rjsmin
94 except ImportError:
95 pass
96 else:
97 return _rjsmin.jsmin
98 try:
99 xrange
100 except NameError:
101 xrange = range
102
103 space_chars = r'[\000-\011\013\014\016-\040]'
104
105 line_comment = r'(?://[^\r\n]*)'
106 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
107 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
108 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
109
110 string1 = \
111 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
112 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
113 strings = r'(?:%s|%s)' % (string1, string2)
114
115 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
116 nospecial = r'[^/\\\[\r\n]'
117 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
118 nospecial, charclass, nospecial
119 )
120 space = r'(?:%s|%s)' % (space_chars, space_comment)
121 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
122 newline = r'(?:%s?[\r\n])' % line_comment
123
124 def fix_charclass(result):
125 """ Fixup string of chars to fit into a regex char class """
126 pos = result.find('-')
127 if pos >= 0:
128 result = r'%s%s-' % (result[:pos], result[pos + 1:])
129
130 def sequentize(string):
131 """
132 Notate consecutive characters as sequence
133
134 (1-4 instead of 1234)
135 """
136 first, last, result = None, None, []
137 for char in map(ord, string):
138 if last is None:
139 first = last = char
140 elif last + 1 == char:
141 last = char
142 else:
143 result.append((first, last))
144 first = last = char
145 if last is not None:
146 result.append((first, last))
147 return ''.join(['%s%s%s' % (
148 chr(first),
149 last > first + 1 and '-' or '',
150 last != first and chr(last) or ''
151 ) for first, last in result])
152
153 return _re.sub(r'([\000-\040\047])',
154 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
155 .replace('\\', '\\\\')
156 .replace('[', '\\[')
157 .replace(']', '\\]')
158 )
159 )
160
161 def id_literal_(what):
162 """ Make id_literal like char class """
163 match = _re.compile(what).match
164 result = ''.join([
165 chr(c) for c in xrange(127) if not match(chr(c))
166 ])
167 return '[^%s]' % fix_charclass(result)
168
169 def not_id_literal_(keep):
170 """ Make negated id_literal like char class """
171 match = _re.compile(id_literal_(keep)).match
172 result = ''.join([
173 chr(c) for c in xrange(127) if not match(chr(c))
174 ])
175 return r'[%s]' % fix_charclass(result)
176
177 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
178 preregex1 = r'[(,=:\[!&|?{};\r\n]'
179 preregex2 = r'%(not_id_literal)sreturn' % locals()
180
181 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
182 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
183 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
184
185 dull = r'[^\047"/\000-\040]'
186
187 space_sub_simple = _re.compile((
188 r'(%(dull)s+)'
189 r'|(%(strings)s%(dull)s*)'
190 r'|(?<=%(preregex1)s)'
191 r'%(space)s*(?:%(newline)s%(space)s*)*'
192 r'(%(regex)s%(dull)s*)'
193 r'|(?<=%(preregex2)s)'
194 r'%(space)s*(?:%(newline)s%(space)s)*'
195 r'(%(regex)s%(dull)s*)'
196 r'|(?<=%(id_literal_close)s)'
197 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
198 r'(?=%(id_literal_open)s)'
199 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
200 r'|(?<=\+)(%(space)s)+(?=\+)'
201 r'|(?<=-)(%(space)s)+(?=-)'
202 r'|%(space)s+'
203 r'|(?:%(newline)s%(space)s*)+'
204 ) % locals()).sub
205
206
207 def space_subber_simple(match):
208 """ Substitution callback """
209
210 groups = match.groups()
211 if groups[0]: return groups[0]
212 elif groups[1]: return groups[1]
213 elif groups[2]: return groups[2]
214 elif groups[3]: return groups[3]
215 elif groups[4]: return '\n'
216 elif groups[5] or groups[6] or groups[7]: return ' '
217 else: return ''
218
219 space_sub_banged = _re.compile((
220 r'(%(dull)s+)'
221 r'|(%(strings)s%(dull)s*)'
222 r'|(%(bang_comment)s%(dull)s*)'
223 r'|(?<=%(preregex1)s)'
224 r'%(space)s*(?:%(newline)s%(space)s*)*'
225 r'(%(regex)s%(dull)s*)'
226 r'|(?<=%(preregex2)s)'
227 r'%(space)s*(?:%(newline)s%(space)s)*'
228 r'(%(regex)s%(dull)s*)'
229 r'|(?<=%(id_literal_close)s)'
230 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
231 r'(?=%(id_literal_open)s)'
232 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
233 r'|(?<=\+)(%(space)s)+(?=\+)'
234 r'|(?<=-)(%(space)s)+(?=-)'
235 r'|%(space)s+'
236 r'|(?:%(newline)s%(space)s*)+'
237 ) % dict(locals(), space=space_nobang)).sub
238
239
240 def space_subber_banged(match):
241 """ Substitution callback """
242
243 groups = match.groups()
244 if groups[0]: return groups[0]
245 elif groups[1]: return groups[1]
246 elif groups[2]: return groups[2]
247 elif groups[3]: return groups[3]
248 elif groups[4]: return groups[4]
249 elif groups[5]: return '\n'
250 elif groups[6] or groups[7] or groups[8]: return ' '
251 else: return ''
252
253 def jsmin(script, keep_bang_comments=False):
254 r"""
255 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
256
257 Instead of parsing the stream char by char, it uses a regular
258 expression approach which minifies the whole script with one big
259 substitution regex.
260
261 .. _jsmin.c by Douglas Crockford:
262 http://www.crockford.com/javascript/jsmin.c
263
264 :Parameters:
265 `script` : ``str``
266 Script to minify
267
268 `keep_bang_comments` : ``bool``
269 Keep comments starting with an exclamation mark? (``/*!...*/``)
270
271 :Return: Minified script
272 :Rtype: ``str``
273 """
274 if keep_bang_comments:
275 return space_sub_banged(
276 space_subber_banged, '\n%s\n' % script
277 ).strip()
278 else:
279 return space_sub_simple(
280 space_subber_simple, '\n%s\n' % script
281 ).strip()
282
283 return jsmin
284
285 jsmin = _make_jsmin()
286
287
289 r"""
290 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
291
292 Instead of parsing the stream char by char, it uses a regular
293 expression approach which minifies the whole script with one big
294 substitution regex.
295
296 .. _jsmin.c by Douglas Crockford:
297 http://www.crockford.com/javascript/jsmin.c
298
299 :Warning: This function is the digest of a _make_jsmin() call. It just
300 utilizes the resulting regexes. It's here for fun and may
301 vanish any time. Use the `jsmin` function instead.
302
303 :Parameters:
304 `script` : ``str``
305 Script to minify
306
307 `keep_bang_comments` : ``bool``
308 Keep comments starting with an exclamation mark? (``/*!...*/``)
309
310 :Return: Minified script
311 :Rtype: ``str``
312 """
313 if not keep_bang_comments:
314 rex = (
315 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
316 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
317 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
318 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
319 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
320 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
321 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
322 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<'
323 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04'
324 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?['
325 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^'
326 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:'
327 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)['
328 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000'
329 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?'
330 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?'
331 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.'
332 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0'
333 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00'
334 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]'
335 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-'
336 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?'
337 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
338 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0'
339 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
340 )
341 def subber(match):
342 """ Substitution callback """
343 groups = match.groups()
344 return (
345 groups[0] or
346 groups[1] or
347 groups[2] or
348 groups[3] or
349 (groups[4] and '\n') or
350 (groups[5] and ' ') or
351 (groups[6] and ' ') or
352 (groups[7] and ' ') or
353 ''
354 )
355 else:
356 rex = (
357 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
358 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
359 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*'
360 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r'
361 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*'
362 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
363 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/('
364 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:'
365 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]'
366 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0'
367 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://['
368 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*'
369 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
370 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
371 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\'
372 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:['
373 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011'
374 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
375 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@'
376 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*'
377 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)'
378 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^'
379 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:'
380 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
381 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?'
382 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*('
383 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
384 )
385 def subber(match):
386 """ Substitution callback """
387 groups = match.groups()
388 return (
389 groups[0] or
390 groups[1] or
391 groups[2] or
392 groups[3] or
393 groups[4] or
394 (groups[5] and '\n') or
395 (groups[6] and ' ') or
396 (groups[7] and ' ') or
397 (groups[8] and ' ') or
398 ''
399 )
400
401 return _re.sub(rex, subber, '\n%s\n' % script).strip()
402
403
404 if __name__ == '__main__':
406 """ Main """
407 import sys as _sys
408 keep_bang_comments = (
409 '-b' in _sys.argv[1:]
410 or '-bp' in _sys.argv[1:]
411 or '-pb' in _sys.argv[1:]
412 )
413 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \
414 or '-pb' in _sys.argv[1:]:
415 global jsmin
416 jsmin = _make_jsmin(python_only=True)
417 _sys.stdout.write(jsmin(
418 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
419 ))
420 main()
421