Module rjsmin

Source Code for Module rjsmin

  1  #!/usr/bin/env python 
  2  # -*- coding: ascii -*- 
  3  r""" 
  4  ===================== 
  5   Javascript Minifier 
  6  ===================== 
  7   
  8  rJSmin is a javascript minifier written in python. 
  9   
 10  The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\. 
 11   
 12  :Copyright: 
 13   
 14   Copyright 2011 - 2014 
 15   Andr\xe9 Malo or his licensors, as applicable 
 16   
 17  :License: 
 18   
 19   Licensed under the Apache License, Version 2.0 (the "License"); 
 20   you may not use this file except in compliance with the License. 
 21   You may obtain a copy of the License at 
 22   
 23       http://www.apache.org/licenses/LICENSE-2.0 
 24   
 25   Unless required by applicable law or agreed to in writing, software 
 26   distributed under the License is distributed on an "AS IS" BASIS, 
 27   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 28   See the License for the specific language governing permissions and 
 29   limitations under the License. 
 30   
 31  The module is a re-implementation aiming for speed, so it can be used at 
 32  runtime (rather than during a preprocessing step). Usually it produces the 
 33  same results as the original ``jsmin.c``. It differs in the following ways: 
 34   
 35  - there is no error detection: unterminated string, regex and comment 
 36    literals are treated as regular javascript code and minified as such. 
 37  - Control characters inside string and regex literals are left untouched; they 
 38    are not converted to spaces (nor to \\n) 
 39  - Newline characters are not allowed inside string and regex literals, except 
 40    for line continuations in string literals (ECMA-5). 
 41  - "return /regex/" is recognized correctly. 
 42  - "+ +" and "- -" sequences are not collapsed to '++' or '--' 
 43  - Newlines before ! operators are removed more sensibly 
 44  - Comments starting with an exclamation mark (``!``) can be kept optionally 
 45  - rJSmin does not handle streams, but only complete strings. (However, the 
 46    module provides a "streamy" interface). 
 47   
 48  Since most parts of the logic are handled by the regex engine it's way 
 49  faster than the original python port of ``jsmin.c`` by Baruch Even. The speed 
 50  factor varies between about 6 and 55 depending on input and python version 
 51  (it gets faster the more compressed the input already is). Compared to the 
 52  speed-refactored python port by Dave St.Germain the performance gain is less 
 53  dramatic but still between 1.2 and 7. See the docs/BENCHMARKS file for 
 54  details. 
 55   
 56  rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more. 
 57   
 58  Both python 2 and python 3 are supported. 
 59   
 60  .. _jsmin.c by Douglas Crockford: 
 61     http://www.crockford.com/javascript/jsmin.c 
 62  """ 
 63  if __doc__: 
 64      # pylint: disable = W0622 
 65      __doc__ = __doc__.encode('ascii').decode('unicode_escape') 
 66  __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape') 
 67  __docformat__ = "restructuredtext en" 
 68  __license__ = "Apache License, Version 2.0" 
 69  __version__ = '1.0.9' 
 70  __all__ = ['jsmin'] 
 71   
 72  import re as _re 
 73   
 74   
75 -def _make_jsmin(python_only=False):
76 """ 77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_ 78 79 .. _jsmin.c by Douglas Crockford: 80 http://www.crockford.com/javascript/jsmin.c 81 82 :Parameters: 83 `python_only` : ``bool`` 84 Use only the python variant. If true, the c extension is not even 85 tried to be loaded. 86 87 :Return: Minifier 88 :Rtype: ``callable`` 89 """ 90 # pylint: disable = R0912, R0914, W0612 91 if not python_only: 92 try: 93 import _rjsmin 94 except ImportError: 95 pass 96 else: 97 return _rjsmin.jsmin 98 try: 99 xrange 100 except NameError: 101 xrange = range # pylint: disable = W0622 102 103 space_chars = r'[\000-\011\013\014\016-\040]' 104 105 line_comment = r'(?://[^\r\n]*)' 106 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)' 107 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)' 108 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)' 109 110 string1 = \ 111 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)' 112 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")' 113 strings = r'(?:%s|%s)' % (string1, string2) 114 115 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])' 116 nospecial = r'[^/\\\[\r\n]' 117 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % ( 118 nospecial, charclass, nospecial 119 ) 120 space = r'(?:%s|%s)' % (space_chars, space_comment) 121 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang) 122 newline = r'(?:%s?[\r\n])' % line_comment 123 124 def fix_charclass(result): 125 """ Fixup string of chars to fit into a regex char class """ 126 pos = result.find('-') 127 if pos >= 0: 128 result = r'%s%s-' % (result[:pos], result[pos + 1:]) 129 130 def sequentize(string): 131 """ 132 Notate consecutive characters as sequence 133 134 (1-4 instead of 1234) 135 """ 136 first, last, result = None, None, [] 137 for char in map(ord, string): 138 if last is None: 139 first = last = char 140 elif last + 1 == char: 141 last = char 142 else: 143 result.append((first, last)) 144 first = last = char 145 if last is not None: 146 result.append((first, last)) 147 return ''.join(['%s%s%s' % ( 148 chr(first), 149 last > first + 1 and '-' or '', 150 last != first and chr(last) or '' 151 ) for first, last in result])
152 153 return _re.sub(r'([\000-\040\047])', # for better portability 154 lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result) 155 .replace('\\', '\\\\') 156 .replace('[', '\\[') 157 .replace(']', '\\]') 158 ) 159 ) 160 161 def id_literal_(what): 162 """ Make id_literal like char class """ 163 match = _re.compile(what).match 164 result = ''.join([ 165 chr(c) for c in xrange(127) if not match(chr(c)) 166 ]) 167 return '[^%s]' % fix_charclass(result) 168 169 def not_id_literal_(keep): 170 """ Make negated id_literal like char class """ 171 match = _re.compile(id_literal_(keep)).match 172 result = ''.join([ 173 chr(c) for c in xrange(127) if not match(chr(c)) 174 ]) 175 return r'[%s]' % fix_charclass(result) 176 177 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]') 178 preregex1 = r'[(,=:\[!&|?{};\r\n]' 179 preregex2 = r'%(not_id_literal)sreturn' % locals() 180 181 id_literal = id_literal_(r'[a-zA-Z0-9_$]') 182 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]') 183 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]') 184 185 dull = r'[^\047"/\000-\040]' 186 187 space_sub_simple = _re.compile(( 188 r'(%(dull)s+)' 189 r'|(%(strings)s%(dull)s*)' 190 r'|(?<=%(preregex1)s)' 191 r'%(space)s*(?:%(newline)s%(space)s*)*' 192 r'(%(regex)s%(dull)s*)' 193 r'|(?<=%(preregex2)s)' 194 r'%(space)s*(?:%(newline)s%(space)s)*' 195 r'(%(regex)s%(dull)s*)' 196 r'|(?<=%(id_literal_close)s)' 197 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 198 r'(?=%(id_literal_open)s)' 199 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 200 r'|(?<=\+)(%(space)s)+(?=\+)' 201 r'|(?<=-)(%(space)s)+(?=-)' 202 r'|%(space)s+' 203 r'|(?:%(newline)s%(space)s*)+' 204 ) % locals()).sub 205 #print space_sub_simple.__self__.pattern 206 207 def space_subber_simple(match): 208 """ Substitution callback """ 209 # pylint: disable = C0321, R0911 210 groups = match.groups() 211 if groups[0]: return groups[0] 212 elif groups[1]: return groups[1] 213 elif groups[2]: return groups[2] 214 elif groups[3]: return groups[3] 215 elif groups[4]: return '\n' 216 elif groups[5] or groups[6] or groups[7]: return ' ' 217 else: return '' 218 219 space_sub_banged = _re.compile(( 220 r'(%(dull)s+)' 221 r'|(%(strings)s%(dull)s*)' 222 r'|(%(bang_comment)s%(dull)s*)' 223 r'|(?<=%(preregex1)s)' 224 r'%(space)s*(?:%(newline)s%(space)s*)*' 225 r'(%(regex)s%(dull)s*)' 226 r'|(?<=%(preregex2)s)' 227 r'%(space)s*(?:%(newline)s%(space)s)*' 228 r'(%(regex)s%(dull)s*)' 229 r'|(?<=%(id_literal_close)s)' 230 r'%(space)s*(?:(%(newline)s)%(space)s*)+' 231 r'(?=%(id_literal_open)s)' 232 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)' 233 r'|(?<=\+)(%(space)s)+(?=\+)' 234 r'|(?<=-)(%(space)s)+(?=-)' 235 r'|%(space)s+' 236 r'|(?:%(newline)s%(space)s*)+' 237 ) % dict(locals(), space=space_nobang)).sub 238 #print space_sub_banged.__self__.pattern 239 240 def space_subber_banged(match): 241 """ Substitution callback """ 242 # pylint: disable = C0321, R0911 243 groups = match.groups() 244 if groups[0]: return groups[0] 245 elif groups[1]: return groups[1] 246 elif groups[2]: return groups[2] 247 elif groups[3]: return groups[3] 248 elif groups[4]: return groups[4] 249 elif groups[5]: return '\n' 250 elif groups[6] or groups[7] or groups[8]: return ' ' 251 else: return '' 252 253 def jsmin(script, keep_bang_comments=False): # pylint: disable = W0621 254 r""" 255 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 256 257 Instead of parsing the stream char by char, it uses a regular 258 expression approach which minifies the whole script with one big 259 substitution regex. 260 261 .. _jsmin.c by Douglas Crockford: 262 http://www.crockford.com/javascript/jsmin.c 263 264 :Parameters: 265 `script` : ``str`` 266 Script to minify 267 268 `keep_bang_comments` : ``bool`` 269 Keep comments starting with an exclamation mark? (``/*!...*/``) 270 271 :Return: Minified script 272 :Rtype: ``str`` 273 """ 274 if keep_bang_comments: 275 return space_sub_banged( 276 space_subber_banged, '\n%s\n' % script 277 ).strip() 278 else: 279 return space_sub_simple( 280 space_subber_simple, '\n%s\n' % script 281 ).strip() 282 283 return jsmin 284 285 jsmin = _make_jsmin() 286 287
288 -def jsmin_for_posers(script, keep_bang_comments=False):
289 r""" 290 Minify javascript based on `jsmin.c by Douglas Crockford`_\. 291 292 Instead of parsing the stream char by char, it uses a regular 293 expression approach which minifies the whole script with one big 294 substitution regex. 295 296 .. _jsmin.c by Douglas Crockford: 297 http://www.crockford.com/javascript/jsmin.c 298 299 :Warning: This function is the digest of a _make_jsmin() call. It just 300 utilizes the resulting regexes. It's here for fun and may 301 vanish any time. Use the `jsmin` function instead. 302 303 :Parameters: 304 `script` : ``str`` 305 Script to minify 306 307 `keep_bang_comments` : ``bool`` 308 Keep comments starting with an exclamation mark? (``/*!...*/``) 309 310 :Return: Minified script 311 :Rtype: ``str`` 312 """ 313 if not keep_bang_comments: 314 rex = ( 315 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 316 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 317 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?' 318 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*' 319 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 320 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r' 321 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r' 322 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<' 323 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04' 324 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[' 325 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^' 326 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:' 327 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[' 328 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000' 329 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?' 330 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?' 331 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.' 332 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0' 333 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00' 334 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]' 335 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-' 336 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?' 337 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]' 338 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0' 339 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 340 ) 341 def subber(match): 342 """ Substitution callback """ 343 groups = match.groups() 344 return ( 345 groups[0] or 346 groups[1] or 347 groups[2] or 348 groups[3] or 349 (groups[4] and '\n') or 350 (groups[5] and ' ') or 351 (groups[6] and ' ') or 352 (groups[7] and ' ') or 353 '' 354 )
355 else: 356 rex = ( 357 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]' 358 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]' 359 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*' 360 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r' 361 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*' 362 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0' 363 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(' 364 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:' 365 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]' 366 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0' 367 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[' 368 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*' 369 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(' 370 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/' 371 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\' 372 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[' 373 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011' 374 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 375 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@' 376 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*' 377 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)' 378 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^' 379 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:' 380 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013' 381 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?' 382 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(' 383 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+' 384 ) 385 def subber(match): 386 """ Substitution callback """ 387 groups = match.groups() 388 return ( 389 groups[0] or 390 groups[1] or 391 groups[2] or 392 groups[3] or 393 groups[4] or 394 (groups[5] and '\n') or 395 (groups[6] and ' ') or 396 (groups[7] and ' ') or 397 (groups[8] and ' ') or 398 '' 399 ) 400 401 return _re.sub(rex, subber, '\n%s\n' % script).strip() 402 403 404 if __name__ == '__main__':
405 - def main():
406 """ Main """ 407 import sys as _sys 408 keep_bang_comments = ( 409 '-b' in _sys.argv[1:] 410 or '-bp' in _sys.argv[1:] 411 or '-pb' in _sys.argv[1:] 412 ) 413 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \ 414 or '-pb' in _sys.argv[1:]: 415 global jsmin # pylint: disable = W0603 416 jsmin = _make_jsmin(python_only=True) 417 _sys.stdout.write(jsmin( 418 _sys.stdin.read(), keep_bang_comments=keep_bang_comments 419 ))
420 main() 421