1
2
3 r"""
4 =====================
5 Javascript Minifier
6 =====================
7
8 rJSmin is a javascript minifier written in python.
9
10 The minifier is based on the semantics of `jsmin.c by Douglas Crockford`_\\.
11
12 :Copyright:
13
14 Copyright 2011 - 2014
15 Andr\xe9 Malo or his licensors, as applicable
16
17 :License:
18
19 Licensed under the Apache License, Version 2.0 (the "License");
20 you may not use this file except in compliance with the License.
21 You may obtain a copy of the License at
22
23 http://www.apache.org/licenses/LICENSE-2.0
24
25 Unless required by applicable law or agreed to in writing, software
26 distributed under the License is distributed on an "AS IS" BASIS,
27 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28 See the License for the specific language governing permissions and
29 limitations under the License.
30
31 The module is a re-implementation aiming for speed, so it can be used at
32 runtime (rather than during a preprocessing step). Usually it produces the
33 same results as the original ``jsmin.c``. It differs in the following ways:
34
35 - there is no error detection: unterminated string, regex and comment
36 literals are treated as regular javascript code and minified as such.
37 - Control characters inside string and regex literals are left untouched; they
38 are not converted to spaces (nor to \\n)
39 - Newline characters are not allowed inside string and regex literals, except
40 for line continuations in string literals (ECMA-5).
41 - "return /regex/" is recognized correctly.
42 - "+ +" and "- -" sequences are not collapsed to '++' or '--'
43 - Newlines before ! operators are removed more sensibly
44 - Comments starting with an exclamation mark (``!``) can be kept optionally
45 - rJSmin does not handle streams, but only complete strings. (However, the
46 module provides a "streamy" interface).
47
48 Since most parts of the logic are handled by the regex engine it's way faster
49 than the original python port of ``jsmin.c`` by Baruch Even. The speed factor
50 varies between about 6 and 55 depending on input and python version (it gets
51 faster the more compressed the input already is). Compared to the
52 speed-refactored python port by Dave St.Germain the performance gain is less
53 dramatic but still between 3 and 50 (for huge inputs). See the docs/BENCHMARKS
54 file for details.
55
56 rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
57
58 Both python 2 and python 3 are supported.
59
60 .. _jsmin.c by Douglas Crockford:
61 http://www.crockford.com/javascript/jsmin.c
62 """
63 if __doc__:
64
65 __doc__ = __doc__.encode('ascii').decode('unicode_escape')
66 __author__ = r"Andr\xe9 Malo".encode('ascii').decode('unicode_escape')
67 __docformat__ = "restructuredtext en"
68 __license__ = "Apache License, Version 2.0"
69 __version__ = '1.0.10'
70 __all__ = ['jsmin']
71
72 import re as _re
73
74
76 """
77 Generate JS minifier based on `jsmin.c by Douglas Crockford`_
78
79 .. _jsmin.c by Douglas Crockford:
80 http://www.crockford.com/javascript/jsmin.c
81
82 :Parameters:
83 `python_only` : ``bool``
84 Use only the python variant. If true, the c extension is not even
85 tried to be loaded.
86
87 :Return: Minifier
88 :Rtype: ``callable``
89 """
90
91
92 if not python_only:
93 try:
94 import _rjsmin
95 except ImportError:
96 pass
97 else:
98 return _rjsmin.jsmin
99 try:
100 xrange
101 except NameError:
102 xrange = range
103
104 space_chars = r'[\000-\011\013\014\016-\040]'
105
106 line_comment = r'(?://[^\r\n]*)'
107 space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
108 space_comment_nobang = r'(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)'
109 bang_comment = r'(?:/\*![^*]*\*+(?:[^/*][^*]*\*+)*/)'
110
111 string1 = \
112 r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
113 string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
114 strings = r'(?:%s|%s)' % (string1, string2)
115
116 charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
117 nospecial = r'[^/\\\[\r\n]'
118 regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
119 nospecial, charclass, nospecial
120 )
121 space = r'(?:%s|%s)' % (space_chars, space_comment)
122 space_nobang = r'(?:%s|%s)' % (space_chars, space_comment_nobang)
123 newline = r'(?:%s?[\r\n])' % line_comment
124
125 def fix_charclass(result):
126 """ Fixup string of chars to fit into a regex char class """
127 pos = result.find('-')
128 if pos >= 0:
129 result = r'%s%s-' % (result[:pos], result[pos + 1:])
130
131 def sequentize(string):
132 """
133 Notate consecutive characters as sequence
134
135 (1-4 instead of 1234)
136 """
137 first, last, result = None, None, []
138 for char in map(ord, string):
139 if last is None:
140 first = last = char
141 elif last + 1 == char:
142 last = char
143 else:
144 result.append((first, last))
145 first = last = char
146 if last is not None:
147 result.append((first, last))
148 return ''.join(['%s%s%s' % (
149 chr(first),
150 last > first + 1 and '-' or '',
151 last != first and chr(last) or ''
152 ) for first, last in result])
153
154 return _re.sub(
155 r'([\000-\040\047])',
156 lambda m: '\\%03o' % ord(m.group(1)), (
157 sequentize(result)
158 .replace('\\', '\\\\')
159 .replace('[', '\\[')
160 .replace(']', '\\]')
161 )
162 )
163
164 def id_literal_(what):
165 """ Make id_literal like char class """
166 match = _re.compile(what).match
167 result = ''.join([
168 chr(c) for c in xrange(127) if not match(chr(c))
169 ])
170 return '[^%s]' % fix_charclass(result)
171
172 def not_id_literal_(keep):
173 """ Make negated id_literal like char class """
174 match = _re.compile(id_literal_(keep)).match
175 result = ''.join([
176 chr(c) for c in xrange(127) if not match(chr(c))
177 ])
178 return r'[%s]' % fix_charclass(result)
179
180 not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
181 preregex1 = r'[(,=:\[!&|?{};\r\n]'
182 preregex2 = r'%(not_id_literal)sreturn' % locals()
183
184 id_literal = id_literal_(r'[a-zA-Z0-9_$]')
185 id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(!+-]')
186 id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
187
188 dull = r'[^\047"/\000-\040]'
189
190 space_sub_simple = _re.compile((
191
192
193 r'(%(dull)s+)'
194 r'|(%(strings)s%(dull)s*)'
195 r'|(?<=%(preregex1)s)'
196 r'%(space)s*(?:%(newline)s%(space)s*)*'
197 r'(%(regex)s%(dull)s*)'
198 r'|(?<=%(preregex2)s)'
199 r'%(space)s*(?:%(newline)s%(space)s)*'
200 r'(%(regex)s%(dull)s*)'
201 r'|(?<=%(id_literal_close)s)'
202 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
203 r'(?=%(id_literal_open)s)'
204 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
205 r'|(?<=\+)(%(space)s)+(?=\+)'
206 r'|(?<=-)(%(space)s)+(?=-)'
207 r'|%(space)s+'
208 r'|(?:%(newline)s%(space)s*)+'
209 ) % locals()).sub
210
211
212 def space_subber_simple(match):
213 """ Substitution callback """
214
215
216 groups = match.groups()
217 if groups[0]:
218 return groups[0]
219 elif groups[1]:
220 return groups[1]
221 elif groups[2]:
222 return groups[2]
223 elif groups[3]:
224 return groups[3]
225 elif groups[4]:
226 return '\n'
227 elif groups[5] or groups[6] or groups[7]:
228 return ' '
229 else:
230 return ''
231
232 space_sub_banged = _re.compile((
233
234
235 r'(%(dull)s+)'
236 r'|(%(strings)s%(dull)s*)'
237 r'|(%(bang_comment)s%(dull)s*)'
238 r'|(?<=%(preregex1)s)'
239 r'%(space)s*(?:%(newline)s%(space)s*)*'
240 r'(%(regex)s%(dull)s*)'
241 r'|(?<=%(preregex2)s)'
242 r'%(space)s*(?:%(newline)s%(space)s)*'
243 r'(%(regex)s%(dull)s*)'
244 r'|(?<=%(id_literal_close)s)'
245 r'%(space)s*(?:(%(newline)s)%(space)s*)+'
246 r'(?=%(id_literal_open)s)'
247 r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
248 r'|(?<=\+)(%(space)s)+(?=\+)'
249 r'|(?<=-)(%(space)s)+(?=-)'
250 r'|%(space)s+'
251 r'|(?:%(newline)s%(space)s*)+'
252 ) % dict(locals(), space=space_nobang)).sub
253
254
255 def space_subber_banged(match):
256 """ Substitution callback """
257
258
259 groups = match.groups()
260 if groups[0]:
261 return groups[0]
262 elif groups[1]:
263 return groups[1]
264 elif groups[2]:
265 return groups[2]
266 elif groups[3]:
267 return groups[3]
268 elif groups[4]:
269 return groups[4]
270 elif groups[5]:
271 return '\n'
272 elif groups[6] or groups[7] or groups[8]:
273 return ' '
274 else:
275 return ''
276
277 def jsmin(script, keep_bang_comments=False):
278 r"""
279 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
280
281 Instead of parsing the stream char by char, it uses a regular
282 expression approach which minifies the whole script with one big
283 substitution regex.
284
285 .. _jsmin.c by Douglas Crockford:
286 http://www.crockford.com/javascript/jsmin.c
287
288 :Parameters:
289 `script` : ``str``
290 Script to minify
291
292 `keep_bang_comments` : ``bool``
293 Keep comments starting with an exclamation mark? (``/*!...*/``)
294
295 :Return: Minified script
296 :Rtype: ``str``
297 """
298 if keep_bang_comments:
299 return space_sub_banged(
300 space_subber_banged, '\n%s\n' % script
301 ).strip()
302 else:
303 return space_sub_simple(
304 space_subber_simple, '\n%s\n' % script
305 ).strip()
306
307 return jsmin
308
309 jsmin = _make_jsmin()
310
311
313 r"""
314 Minify javascript based on `jsmin.c by Douglas Crockford`_\.
315
316 Instead of parsing the stream char by char, it uses a regular
317 expression approach which minifies the whole script with one big
318 substitution regex.
319
320 .. _jsmin.c by Douglas Crockford:
321 http://www.crockford.com/javascript/jsmin.c
322
323 :Warning: This function is the digest of a _make_jsmin() call. It just
324 utilizes the resulting regexes. It's here for fun and may
325 vanish any time. Use the `jsmin` function instead.
326
327 :Parameters:
328 `script` : ``str``
329 Script to minify
330
331 `keep_bang_comments` : ``bool``
332 Keep comments starting with an exclamation mark? (``/*!...*/``)
333
334 :Return: Minified script
335 :Rtype: ``str``
336 """
337 if not keep_bang_comments:
338 rex = (
339 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
340 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
341 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?'
342 r'{};\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*'
343 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
344 r'14\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/(?![\r'
345 r'\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r'
346 r'\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<'
347 r'=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\016-\04'
348 r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?['
349 r'\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^'
350 r'*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:'
351 r'\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)['
352 r'^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000'
353 r'-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?'
354 r':((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011\013\014\016-\040]|(?'
355 r':/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+(?=[^\000-\040"#%-\047)*,.'
356 r'/:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-^`{-~-])((?:[\000-\011\0'
357 r'13\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=[^\00'
358 r'0-#%-,./:-@\[-^`{-~-])|(?<=\+)((?:[\000-\011\013\014\016-\040]'
359 r'|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-'
360 r'\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?'
361 r'=-)|(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
362 r'*\*+)*/))+|(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\0'
363 r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
364 )
365
366 def subber(match):
367 """ Substitution callback """
368 groups = match.groups()
369 return (
370 groups[0] or
371 groups[1] or
372 groups[2] or
373 groups[3] or
374 (groups[4] and '\n') or
375 (groups[5] and ' ') or
376 (groups[6] and ' ') or
377 (groups[7] and ' ') or
378 ''
379 )
380 else:
381 rex = (
382 r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]'
383 r'|\r?\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]'
384 r'|\r?\n|\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|((?:/\*![^*]*\*'
385 r'+(?:[^/*][^*]*\*+)*/)[^\047"/\000-\040]*)|(?<=[(,=:\[!&|?{};\r'
386 r'\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*'
387 r'][^*]*\*+)*/))*(?:(?:(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\0'
388 r'14\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)*((?:/('
389 r'?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:'
390 r'\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]'
391 r'*)|(?<=[\000-#%-,./:-@\[-^`{-~-]return)(?:[\000-\011\013\014\0'
392 r'16-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:(?:(?://['
393 r'^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*'
394 r']*\*+(?:[^/*][^*]*\*+)*/)))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:('
395 r'?:\\[^\r\n]|(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/'
396 r'\\\[\r\n]*)*/)[^\047"/\000-\040]*)|(?<=[^\000-!#%&(*,./:-@\[\\'
397 r'^`{|~])(?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:['
398 r'^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n]))(?:[\000-\011'
399 r'\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
400 r'(?=[^\000-\040"#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@'
401 r'\[-^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*'
402 r'+(?:[^/*][^*]*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?<=\+)'
403 r'((?:[\000-\011\013\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^'
404 r'*]*\*+)*/)))+(?=\+)|(?<=-)((?:[\000-\011\013\014\016-\040]|(?:'
405 r'/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/)))+(?=-)|(?:[\000-\011\013'
406 r'\014\016-\040]|(?:/\*(?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?'
407 r':(?://[^\r\n]*)?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*('
408 r'?!!)[^*]*\*+(?:[^/*][^*]*\*+)*/))*)+'
409 )
410
411 def subber(match):
412 """ Substitution callback """
413 groups = match.groups()
414 return (
415 groups[0] or
416 groups[1] or
417 groups[2] or
418 groups[3] or
419 groups[4] or
420 (groups[5] and '\n') or
421 (groups[6] and ' ') or
422 (groups[7] and ' ') or
423 (groups[8] and ' ') or
424 ''
425 )
426
427 return _re.sub(rex, subber, '\n%s\n' % script).strip()
428
429
430 if __name__ == '__main__':
432 """ Main """
433 import sys as _sys
434 keep_bang_comments = (
435 '-b' in _sys.argv[1:]
436 or '-bp' in _sys.argv[1:]
437 or '-pb' in _sys.argv[1:]
438 )
439 if '-p' in _sys.argv[1:] or '-bp' in _sys.argv[1:] \
440 or '-pb' in _sys.argv[1:]:
441 global jsmin
442 jsmin = _make_jsmin(python_only=True)
443 _sys.stdout.write(jsmin(
444 _sys.stdin.read(), keep_bang_comments=keep_bang_comments
445 ))
446 main()
447