Teknik is a suite of services with attractive and functional interfaces. https://www.teknik.io/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Minifier.php 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. <?php
  2. /*
  3. * This file is part of the JShrink package.
  4. *
  5. * (c) Robert Hafner <tedivm@tedivm.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. /**
  11. * JShrink
  12. *
  13. *
  14. * @package JShrink
  15. * @author Robert Hafner <tedivm@tedivm.com>
  16. */
  17. namespace JShrink;
  18. /**
  19. * Minifier
  20. *
  21. * Usage - Minifier::minify($js);
  22. * Usage - Minifier::minify($js, $options);
  23. * Usage - Minifier::minify($js, array('flaggedComments' => false));
  24. *
  25. * @package JShrink
  26. * @author Robert Hafner <tedivm@tedivm.com>
  27. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  28. */
  29. class Minifier
  30. {
  31. /**
  32. * The input javascript to be minified.
  33. *
  34. * @var string
  35. */
  36. protected $input;
  37. /**
  38. * The location of the character (in the input string) that is next to be
  39. * processed.
  40. *
  41. * @var int
  42. */
  43. protected $index = 0;
  44. /**
  45. * The first of the characters currently being looked at.
  46. *
  47. * @var string
  48. */
  49. protected $a = '';
  50. /**
  51. * The next character being looked at (after a);
  52. *
  53. * @var string
  54. */
  55. protected $b = '';
  56. /**
  57. * This character is only active when certain look ahead actions take place.
  58. *
  59. * @var string
  60. */
  61. protected $c;
  62. /**
  63. * Contains the options for the current minification process.
  64. *
  65. * @var array
  66. */
  67. protected $options;
  68. /**
  69. * Contains the default options for minification. This array is merged with
  70. * the one passed in by the user to create the request specific set of
  71. * options (stored in the $options attribute).
  72. *
  73. * @var array
  74. */
  75. protected static $defaultOptions = array('flaggedComments' => true);
  76. /**
  77. * Contains lock ids which are used to replace certain code patterns and
  78. * prevent them from being minified
  79. *
  80. * @var array
  81. */
  82. protected $locks = array();
  83. /**
  84. * Takes a string containing javascript and removes unneeded characters in
  85. * order to shrink the code without altering it's functionality.
  86. *
  87. * @param string $js The raw javascript to be minified
  88. * @param array $options Various runtime options in an associative array
  89. * @throws \Exception
  90. * @return bool|string
  91. */
  92. public static function minify($js, $options = array())
  93. {
  94. try {
  95. ob_start();
  96. $jshrink = new Minifier();
  97. $js = $jshrink->lock($js);
  98. $jshrink->minifyDirectToOutput($js, $options);
  99. // Sometimes there's a leading new line, so we trim that out here.
  100. $js = ltrim(ob_get_clean());
  101. $js = $jshrink->unlock($js);
  102. unset($jshrink);
  103. return $js;
  104. } catch (\Exception $e) {
  105. if (isset($jshrink)) {
  106. // Since the breakdownScript function probably wasn't finished
  107. // we clean it out before discarding it.
  108. $jshrink->clean();
  109. unset($jshrink);
  110. }
  111. // without this call things get weird, with partially outputted js.
  112. ob_end_clean();
  113. throw $e;
  114. }
  115. }
  116. /**
  117. * Processes a javascript string and outputs only the required characters,
  118. * stripping out all unneeded characters.
  119. *
  120. * @param string $js The raw javascript to be minified
  121. * @param array $options Various runtime options in an associative array
  122. */
  123. protected function minifyDirectToOutput($js, $options)
  124. {
  125. $this->initialize($js, $options);
  126. $this->loop();
  127. $this->clean();
  128. }
  129. /**
  130. * Initializes internal variables, normalizes new lines,
  131. *
  132. * @param string $js The raw javascript to be minified
  133. * @param array $options Various runtime options in an associative array
  134. */
  135. protected function initialize($js, $options)
  136. {
  137. $this->options = array_merge(static::$defaultOptions, $options);
  138. $js = str_replace("\r\n", "\n", $js);
  139. $this->input = str_replace("\r", "\n", $js);
  140. // We add a newline to the end of the script to make it easier to deal
  141. // with comments at the bottom of the script- this prevents the unclosed
  142. // comment error that can otherwise occur.
  143. $this->input .= PHP_EOL;
  144. // Populate "a" with a new line, "b" with the first character, before
  145. // entering the loop
  146. $this->a = "\n";
  147. $this->b = $this->getReal();
  148. }
  149. /**
  150. * The primary action occurs here. This function loops through the input string,
  151. * outputting anything that's relevant and discarding anything that is not.
  152. */
  153. protected function loop()
  154. {
  155. while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
  156. switch ($this->a) {
  157. // new lines
  158. case "\n":
  159. // if the next line is something that can't stand alone preserve the newline
  160. if (strpos('(-+{[@', $this->b) !== false) {
  161. echo $this->a;
  162. $this->saveString();
  163. break;
  164. }
  165. // if B is a space we skip the rest of the switch block and go down to the
  166. // string/regex check below, resetting $this->b with getReal
  167. if($this->b === ' ')
  168. break;
  169. // otherwise we treat the newline like a space
  170. case ' ':
  171. if(static::isAlphaNumeric($this->b))
  172. echo $this->a;
  173. $this->saveString();
  174. break;
  175. default:
  176. switch ($this->b) {
  177. case "\n":
  178. if (strpos('}])+-"\'', $this->a) !== false) {
  179. echo $this->a;
  180. $this->saveString();
  181. break;
  182. } else {
  183. if (static::isAlphaNumeric($this->a)) {
  184. echo $this->a;
  185. $this->saveString();
  186. }
  187. }
  188. break;
  189. case ' ':
  190. if(!static::isAlphaNumeric($this->a))
  191. break;
  192. default:
  193. // check for some regex that breaks stuff
  194. if ($this->a == '/' && ($this->b == '\'' || $this->b == '"')) {
  195. $this->saveRegex();
  196. continue;
  197. }
  198. echo $this->a;
  199. $this->saveString();
  200. break;
  201. }
  202. }
  203. // do reg check of doom
  204. $this->b = $this->getReal();
  205. if(($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false))
  206. $this->saveRegex();
  207. }
  208. }
  209. /**
  210. * Resets attributes that do not need to be stored between requests so that
  211. * the next request is ready to go. Another reason for this is to make sure
  212. * the variables are cleared and are not taking up memory.
  213. */
  214. protected function clean()
  215. {
  216. unset($this->input);
  217. $this->index = 0;
  218. $this->a = $this->b = '';
  219. unset($this->c);
  220. unset($this->options);
  221. }
  222. /**
  223. * Returns the next string for processing based off of the current index.
  224. *
  225. * @return string
  226. */
  227. protected function getChar()
  228. {
  229. // Check to see if we had anything in the look ahead buffer and use that.
  230. if (isset($this->c)) {
  231. $char = $this->c;
  232. unset($this->c);
  233. // Otherwise we start pulling from the input.
  234. } else {
  235. $char = substr($this->input, $this->index, 1);
  236. // If the next character doesn't exist return false.
  237. if (isset($char) && $char === false) {
  238. return false;
  239. }
  240. // Otherwise increment the pointer and use this char.
  241. $this->index++;
  242. }
  243. // Normalize all whitespace except for the newline character into a
  244. // standard space.
  245. if($char !== "\n" && ord($char) < 32)
  246. return ' ';
  247. return $char;
  248. }
  249. /**
  250. * This function gets the next "real" character. It is essentially a wrapper
  251. * around the getChar function that skips comments. This has significant
  252. * performance benefits as the skipping is done using native functions (ie,
  253. * c code) rather than in script php.
  254. *
  255. *
  256. * @return string Next 'real' character to be processed.
  257. * @throws \RuntimeException
  258. */
  259. protected function getReal()
  260. {
  261. $startIndex = $this->index;
  262. $char = $this->getChar();
  263. // Check to see if we're potentially in a comment
  264. if ($char !== '/') {
  265. return $char;
  266. }
  267. $this->c = $this->getChar();
  268. if ($this->c == '/') {
  269. return $this->processOneLineComments($startIndex);
  270. } elseif ($this->c == '*') {
  271. return $this->processMultiLineComments($startIndex);
  272. }
  273. return $char;
  274. }
  275. /**
  276. * Removed one line comments, with the exception of some very specific types of
  277. * conditional comments.
  278. *
  279. * @param int $startIndex The index point where "getReal" function started
  280. * @return string
  281. */
  282. protected function processOneLineComments($startIndex)
  283. {
  284. $thirdCommentString = substr($this->input, $this->index, 1);
  285. // kill rest of line
  286. $this->getNext("\n");
  287. if ($thirdCommentString == '@') {
  288. $endPoint = ($this->index) - $startIndex;
  289. unset($this->c);
  290. $char = "\n" . substr($this->input, $startIndex, $endPoint);
  291. } else {
  292. // first one is contents of $this->c
  293. $this->getChar();
  294. $char = $this->getChar();
  295. }
  296. return $char;
  297. }
  298. /**
  299. * Skips multiline comments where appropriate, and includes them where needed.
  300. * Conditional comments and "license" style blocks are preserved.
  301. *
  302. * @param int $startIndex The index point where "getReal" function started
  303. * @return bool|string False if there's no character
  304. * @throws \RuntimeException Unclosed comments will throw an error
  305. */
  306. protected function processMultiLineComments($startIndex)
  307. {
  308. $this->getChar(); // current C
  309. $thirdCommentString = $this->getChar();
  310. // kill everything up to the next */ if it's there
  311. if ($this->getNext('*/')) {
  312. $this->getChar(); // get *
  313. $this->getChar(); // get /
  314. $char = $this->getChar(); // get next real character
  315. // Now we reinsert conditional comments and YUI-style licensing comments
  316. if (($this->options['flaggedComments'] && $thirdCommentString == '!')
  317. || ($thirdCommentString == '@') ) {
  318. // If conditional comments or flagged comments are not the first thing in the script
  319. // we need to echo a and fill it with a space before moving on.
  320. if ($startIndex > 0) {
  321. echo $this->a;
  322. $this->a = " ";
  323. // If the comment started on a new line we let it stay on the new line
  324. if ($this->input[($startIndex - 1)] == "\n") {
  325. echo "\n";
  326. }
  327. }
  328. $endPoint = ($this->index - 1) - $startIndex;
  329. echo substr($this->input, $startIndex, $endPoint);
  330. return $char;
  331. }
  332. } else {
  333. $char = false;
  334. }
  335. if($char === false)
  336. throw new \RuntimeException('Unclosed multiline comment at position: ' . ($this->index - 2));
  337. // if we're here c is part of the comment and therefore tossed
  338. if(isset($this->c))
  339. unset($this->c);
  340. return $char;
  341. }
  342. /**
  343. * Pushes the index ahead to the next instance of the supplied string. If it
  344. * is found the first character of the string is returned and the index is set
  345. * to it's position.
  346. *
  347. * @param string $string
  348. * @return string|false Returns the first character of the string or false.
  349. */
  350. protected function getNext($string)
  351. {
  352. // Find the next occurrence of "string" after the current position.
  353. $pos = strpos($this->input, $string, $this->index);
  354. // If it's not there return false.
  355. if($pos === false)
  356. return false;
  357. // Adjust position of index to jump ahead to the asked for string
  358. $this->index = $pos;
  359. // Return the first character of that string.
  360. return substr($this->input, $this->index, 1);
  361. }
  362. /**
  363. * When a javascript string is detected this function crawls for the end of
  364. * it and saves the whole string.
  365. *
  366. * @throws \RuntimeException Unclosed strings will throw an error
  367. */
  368. protected function saveString()
  369. {
  370. $startpos = $this->index;
  371. // saveString is always called after a gets cleared, so we push b into
  372. // that spot.
  373. $this->a = $this->b;
  374. // If this isn't a string we don't need to do anything.
  375. if ($this->a != "'" && $this->a != '"') {
  376. return;
  377. }
  378. // String type is the quote used, " or '
  379. $stringType = $this->a;
  380. // Echo out that starting quote
  381. echo $this->a;
  382. // Loop until the string is done
  383. while (1) {
  384. // Grab the very next character and load it into a
  385. $this->a = $this->getChar();
  386. switch ($this->a) {
  387. // If the string opener (single or double quote) is used
  388. // output it and break out of the while loop-
  389. // The string is finished!
  390. case $stringType:
  391. break 2;
  392. // New lines in strings without line delimiters are bad- actual
  393. // new lines will be represented by the string \n and not the actual
  394. // character, so those will be treated just fine using the switch
  395. // block below.
  396. case "\n":
  397. throw new \RuntimeException('Unclosed string at position: ' . $startpos );
  398. break;
  399. // Escaped characters get picked up here. If it's an escaped new line it's not really needed
  400. case '\\':
  401. // a is a slash. We want to keep it, and the next character,
  402. // unless it's a new line. New lines as actual strings will be
  403. // preserved, but escaped new lines should be reduced.
  404. $this->b = $this->getChar();
  405. // If b is a new line we discard a and b and restart the loop.
  406. if ($this->b == "\n") {
  407. break;
  408. }
  409. // echo out the escaped character and restart the loop.
  410. echo $this->a . $this->b;
  411. break;
  412. // Since we're not dealing with any special cases we simply
  413. // output the character and continue our loop.
  414. default:
  415. echo $this->a;
  416. }
  417. }
  418. }
  419. /**
  420. * When a regular expression is detected this function crawls for the end of
  421. * it and saves the whole regex.
  422. *
  423. * @throws \RuntimeException Unclosed regex will throw an error
  424. */
  425. protected function saveRegex()
  426. {
  427. echo $this->a . $this->b;
  428. while (($this->a = $this->getChar()) !== false) {
  429. if($this->a == '/')
  430. break;
  431. if ($this->a == '\\') {
  432. echo $this->a;
  433. $this->a = $this->getChar();
  434. }
  435. if($this->a == "\n")
  436. throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
  437. echo $this->a;
  438. }
  439. $this->b = $this->getReal();
  440. }
  441. /**
  442. * Checks to see if a character is alphanumeric.
  443. *
  444. * @param string $char Just one character
  445. * @return bool
  446. */
  447. protected static function isAlphaNumeric($char)
  448. {
  449. return preg_match('/^[\w\$]$/', $char) === 1 || $char == '/';
  450. }
  451. /**
  452. * Replace patterns in the given string and store the replacement
  453. *
  454. * @param string $js The string to lock
  455. * @return bool
  456. */
  457. protected function lock($js)
  458. {
  459. /* lock things like <code>"asd" + ++x;</code> */
  460. $lock = '"LOCK---' . crc32(time()) . '"';
  461. $matches = array();
  462. preg_match('/([+-])(\s+)([+-])/', $js, $matches);
  463. if (empty($matches)) {
  464. return $js;
  465. }
  466. $this->locks[$lock] = $matches[2];
  467. $js = preg_replace('/([+-])\s+([+-])/', "$1{$lock}$2", $js);
  468. /* -- */
  469. return $js;
  470. }
  471. /**
  472. * Replace "locks" with the original characters
  473. *
  474. * @param string $js The string to unlock
  475. * @return bool
  476. */
  477. protected function unlock($js)
  478. {
  479. if (!count($this->locks)) {
  480. return $js;
  481. }
  482. foreach ($this->locks as $lock => $replacement) {
  483. $js = str_replace($lock, $replacement, $js);
  484. }
  485. return $js;
  486. }
  487. }