match.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. var hasOwnProperty = Object.prototype.hasOwnProperty;
  2. var matchGraph = require('./match-graph');
  3. var MATCH = matchGraph.MATCH;
  4. var MISMATCH = matchGraph.MISMATCH;
  5. var DISALLOW_EMPTY = matchGraph.DISALLOW_EMPTY;
  6. var TYPE = require('../tokenizer/const').TYPE;
  7. var STUB = 0;
  8. var TOKEN = 1;
  9. var OPEN_SYNTAX = 2;
  10. var CLOSE_SYNTAX = 3;
  11. var EXIT_REASON_MATCH = 'Match';
  12. var EXIT_REASON_MISMATCH = 'Mismatch';
  13. var EXIT_REASON_ITERATION_LIMIT = 'Maximum iteration number exceeded (please fill an issue on https://github.com/csstree/csstree/issues)';
  14. var ITERATION_LIMIT = 15000;
  15. var totalIterationCount = 0;
  16. function reverseList(list) {
  17. var prev = null;
  18. var next = null;
  19. var item = list;
  20. while (item !== null) {
  21. next = item.prev;
  22. item.prev = prev;
  23. prev = item;
  24. item = next;
  25. }
  26. return prev;
  27. }
  28. function areStringsEqualCaseInsensitive(testStr, referenceStr) {
  29. if (testStr.length !== referenceStr.length) {
  30. return false;
  31. }
  32. for (var i = 0; i < testStr.length; i++) {
  33. var testCode = testStr.charCodeAt(i);
  34. var referenceCode = referenceStr.charCodeAt(i);
  35. // testCode.toLowerCase() for U+0041 LATIN CAPITAL LETTER A (A) .. U+005A LATIN CAPITAL LETTER Z (Z).
  36. if (testCode >= 0x0041 && testCode <= 0x005A) {
  37. testCode = testCode | 32;
  38. }
  39. if (testCode !== referenceCode) {
  40. return false;
  41. }
  42. }
  43. return true;
  44. }
  45. function isContextEdgeDelim(token) {
  46. if (token.type !== TYPE.Delim) {
  47. return false;
  48. }
  49. // Fix matching for unicode-range: U+30??, U+FF00-FF9F
  50. // Probably we need to check out previous match instead
  51. return token.value !== '?';
  52. }
  53. function isCommaContextStart(token) {
  54. if (token === null) {
  55. return true;
  56. }
  57. return (
  58. token.type === TYPE.Comma ||
  59. token.type === TYPE.Function ||
  60. token.type === TYPE.LeftParenthesis ||
  61. token.type === TYPE.LeftSquareBracket ||
  62. token.type === TYPE.LeftCurlyBracket ||
  63. isContextEdgeDelim(token)
  64. );
  65. }
  66. function isCommaContextEnd(token) {
  67. if (token === null) {
  68. return true;
  69. }
  70. return (
  71. token.type === TYPE.RightParenthesis ||
  72. token.type === TYPE.RightSquareBracket ||
  73. token.type === TYPE.RightCurlyBracket ||
  74. token.type === TYPE.Delim
  75. );
  76. }
  77. function internalMatch(tokens, state, syntaxes) {
  78. function moveToNextToken() {
  79. do {
  80. tokenIndex++;
  81. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  82. } while (token !== null && (token.type === TYPE.WhiteSpace || token.type === TYPE.Comment));
  83. }
  84. function getNextToken(offset) {
  85. var nextIndex = tokenIndex + offset;
  86. return nextIndex < tokens.length ? tokens[nextIndex] : null;
  87. }
  88. function stateSnapshotFromSyntax(nextState, prev) {
  89. return {
  90. nextState: nextState,
  91. matchStack: matchStack,
  92. syntaxStack: syntaxStack,
  93. thenStack: thenStack,
  94. tokenIndex: tokenIndex,
  95. prev: prev
  96. };
  97. }
  98. function pushThenStack(nextState) {
  99. thenStack = {
  100. nextState: nextState,
  101. matchStack: matchStack,
  102. syntaxStack: syntaxStack,
  103. prev: thenStack
  104. };
  105. }
  106. function pushElseStack(nextState) {
  107. elseStack = stateSnapshotFromSyntax(nextState, elseStack);
  108. }
  109. function addTokenToMatch() {
  110. matchStack = {
  111. type: TOKEN,
  112. syntax: state.syntax,
  113. token: token,
  114. prev: matchStack
  115. };
  116. moveToNextToken();
  117. syntaxStash = null;
  118. if (tokenIndex > longestMatch) {
  119. longestMatch = tokenIndex;
  120. }
  121. }
  122. function openSyntax() {
  123. syntaxStack = {
  124. syntax: state.syntax,
  125. opts: state.syntax.opts || (syntaxStack !== null && syntaxStack.opts) || null,
  126. prev: syntaxStack
  127. };
  128. matchStack = {
  129. type: OPEN_SYNTAX,
  130. syntax: state.syntax,
  131. token: matchStack.token,
  132. prev: matchStack
  133. };
  134. }
  135. function closeSyntax() {
  136. if (matchStack.type === OPEN_SYNTAX) {
  137. matchStack = matchStack.prev;
  138. } else {
  139. matchStack = {
  140. type: CLOSE_SYNTAX,
  141. syntax: syntaxStack.syntax,
  142. token: matchStack.token,
  143. prev: matchStack
  144. };
  145. }
  146. syntaxStack = syntaxStack.prev;
  147. }
  148. var syntaxStack = null;
  149. var thenStack = null;
  150. var elseStack = null;
  151. // null – stashing allowed, nothing stashed
  152. // false – stashing disabled, nothing stashed
  153. // anithing else – fail stashable syntaxes, some syntax stashed
  154. var syntaxStash = null;
  155. var iterationCount = 0; // count iterations and prevent infinite loop
  156. var exitReason = null;
  157. var token = null;
  158. var tokenIndex = -1;
  159. var longestMatch = 0;
  160. var matchStack = {
  161. type: STUB,
  162. syntax: null,
  163. token: null,
  164. prev: null
  165. };
  166. moveToNextToken();
  167. while (exitReason === null && ++iterationCount < ITERATION_LIMIT) {
  168. // function mapList(list, fn) {
  169. // var result = [];
  170. // while (list) {
  171. // result.unshift(fn(list));
  172. // list = list.prev;
  173. // }
  174. // return result;
  175. // }
  176. // console.log('--\n',
  177. // '#' + iterationCount,
  178. // require('util').inspect({
  179. // match: mapList(matchStack, x => x.type === TOKEN ? x.token && x.token.value : x.syntax ? ({ [OPEN_SYNTAX]: '<', [CLOSE_SYNTAX]: '</' }[x.type] || x.type) + '!' + x.syntax.name : null),
  180. // token: token && token.value,
  181. // tokenIndex,
  182. // syntax: syntax.type + (syntax.id ? ' #' + syntax.id : '')
  183. // }, { depth: null })
  184. // );
  185. switch (state.type) {
  186. case 'Match':
  187. if (thenStack === null) {
  188. // turn to MISMATCH when some tokens left unmatched
  189. if (token !== null) {
  190. // doesn't mismatch if just one token left and it's an IE hack
  191. if (tokenIndex !== tokens.length - 1 || (token.value !== '\\0' && token.value !== '\\9')) {
  192. state = MISMATCH;
  193. break;
  194. }
  195. }
  196. // break the main loop, return a result - MATCH
  197. exitReason = EXIT_REASON_MATCH;
  198. break;
  199. }
  200. // go to next syntax (`then` branch)
  201. state = thenStack.nextState;
  202. // check match is not empty
  203. if (state === DISALLOW_EMPTY) {
  204. if (thenStack.matchStack === matchStack) {
  205. state = MISMATCH;
  206. break;
  207. } else {
  208. state = MATCH;
  209. }
  210. }
  211. // close syntax if needed
  212. while (thenStack.syntaxStack !== syntaxStack) {
  213. closeSyntax();
  214. }
  215. // pop stack
  216. thenStack = thenStack.prev;
  217. break;
  218. case 'Mismatch':
  219. // when some syntax is stashed
  220. if (syntaxStash !== null && syntaxStash !== false) {
  221. // there is no else branches or a branch reduce match stack
  222. if (elseStack === null || tokenIndex > elseStack.tokenIndex) {
  223. // restore state from the stash
  224. elseStack = syntaxStash;
  225. syntaxStash = false; // disable stashing
  226. }
  227. } else if (elseStack === null) {
  228. // no else branches -> break the main loop
  229. // return a result - MISMATCH
  230. exitReason = EXIT_REASON_MISMATCH;
  231. break;
  232. }
  233. // go to next syntax (`else` branch)
  234. state = elseStack.nextState;
  235. // restore all the rest stack states
  236. thenStack = elseStack.thenStack;
  237. syntaxStack = elseStack.syntaxStack;
  238. matchStack = elseStack.matchStack;
  239. tokenIndex = elseStack.tokenIndex;
  240. token = tokenIndex < tokens.length ? tokens[tokenIndex] : null;
  241. // pop stack
  242. elseStack = elseStack.prev;
  243. break;
  244. case 'MatchGraph':
  245. state = state.match;
  246. break;
  247. case 'If':
  248. // IMPORTANT: else stack push must go first,
  249. // since it stores the state of thenStack before changes
  250. if (state.else !== MISMATCH) {
  251. pushElseStack(state.else);
  252. }
  253. if (state.then !== MATCH) {
  254. pushThenStack(state.then);
  255. }
  256. state = state.match;
  257. break;
  258. case 'MatchOnce':
  259. state = {
  260. type: 'MatchOnceBuffer',
  261. syntax: state,
  262. index: 0,
  263. mask: 0
  264. };
  265. break;
  266. case 'MatchOnceBuffer':
  267. var terms = state.syntax.terms;
  268. if (state.index === terms.length) {
  269. // no matches at all or it's required all terms to be matched
  270. if (state.mask === 0 || state.syntax.all) {
  271. state = MISMATCH;
  272. break;
  273. }
  274. // a partial match is ok
  275. state = MATCH;
  276. break;
  277. }
  278. // all terms are matched
  279. if (state.mask === (1 << terms.length) - 1) {
  280. state = MATCH;
  281. break;
  282. }
  283. for (; state.index < terms.length; state.index++) {
  284. var matchFlag = 1 << state.index;
  285. if ((state.mask & matchFlag) === 0) {
  286. // IMPORTANT: else stack push must go first,
  287. // since it stores the state of thenStack before changes
  288. pushElseStack(state);
  289. pushThenStack({
  290. type: 'AddMatchOnce',
  291. syntax: state.syntax,
  292. mask: state.mask | matchFlag
  293. });
  294. // match
  295. state = terms[state.index++];
  296. break;
  297. }
  298. }
  299. break;
  300. case 'AddMatchOnce':
  301. state = {
  302. type: 'MatchOnceBuffer',
  303. syntax: state.syntax,
  304. index: 0,
  305. mask: state.mask
  306. };
  307. break;
  308. case 'Enum':
  309. if (token !== null) {
  310. var name = token.value.toLowerCase();
  311. // drop \0 and \9 hack from keyword name
  312. if (name.indexOf('\\') !== -1) {
  313. name = name.replace(/\\[09].*$/, '');
  314. }
  315. if (hasOwnProperty.call(state.map, name)) {
  316. state = state.map[name];
  317. break;
  318. }
  319. }
  320. state = MISMATCH;
  321. break;
  322. case 'Generic':
  323. var opts = syntaxStack !== null ? syntaxStack.opts : null;
  324. var lastTokenIndex = tokenIndex + Math.floor(state.fn(token, getNextToken, opts));
  325. if (!isNaN(lastTokenIndex) && lastTokenIndex > tokenIndex) {
  326. while (tokenIndex < lastTokenIndex) {
  327. addTokenToMatch();
  328. }
  329. state = MATCH;
  330. } else {
  331. state = MISMATCH;
  332. }
  333. break;
  334. case 'Type':
  335. case 'Property':
  336. var syntaxDict = state.type === 'Type' ? 'types' : 'properties';
  337. var dictSyntax = hasOwnProperty.call(syntaxes, syntaxDict) ? syntaxes[syntaxDict][state.name] : null;
  338. if (!dictSyntax || !dictSyntax.match) {
  339. throw new Error(
  340. 'Bad syntax reference: ' +
  341. (state.type === 'Type'
  342. ? '<' + state.name + '>'
  343. : '<\'' + state.name + '\'>')
  344. );
  345. }
  346. // stash a syntax for types with low priority
  347. if (syntaxStash !== false && token !== null && state.type === 'Type') {
  348. var lowPriorityMatching =
  349. // https://drafts.csswg.org/css-values-4/#custom-idents
  350. // When parsing positionally-ambiguous keywords in a property value, a <custom-ident> production
  351. // can only claim the keyword if no other unfulfilled production can claim it.
  352. (state.name === 'custom-ident' && token.type === TYPE.Ident) ||
  353. // https://drafts.csswg.org/css-values-4/#lengths
  354. // ... if a `0` could be parsed as either a <number> or a <length> in a property (such as line-height),
  355. // it must parse as a <number>
  356. (state.name === 'length' && token.value === '0');
  357. if (lowPriorityMatching) {
  358. if (syntaxStash === null) {
  359. syntaxStash = stateSnapshotFromSyntax(state, elseStack);
  360. }
  361. state = MISMATCH;
  362. break;
  363. }
  364. }
  365. openSyntax();
  366. state = dictSyntax.match;
  367. break;
  368. case 'Keyword':
  369. var name = state.name;
  370. if (token !== null) {
  371. var keywordName = token.value;
  372. // drop \0 and \9 hack from keyword name
  373. if (keywordName.indexOf('\\') !== -1) {
  374. keywordName = keywordName.replace(/\\[09].*$/, '');
  375. }
  376. if (areStringsEqualCaseInsensitive(keywordName, name)) {
  377. addTokenToMatch();
  378. state = MATCH;
  379. break;
  380. }
  381. }
  382. state = MISMATCH;
  383. break;
  384. case 'AtKeyword':
  385. case 'Function':
  386. if (token !== null && areStringsEqualCaseInsensitive(token.value, state.name)) {
  387. addTokenToMatch();
  388. state = MATCH;
  389. break;
  390. }
  391. state = MISMATCH;
  392. break;
  393. case 'Token':
  394. if (token !== null && token.value === state.value) {
  395. addTokenToMatch();
  396. state = MATCH;
  397. break;
  398. }
  399. state = MISMATCH;
  400. break;
  401. case 'Comma':
  402. if (token !== null && token.type === TYPE.Comma) {
  403. if (isCommaContextStart(matchStack.token)) {
  404. state = MISMATCH;
  405. } else {
  406. addTokenToMatch();
  407. state = isCommaContextEnd(token) ? MISMATCH : MATCH;
  408. }
  409. } else {
  410. state = isCommaContextStart(matchStack.token) || isCommaContextEnd(token) ? MATCH : MISMATCH;
  411. }
  412. break;
  413. case 'String':
  414. var string = '';
  415. for (var lastTokenIndex = tokenIndex; lastTokenIndex < tokens.length && string.length < state.value.length; lastTokenIndex++) {
  416. string += tokens[lastTokenIndex].value;
  417. }
  418. if (areStringsEqualCaseInsensitive(string, state.value)) {
  419. while (tokenIndex < lastTokenIndex) {
  420. addTokenToMatch();
  421. }
  422. state = MATCH;
  423. } else {
  424. state = MISMATCH;
  425. }
  426. break;
  427. default:
  428. throw new Error('Unknown node type: ' + state.type);
  429. }
  430. }
  431. totalIterationCount += iterationCount;
  432. switch (exitReason) {
  433. case null:
  434. console.warn('[csstree-match] BREAK after ' + ITERATION_LIMIT + ' iterations');
  435. exitReason = EXIT_REASON_ITERATION_LIMIT;
  436. matchStack = null;
  437. break;
  438. case EXIT_REASON_MATCH:
  439. while (syntaxStack !== null) {
  440. closeSyntax();
  441. }
  442. break;
  443. default:
  444. matchStack = null;
  445. }
  446. return {
  447. tokens: tokens,
  448. reason: exitReason,
  449. iterations: iterationCount,
  450. match: matchStack,
  451. longestMatch: longestMatch
  452. };
  453. }
  454. function matchAsList(tokens, matchGraph, syntaxes) {
  455. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  456. if (matchResult.match !== null) {
  457. var item = reverseList(matchResult.match).prev;
  458. matchResult.match = [];
  459. while (item !== null) {
  460. switch (item.type) {
  461. case STUB:
  462. break;
  463. case OPEN_SYNTAX:
  464. case CLOSE_SYNTAX:
  465. matchResult.match.push({
  466. type: item.type,
  467. syntax: item.syntax
  468. });
  469. break;
  470. default:
  471. matchResult.match.push({
  472. token: item.token.value,
  473. node: item.token.node
  474. });
  475. break;
  476. }
  477. item = item.prev;
  478. }
  479. }
  480. return matchResult;
  481. }
  482. function matchAsTree(tokens, matchGraph, syntaxes) {
  483. var matchResult = internalMatch(tokens, matchGraph, syntaxes || {});
  484. if (matchResult.match === null) {
  485. return matchResult;
  486. }
  487. var item = matchResult.match;
  488. var host = matchResult.match = {
  489. syntax: matchGraph.syntax || null,
  490. match: []
  491. };
  492. var hostStack = [host];
  493. // revert a list and start with 2nd item since 1st is a stub item
  494. item = reverseList(item).prev;
  495. // build a tree
  496. while (item !== null) {
  497. switch (item.type) {
  498. case OPEN_SYNTAX:
  499. host.match.push(host = {
  500. syntax: item.syntax,
  501. match: []
  502. });
  503. hostStack.push(host);
  504. break;
  505. case CLOSE_SYNTAX:
  506. hostStack.pop();
  507. host = hostStack[hostStack.length - 1];
  508. break;
  509. default:
  510. host.match.push({
  511. syntax: item.syntax || null,
  512. token: item.token.value,
  513. node: item.token.node
  514. });
  515. }
  516. item = item.prev;
  517. }
  518. return matchResult;
  519. }
  520. module.exports = {
  521. matchAsList: matchAsList,
  522. matchAsTree: matchAsTree,
  523. getTotalIterationCount: function() {
  524. return totalIterationCount;
  525. }
  526. };