parse.js 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. 'use strict';
  2. const { Node, Block } = require('./Node');
  3. const languages = require('./languages');
  4. const constants = {
  5. ESCAPED_CHAR_REGEX: /^\\./,
  6. QUOTED_STRING_REGEX: /^(['"`])((?:\\.|[^\1])+?)(\1)/,
  7. NEWLINE_REGEX: /^\r*\n/
  8. };
  9. const parse = (input, options = {}) => {
  10. if (typeof input !== 'string') {
  11. throw new TypeError('Expected input to be a string');
  12. }
  13. const cst = new Block({ type: 'root', nodes: [] });
  14. const stack = [cst];
  15. const name = (options.language || 'javascript').toLowerCase();
  16. const lang = languages[name];
  17. if (typeof lang === 'undefined') {
  18. throw new Error(`Language "${name}" is not supported by strip-comments`);
  19. }
  20. const { LINE_REGEX, BLOCK_OPEN_REGEX, BLOCK_CLOSE_REGEX } = lang;
  21. let block = cst;
  22. let remaining = input;
  23. let token;
  24. let prev;
  25. const source = [BLOCK_OPEN_REGEX, BLOCK_CLOSE_REGEX].filter(Boolean);
  26. let tripleQuotes = false;
  27. if (source.every(regex => regex.source === '^"""')) {
  28. tripleQuotes = true;
  29. }
  30. /**
  31. * Helpers
  32. */
  33. const consume = (value = remaining[0] || '') => {
  34. remaining = remaining.slice(value.length);
  35. return value;
  36. };
  37. const scan = (regex, type = 'text') => {
  38. const match = regex.exec(remaining);
  39. if (match) {
  40. consume(match[0]);
  41. return { type, value: match[0], match };
  42. }
  43. };
  44. const push = node => {
  45. if (prev && prev.type === 'text' && node.type === 'text') {
  46. prev.value += node.value;
  47. return;
  48. }
  49. block.push(node);
  50. if (node.nodes) {
  51. stack.push(node);
  52. block = node;
  53. }
  54. prev = node;
  55. };
  56. const pop = () => {
  57. if (block.type === 'root') {
  58. throw new SyntaxError('Unclosed block comment');
  59. }
  60. stack.pop();
  61. block = stack[stack.length - 1];
  62. };
  63. /**
  64. * Parse input string
  65. */
  66. while (remaining !== '') {
  67. // escaped characters
  68. if ((token = scan(constants.ESCAPED_CHAR_REGEX, 'text'))) {
  69. push(new Node(token));
  70. continue;
  71. }
  72. // quoted strings
  73. if (block.type !== 'block' && (!prev || !/\w$/.test(prev.value)) && !(tripleQuotes && remaining.startsWith('"""'))) {
  74. if ((token = scan(constants.QUOTED_STRING_REGEX, 'text'))) {
  75. push(new Node(token));
  76. continue;
  77. }
  78. }
  79. // newlines
  80. if ((token = scan(constants.NEWLINE_REGEX, 'newline'))) {
  81. push(new Node(token));
  82. continue;
  83. }
  84. // block comment open
  85. if (BLOCK_OPEN_REGEX && options.block && !(tripleQuotes && block.type === 'block')) {
  86. if ((token = scan(BLOCK_OPEN_REGEX, 'open'))) {
  87. push(new Block({ type: 'block' }));
  88. push(new Node(token));
  89. continue;
  90. }
  91. }
  92. // block comment close
  93. if (BLOCK_CLOSE_REGEX && block.type === 'block' && options.block) {
  94. if ((token = scan(BLOCK_CLOSE_REGEX, 'close'))) {
  95. token.newline = token.match[1] || '';
  96. push(new Node(token));
  97. pop();
  98. continue;
  99. }
  100. }
  101. // line comment
  102. if (LINE_REGEX && block.type !== 'block' && options.line) {
  103. if ((token = scan(LINE_REGEX, 'line'))) {
  104. push(new Node(token));
  105. continue;
  106. }
  107. }
  108. // Plain text (skip "C" since some languages use "C" to start comments)
  109. if ((token = scan(/^[a-zABD-Z0-9\t ]+/, 'text'))) {
  110. push(new Node(token));
  111. continue;
  112. }
  113. push(new Node({ type: 'text', value: consume(remaining[0]) }));
  114. }
  115. return cst;
  116. };
  117. module.exports = parse;