123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- 'use strict';
- const { Node, Block } = require('./Node');
- const languages = require('./languages');
- const constants = {
- ESCAPED_CHAR_REGEX: /^\\./,
- QUOTED_STRING_REGEX: /^(['"`])((?:\\.|[^\1])+?)(\1)/,
- NEWLINE_REGEX: /^\r*\n/
- };
- const parse = (input, options = {}) => {
- if (typeof input !== 'string') {
- throw new TypeError('Expected input to be a string');
- }
- const cst = new Block({ type: 'root', nodes: [] });
- const stack = [cst];
- const name = (options.language || 'javascript').toLowerCase();
- const lang = languages[name];
- if (typeof lang === 'undefined') {
- throw new Error(`Language "${name}" is not supported by strip-comments`);
- }
- const { LINE_REGEX, BLOCK_OPEN_REGEX, BLOCK_CLOSE_REGEX } = lang;
- let block = cst;
- let remaining = input;
- let token;
- let prev;
- const source = [BLOCK_OPEN_REGEX, BLOCK_CLOSE_REGEX].filter(Boolean);
- let tripleQuotes = false;
- if (source.every(regex => regex.source === '^"""')) {
- tripleQuotes = true;
- }
-
- const consume = (value = remaining[0] || '') => {
- remaining = remaining.slice(value.length);
- return value;
- };
- const scan = (regex, type = 'text') => {
- const match = regex.exec(remaining);
- if (match) {
- consume(match[0]);
- return { type, value: match[0], match };
- }
- };
- const push = node => {
- if (prev && prev.type === 'text' && node.type === 'text') {
- prev.value += node.value;
- return;
- }
- block.push(node);
- if (node.nodes) {
- stack.push(node);
- block = node;
- }
- prev = node;
- };
- const pop = () => {
- if (block.type === 'root') {
- throw new SyntaxError('Unclosed block comment');
- }
- stack.pop();
- block = stack[stack.length - 1];
- };
-
- while (remaining !== '') {
-
- if ((token = scan(constants.ESCAPED_CHAR_REGEX, 'text'))) {
- push(new Node(token));
- continue;
- }
-
- if (block.type !== 'block' && (!prev || !/\w$/.test(prev.value)) && !(tripleQuotes && remaining.startsWith('"""'))) {
- if ((token = scan(constants.QUOTED_STRING_REGEX, 'text'))) {
- push(new Node(token));
- continue;
- }
- }
-
- if ((token = scan(constants.NEWLINE_REGEX, 'newline'))) {
- push(new Node(token));
- continue;
- }
-
- if (BLOCK_OPEN_REGEX && options.block && !(tripleQuotes && block.type === 'block')) {
- if ((token = scan(BLOCK_OPEN_REGEX, 'open'))) {
- push(new Block({ type: 'block' }));
- push(new Node(token));
- continue;
- }
- }
-
- if (BLOCK_CLOSE_REGEX && block.type === 'block' && options.block) {
- if ((token = scan(BLOCK_CLOSE_REGEX, 'close'))) {
- token.newline = token.match[1] || '';
- push(new Node(token));
- pop();
- continue;
- }
- }
-
- if (LINE_REGEX && block.type !== 'block' && options.line) {
- if ((token = scan(LINE_REGEX, 'line'))) {
- push(new Node(token));
- continue;
- }
- }
-
- if ((token = scan(/^[a-zABD-Z0-9\t ]+/, 'text'))) {
- push(new Node(token));
- continue;
- }
- push(new Node({ type: 'text', value: consume(remaining[0]) }));
- }
- return cst;
- };
- module.exports = parse;
|