// Copy-pasted from `PhoneNumberMatcher.js`. import { PLUS_CHARS } from '../constants.js' import { limit } from './util.js' import { isLatinLetter, isInvalidPunctuationSymbol } from './utf-8.js' const OPENING_PARENS = '(\\[\uFF08\uFF3B' const CLOSING_PARENS = ')\\]\uFF09\uFF3D' const NON_PARENS = `[^${OPENING_PARENS}${CLOSING_PARENS}]` export const LEAD_CLASS = `[${OPENING_PARENS}${PLUS_CHARS}]` // Punctuation that may be at the start of a phone number - brackets and plus signs. const LEAD_CLASS_LEADING = new RegExp('^' + LEAD_CLASS) // Limit on the number of pairs of brackets in a phone number. const BRACKET_PAIR_LIMIT = limit(0, 3) /** * Pattern to check that brackets match. Opening brackets should be closed within a phone number. * This also checks that there is something inside the brackets. Having no brackets at all is also * fine. * * An opening bracket at the beginning may not be closed, but subsequent ones should be. It's * also possible that the leading bracket was dropped, so we shouldn't be surprised if we see a * closing bracket first. We limit the sets of brackets in a phone number to four. */ const MATCHING_BRACKETS_ENTIRE = new RegExp ( '^' + "(?:[" + OPENING_PARENS + "])?" + "(?:" + NON_PARENS + "+" + "[" + CLOSING_PARENS + "])?" + NON_PARENS + "+" + "(?:[" + OPENING_PARENS + "]" + NON_PARENS + "+[" + CLOSING_PARENS + "])" + BRACKET_PAIR_LIMIT + NON_PARENS + "*" + '$' ) /** * Matches strings that look like publication pages. Example: *
Computing Complete Answers to Queries in the Presence of Limited Access Patterns. * Chen Li. VLDB J. 12(3): 211-227 (2003).* * The string "211-227 (2003)" is not a telephone number. */ const PUB_PAGES = /\d{1,5}-+\d{1,5}\s{0,4}\(\d{1,4}/ export default function isValidCandidate(candidate, offset, text, leniency) { // Check the candidate doesn't contain any formatting // which would indicate that it really isn't a phone number. if (!MATCHING_BRACKETS_ENTIRE.test(candidate) || PUB_PAGES.test(candidate)) { return } // If leniency is set to VALID or stricter, we also want to skip numbers that are surrounded // by Latin alphabetic characters, to skip cases like abc8005001234 or 8005001234def. if (leniency !== 'POSSIBLE') { // If the candidate is not at the start of the text, // and does not start with phone-number punctuation, // check the previous character. if (offset > 0 && !LEAD_CLASS_LEADING.test(candidate)) { const previousChar = text[offset - 1] // We return null if it is a latin letter or an invalid punctuation symbol. if (isInvalidPunctuationSymbol(previousChar) || isLatinLetter(previousChar)) { return false } } const lastCharIndex = offset + candidate.length if (lastCharIndex < text.length) { const nextChar = text[lastCharIndex] if (isInvalidPunctuationSymbol(nextChar) || isLatinLetter(nextChar)) { return false } } } return true }