PhoneNumberMatcher.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. "use strict";
  2. function _typeof(obj) { "@babel/helpers - typeof"; return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (obj) { return typeof obj; } : function (obj) { return obj && "function" == typeof Symbol && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }, _typeof(obj); }
  3. Object.defineProperty(exports, "__esModule", {
  4. value: true
  5. });
  6. exports["default"] = void 0;
  7. var _PhoneNumber = _interopRequireDefault(require("./PhoneNumber.js"));
  8. var _constants = require("./constants.js");
  9. var _createExtensionPattern = _interopRequireDefault(require("./helpers/extension/createExtensionPattern.js"));
  10. var _RegExpCache = _interopRequireDefault(require("./findNumbers/RegExpCache.js"));
  11. var _util = require("./findNumbers/util.js");
  12. var _utf = require("./findNumbers/utf-8.js");
  13. var _Leniency = _interopRequireDefault(require("./findNumbers/Leniency.js"));
  14. var _parsePreCandidate = _interopRequireDefault(require("./findNumbers/parsePreCandidate.js"));
  15. var _isValidPreCandidate = _interopRequireDefault(require("./findNumbers/isValidPreCandidate.js"));
  16. var _isValidCandidate = _interopRequireWildcard(require("./findNumbers/isValidCandidate.js"));
  17. var _metadata = require("./metadata.js");
  18. var _parsePhoneNumber = _interopRequireDefault(require("./parsePhoneNumber.js"));
  19. function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
  20. function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { "default": obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj["default"] = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
  21. function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
  22. function _createForOfIteratorHelperLoose(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (it) return (it = it.call(o)).next.bind(it); if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; return function () { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); }
  23. function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
  24. function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
  25. function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
  26. function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
  27. function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); Object.defineProperty(Constructor, "prototype", { writable: false }); return Constructor; }
  28. var USE_NON_GEOGRAPHIC_COUNTRY_CODE = false;
  29. var EXTN_PATTERNS_FOR_MATCHING = (0, _createExtensionPattern["default"])('matching');
  30. /**
  31. * Patterns used to extract phone numbers from a larger phone-number-like pattern. These are
  32. * ordered according to specificity. For example, white-space is last since that is frequently
  33. * used in numbers, not just to separate two numbers. We have separate patterns since we don't
  34. * want to break up the phone-number-like text on more than one different kind of symbol at one
  35. * time, although symbols of the same type (e.g. space) can be safely grouped together.
  36. *
  37. * Note that if there is a match, we will always check any text found up to the first match as
  38. * well.
  39. */
  40. var INNER_MATCHES = [// Breaks on the slash - e.g. "651-234-2345/332-445-1234"
  41. '\\/+(.*)/', // Note that the bracket here is inside the capturing group, since we consider it part of the
  42. // phone number. Will match a pattern like "(650) 223 3345 (754) 223 3321".
  43. '(\\([^(]*)', // Breaks on a hyphen - e.g. "12345 - 332-445-1234 is my number."
  44. // We require a space on either side of the hyphen for it to be considered a separator.
  45. "(?:".concat(_utf.pZ, "-|-").concat(_utf.pZ, ")").concat(_utf.pZ, "*(.+)"), // Various types of wide hyphens. Note we have decided not to enforce a space here, since it's
  46. // possible that it's supposed to be used to break two numbers without spaces, and we haven't
  47. // seen many instances of it used within a number.
  48. "[\u2012-\u2015\uFF0D]".concat(_utf.pZ, "*(.+)"), // Breaks on a full stop - e.g. "12345. 332-445-1234 is my number."
  49. "\\.+".concat(_utf.pZ, "*([^.]+)"), // Breaks on space - e.g. "3324451234 8002341234"
  50. "".concat(_utf.pZ, "+(").concat(_utf.PZ, "+)")]; // Limit on the number of leading (plus) characters.
  51. var leadLimit = (0, _util.limit)(0, 2); // Limit on the number of consecutive punctuation characters.
  52. var punctuationLimit = (0, _util.limit)(0, 4);
  53. /* The maximum number of digits allowed in a digit-separated block. As we allow all digits in a
  54. * single block, set high enough to accommodate the entire national number and the international
  55. * country code. */
  56. var digitBlockLimit = _constants.MAX_LENGTH_FOR_NSN + _constants.MAX_LENGTH_COUNTRY_CODE; // Limit on the number of blocks separated by punctuation.
  57. // Uses digitBlockLimit since some formats use spaces to separate each digit.
  58. var blockLimit = (0, _util.limit)(0, digitBlockLimit);
  59. /* A punctuation sequence allowing white space. */
  60. var punctuation = "[".concat(_constants.VALID_PUNCTUATION, "]") + punctuationLimit; // A digits block without punctuation.
  61. var digitSequence = _utf.pNd + (0, _util.limit)(1, digitBlockLimit);
  62. /**
  63. * Phone number pattern allowing optional punctuation.
  64. * The phone number pattern used by `find()`, similar to
  65. * VALID_PHONE_NUMBER, but with the following differences:
  66. * <ul>
  67. * <li>All captures are limited in order to place an upper bound to the text matched by the
  68. * pattern.
  69. * <ul>
  70. * <li>Leading punctuation / plus signs are limited.
  71. * <li>Consecutive occurrences of punctuation are limited.
  72. * <li>Number of digits is limited.
  73. * </ul>
  74. * <li>No whitespace is allowed at the start or end.
  75. * <li>No alpha digits (vanity numbers such as 1-800-SIX-FLAGS) are currently supported.
  76. * </ul>
  77. */
  78. var PATTERN = '(?:' + _isValidCandidate.LEAD_CLASS + punctuation + ')' + leadLimit + digitSequence + '(?:' + punctuation + digitSequence + ')' + blockLimit + '(?:' + EXTN_PATTERNS_FOR_MATCHING + ')?'; // Regular expression of trailing characters that we want to remove.
  79. // We remove all characters that are not alpha or numerical characters.
  80. // The hash character is retained here, as it may signify
  81. // the previous block was an extension.
  82. //
  83. // // Don't know what does '&&' mean here.
  84. // const UNWANTED_END_CHAR_PATTERN = new RegExp(`[[\\P{N}&&\\P{L}]&&[^#]]+$`)
  85. //
  86. var UNWANTED_END_CHAR_PATTERN = new RegExp("[^".concat(_utf._pN).concat(_utf._pL, "#]+$"));
  87. var NON_DIGITS_PATTERN = /(\D+)/;
  88. var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1;
  89. /**
  90. * A stateful class that finds and extracts telephone numbers from {@linkplain CharSequence text}.
  91. * Instances can be created using the {@linkplain PhoneNumberUtil#findNumbers factory methods} in
  92. * {@link PhoneNumberUtil}.
  93. *
  94. * <p>Vanity numbers (phone numbers using alphabetic digits such as <tt>1-800-SIX-FLAGS</tt> are
  95. * not found.
  96. *
  97. * <p>This class is not thread-safe.
  98. */
  99. var PhoneNumberMatcher = /*#__PURE__*/function () {
  100. /**
  101. * @param {string} text — the character sequence that we will search, null for no text.
  102. * @param {'POSSIBLE'|'VALID'|'STRICT_GROUPING'|'EXACT_GROUPING'} [options.leniency] — The leniency to use when evaluating candidate phone numbers. See `source/findNumbers/Leniency.js` for more details.
  103. * @param {number} [options.maxTries] — The maximum number of invalid numbers to try before giving up on the text. This is to cover degenerate cases where the text has a lot of false positives in it. Must be >= 0.
  104. */
  105. function PhoneNumberMatcher() {
  106. var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : '';
  107. var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
  108. var metadata = arguments.length > 2 ? arguments[2] : undefined;
  109. _classCallCheck(this, PhoneNumberMatcher);
  110. options = {
  111. v2: options.v2,
  112. defaultCallingCode: options.defaultCallingCode,
  113. defaultCountry: options.defaultCountry && (0, _metadata.isSupportedCountry)(options.defaultCountry, metadata) ? options.defaultCountry : undefined,
  114. leniency: options.leniency || (options.extended ? 'POSSIBLE' : 'VALID'),
  115. maxTries: options.maxTries || MAX_SAFE_INTEGER
  116. }; // Validate `leniency`.
  117. if (!options.leniency) {
  118. throw new TypeError('`leniency` is required');
  119. }
  120. if (options.leniency !== 'POSSIBLE' && options.leniency !== 'VALID') {
  121. throw new TypeError("Invalid `leniency`: \"".concat(options.leniency, "\". Supported values: \"POSSIBLE\", \"VALID\"."));
  122. } // Validate `maxTries`.
  123. if (options.maxTries < 0) {
  124. throw new TypeError('`maxTries` must be `>= 0`');
  125. }
  126. this.text = text;
  127. this.options = options;
  128. this.metadata = metadata; // The degree of phone number validation.
  129. this.leniency = _Leniency["default"][options.leniency];
  130. if (!this.leniency) {
  131. throw new TypeError("Unknown leniency: \"".concat(options.leniency, "\""));
  132. }
  133. /** The maximum number of retries after matching an invalid number. */
  134. this.maxTries = options.maxTries;
  135. this.PATTERN = new RegExp(PATTERN, 'ig');
  136. /** The iteration tristate. */
  137. this.state = 'NOT_READY';
  138. /** The next index to start searching at. Undefined in {@link State#DONE}. */
  139. this.searchIndex = 0; // A cache for frequently used country-specific regular expressions. Set to 32 to cover ~2-3
  140. // countries being used for the same doc with ~10 patterns for each country. Some pages will have
  141. // a lot more countries in use, but typically fewer numbers for each so expanding the cache for
  142. // that use-case won't have a lot of benefit.
  143. this.regExpCache = new _RegExpCache["default"](32);
  144. }
  145. /**
  146. * Attempts to find the next subsequence in the searched sequence on or after {@code searchIndex}
  147. * that represents a phone number. Returns the next match, null if none was found.
  148. *
  149. * @param index the search index to start searching at
  150. * @return the phone number match found, null if none can be found
  151. */
  152. _createClass(PhoneNumberMatcher, [{
  153. key: "find",
  154. value: function find() {
  155. // // Reset the regular expression.
  156. // this.PATTERN.lastIndex = index
  157. var matches;
  158. while (this.maxTries > 0 && (matches = this.PATTERN.exec(this.text)) !== null) {
  159. var candidate = matches[0];
  160. var offset = matches.index;
  161. candidate = (0, _parsePreCandidate["default"])(candidate);
  162. if ((0, _isValidPreCandidate["default"])(candidate, offset, this.text)) {
  163. var match = // Try to come up with a valid match given the entire candidate.
  164. this.parseAndVerify(candidate, offset, this.text) // If that failed, try to find an "inner match" -
  165. // there might be a phone number within this candidate.
  166. || this.extractInnerMatch(candidate, offset, this.text);
  167. if (match) {
  168. if (this.options.v2) {
  169. return {
  170. startsAt: match.startsAt,
  171. endsAt: match.endsAt,
  172. number: match.phoneNumber
  173. };
  174. } else {
  175. var phoneNumber = match.phoneNumber;
  176. var result = {
  177. startsAt: match.startsAt,
  178. endsAt: match.endsAt,
  179. phone: phoneNumber.nationalNumber
  180. };
  181. if (phoneNumber.country) {
  182. /* istanbul ignore if */
  183. if (USE_NON_GEOGRAPHIC_COUNTRY_CODE && country === '001') {
  184. result.countryCallingCode = phoneNumber.countryCallingCode;
  185. } else {
  186. result.country = phoneNumber.country;
  187. }
  188. } else {
  189. result.countryCallingCode = phoneNumber.countryCallingCode;
  190. }
  191. if (phoneNumber.ext) {
  192. result.ext = phoneNumber.ext;
  193. }
  194. return result;
  195. }
  196. }
  197. }
  198. this.maxTries--;
  199. }
  200. }
  201. /**
  202. * Attempts to extract a match from `substring`
  203. * if the substring itself does not qualify as a match.
  204. */
  205. }, {
  206. key: "extractInnerMatch",
  207. value: function extractInnerMatch(substring, offset, text) {
  208. for (var _iterator = _createForOfIteratorHelperLoose(INNER_MATCHES), _step; !(_step = _iterator()).done;) {
  209. var innerMatchPattern = _step.value;
  210. var isFirstMatch = true;
  211. var candidateMatch = void 0;
  212. var innerMatchRegExp = new RegExp(innerMatchPattern, 'g');
  213. while (this.maxTries > 0 && (candidateMatch = innerMatchRegExp.exec(substring)) !== null) {
  214. if (isFirstMatch) {
  215. // We should handle any group before this one too.
  216. var _candidate = (0, _util.trimAfterFirstMatch)(UNWANTED_END_CHAR_PATTERN, substring.slice(0, candidateMatch.index));
  217. var _match = this.parseAndVerify(_candidate, offset, text);
  218. if (_match) {
  219. return _match;
  220. }
  221. this.maxTries--;
  222. isFirstMatch = false;
  223. }
  224. var candidate = (0, _util.trimAfterFirstMatch)(UNWANTED_END_CHAR_PATTERN, candidateMatch[1]); // Java code does `groupMatcher.start(1)` here,
  225. // but there's no way in javascript to get a `candidate` start index,
  226. // therefore resort to using this kind of an approximation.
  227. // (`groupMatcher` is called `candidateInSubstringMatch` in this javascript port)
  228. // https://stackoverflow.com/questions/15934353/get-index-of-each-capture-in-a-javascript-regex
  229. var candidateIndexGuess = substring.indexOf(candidate, candidateMatch.index);
  230. var match = this.parseAndVerify(candidate, offset + candidateIndexGuess, text);
  231. if (match) {
  232. return match;
  233. }
  234. this.maxTries--;
  235. }
  236. }
  237. }
  238. /**
  239. * Parses a phone number from the `candidate` using `parse` and
  240. * verifies it matches the requested `leniency`. If parsing and verification succeed,
  241. * a corresponding `PhoneNumberMatch` is returned, otherwise this method returns `null`.
  242. *
  243. * @param candidate the candidate match
  244. * @param offset the offset of {@code candidate} within {@link #text}
  245. * @return the parsed and validated phone number match, or null
  246. */
  247. }, {
  248. key: "parseAndVerify",
  249. value: function parseAndVerify(candidate, offset, text) {
  250. if (!(0, _isValidCandidate["default"])(candidate, offset, text, this.options.leniency)) {
  251. return;
  252. }
  253. var phoneNumber = (0, _parsePhoneNumber["default"])(candidate, {
  254. extended: true,
  255. defaultCountry: this.options.defaultCountry,
  256. defaultCallingCode: this.options.defaultCallingCode
  257. }, this.metadata);
  258. if (!phoneNumber) {
  259. return;
  260. }
  261. if (!phoneNumber.isPossible()) {
  262. return;
  263. }
  264. if (this.leniency(phoneNumber, {
  265. candidate: candidate,
  266. defaultCountry: this.options.defaultCountry,
  267. metadata: this.metadata,
  268. regExpCache: this.regExpCache
  269. })) {
  270. return {
  271. startsAt: offset,
  272. endsAt: offset + candidate.length,
  273. phoneNumber: phoneNumber
  274. };
  275. }
  276. }
  277. }, {
  278. key: "hasNext",
  279. value: function hasNext() {
  280. if (this.state === 'NOT_READY') {
  281. this.lastMatch = this.find(); // (this.searchIndex)
  282. if (this.lastMatch) {
  283. // this.searchIndex = this.lastMatch.endsAt
  284. this.state = 'READY';
  285. } else {
  286. this.state = 'DONE';
  287. }
  288. }
  289. return this.state === 'READY';
  290. }
  291. }, {
  292. key: "next",
  293. value: function next() {
  294. // Check the state and find the next match as a side-effect if necessary.
  295. if (!this.hasNext()) {
  296. throw new Error('No next element');
  297. } // Don't retain that memory any longer than necessary.
  298. var result = this.lastMatch;
  299. this.lastMatch = null;
  300. this.state = 'NOT_READY';
  301. return result;
  302. }
  303. }]);
  304. return PhoneNumberMatcher;
  305. }();
  306. exports["default"] = PhoneNumberMatcher;
  307. //# sourceMappingURL=PhoneNumberMatcher.js.map