createExtensionPattern.js 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. import { VALID_DIGITS } from '../../constants.js'
  2. // The RFC 3966 format for extensions.
  3. const RFC3966_EXTN_PREFIX = ';ext='
  4. /**
  5. * Helper method for constructing regular expressions for parsing. Creates
  6. * an expression that captures up to max_length digits.
  7. * @return {string} RegEx pattern to capture extension digits.
  8. */
  9. const getExtensionDigitsPattern = (maxLength) => `([${VALID_DIGITS}]{1,${maxLength}})`
  10. /**
  11. * Helper initialiser method to create the regular-expression pattern to match
  12. * extensions.
  13. * Copy-pasted from Google's `libphonenumber`:
  14. * https://github.com/google/libphonenumber/blob/55b2646ec9393f4d3d6661b9c82ef9e258e8b829/javascript/i18n/phonenumbers/phonenumberutil.js#L759-L766
  15. * @return {string} RegEx pattern to capture extensions.
  16. */
  17. export default function createExtensionPattern(purpose) {
  18. // We cap the maximum length of an extension based on the ambiguity of the way
  19. // the extension is prefixed. As per ITU, the officially allowed length for
  20. // extensions is actually 40, but we don't support this since we haven't seen real
  21. // examples and this introduces many false interpretations as the extension labels
  22. // are not standardized.
  23. /** @type {string} */
  24. var extLimitAfterExplicitLabel = '20';
  25. /** @type {string} */
  26. var extLimitAfterLikelyLabel = '15';
  27. /** @type {string} */
  28. var extLimitAfterAmbiguousChar = '9';
  29. /** @type {string} */
  30. var extLimitWhenNotSure = '6';
  31. /** @type {string} */
  32. var possibleSeparatorsBetweenNumberAndExtLabel = "[ \u00A0\\t,]*";
  33. // Optional full stop (.) or colon, followed by zero or more spaces/tabs/commas.
  34. /** @type {string} */
  35. var possibleCharsAfterExtLabel = "[:\\.\uFF0E]?[ \u00A0\\t,-]*";
  36. /** @type {string} */
  37. var optionalExtnSuffix = "#?";
  38. // Here the extension is called out in more explicit way, i.e mentioning it obvious
  39. // patterns like "ext.".
  40. /** @type {string} */
  41. var explicitExtLabels =
  42. "(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|\u0434\u043E\u0431|anexo)";
  43. // One-character symbols that can be used to indicate an extension, and less
  44. // commonly used or more ambiguous extension labels.
  45. /** @type {string} */
  46. var ambiguousExtLabels = "(?:[x\uFF58#\uFF03~\uFF5E]|int|\uFF49\uFF4E\uFF54)";
  47. // When extension is not separated clearly.
  48. /** @type {string} */
  49. var ambiguousSeparator = "[- ]+";
  50. // This is the same as possibleSeparatorsBetweenNumberAndExtLabel, but not matching
  51. // comma as extension label may have it.
  52. /** @type {string} */
  53. var possibleSeparatorsNumberExtLabelNoComma = "[ \u00A0\\t]*";
  54. // ",," is commonly used for auto dialling the extension when connected. First
  55. // comma is matched through possibleSeparatorsBetweenNumberAndExtLabel, so we do
  56. // not repeat it here. Semi-colon works in Iphone and Android also to pop up a
  57. // button with the extension number following.
  58. /** @type {string} */
  59. var autoDiallingAndExtLabelsFound = "(?:,{2}|;)";
  60. /** @type {string} */
  61. var rfcExtn = RFC3966_EXTN_PREFIX
  62. + getExtensionDigitsPattern(extLimitAfterExplicitLabel);
  63. /** @type {string} */
  64. var explicitExtn = possibleSeparatorsBetweenNumberAndExtLabel + explicitExtLabels
  65. + possibleCharsAfterExtLabel
  66. + getExtensionDigitsPattern(extLimitAfterExplicitLabel)
  67. + optionalExtnSuffix;
  68. /** @type {string} */
  69. var ambiguousExtn = possibleSeparatorsBetweenNumberAndExtLabel + ambiguousExtLabels
  70. + possibleCharsAfterExtLabel
  71. + getExtensionDigitsPattern(extLimitAfterAmbiguousChar)
  72. + optionalExtnSuffix;
  73. /** @type {string} */
  74. var americanStyleExtnWithSuffix = ambiguousSeparator
  75. + getExtensionDigitsPattern(extLimitWhenNotSure) + "#";
  76. /** @type {string} */
  77. var autoDiallingExtn = possibleSeparatorsNumberExtLabelNoComma
  78. + autoDiallingAndExtLabelsFound + possibleCharsAfterExtLabel
  79. + getExtensionDigitsPattern(extLimitAfterLikelyLabel)
  80. + optionalExtnSuffix;
  81. /** @type {string} */
  82. var onlyCommasExtn = possibleSeparatorsNumberExtLabelNoComma
  83. + "(?:,)+" + possibleCharsAfterExtLabel
  84. + getExtensionDigitsPattern(extLimitAfterAmbiguousChar)
  85. + optionalExtnSuffix;
  86. // The first regular expression covers RFC 3966 format, where the extension is added
  87. // using ";ext=". The second more generic where extension is mentioned with explicit
  88. // labels like "ext:". In both the above cases we allow more numbers in extension than
  89. // any other extension labels. The third one captures when single character extension
  90. // labels or less commonly used labels are used. In such cases we capture fewer
  91. // extension digits in order to reduce the chance of falsely interpreting two
  92. // numbers beside each other as a number + extension. The fourth one covers the
  93. // special case of American numbers where the extension is written with a hash
  94. // at the end, such as "- 503#". The fifth one is exclusively for extension
  95. // autodialling formats which are used when dialling and in this case we accept longer
  96. // extensions. The last one is more liberal on the number of commas that acts as
  97. // extension labels, so we have a strict cap on the number of digits in such extensions.
  98. return rfcExtn + "|"
  99. + explicitExtn + "|"
  100. + ambiguousExtn + "|"
  101. + americanStyleExtnWithSuffix + "|"
  102. + autoDiallingExtn + "|"
  103. + onlyCommasExtn;
  104. }