codtracker-js/node_modules/eslint/lib/rules/no-misleading-character-class.js

/**
 * @author Toru Nagashima <https://github.com/mysticatea>
 */
'use strict';

const {
  CALL,
  CONSTRUCT,
  ReferenceTracker,
  getStaticValue,
  getStringIfConstant,
} = require('@eslint-community/eslint-utils');
const { RegExpParser, visitRegExpAST } = require('@eslint-community/regexpp');
const {
  isCombiningCharacter,
  isEmojiModifier,
  isRegionalIndicatorSymbol,
  isSurrogatePair,
} = require('./utils/unicode');
const astUtils = require('./utils/ast-utils.js');
const { isValidWithUnicodeFlag } = require('./utils/regular-expressions');
const {
  parseStringLiteral,
  parseTemplateToken,
} = require('./utils/char-source');

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

/**
 * @typedef {import('@eslint-community/regexpp').AST.Character} Character
 * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
 */

/**
 * Iterate character sequences of a given nodes.
 *
 * CharacterClassRange syntax can steal a part of character sequence,
 * so this function reverts CharacterClassRange syntax and restore the sequence.
 * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
 * @returns {IterableIterator<Character[]>} The list of character sequences.
 */
function* iterateCharacterSequence(nodes) {
  /** @type {Character[]} */
  let seq = [];

  for (const node of nodes) {
    switch (node.type) {
      case 'Character':
        seq.push(node);
        break;

      case 'CharacterClassRange':
        seq.push(node.min);
        yield seq;
        seq = [node.max];
        break;

      case 'CharacterSet':
      case 'CharacterClass': // [[]] nesting character class
      case 'ClassStringDisjunction': // \q{...}
      case 'ExpressionCharacterClass': // [A--B]
        if (seq.length > 0) {
          yield seq;
          seq = [];
        }
        break;

      // no default
    }
  }

  if (seq.length > 0) {
    yield seq;
  }
}

/**
 * Checks whether the given character node is a Unicode code point escape or not.
 * @param {Character} char the character node to check.
 * @returns {boolean} `true` if the character node is a Unicode code point escape.
 */
function isUnicodeCodePointEscape(char) {
  return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
}

/**
 * Each function returns matched characters if it detects that kind of problem.
 * @type {Record<string, (chars: Character[]) => IterableIterator<Character[]>>}
 */
const findCharacterSequences = {
  *surrogatePairWithoutUFlag(chars) {
    for (const [index, char] of chars.entries()) {
      const previous = chars[index - 1];

      if (
        previous &&
        char &&
        isSurrogatePair(previous.value, char.value) &&
        !isUnicodeCodePointEscape(previous) &&
        !isUnicodeCodePointEscape(char)
      ) {
        yield [previous, char];
      }
    }
  },

  *surrogatePair(chars) {
    for (const [index, char] of chars.entries()) {
      const previous = chars[index - 1];

      if (
        previous &&
        char &&
        isSurrogatePair(previous.value, char.value) &&
        (isUnicodeCodePointEscape(previous) || isUnicodeCodePointEscape(char))
      ) {
        yield [previous, char];
      }
    }
  },

  *combiningClass(chars, unfilteredChars) {
    /*
     * When `allowEscape` is `true`, a combined character should only be allowed if the combining mark appears as an escape sequence.
     * This means that the base character should be considered even if it's escaped.
     */
    for (const [index, char] of chars.entries()) {
      const previous = unfilteredChars[index - 1];

      if (
        previous &&
        char &&
        isCombiningCharacter(char.value) &&
        !isCombiningCharacter(previous.value)
      ) {
        yield [previous, char];
      }
    }
  },

  *emojiModifier(chars) {
    for (const [index, char] of chars.entries()) {
      const previous = chars[index - 1];

      if (
        previous &&
        char &&
        isEmojiModifier(char.value) &&
        !isEmojiModifier(previous.value)
      ) {
        yield [previous, char];
      }
    }
  },

  *regionalIndicatorSymbol(chars) {
    for (const [index, char] of chars.entries()) {
      const previous = chars[index - 1];

      if (
        previous &&
        char &&
        isRegionalIndicatorSymbol(char.value) &&
        isRegionalIndicatorSymbol(previous.value)
      ) {
        yield [previous, char];
      }
    }
  },

  *zwj(chars) {
    let sequence = null;

    for (const [index, char] of chars.entries()) {
      const previous = chars[index - 1];
      const next = chars[index + 1];

      if (
        previous &&
        char &&
        next &&
        char.value === 0x200d &&
        previous.value !== 0x200d &&
        next.value !== 0x200d
      ) {
        if (sequence) {
          if (sequence.at(-1) === previous) {
            sequence.push(char, next); // append to the sequence
          } else {
            yield sequence;
            sequence = chars.slice(index - 1, index + 2);
          }
        } else {
          sequence = chars.slice(index - 1, index + 2);
        }
      }
    }

    if (sequence) {
      yield sequence;
    }
  },
};

const kinds = Object.keys(findCharacterSequences);

/**
 * Gets the value of the given node if it's a static value other than a regular expression object,
 * or the node's `regex` property.
 * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated.
 * A known example is Node.js 18 which does not support the `v` flag.
 * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`.
 * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node.
 * @param {ASTNode | undefined} node The node to be inspected.
 * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope.
 * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`.
 */
function getStaticValueOrRegex(node, initialScope) {
  if (!node) {
    return null;
  }
  if (node.type === 'Literal' && node.regex) {
    return { regex: node.regex };
  }

  const staticValue = getStaticValue(node, initialScope);

  if (staticValue?.value instanceof RegExp) {
    return null;
  }
  return staticValue;
}

/**
 * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
 * This function requires the source text of the character to be known.
 * @param {Character} char Character to check.
 * @param {string} charSource Source text of the character to check.
 * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
 */
function checkForAcceptableEscape(char, charSource) {
  if (!charSource.startsWith('\\')) {
    return false;
  }
  const match = /(?<=^\\+).$/su.exec(charSource);

  return match?.[0] !== String.fromCodePoint(char.value);
}

/**
 * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
 * This function works with characters that are produced by a string or template literal.
 * It requires the source text and the CodeUnit list of the literal to be known.
 * @param {Character} char Character to check.
 * @param {string} nodeSource Source text of the string or template literal that produces the character.
 * @param {CodeUnit[]} codeUnits List of CodeUnit objects of the literal that produces the character.
 * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
 */
function checkForAcceptableEscapeInString(char, nodeSource, codeUnits) {
  const firstIndex = char.start;
  const lastIndex = char.end - 1;
  const start = codeUnits[firstIndex].start;
  const end = codeUnits[lastIndex].end;
  const charSource = nodeSource.slice(start, end);

  return checkForAcceptableEscape(char, charSource);
}

//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------

/** @type {import('../types').Rule.RuleModule} */
module.exports = {
  meta: {
    type: 'problem',

    docs: {
      description:
        'Disallow characters which are made with multiple code points in character class syntax',
      recommended: true,
      url: 'https://eslint.org/docs/latest/rules/no-misleading-character-class',
    },

    hasSuggestions: true,

    schema: [
      {
        type: 'object',
        properties: {
          allowEscape: {
            type: 'boolean',
            default: false,
          },
        },
        additionalProperties: false,
      },
    ],

    messages: {
      surrogatePairWithoutUFlag:
        "Unexpected surrogate pair in character class. Use 'u' flag.",
      surrogatePair: 'Unexpected surrogate pair in character class.',
      combiningClass: 'Unexpected combined character in character class.',
      emojiModifier: 'Unexpected modified Emoji in character class.',
      regionalIndicatorSymbol: 'Unexpected national flag in character class.',
      zwj: 'Unexpected joined character sequence in character class.',
      suggestUnicodeFlag: "Add unicode 'u' flag to regex.",
    },
  },
  create(context) {
    const allowEscape = context.options[0]?.allowEscape;
    const sourceCode = context.sourceCode;
    const parser = new RegExpParser();
    const checkedPatternNodes = new Set();

    /**
     * Verify a given regular expression.
     * @param {Node} node The node to report.
     * @param {string} pattern The regular expression pattern to verify.
     * @param {string} flags The flags of the regular expression.
     * @param {Function} unicodeFixer Fixer for missing "u" flag.
     * @returns {void}
     */
    function verify(node, pattern, flags, unicodeFixer) {
      let patternNode;

      try {
        patternNode = parser.parsePattern(pattern, 0, pattern.length, {
          unicode: flags.includes('u'),
          unicodeSets: flags.includes('v'),
        });
      } catch {
        // Ignore regular expressions with syntax errors
        return;
      }

      let codeUnits = null;

      /**
       * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
       * For the purposes of this rule, an escape sequence is considered acceptable if it consists of one or more backslashes followed by the character being escaped.
       * @param {Character} char Character to check.
       * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
       */
      function isAcceptableEscapeSequence(char) {
        if (node.type === 'Literal' && node.regex) {
          return checkForAcceptableEscape(char, char.raw);
        }
        if (node.type === 'Literal' && typeof node.value === 'string') {
          const nodeSource = node.raw;

          codeUnits ??= parseStringLiteral(nodeSource);

          return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
        }
        if (astUtils.isStaticTemplateLiteral(node)) {
          const nodeSource = sourceCode.getText(node);

          codeUnits ??= parseTemplateToken(nodeSource);

          return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
        }
        return false;
      }

      const foundKindMatches = new Map();

      visitRegExpAST(patternNode, {
        onCharacterClassEnter(ccNode) {
          for (const unfilteredChars of iterateCharacterSequence(
            ccNode.elements
          )) {
            let chars;

            if (allowEscape) {
              // Replace escape sequences with null to avoid having them flagged.
              chars = unfilteredChars.map((char) =>
                isAcceptableEscapeSequence(char) ? null : char
              );
            } else {
              chars = unfilteredChars;
            }
            for (const kind of kinds) {
              const matches = findCharacterSequences[kind](
                chars,
                unfilteredChars
              );

              if (foundKindMatches.has(kind)) {
                foundKindMatches.get(kind).push(...matches);
              } else {
                foundKindMatches.set(kind, [...matches]);
              }
            }
          }
        },
      });

      /**
       * Finds the report loc(s) for a range of matches.
       * Only literals and expression-less templates generate granular errors.
       * @param {Character[][]} matches Lists of individual characters being reported on.
       * @returns {Location[]} locs for context.report.
       * @see https://github.com/eslint/eslint/pull/17515
       */
      function getNodeReportLocations(matches) {
        if (
          !astUtils.isStaticTemplateLiteral(node) &&
          node.type !== 'Literal'
        ) {
          return matches.length ? [node.loc] : [];
        }
        return matches.map((chars) => {
          const firstIndex = chars[0].start;
          const lastIndex = chars.at(-1).end - 1;
          let start;
          let end;

          if (node.type === 'TemplateLiteral') {
            const source = sourceCode.getText(node);
            const offset = node.range[0];

            codeUnits ??= parseTemplateToken(source);
            start = offset + codeUnits[firstIndex].start;
            end = offset + codeUnits[lastIndex].end;
          } else if (typeof node.value === 'string') {
            // String Literal
            const source = node.raw;
            const offset = node.range[0];

            codeUnits ??= parseStringLiteral(source);
            start = offset + codeUnits[firstIndex].start;
            end = offset + codeUnits[lastIndex].end;
          } else {
            // RegExp Literal
            const offset = node.range[0] + 1; // Add 1 to skip the leading slash.

            start = offset + firstIndex;
            end = offset + lastIndex + 1;
          }

          return {
            start: sourceCode.getLocFromIndex(start),
            end: sourceCode.getLocFromIndex(end),
          };
        });
      }

      for (const [kind, matches] of foundKindMatches) {
        let suggest;

        if (kind === 'surrogatePairWithoutUFlag') {
          suggest = [
            {
              messageId: 'suggestUnicodeFlag',
              fix: unicodeFixer,
            },
          ];
        }

        const locs = getNodeReportLocations(matches);

        for (const loc of locs) {
          context.report({
            node,
            loc,
            messageId: kind,
            suggest,
          });
        }
      }
    }

    return {
      'Literal[regex]'(node) {
        if (checkedPatternNodes.has(node)) {
          return;
        }
        verify(node, node.regex.pattern, node.regex.flags, (fixer) => {
          if (
            !isValidWithUnicodeFlag(
              context.languageOptions.ecmaVersion,
              node.regex.pattern
            )
          ) {
            return null;
          }

          return fixer.insertTextAfter(node, 'u');
        });
      },
      Program(node) {
        const scope = sourceCode.getScope(node);
        const tracker = new ReferenceTracker(scope);

        /*
         * Iterate calls of RegExp.
         * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
         *       `const {RegExp: a} = window; new a()`, etc...
         */
        for (const { node: refNode } of tracker.iterateGlobalReferences({
          RegExp: { [CALL]: true, [CONSTRUCT]: true },
        })) {
          let pattern, flags;
          const [patternNode, flagsNode] = refNode.arguments;
          const evaluatedPattern = getStaticValueOrRegex(patternNode, scope);

          if (!evaluatedPattern) {
            continue;
          }
          if (flagsNode) {
            if (evaluatedPattern.regex) {
              pattern = evaluatedPattern.regex.pattern;
              checkedPatternNodes.add(patternNode);
            } else {
              pattern = String(evaluatedPattern.value);
            }
            flags = getStringIfConstant(flagsNode, scope);
          } else {
            if (evaluatedPattern.regex) {
              continue;
            }
            pattern = String(evaluatedPattern.value);
            flags = '';
          }

          if (typeof flags === 'string') {
            verify(patternNode, pattern, flags, (fixer) => {
              if (
                !isValidWithUnicodeFlag(
                  context.languageOptions.ecmaVersion,
                  pattern
                )
              ) {
                return null;
              }

              if (refNode.arguments.length === 1) {
                const penultimateToken = sourceCode.getLastToken(refNode, {
                  skip: 1,
                }); // skip closing parenthesis

                return fixer.insertTextAfter(
                  penultimateToken,
                  astUtils.isCommaToken(penultimateToken) ? ' "u",' : ', "u"'
                );
              }

              if (
                (flagsNode.type === 'Literal' &&
                  typeof flagsNode.value === 'string') ||
                flagsNode.type === 'TemplateLiteral'
              ) {
                const range = [flagsNode.range[0], flagsNode.range[1] - 1];

                return fixer.insertTextAfterRange(range, 'u');
              }

              return null;
            });
          }
        }
      },
    };
  },
};