/**
 * @author Toru Nagashima <https://github.com/mysticatea>
 */
"use strict";

const {
	CALL,
	CONSTRUCT,
	ReferenceTracker,
	getStaticValue,
	getStringIfConstant,
} = require("@eslint-community/eslint-utils");
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
const {
	isCombiningCharacter,
	isEmojiModifier,
	isRegionalIndicatorSymbol,
	isSurrogatePair,
} = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
const {
	parseStringLiteral,
	parseTemplateToken,
} = require("./utils/char-source");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

/**
 * @typedef {import('@eslint-community/regexpp').AST.Character} Character
 * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement} CharacterClassElement
 */

/**
 * Iterate character sequences of a given nodes.
 *
 * CharacterClassRange syntax can steal a part of character sequence,
 * so this function reverts CharacterClassRange syntax and restore the sequence.
 * @param {CharacterClassElement[]} nodes The node list to iterate character sequences.
 * @returns {IterableIterator<Character[]>} The list of character sequences.
 */
function* iterateCharacterSequence(nodes) {
	/** @type {Character[]} */
	let seq = [];

	for (const node of nodes) {
		switch (node.type) {
			case "Character":
				seq.push(node);
				break;

			case "CharacterClassRange":
				seq.push(node.min);
				yield seq;
				seq = [node.max];
				break;

			case "CharacterSet":
			case "CharacterClass": // [[]] nesting character class
			case "ClassStringDisjunction": // \q{...}
			case "ExpressionCharacterClass": // [A--B]
				if (seq.length > 0) {
					yield seq;
					seq = [];
				}
				break;

			// no default
		}
	}

	if (seq.length > 0) {
		yield seq;
	}
}

/**
 * Checks whether the given character node is a Unicode code point escape or not.
 * @param {Character} char the character node to check.
 * @returns {boolean} `true` if the character node is a Unicode code point escape.
 */
function isUnicodeCodePointEscape(char) {
	return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
}

/**
 * Each function returns matched characters if it detects that kind of problem.
 * @type {Record<string, (chars: Character[]) => IterableIterator<Character[]>>}
 */
const findCharacterSequences = {
	*surrogatePairWithoutUFlag(chars) {
		for (const [index, char] of chars.entries()) {
			const previous = chars[index - 1];

			if (
				previous &&
				char &&
				isSurrogatePair(previous.value, char.value) &&
				!isUnicodeCodePointEscape(previous) &&
				!isUnicodeCodePointEscape(char)
			) {
				yield [previous, char];
			}
		}
	},

	*surrogatePair(chars) {
		for (const [index, char] of chars.entries()) {
			const previous = chars[index - 1];

			if (
				previous &&
				char &&
				isSurrogatePair(previous.value, char.value) &&
				(isUnicodeCodePointEscape(previous) ||
					isUnicodeCodePointEscape(char))
			) {
				yield [previous, char];
			}
		}
	},

	*combiningClass(chars, unfilteredChars) {
		/*
		 * When `allowEscape` is `true`, a combined character should only be allowed if the combining mark appears as an escape sequence.
		 * This means that the base character should be considered even if it's escaped.
		 */
		for (const [index, char] of chars.entries()) {
			const previous = unfilteredChars[index - 1];

			if (
				previous &&
				char &&
				isCombiningCharacter(char.value) &&
				!isCombiningCharacter(previous.value)
			) {
				yield [previous, char];
			}
		}
	},

	*emojiModifier(chars) {
		for (const [index, char] of chars.entries()) {
			const previous = chars[index - 1];

			if (
				previous &&
				char &&
				isEmojiModifier(char.value) &&
				!isEmojiModifier(previous.value)
			) {
				yield [previous, char];
			}
		}
	},

	*regionalIndicatorSymbol(chars) {
		for (const [index, char] of chars.entries()) {
			const previous = chars[index - 1];

			if (
				previous &&
				char &&
				isRegionalIndicatorSymbol(char.value) &&
				isRegionalIndicatorSymbol(previous.value)
			) {
				yield [previous, char];
			}
		}
	},

	*zwj(chars) {
		let sequence = null;

		for (const [index, char] of chars.entries()) {
			const previous = chars[index - 1];
			const next = chars[index + 1];

			if (
				previous &&
				char &&
				next &&
				char.value === 0x200d &&
				previous.value !== 0x200d &&
				next.value !== 0x200d
			) {
				if (sequence) {
					if (sequence.at(-1) === previous) {
						sequence.push(char, next); // append to the sequence
					} else {
						yield sequence;
						sequence = chars.slice(index - 1, index + 2);
					}
				} else {
					sequence = chars.slice(index - 1, index + 2);
				}
			}
		}

		if (sequence) {
			yield sequence;
		}
	},
};

const kinds = Object.keys(findCharacterSequences);

/**
 * Gets the value of the given node if it's a static value other than a regular expression object,
 * or the node's `regex` property.
 * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated.
 * A known example is Node.js 18 which does not support the `v` flag.
 * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`.
 * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node.
 * @param {ASTNode | undefined} node The node to be inspected.
 * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope.
 * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`.
 */
function getStaticValueOrRegex(node, initialScope) {
	if (!node) {
		return null;
	}
	if (node.type === "Literal" && node.regex) {
		return { regex: node.regex };
	}

	const staticValue = getStaticValue(node, initialScope);

	if (staticValue?.value instanceof RegExp) {
		return null;
	}
	return staticValue;
}

/**
 * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
 * This function requires the source text of the character to be known.
 * @param {Character} char Character to check.
 * @param {string} charSource Source text of the character to check.
 * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
 */
function checkForAcceptableEscape(char, charSource) {
	if (!charSource.startsWith("\\")) {
		return false;
	}
	const match = /(?<=^\\+).$/su.exec(charSource);

	return match?.[0] !== String.fromCodePoint(char.value);
}

/**
 * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
 * This function works with characters that are produced by a string or template literal.
 * It requires the source text and the CodeUnit list of the literal to be known.
 * @param {Character} char Character to check.
 * @param {string} nodeSource Source text of the string or template literal that produces the character.
 * @param {CodeUnit[]} codeUnits List of CodeUnit objects of the literal that produces the character.
 * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
 */
function checkForAcceptableEscapeInString(char, nodeSource, codeUnits) {
	const firstIndex = char.start;
	const lastIndex = char.end - 1;
	const start = codeUnits[firstIndex].start;
	const end = codeUnits[lastIndex].end;
	const charSource = nodeSource.slice(start, end);

	return checkForAcceptableEscape(char, charSource);
}

//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------

/** @type {import('../types').Rule.RuleModule} */
module.exports = {
	meta: {
		type: "problem",

		docs: {
			description:
				"Disallow characters which are made with multiple code points in character class syntax",
			recommended: true,
			url: "https://eslint.org/docs/latest/rules/no-misleading-character-class",
		},

		hasSuggestions: true,

		schema: [
			{
				type: "object",
				properties: {
					allowEscape: {
						type: "boolean",
						default: false,
					},
				},
				additionalProperties: false,
			},
		],

		messages: {
			surrogatePairWithoutUFlag:
				"Unexpected surrogate pair in character class. Use 'u' flag.",
			surrogatePair: "Unexpected surrogate pair in character class.",
			combiningClass: "Unexpected combined character in character class.",
			emojiModifier: "Unexpected modified Emoji in character class.",
			regionalIndicatorSymbol:
				"Unexpected national flag in character class.",
			zwj: "Unexpected joined character sequence in character class.",
			suggestUnicodeFlag: "Add unicode 'u' flag to regex.",
		},
	},
	create(context) {
		const allowEscape = context.options[0]?.allowEscape;
		const sourceCode = context.sourceCode;
		const parser = new RegExpParser();
		const checkedPatternNodes = new Set();

		/**
		 * Verify a given regular expression.
		 * @param {Node} node The node to report.
		 * @param {string} pattern The regular expression pattern to verify.
		 * @param {string} flags The flags of the regular expression.
		 * @param {Function} unicodeFixer Fixer for missing "u" flag.
		 * @returns {void}
		 */
		function verify(node, pattern, flags, unicodeFixer) {
			let patternNode;

			try {
				patternNode = parser.parsePattern(pattern, 0, pattern.length, {
					unicode: flags.includes("u"),
					unicodeSets: flags.includes("v"),
				});
			} catch {
				// Ignore regular expressions with syntax errors
				return;
			}

			let codeUnits = null;

			/**
			 * Checks whether a specified regexpp character is represented as an acceptable escape sequence.
			 * For the purposes of this rule, an escape sequence is considered acceptable if it consists of one or more backslashes followed by the character being escaped.
			 * @param {Character} char Character to check.
			 * @returns {boolean} Whether the specified regexpp character is represented as an acceptable escape sequence.
			 */
			function isAcceptableEscapeSequence(char) {
				if (node.type === "Literal" && node.regex) {
					return checkForAcceptableEscape(char, char.raw);
				}
				if (node.type === "Literal" && typeof node.value === "string") {
					const nodeSource = node.raw;

					codeUnits ??= parseStringLiteral(nodeSource);

					return checkForAcceptableEscapeInString(
						char,
						nodeSource,
						codeUnits,
					);
				}
				if (astUtils.isStaticTemplateLiteral(node)) {
					const nodeSource = sourceCode.getText(node);

					codeUnits ??= parseTemplateToken(nodeSource);

					return checkForAcceptableEscapeInString(
						char,
						nodeSource,
						codeUnits,
					);
				}
				return false;
			}

			const foundKindMatches = new Map();

			visitRegExpAST(patternNode, {
				onCharacterClassEnter(ccNode) {
					for (const unfilteredChars of iterateCharacterSequence(
						ccNode.elements,
					)) {
						let chars;

						if (allowEscape) {
							// Replace escape sequences with null to avoid having them flagged.
							chars = unfilteredChars.map(char =>
								isAcceptableEscapeSequence(char) ? null : char,
							);
						} else {
							chars = unfilteredChars;
						}
						for (const kind of kinds) {
							const matches = findCharacterSequences[kind](
								chars,
								unfilteredChars,
							);

							if (foundKindMatches.has(kind)) {
								foundKindMatches.get(kind).push(...matches);
							} else {
								foundKindMatches.set(kind, [...matches]);
							}
						}
					}
				},
			});

			/**
			 * Finds the report loc(s) for a range of matches.
			 * Only literals and expression-less templates generate granular errors.
			 * @param {Character[][]} matches Lists of individual characters being reported on.
			 * @returns {Location[]} locs for context.report.
			 * @see https://github.com/eslint/eslint/pull/17515
			 */
			function getNodeReportLocations(matches) {
				if (
					!astUtils.isStaticTemplateLiteral(node) &&
					node.type !== "Literal"
				) {
					return matches.length ? [node.loc] : [];
				}
				return matches.map(chars => {
					const firstIndex = chars[0].start;
					const lastIndex = chars.at(-1).end - 1;
					let start;
					let end;

					if (node.type === "TemplateLiteral") {
						const source = sourceCode.getText(node);
						const offset = node.range[0];

						codeUnits ??= parseTemplateToken(source);
						start = offset + codeUnits[firstIndex].start;
						end = offset + codeUnits[lastIndex].end;
					} else if (typeof node.value === "string") {
						// String Literal
						const source = node.raw;
						const offset = node.range[0];

						codeUnits ??= parseStringLiteral(source);
						start = offset + codeUnits[firstIndex].start;
						end = offset + codeUnits[lastIndex].end;
					} else {
						// RegExp Literal
						const offset = node.range[0] + 1; // Add 1 to skip the leading slash.

						start = offset + firstIndex;
						end = offset + lastIndex + 1;
					}

					return {
						start: sourceCode.getLocFromIndex(start),
						end: sourceCode.getLocFromIndex(end),
					};
				});
			}

			for (const [kind, matches] of foundKindMatches) {
				let suggest;

				if (kind === "surrogatePairWithoutUFlag") {
					suggest = [
						{
							messageId: "suggestUnicodeFlag",
							fix: unicodeFixer,
						},
					];
				}

				const locs = getNodeReportLocations(matches);

				for (const loc of locs) {
					context.report({
						node,
						loc,
						messageId: kind,
						suggest,
					});
				}
			}
		}

		return {
			"Literal[regex]"(node) {
				if (checkedPatternNodes.has(node)) {
					return;
				}
				verify(node, node.regex.pattern, node.regex.flags, fixer => {
					if (
						!isValidWithUnicodeFlag(
							context.languageOptions.ecmaVersion,
							node.regex.pattern,
						)
					) {
						return null;
					}

					return fixer.insertTextAfter(node, "u");
				});
			},
			Program(node) {
				const scope = sourceCode.getScope(node);
				const tracker = new ReferenceTracker(scope);

				/*
				 * Iterate calls of RegExp.
				 * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
				 *       `const {RegExp: a} = window; new a()`, etc...
				 */
				for (const { node: refNode } of tracker.iterateGlobalReferences(
					{
						RegExp: { [CALL]: true, [CONSTRUCT]: true },
					},
				)) {
					let pattern, flags;
					const [patternNode, flagsNode] = refNode.arguments;
					const evaluatedPattern = getStaticValueOrRegex(
						patternNode,
						scope,
					);

					if (!evaluatedPattern) {
						continue;
					}
					if (flagsNode) {
						if (evaluatedPattern.regex) {
							pattern = evaluatedPattern.regex.pattern;
							checkedPatternNodes.add(patternNode);
						} else {
							pattern = String(evaluatedPattern.value);
						}
						flags = getStringIfConstant(flagsNode, scope);
					} else {
						if (evaluatedPattern.regex) {
							continue;
						}
						pattern = String(evaluatedPattern.value);
						flags = "";
					}

					if (typeof flags === "string") {
						verify(patternNode, pattern, flags, fixer => {
							if (
								!isValidWithUnicodeFlag(
									context.languageOptions.ecmaVersion,
									pattern,
								)
							) {
								return null;
							}

							if (refNode.arguments.length === 1) {
								const penultimateToken =
									sourceCode.getLastToken(refNode, {
										skip: 1,
									}); // skip closing parenthesis

								return fixer.insertTextAfter(
									penultimateToken,
									astUtils.isCommaToken(penultimateToken)
										? ' "u",'
										: ', "u"',
								);
							}

							if (
								(flagsNode.type === "Literal" &&
									typeof flagsNode.value === "string") ||
								flagsNode.type === "TemplateLiteral"
							) {
								const range = [
									flagsNode.range[0],
									flagsNode.range[1] - 1,
								];

								return fixer.insertTextAfterRange(range, "u");
							}

							return null;
						});
					}
				}
			},
		};
	},
};