/**
 * @fileoverview Rule to forbid control characters from regular expressions.
 * @author Nicholas C. Zakas
 */

"use strict";

const RegExpValidator = require("@eslint-community/regexpp").RegExpValidator;
const collector = new (class {
	constructor() {
		this._source = "";
		this._controlChars = [];
		this._validator = new RegExpValidator(this);
	}

	onPatternEnter() {
		/*
		 * `RegExpValidator` may parse the pattern twice in one `validatePattern`.
		 * So `this._controlChars` should be cleared here as well.
		 *
		 * For example, the `/(?<a>\x1f)/` regex will parse the pattern twice.
		 * This is based on the content described in Annex B.
		 * If the regex contains a `GroupName` and the `u` flag is not used, `ParseText` will be called twice.
		 * See https://tc39.es/ecma262/2023/multipage/additional-ecmascript-features-for-web-browsers.html#sec-parsepattern-annexb
		 */
		this._controlChars = [];
	}

	onCharacter(start, end, cp) {
		if (
			cp >= 0x00 &&
			cp <= 0x1f &&
			(this._source.codePointAt(start) === cp ||
				this._source.slice(start, end).startsWith("\\x") ||
				this._source.slice(start, end).startsWith("\\u"))
		) {
			this._controlChars.push(`\\x${`0${cp.toString(16)}`.slice(-2)}`);
		}
	}

	collectControlChars(regexpStr, flags) {
		const uFlag = typeof flags === "string" && flags.includes("u");
		const vFlag = typeof flags === "string" && flags.includes("v");

		this._controlChars = [];
		this._source = regexpStr;

		try {
			this._validator.validatePattern(regexpStr, void 0, void 0, {
				unicode: uFlag,
				unicodeSets: vFlag,
			}); // Call onCharacter hook
		} catch {
			// Ignore syntax errors in RegExp.
		}
		return this._controlChars;
	}
})();

//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------

/** @type {import('../types').Rule.RuleModule} */
module.exports = {
	meta: {
		type: "problem",

		docs: {
			description: "Disallow control characters in regular expressions",
			recommended: true,
			url: "https://eslint.org/docs/latest/rules/no-control-regex",
		},

		schema: [],

		messages: {
			unexpected:
				"Unexpected control character(s) in regular expression: {{controlChars}}.",
		},
	},

	create(context) {
		/**
		 * Get the regex expression
		 * @param {ASTNode} node `Literal` node to evaluate
		 * @returns {{ pattern: string, flags: string | null } | null} Regex if found (the given node is either a regex literal
		 * or a string literal that is the pattern argument of a RegExp constructor call). Otherwise `null`. If flags cannot be determined,
		 * the `flags` property will be `null`.
		 * @private
		 */
		function getRegExp(node) {
			if (node.regex) {
				return node.regex;
			}
			if (
				typeof node.value === "string" &&
				(node.parent.type === "NewExpression" ||
					node.parent.type === "CallExpression") &&
				node.parent.callee.type === "Identifier" &&
				node.parent.callee.name === "RegExp" &&
				node.parent.arguments[0] === node
			) {
				const pattern = node.value;
				const flags =
					node.parent.arguments.length > 1 &&
					node.parent.arguments[1].type === "Literal" &&
					typeof node.parent.arguments[1].value === "string"
						? node.parent.arguments[1].value
						: null;

				return { pattern, flags };
			}

			return null;
		}

		return {
			Literal(node) {
				const regExp = getRegExp(node);

				if (regExp) {
					const { pattern, flags } = regExp;
					const controlCharacters = collector.collectControlChars(
						pattern,
						flags,
					);

					if (controlCharacters.length > 0) {
						context.report({
							node,
							messageId: "unexpected",
							data: {
								controlChars: controlCharacters.join(", "),
							},
						});
					}
				}
			},
		};
	},
};