'use strict';

const {
  isUSVString,
  bufferToLowerCasedHeaderName,
} = require('../../core/util');
const { utf8DecodeBytes } = require('./util');
const { HTTP_TOKEN_CODEPOINTS, isomorphicDecode } = require('./data-url');
const { makeEntry } = require('./formdata');
const { webidl } = require('./webidl');
const assert = require('node:assert');
const { File: NodeFile } = require('node:buffer');

const File = globalThis.File ?? NodeFile;

const formDataNameBuffer = Buffer.from('form-data; name="');
const filenameBuffer = Buffer.from('filename');
const dd = Buffer.from('--');
const ddcrlf = Buffer.from('--\r\n');

/**
 * @param {string} chars
 */
function isAsciiString(chars) {
  for (let i = 0; i < chars.length; ++i) {
    if ((chars.charCodeAt(i) & ~0x7f) !== 0) {
      return false;
    }
  }
  return true;
}

/**
 * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-boundary
 * @param {string} boundary
 */
function validateBoundary(boundary) {
  const length = boundary.length;

  // - its length is greater or equal to 27 and lesser or equal to 70, and
  if (length < 27 || length > 70) {
    return false;
  }

  // - it is composed by bytes in the ranges 0x30 to 0x39, 0x41 to 0x5A, or
  //   0x61 to 0x7A, inclusive (ASCII alphanumeric), or which are 0x27 ('),
  //   0x2D (-) or 0x5F (_).
  for (let i = 0; i < length; ++i) {
    const cp = boundary.charCodeAt(i);

    if (
      !(
        (cp >= 0x30 && cp <= 0x39) ||
        (cp >= 0x41 && cp <= 0x5a) ||
        (cp >= 0x61 && cp <= 0x7a) ||
        cp === 0x27 ||
        cp === 0x2d ||
        cp === 0x5f
      )
    ) {
      return false;
    }
  }

  return true;
}

/**
 * @see https://andreubotella.github.io/multipart-form-data/#multipart-form-data-parser
 * @param {Buffer} input
 * @param {ReturnType<import('./data-url')['parseMIMEType']>} mimeType
 */
function multipartFormDataParser(input, mimeType) {
  // 1. Assert: mimeType’s essence is "multipart/form-data".
  assert(mimeType !== 'failure' && mimeType.essence === 'multipart/form-data');

  const boundaryString = mimeType.parameters.get('boundary');

  // 2. If mimeType’s parameters["boundary"] does not exist, return failure.
  //    Otherwise, let boundary be the result of UTF-8 decoding mimeType’s
  //    parameters["boundary"].
  if (boundaryString === undefined) {
    throw parsingError('missing boundary in content-type header');
  }

  const boundary = Buffer.from(`--${boundaryString}`, 'utf8');

  // 3. Let entry list be an empty entry list.
  const entryList = [];

  // 4. Let position be a pointer to a byte in input, initially pointing at
  //    the first byte.
  const position = { position: 0 };

  // Note: undici addition, allows leading and trailing CRLFs.
  while (
    input[position.position] === 0x0d &&
    input[position.position + 1] === 0x0a
  ) {
    position.position += 2;
  }

  let trailing = input.length;

  while (input[trailing - 1] === 0x0a && input[trailing - 2] === 0x0d) {
    trailing -= 2;
  }

  if (trailing !== input.length) {
    input = input.subarray(0, trailing);
  }

  // 5. While true:
  while (true) {
    // 5.1. If position points to a sequence of bytes starting with 0x2D 0x2D
    //      (`--`) followed by boundary, advance position by 2 + the length of
    //      boundary. Otherwise, return failure.
    // Note: boundary is padded with 2 dashes already, no need to add 2.
    if (
      input
        .subarray(position.position, position.position + boundary.length)
        .equals(boundary)
    ) {
      position.position += boundary.length;
    } else {
      throw parsingError('expected a value starting with -- and the boundary');
    }

    // 5.2. If position points to the sequence of bytes 0x2D 0x2D 0x0D 0x0A
    //      (`--` followed by CR LF) followed by the end of input, return entry list.
    // Note: a body does NOT need to end with CRLF. It can end with --.
    if (
      (position.position === input.length - 2 &&
        bufferStartsWith(input, dd, position)) ||
      (position.position === input.length - 4 &&
        bufferStartsWith(input, ddcrlf, position))
    ) {
      return entryList;
    }

    // 5.3. If position does not point to a sequence of bytes starting with 0x0D
    //      0x0A (CR LF), return failure.
    if (
      input[position.position] !== 0x0d ||
      input[position.position + 1] !== 0x0a
    ) {
      throw parsingError('expected CRLF');
    }

    // 5.4. Advance position by 2. (This skips past the newline.)
    position.position += 2;

    // 5.5. Let name, filename and contentType be the result of parsing
    //      multipart/form-data headers on input and position, if the result
    //      is not failure. Otherwise, return failure.
    const result = parseMultipartFormDataHeaders(input, position);

    let { name, filename, contentType, encoding } = result;

    // 5.6. Advance position by 2. (This skips past the empty line that marks
    //      the end of the headers.)
    position.position += 2;

    // 5.7. Let body be the empty byte sequence.
    let body;

    // 5.8. Body loop: While position is not past the end of input:
    // TODO: the steps here are completely wrong
    {
      const boundaryIndex = input.indexOf(
        boundary.subarray(2),
        position.position
      );

      if (boundaryIndex === -1) {
        throw parsingError('expected boundary after body');
      }

      body = input.subarray(position.position, boundaryIndex - 4);

      position.position += body.length;

      // Note: position must be advanced by the body's length before being
      // decoded, otherwise the parsing will fail.
      if (encoding === 'base64') {
        body = Buffer.from(body.toString(), 'base64');
      }
    }

    // 5.9. If position does not point to a sequence of bytes starting with
    //      0x0D 0x0A (CR LF), return failure. Otherwise, advance position by 2.
    if (
      input[position.position] !== 0x0d ||
      input[position.position + 1] !== 0x0a
    ) {
      throw parsingError('expected CRLF');
    } else {
      position.position += 2;
    }

    // 5.10. If filename is not null:
    let value;

    if (filename !== null) {
      // 5.10.1. If contentType is null, set contentType to "text/plain".
      contentType ??= 'text/plain';

      // 5.10.2. If contentType is not an ASCII string, set contentType to the empty string.

      // Note: `buffer.isAscii` can be used at zero-cost, but converting a string to a buffer is a high overhead.
      // Content-Type is a relatively small string, so it is faster to use `String#charCodeAt`.
      if (!isAsciiString(contentType)) {
        contentType = '';
      }

      // 5.10.3. Let value be a new File object with name filename, type contentType, and body body.
      value = new File([body], filename, { type: contentType });
    } else {
      // 5.11. Otherwise:

      // 5.11.1. Let value be the UTF-8 decoding without BOM of body.
      value = utf8DecodeBytes(Buffer.from(body));
    }

    // 5.12. Assert: name is a scalar value string and value is either a scalar value string or a File object.
    assert(isUSVString(name));
    assert(
      (typeof value === 'string' && isUSVString(value)) || webidl.is.File(value)
    );

    // 5.13. Create an entry with name and value, and append it to entry list.
    entryList.push(makeEntry(name, value, filename));
  }
}

/**
 * @see https://andreubotella.github.io/multipart-form-data/#parse-multipart-form-data-headers
 * @param {Buffer} input
 * @param {{ position: number }} position
 */
function parseMultipartFormDataHeaders(input, position) {
  // 1. Let name, filename and contentType be null.
  let name = null;
  let filename = null;
  let contentType = null;
  let encoding = null;

  // 2. While true:
  while (true) {
    // 2.1. If position points to a sequence of bytes starting with 0x0D 0x0A (CR LF):
    if (
      input[position.position] === 0x0d &&
      input[position.position + 1] === 0x0a
    ) {
      // 2.1.1. If name is null, return failure.
      if (name === null) {
        throw parsingError('header name is null');
      }

      // 2.1.2. Return name, filename and contentType.
      return { name, filename, contentType, encoding };
    }

    // 2.2. Let header name be the result of collecting a sequence of bytes that are
    //      not 0x0A (LF), 0x0D (CR) or 0x3A (:), given position.
    let headerName = collectASequenceOfBytes(
      (char) => char !== 0x0a && char !== 0x0d && char !== 0x3a,
      input,
      position
    );

    // 2.3. Remove any HTTP tab or space bytes from the start or end of header name.
    headerName = removeChars(
      headerName,
      true,
      true,
      (char) => char === 0x9 || char === 0x20
    );

    // 2.4. If header name does not match the field-name token production, return failure.
    if (!HTTP_TOKEN_CODEPOINTS.test(headerName.toString())) {
      throw parsingError(
        'header name does not match the field-name token production'
      );
    }

    // 2.5. If the byte at position is not 0x3A (:), return failure.
    if (input[position.position] !== 0x3a) {
      throw parsingError('expected :');
    }

    // 2.6. Advance position by 1.
    position.position++;

    // 2.7. Collect a sequence of bytes that are HTTP tab or space bytes given position.
    //      (Do nothing with those bytes.)
    collectASequenceOfBytes(
      (char) => char === 0x20 || char === 0x09,
      input,
      position
    );

    // 2.8. Byte-lowercase header name and switch on the result:
    switch (bufferToLowerCasedHeaderName(headerName)) {
      case 'content-disposition': {
        // 1. Set name and filename to null.
        name = filename = null;

        // 2. If position does not point to a sequence of bytes starting with
        //    `form-data; name="`, return failure.
        if (!bufferStartsWith(input, formDataNameBuffer, position)) {
          throw parsingError(
            'expected form-data; name=" for content-disposition header'
          );
        }

        // 3. Advance position so it points at the byte after the next 0x22 (")
        //    byte (the one in the sequence of bytes matched above).
        position.position += 17;

        // 4. Set name to the result of parsing a multipart/form-data name given
        //    input and position, if the result is not failure. Otherwise, return
        //    failure.
        name = parseMultipartFormDataName(input, position);

        // 5. If position points to a sequence of bytes starting with `; filename="`:
        if (
          input[position.position] === 0x3b /* ; */ &&
          input[position.position + 1] === 0x20 /* ' ' */
        ) {
          const at = { position: position.position + 2 };

          if (bufferStartsWith(input, filenameBuffer, at)) {
            if (input[at.position + 8] === 0x2a /* '*' */) {
              at.position += 10; // skip past filename*=

              // Remove leading http tab and spaces. See RFC for examples.
              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
              collectASequenceOfBytes(
                (char) => char === 0x20 || char === 0x09,
                input,
                at
              );

              const headerValue = collectASequenceOfBytes(
                (char) => char !== 0x20 && char !== 0x0d && char !== 0x0a, // ' ' or CRLF
                input,
                at
              );

              if (
                (headerValue[0] !== 0x75 && headerValue[0] !== 0x55) || // u or U
                (headerValue[1] !== 0x74 && headerValue[1] !== 0x54) || // t or T
                (headerValue[2] !== 0x66 && headerValue[2] !== 0x46) || // f or F
                headerValue[3] !== 0x2d || // -
                headerValue[4] !== 0x38 // 8
              ) {
                throw parsingError("unknown encoding, expected utf-8''");
              }

              // skip utf-8''
              filename = decodeURIComponent(
                new TextDecoder().decode(headerValue.subarray(7))
              );

              position.position = at.position;
            } else {
              // 1. Advance position so it points at the byte after the next 0x22 (") byte
              //    (the one in the sequence of bytes matched above).
              position.position += 11;

              // Remove leading http tab and spaces. See RFC for examples.
              // https://datatracker.ietf.org/doc/html/rfc6266#section-5
              collectASequenceOfBytes(
                (char) => char === 0x20 || char === 0x09,
                input,
                position
              );

              position.position++; // skip past " after removing whitespace

              // 2. Set filename to the result of parsing a multipart/form-data name given
              //    input and position, if the result is not failure. Otherwise, return failure.
              filename = parseMultipartFormDataName(input, position);
            }
          }
        }

        break;
      }
      case 'content-type': {
        // 1. Let header value be the result of collecting a sequence of bytes that are
        //    not 0x0A (LF) or 0x0D (CR), given position.
        let headerValue = collectASequenceOfBytes(
          (char) => char !== 0x0a && char !== 0x0d,
          input,
          position
        );

        // 2. Remove any HTTP tab or space bytes from the end of header value.
        headerValue = removeChars(
          headerValue,
          false,
          true,
          (char) => char === 0x9 || char === 0x20
        );

        // 3. Set contentType to the isomorphic decoding of header value.
        contentType = isomorphicDecode(headerValue);

        break;
      }
      case 'content-transfer-encoding': {
        let headerValue = collectASequenceOfBytes(
          (char) => char !== 0x0a && char !== 0x0d,
          input,
          position
        );

        headerValue = removeChars(
          headerValue,
          false,
          true,
          (char) => char === 0x9 || char === 0x20
        );

        encoding = isomorphicDecode(headerValue);

        break;
      }
      default: {
        // Collect a sequence of bytes that are not 0x0A (LF) or 0x0D (CR), given position.
        // (Do nothing with those bytes.)
        collectASequenceOfBytes(
          (char) => char !== 0x0a && char !== 0x0d,
          input,
          position
        );
      }
    }

    // 2.9. If position does not point to a sequence of bytes starting with 0x0D 0x0A
    //      (CR LF), return failure. Otherwise, advance position by 2 (past the newline).
    if (
      input[position.position] !== 0x0d &&
      input[position.position + 1] !== 0x0a
    ) {
      throw parsingError('expected CRLF');
    } else {
      position.position += 2;
    }
  }
}

/**
 * @see https://andreubotella.github.io/multipart-form-data/#parse-a-multipart-form-data-name
 * @param {Buffer} input
 * @param {{ position: number }} position
 */
function parseMultipartFormDataName(input, position) {
  // 1. Assert: The byte at (position - 1) is 0x22 (").
  assert(input[position.position - 1] === 0x22);

  // 2. Let name be the result of collecting a sequence of bytes that are not 0x0A (LF), 0x0D (CR) or 0x22 ("), given position.
  /** @type {string | Buffer} */
  let name = collectASequenceOfBytes(
    (char) => char !== 0x0a && char !== 0x0d && char !== 0x22,
    input,
    position
  );

  // 3. If the byte at position is not 0x22 ("), return failure. Otherwise, advance position by 1.
  if (input[position.position] !== 0x22) {
    throw parsingError('expected "');
  } else {
    position.position++;
  }

  // 4. Replace any occurrence of the following subsequences in name with the given byte:
  // - `%0A`: 0x0A (LF)
  // - `%0D`: 0x0D (CR)
  // - `%22`: 0x22 (")
  name = new TextDecoder()
    .decode(name)
    .replace(/%0A/gi, '\n')
    .replace(/%0D/gi, '\r')
    .replace(/%22/g, '"');

  // 5. Return the UTF-8 decoding without BOM of name.
  return name;
}

/**
 * @param {(char: number) => boolean} condition
 * @param {Buffer} input
 * @param {{ position: number }} position
 */
function collectASequenceOfBytes(condition, input, position) {
  let start = position.position;

  while (start < input.length && condition(input[start])) {
    ++start;
  }

  return input.subarray(position.position, (position.position = start));
}

/**
 * @param {Buffer} buf
 * @param {boolean} leading
 * @param {boolean} trailing
 * @param {(charCode: number) => boolean} predicate
 * @returns {Buffer}
 */
function removeChars(buf, leading, trailing, predicate) {
  let lead = 0;
  let trail = buf.length - 1;

  if (leading) {
    while (lead < buf.length && predicate(buf[lead])) lead++;
  }

  if (trailing) {
    while (trail > 0 && predicate(buf[trail])) trail--;
  }

  return lead === 0 && trail === buf.length - 1 ?
      buf
    : buf.subarray(lead, trail + 1);
}

/**
 * Checks if {@param buffer} starts with {@param start}
 * @param {Buffer} buffer
 * @param {Buffer} start
 * @param {{ position: number }} position
 */
function bufferStartsWith(buffer, start, position) {
  if (buffer.length < start.length) {
    return false;
  }

  for (let i = 0; i < start.length; i++) {
    if (start[i] !== buffer[position.position + i]) {
      return false;
    }
  }

  return true;
}

function parsingError(cause) {
  return new TypeError('Failed to parse body as FormData.', {
    cause: new TypeError(cause),
  });
}

module.exports = {
  multipartFormDataParser,
  validateBoundary,
};