Adding upstream version 5.28.2+dfsg1+~cs23.11.12.3.upstream/5.28.2+dfsg1+_cs23.11.12.3 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-21 20:56:19 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-21 20:56:19 +0000
commit: 0b6210cd37b68b94252cb798598b12974a20e1c1 (patch)
tree: e371686554a877842d95aa94f100bee552ff2a8e /lib/fetch/dataURL.js
parent: Initial commit. (diff)
download: node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.tar.xz
node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.zip
1 files changed, 627 insertions, 0 deletions
diff --git a/lib/fetch/dataURL.js b/lib/fetch/dataURL.js
new file mode 100644
index 0000000..7b6a606
--- /dev/null
+++ b/lib/fetch/dataURL.js
@@ -0,0 +1,627 @@
+const assert = require('assert')
+const { atob } = require('buffer')
+const { isomorphicDecode } = require('./util')
+
+const encoder = new TextEncoder()
+
+/**
+ * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
+ */
+const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
+const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
+/**
+ * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
+ */
+const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line
+
+// https://fetch.spec.whatwg.org/#data-url-processor
+/** @param {URL} dataURL */
+function dataURLProcessor (dataURL) {
+  // 1. Assert: dataURL’s scheme is "data".
+  assert(dataURL.protocol === 'data:')
+
+  // 2. Let input be the result of running the URL
+  // serializer on dataURL with exclude fragment
+  // set to true.
+  let input = URLSerializer(dataURL, true)
+
+  // 3. Remove the leading "data:" string from input.
+  input = input.slice(5)
+
+  // 4. Let position point at the start of input.
+  const position = { position: 0 }
+
+  // 5. Let mimeType be the result of collecting a
+  // sequence of code points that are not equal
+  // to U+002C (,), given position.
+  let mimeType = collectASequenceOfCodePointsFast(
+    ',',
+    input,
+    position
+  )
+
+  // 6. Strip leading and trailing ASCII whitespace
+  // from mimeType.
+  // Undici implementation note: we need to store the
+  // length because if the mimetype has spaces removed,
+  // the wrong amount will be sliced from the input in
+  // step #9
+  const mimeTypeLength = mimeType.length
+  mimeType = removeASCIIWhitespace(mimeType, true, true)
+
+  // 7. If position is past the end of input, then
+  // return failure
+  if (position.position >= input.length) {
+    return 'failure'
+  }
+
+  // 8. Advance position by 1.
+  position.position++
+
+  // 9. Let encodedBody be the remainder of input.
+  const encodedBody = input.slice(mimeTypeLength + 1)
+
+  // 10. Let body be the percent-decoding of encodedBody.
+  let body = stringPercentDecode(encodedBody)
+
+  // 11. If mimeType ends with U+003B (;), followed by
+  // zero or more U+0020 SPACE, followed by an ASCII
+  // case-insensitive match for "base64", then:
+  if (/;(\u0020){0,}base64$/i.test(mimeType)) {
+    // 1. Let stringBody be the isomorphic decode of body.
+    const stringBody = isomorphicDecode(body)
+
+    // 2. Set body to the forgiving-base64 decode of
+    // stringBody.
+    body = forgivingBase64(stringBody)
+
+    // 3. If body is failure, then return failure.
+    if (body === 'failure') {
+      return 'failure'
+    }
+
+    // 4. Remove the last 6 code points from mimeType.
+    mimeType = mimeType.slice(0, -6)
+
+    // 5. Remove trailing U+0020 SPACE code points from mimeType,
+    // if any.
+    mimeType = mimeType.replace(/(\u0020)+$/, '')
+
+    // 6. Remove the last U+003B (;) code point from mimeType.
+    mimeType = mimeType.slice(0, -1)
+  }
+
+  // 12. If mimeType starts with U+003B (;), then prepend
+  // "text/plain" to mimeType.
+  if (mimeType.startsWith(';')) {
+    mimeType = 'text/plain' + mimeType
+  }
+
+  // 13. Let mimeTypeRecord be the result of parsing
+  // mimeType.
+  let mimeTypeRecord = parseMIMEType(mimeType)
+
+  // 14. If mimeTypeRecord is failure, then set
+  // mimeTypeRecord to text/plain;charset=US-ASCII.
+  if (mimeTypeRecord === 'failure') {
+    mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
+  }
+
+  // 15. Return a new data: URL struct whose MIME
+  // type is mimeTypeRecord and body is body.
+  // https://fetch.spec.whatwg.org/#data-url-struct
+  return { mimeType: mimeTypeRecord, body }
+}
+
+// https://url.spec.whatwg.org/#concept-url-serializer
+/**
+ * @param {URL} url
+ * @param {boolean} excludeFragment
+ */
+function URLSerializer (url, excludeFragment = false) {
+  if (!excludeFragment) {
+    return url.href
+  }
+
+  const href = url.href
+  const hashLength = url.hash.length
+
+  return hashLength === 0 ? href : href.substring(0, href.length - hashLength)
+}
+
+// https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
+/**
+ * @param {(char: string) => boolean} condition
+ * @param {string} input
+ * @param {{ position: number }} position
+ */
+function collectASequenceOfCodePoints (condition, input, position) {
+  // 1. Let result be the empty string.
+  let result = ''
+
+  // 2. While position doesn’t point past the end of input and the
+  // code point at position within input meets the condition condition:
+  while (position.position < input.length && condition(input[position.position])) {
+    // 1. Append that code point to the end of result.
+    result += input[position.position]
+
+    // 2. Advance position by 1.
+    position.position++
+  }
+
+  // 3. Return result.
+  return result
+}
+
+/**
+ * A faster collectASequenceOfCodePoints that only works when comparing a single character.
+ * @param {string} char
+ * @param {string} input
+ * @param {{ position: number }} position
+ */
+function collectASequenceOfCodePointsFast (char, input, position) {
+  const idx = input.indexOf(char, position.position)
+  const start = position.position
+
+  if (idx === -1) {
+    position.position = input.length
+    return input.slice(start)
+  }
+
+  position.position = idx
+  return input.slice(start, position.position)
+}
+
+// https://url.spec.whatwg.org/#string-percent-decode
+/** @param {string} input */
+function stringPercentDecode (input) {
+  // 1. Let bytes be the UTF-8 encoding of input.
+  const bytes = encoder.encode(input)
+
+  // 2. Return the percent-decoding of bytes.
+  return percentDecode(bytes)
+}
+
+// https://url.spec.whatwg.org/#percent-decode
+/** @param {Uint8Array} input */
+function percentDecode (input) {
+  // 1. Let output be an empty byte sequence.
+  /** @type {number[]} */
+  const output = []
+
+  // 2. For each byte byte in input:
+  for (let i = 0; i < input.length; i++) {
+    const byte = input[i]
+
+    // 1. If byte is not 0x25 (%), then append byte to output.
+    if (byte !== 0x25) {
+      output.push(byte)
+
+    // 2. Otherwise, if byte is 0x25 (%) and the next two bytes
+    // after byte in input are not in the ranges
+    // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
+    // and 0x61 (a) to 0x66 (f), all inclusive, append byte
+    // to output.
+    } else if (
+      byte === 0x25 &&
+      !/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
+    ) {
+      output.push(0x25)
+
+    // 3. Otherwise:
+    } else {
+      // 1. Let bytePoint be the two bytes after byte in input,
+      // decoded, and then interpreted as hexadecimal number.
+      const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2])
+      const bytePoint = Number.parseInt(nextTwoBytes, 16)
+
+      // 2. Append a byte whose value is bytePoint to output.
+      output.push(bytePoint)
+
+      // 3. Skip the next two bytes in input.
+      i += 2
+    }
+  }
+
+  // 3. Return output.
+  return Uint8Array.from(output)
+}
+
+// https://mimesniff.spec.whatwg.org/#parse-a-mime-type
+/** @param {string} input */
+function parseMIMEType (input) {
+  // 1. Remove any leading and trailing HTTP whitespace
+  // from input.
+  input = removeHTTPWhitespace(input, true, true)
+
+  // 2. Let position be a position variable for input,
+  // initially pointing at the start of input.
+  const position = { position: 0 }
+
+  // 3. Let type be the result of collecting a sequence
+  // of code points that are not U+002F (/) from
+  // input, given position.
+  const type = collectASequenceOfCodePointsFast(
+    '/',
+    input,
+    position
+  )
+
+  // 4. If type is the empty string or does not solely
+  // contain HTTP token code points, then return failure.
+  // https://mimesniff.spec.whatwg.org/#http-token-code-point
+  if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
+    return 'failure'
+  }
+
+  // 5. If position is past the end of input, then return
+  // failure
+  if (position.position > input.length) {
+    return 'failure'
+  }
+
+  // 6. Advance position by 1. (This skips past U+002F (/).)
+  position.position++
+
+  // 7. Let subtype be the result of collecting a sequence of
+  // code points that are not U+003B (;) from input, given
+  // position.
+  let subtype = collectASequenceOfCodePointsFast(
+    ';',
+    input,
+    position
+  )
+
+  // 8. Remove any trailing HTTP whitespace from subtype.
+  subtype = removeHTTPWhitespace(subtype, false, true)
+
+  // 9. If subtype is the empty string or does not solely
+  // contain HTTP token code points, then return failure.
+  if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
+    return 'failure'
+  }
+
+  const typeLowercase = type.toLowerCase()
+  const subtypeLowercase = subtype.toLowerCase()
+
+  // 10. Let mimeType be a new MIME type record whose type
+  // is type, in ASCII lowercase, and subtype is subtype,
+  // in ASCII lowercase.
+  // https://mimesniff.spec.whatwg.org/#mime-type
+  const mimeType = {
+    type: typeLowercase,
+    subtype: subtypeLowercase,
+    /** @type {Map<string, string>} */
+    parameters: new Map(),
+    // https://mimesniff.spec.whatwg.org/#mime-type-essence
+    essence: `${typeLowercase}/${subtypeLowercase}`
+  }
+
+  // 11. While position is not past the end of input:
+  while (position.position < input.length) {
+    // 1. Advance position by 1. (This skips past U+003B (;).)
+    position.position++
+
+    // 2. Collect a sequence of code points that are HTTP
+    // whitespace from input given position.
+    collectASequenceOfCodePoints(
+      // https://fetch.spec.whatwg.org/#http-whitespace
+      char => HTTP_WHITESPACE_REGEX.test(char),
+      input,
+      position
+    )
+
+    // 3. Let parameterName be the result of collecting a
+    // sequence of code points that are not U+003B (;)
+    // or U+003D (=) from input, given position.
+    let parameterName = collectASequenceOfCodePoints(
+      (char) => char !== ';' && char !== '=',
+      input,
+      position
+    )
+
+    // 4. Set parameterName to parameterName, in ASCII
+    // lowercase.
+    parameterName = parameterName.toLowerCase()
+
+    // 5. If position is not past the end of input, then:
+    if (position.position < input.length) {
+      // 1. If the code point at position within input is
+      // U+003B (;), then continue.
+      if (input[position.position] === ';') {
+        continue
+      }
+
+      // 2. Advance position by 1. (This skips past U+003D (=).)
+      position.position++
+    }
+
+    // 6. If position is past the end of input, then break.
+    if (position.position > input.length) {
+      break
+    }
+
+    // 7. Let parameterValue be null.
+    let parameterValue = null
+
+    // 8. If the code point at position within input is
+    // U+0022 ("), then:
+    if (input[position.position] === '"') {
+      // 1. Set parameterValue to the result of collecting
+      // an HTTP quoted string from input, given position
+      // and the extract-value flag.
+      parameterValue = collectAnHTTPQuotedString(input, position, true)
+
+      // 2. Collect a sequence of code points that are not
+      // U+003B (;) from input, given position.
+      collectASequenceOfCodePointsFast(
+        ';',
+        input,
+        position
+      )
+
+    // 9. Otherwise:
+    } else {
+      // 1. Set parameterValue to the result of collecting
+      // a sequence of code points that are not U+003B (;)
+      // from input, given position.
+      parameterValue = collectASequenceOfCodePointsFast(
+        ';',
+        input,
+        position
+      )
+
+      // 2. Remove any trailing HTTP whitespace from parameterValue.
+      parameterValue = removeHTTPWhitespace(parameterValue, false, true)
+
+      // 3. If parameterValue is the empty string, then continue.
+      if (parameterValue.length === 0) {
+        continue
+      }
+    }
+
+    // 10. If all of the following are true
+    // - parameterName is not the empty string
+    // - parameterName solely contains HTTP token code points
+    // - parameterValue solely contains HTTP quoted-string token code points
+    // - mimeType’s parameters[parameterName] does not exist
+    // then set mimeType’s parameters[parameterName] to parameterValue.
+    if (
+      parameterName.length !== 0 &&
+      HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
+      (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
+      !mimeType.parameters.has(parameterName)
+    ) {
+      mimeType.parameters.set(parameterName, parameterValue)
+    }
+  }
+
+  // 12. Return mimeType.
+  return mimeType
+}
+
+// https://infra.spec.whatwg.org/#forgiving-base64-decode
+/** @param {string} data */
+function forgivingBase64 (data) {
+  // 1. Remove all ASCII whitespace from data.
+  data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '')  // eslint-disable-line
+
+  // 2. If data’s code point length divides by 4 leaving
+  // no remainder, then:
+  if (data.length % 4 === 0) {
+    // 1. If data ends with one or two U+003D (=) code points,
+    // then remove them from data.
+    data = data.replace(/=?=$/, '')
+  }
+
+  // 3. If data’s code point length divides by 4 leaving
+  // a remainder of 1, then return failure.
+  if (data.length % 4 === 1) {
+    return 'failure'
+  }
+
+  // 4. If data contains a code point that is not one of
+  //  U+002B (+)
+  //  U+002F (/)
+  //  ASCII alphanumeric
+  // then return failure.
+  if (/[^+/0-9A-Za-z]/.test(data)) {
+    return 'failure'
+  }
+
+  const binary = atob(data)
+  const bytes = new Uint8Array(binary.length)
+
+  for (let byte = 0; byte < binary.length; byte++) {
+    bytes[byte] = binary.charCodeAt(byte)
+  }
+
+  return bytes
+}
+
+// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
+// tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
+/**
+ * @param {string} input
+ * @param {{ position: number }} position
+ * @param {boolean?} extractValue
+ */
+function collectAnHTTPQuotedString (input, position, extractValue) {
+  // 1. Let positionStart be position.
+  const positionStart = position.position
+
+  // 2. Let value be the empty string.
+  let value = ''
+
+  // 3. Assert: the code point at position within input
+  // is U+0022 (").
+  assert(input[position.position] === '"')
+
+  // 4. Advance position by 1.
+  position.position++
+
+  // 5. While true:
+  while (true) {
+    // 1. Append the result of collecting a sequence of code points
+    // that are not U+0022 (") or U+005C (\) from input, given
+    // position, to value.
+    value += collectASequenceOfCodePoints(
+      (char) => char !== '"' && char !== '\\',
+      input,
+      position
+    )
+
+    // 2. If position is past the end of input, then break.
+    if (position.position >= input.length) {
+      break
+    }
+
+    // 3. Let quoteOrBackslash be the code point at position within
+    // input.
+    const quoteOrBackslash = input[position.position]
+
+    // 4. Advance position by 1.
+    position.position++
+
+    // 5. If quoteOrBackslash is U+005C (\), then:
+    if (quoteOrBackslash === '\\') {
+      // 1. If position is past the end of input, then append
+      // U+005C (\) to value and break.
+      if (position.position >= input.length) {
+        value += '\\'
+        break
+      }
+
+      // 2. Append the code point at position within input to value.
+      value += input[position.position]
+
+      // 3. Advance position by 1.
+      position.position++
+
+    // 6. Otherwise:
+    } else {
+      // 1. Assert: quoteOrBackslash is U+0022 (").
+      assert(quoteOrBackslash === '"')
+
+      // 2. Break.
+      break
+    }
+  }
+
+  // 6. If the extract-value flag is set, then return value.
+  if (extractValue) {
+    return value
+  }
+
+  // 7. Return the code points from positionStart to position,
+  // inclusive, within input.
+  return input.slice(positionStart, position.position)
+}
+
+/**
+ * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
+ */
+function serializeAMimeType (mimeType) {
+  assert(mimeType !== 'failure')
+  const { parameters, essence } = mimeType
+
+  // 1. Let serialization be the concatenation of mimeType’s
+  //    type, U+002F (/), and mimeType’s subtype.
+  let serialization = essence
+
+  // 2. For each name → value of mimeType’s parameters:
+  for (let [name, value] of parameters.entries()) {
+    // 1. Append U+003B (;) to serialization.
+    serialization += ';'
+
+    // 2. Append name to serialization.
+    serialization += name
+
+    // 3. Append U+003D (=) to serialization.
+    serialization += '='
+
+    // 4. If value does not solely contain HTTP token code
+    //    points or value is the empty string, then:
+    if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
+      // 1. Precede each occurence of U+0022 (") or
+      //    U+005C (\) in value with U+005C (\).
+      value = value.replace(/(\\|")/g, '\\$1')
+
+      // 2. Prepend U+0022 (") to value.
+      value = '"' + value
+
+      // 3. Append U+0022 (") to value.
+      value += '"'
+    }
+
+    // 5. Append value to serialization.
+    serialization += value
+  }
+
+  // 3. Return serialization.
+  return serialization
+}
+
+/**
+ * @see https://fetch.spec.whatwg.org/#http-whitespace
+ * @param {string} char
+ */
+function isHTTPWhiteSpace (char) {
+  return char === '\r' || char === '\n' || char === '\t' || char === ' '
+}
+
+/**
+ * @see https://fetch.spec.whatwg.org/#http-whitespace
+ * @param {string} str
+ */
+function removeHTTPWhitespace (str, leading = true, trailing = true) {
+  let lead = 0
+  let trail = str.length - 1
+
+  if (leading) {
+    for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
+  }
+
+  if (trailing) {
+    for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
+  }
+
+  return str.slice(lead, trail + 1)
+}
+
+/**
+ * @see https://infra.spec.whatwg.org/#ascii-whitespace
+ * @param {string} char
+ */
+function isASCIIWhitespace (char) {
+  return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
+}
+
+/**
+ * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
+ */
+function removeASCIIWhitespace (str, leading = true, trailing = true) {
+  let lead = 0
+  let trail = str.length - 1
+
+  if (leading) {
+    for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
+  }
+
+  if (trailing) {
+    for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
+  }
+
+  return str.slice(lead, trail + 1)
+}
+
+module.exports = {
+  dataURLProcessor,
+  URLSerializer,
+  collectASequenceOfCodePoints,
+  collectASequenceOfCodePointsFast,
+  stringPercentDecode,
+  parseMIMEType,
+  collectAnHTTPQuotedString,
+  serializeAMimeType
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-21 20:56:19 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-21 20:56:19 +0000
commit	0b6210cd37b68b94252cb798598b12974a20e1c1 (patch)
tree	e371686554a877842d95aa94f100bee552ff2a8e /lib/fetch/dataURL.js
parent	Initial commit. (diff)
download	node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.tar.xz node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.zip