CloudProxyPro/node_modules/oxc-parser/raw-transfer/common.js

'use strict';

const os = require('node:os');
const rawTransferSupported = require('./supported.js');
const {
  parseSyncRaw: parseSyncRawBinding,
  parseAsyncRaw: parseAsyncRawBinding,
  getBufferOffset,
} = require('../bindings.js');
const { BUFFER_SIZE, BUFFER_ALIGN, IS_TS_FLAG_POS } = require('../generated/constants.js');

module.exports = {
  parseSyncRawImpl,
  parseAsyncRawImpl,
  prepareRaw,
  isJsAst,
  returnBufferToCache,
};

// Throw an error if running on a platform which raw transfer doesn't support.
//
// Note: This module is lazy-loaded only when user calls `parseSync` or `parseAsync` with
// `experimentalRawTransfer` or `experimentalLazy` options, or calls `experimentalGetLazyVisitor`.
if (!rawTransferSupported()) {
  throw new Error(
    '`experimentalRawTransfer` and `experimentalLazy` options are not supported ' +
      'on 32-bit or big-endian systems, versions of NodeJS prior to v22.0.0, ' +
      'versions of Deno prior to v2.0.0, or other runtimes',
  );
}

/**
 * Parse JS/TS source synchronously on current thread using raw transfer.
 *
 * Convert the buffer returned by Rust to a JS object with provided `convert` function.
 *
 * This function contains logic shared by both `parseSyncRaw` and `parseSyncLazy`.
 *
 * @param {string} filename - Filename
 * @param {string} sourceText - Source text of file
 * @param {Object|undefined} options - Parsing options
 * @param {function} convert - Function to convert the buffer returned from Rust into a JS object
 * @returns {Object} - The return value of `convert`
 */
function parseSyncRawImpl(filename, sourceText, options, convert) {
  const { buffer, sourceByteLen } = prepareRaw(sourceText);
  parseSyncRawBinding(filename, buffer, sourceByteLen, options);
  return convert(buffer, sourceText, sourceByteLen);
}

// User should not schedule more async tasks than there are available CPUs, as it hurts performance,
// but it's a common mistake in async JS code to do exactly that.
//
// That anti-pattern looks like this when applied to Oxc:
//
// ```js
// const asts = await Promise.all(
//   files.map(
//     async (filename) => {
//       const sourceText = await fs.readFile(filename, 'utf8');
//       const ast = await oxc.parseAsync(filename, sourceText);
//       return ast;
//     }
//   )
// );
// ```
//
// In most cases, that'd just result in a bit of degraded performance, and higher memory use because
// of loading sources into memory prematurely.
//
// However, raw transfer uses a 6 GiB buffer for each parsing operation.
// Most of the memory pages in those buffers are never touched, so this does not consume a huge amount
// of physical memory, but it does still consume virtual memory.
//
// If we allowed creating a large number of 6 GiB buffers simultaneously, it would quickly consume
// virtual memory space and risk memory exhaustion. The code above would exhaust all of bottom half
// (heap) of 48-bit virtual memory space if `files.length >= 21_845`. This is not a number which
// is unrealistic in real world code.
//
// To guard against this possibility, we implement a simple queue.
// No more than `os.availableParallelism()` files can be parsed simultaneously, and any further calls to
// `parseAsyncRaw` will be put in a queue, to execute once other tasks complete.
//
// Fallback to `os.cpus().length` on versions of NodeJS prior to v18.14.0, which do not support
// `os.availableParallelism`.
let availableCores = os.availableParallelism ? os.availableParallelism() : os.cpus().length;
const queue = [];

/**
 * Parse JS/TS source asynchronously using raw transfer.
 *
 * Convert the buffer returned by Rust to a JS object with provided `convert` function.
 *
 * Queues up parsing operations if more calls than number of CPU cores (see above).
 *
 * This function contains logic shared by both `parseAsyncRaw` and `parseAsyncLazy`.
 *
 * @param {string} filename - Filename
 * @param {string} sourceText - Source text of file
 * @param {Object|undefined} options - Parsing options
 * @param {function} convert - Function to convert the buffer returned from Rust into a JS object
 * @returns {Object} - The return value of `convert`
 */
async function parseAsyncRawImpl(filename, sourceText, options, convert) {
  // Wait for a free CPU core if all CPUs are currently busy.
  //
  // Note: `availableCores` is NOT decremented if have to wait in the queue first,
  // and NOT incremented when parsing completes and it runs next task in the queue.
  //
  // This is to avoid a race condition if `parseAsyncRaw` is called during the microtick in between
  // `resolve` being called below, and the promise resolving here. In that case the new task could
  // start running, and then the promise resolves, and the queued task also starts running.
  // We'd then have `availableParallelism() + 1` tasks running simultaneously. Potentially, this could
  // happen repeatedly, with the number of tasks running simultaneously ever-increasing.
  if (availableCores === 0) {
    // All CPU cores are busy. Put this task in queue and wait for capacity to become available.
    await new Promise((resolve, _) => {
      queue.push(resolve);
    });
  } else {
    // A CPU core is available. Mark core as busy, and run parsing now.
    availableCores--;
  }

  // Parse
  const { buffer, sourceByteLen } = prepareRaw(sourceText);
  await parseAsyncRawBinding(filename, buffer, sourceByteLen, options);
  const data = convert(buffer, sourceText, sourceByteLen);

  // Free the CPU core
  if (queue.length > 0) {
    // Some further tasks waiting in queue. Run the next one.
    // Do not increment `availableCores` (see above).
    const resolve = queue.shift();
    resolve();
  } else {
    // No tasks waiting in queue. This CPU is now free.
    availableCores++;
  }

  return data;
}

const ARRAY_BUFFER_SIZE = BUFFER_SIZE + BUFFER_ALIGN;
const ONE_GIB = 1 << 30;

// We keep a cache of buffers for raw transfer, so we can reuse them as much as possible.
//
// When processing multiple files, it's ideal if can reuse an existing buffer, as it's more likely to
// be warm in CPU cache, it avoids allocations, and it saves work for the garbage collector.
//
// However, we also don't want to keep a load of large buffers around indefinitely using up memory,
// if they're not going to be used again.
//
// We have no knowledge of what pattern over time user may process files in (could be lots in quick
// succession, or more occasionally in a long-running process). So we try to use flexible caching
// strategy which is adaptable to many usage patterns.
//
// We use a 2-tier cache.
// Tier 1 uses strong references, tier 2 uses weak references.
//
// When parsing is complete and the buffer is no longer in use, push it to `buffers` (tier 1 cache).
// Set a timer to clear the cache when no activity for 10 seconds.
//
// When the timer expires, move all the buffers from tier 1 cache into `oldBuffers` (tier 2).
// They are stored there as `WeakRef`s, so the garbage collector is free to reclaim them.
//
// On the next call to `parseSync` or `parseAsync`, promote any buffers in tier 2 cache which were not
// already garbage collected back into tier 1 cache. This is on assumption that parsing one file
// indicates parsing as a whole is an ongoing process, and there will likely be further calls to
// `parseSync` / `parseAsync` in future.
//
// The weak tier 2 cache is because V8 does not necessarily free memory as soon as it's able to be
// freed. We don't want to block it from freeing memory, but if it's not done that yet, there's no
// point creating a new buffer, when one already exists.
const CLEAR_BUFFERS_TIMEOUT = 10_000; // 10 seconds
const buffers = [], oldBuffers = [];
let clearBuffersTimeout = null;

const textEncoder = new TextEncoder();

/**
 * Get a buffer (from cache if possible), and copy source text into it.
 *
 * @param {string} sourceText - Source text of file
 * @returns {Object} - Object of form `{ buffer, sourceByteLen }`.
 *   - `buffer`: `Uint8Array` containing the AST in raw form.
 *   - `sourceByteLen`: Length of source text in UTF-8 bytes
 *     (which may not be equal to `sourceText.length` if source contains non-ASCII characters).
 */
function prepareRaw(sourceText) {
  // Cancel timeout for clearing buffers
  if (clearBuffersTimeout !== null) {
    clearTimeout(clearBuffersTimeout);
    clearBuffersTimeout = null;
  }

  // Revive any discarded buffers which have not yet been garbage collected
  if (oldBuffers.length > 0) {
    const revivedBuffers = [];
    for (let oldBuffer of oldBuffers) {
      oldBuffer = oldBuffer.deref();
      if (oldBuffer !== undefined) revivedBuffers.push(oldBuffer);
    }
    oldBuffers.length = 0;
    if (revivedBuffers.length > 0) buffers.unshift(...revivedBuffers);
  }

  // Reuse existing buffer, or create a new one
  const buffer = buffers.length > 0 ? buffers.pop() : createBuffer();

  // Write source into start of buffer.
  // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB,
  // so create a view into buffer of this size to write into.
  const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB);
  const { read, written: sourceByteLen } = textEncoder.encodeInto(sourceText, sourceBuffer);
  if (read !== sourceText.length) throw new Error('Failed to write source text into buffer');

  return { buffer, sourceByteLen };
}

/**
 * Get if AST should be parsed as JS or TS.
 * Rust side sets a `bool` in this position in buffer which is `true` if TS.
 *
 * @param {Uint8Array} buffer - Buffer containing AST in raw form
 * @returns {boolean} - `true` if AST is JS, `false` if TS
 */
function isJsAst(buffer) {
  return buffer[IS_TS_FLAG_POS] === 0;
}

/**
 * Return buffer to cache, to be reused.
 * Set a timer to clear buffers.
 *
 * @param {Uint8Array} buffer - Buffer
 * @returns {undefined}
 */
function returnBufferToCache(buffer) {
  buffers.push(buffer);

  if (clearBuffersTimeout !== null) clearTimeout(clearBuffersTimeout);
  clearBuffersTimeout = setTimeout(clearBuffersCache, CLEAR_BUFFERS_TIMEOUT);
  clearBuffersTimeout.unref();
}

/**
 * Downgrade buffers in tier 1 cache (`buffers`) to tier 2 (`oldBuffers`)
 * so they can be garbage collected.
 *
 * @returns {undefined}
 */
function clearBuffersCache() {
  clearBuffersTimeout = null;

  for (const buffer of buffers) {
    oldBuffers.push(new WeakRef(buffer));
  }
  buffers.length = 0;
}

/**
 * Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB.
 *
 * Achieve this by creating a 6 GiB `ArrayBuffer`, getting the offset within it that's aligned to 4 GiB,
 * chopping off that number of bytes from the start, and shortening to 2 GiB.
 *
 * It's always possible to obtain a 2 GiB slice aligned on 4 GiB within a 6 GiB buffer,
 * no matter how the 6 GiB buffer is aligned.
 *
 * Note: On systems with virtual memory, this only consumes 6 GiB of *virtual* memory.
 * It does not consume physical memory until data is actually written to the `Uint8Array`.
 * Physical memory consumed corresponds to the quantity of data actually written.
 *
 * @returns {Uint8Array} - Buffer
 */
function createBuffer() {
  const arrayBuffer = new ArrayBuffer(ARRAY_BUFFER_SIZE);
  const offset = getBufferOffset(new Uint8Array(arrayBuffer));
  const buffer = new Uint8Array(arrayBuffer, offset, BUFFER_SIZE);
  buffer.uint32 = new Uint32Array(arrayBuffer, offset, BUFFER_SIZE / 4);
  buffer.float64 = new Float64Array(arrayBuffer, offset, BUFFER_SIZE / 8);
  return buffer;
}