builder/lib/processors/nonAsciiEscaper.js

const escapeUnicode = require("escape-unicode");

/**
 * @see https://en.wikipedia.org/wiki/ASCII
 * ascii contains 128 characters.
 * its char codes reach from 0 to 127.
 * @type {number}
 */
const CHAR_CODE_OF_LAST_ASCII_CHARACTER = 127;

// use memoization for escapeUnicode function for performance
const memoizeEscapeUnicodeMap = {};
const memoizeEscapeUnicode = function(sChar) {
	if (memoizeEscapeUnicodeMap[sChar]) {
		return memoizeEscapeUnicodeMap[sChar];
	}
	memoizeEscapeUnicodeMap[sChar] = escapeUnicode(sChar);
	return memoizeEscapeUnicodeMap[sChar];
};

/**
 * Escapes non ASCII characters with unicode escape sequences.
 *
 * @see https://en.wikipedia.org/wiki/ASCII
 * @see https://tools.ietf.org/html/rfc5137#section-6.1
 *
 *
 * @param {string} string input string with non ascii characters, e.g. L♥VE
 * @returns {{string: (string), modified: boolean}} output string with all non ascii
 * characters being escaped by unicode sequence, e.g. L\u2665VE
 */
const escapeNonAscii = function(string) {
	let result = "";
	let modified = false;
	for (let i = 0; i < string.length; i++) {
		const char = string[i];
		// check for non ascii characters (characters which have a char code
		// greater than the ascii character code range)
		if (string.charCodeAt(i) > CHAR_CODE_OF_LAST_ASCII_CHARACTER) {
			result += memoizeEscapeUnicode(char);
			modified = true;
		} else {
			result += char;
		}
	}
	return {
		modified,
		string: result
	};
};

/**
 * Escapes non ASCII characters with unicode escape sequences.
 *
 * @example
 * const encoding = nonAsciiEscaper.getEncodingFromAlias("ISO-8859-1");
 * nonAsciiEscaper({resources, options: {encoding}});
 *
 *
 * @public
 * @alias module:@ui5/builder.processors.nonAsciiEscaper
 * @param {object} parameters Parameters
 * @param {module:@ui5/fs.Resource[]} parameters.resources List of resources to be processed
 * @param {object} [parameters.options] Options
 * @param {string} [parameters.options.encoding="utf8"] resource file encoding
 *   ({@link https://nodejs.org/api/buffer.html#buffer_buffers_and_character_encodings Node.js character encodings}).
 *   Use #getEncodingFromAlias to get the encoding string
 * @returns {Promise<module:@ui5/fs.Resource[]>} Promise resolving with the processed resources
 */
async function nonAsciiEscaper({resources, options: {encoding}}) {
	encoding = encoding || "utf8";

	async function processResource(resource) {
		const resourceString = (await resource.getBuffer()).toString(encoding);
		const escaped = escapeNonAscii(resourceString);
		// only modify the resource's string if it was changed
		if (escaped.modified) {
			resource.setString(escaped.string);
		}
		return resource;
	}

	return Promise.all(resources.map(processResource));
}

const encodingMap = {
	"UTF-8": "utf8",
	"ISO-8859-1": "latin1",
};

/**
 * Provides a mapping from user-friendly encoding name (alias) such as "UTF-8" and "ISO-8859-1" to node
 * specific encoding name such as "utf8" or "latin1". Simplifies usage of nonAsciiEscaper encoding option
 * such that it can be used standalone without the respective task (e.g. in Splitter, Bundler and related projects).
 *
 * @public
 * @alias module:@ui5/builder.processors.nonAsciiEscaper․getEncodingFromAlias
 * @param {string} encoding encoding labels: "UTF-8" and "ISO-8859-1"
 * @returns {string} node.js character encoding string, e.g. utf8 and latin1
 */
nonAsciiEscaper.getEncodingFromAlias = function(encoding) {
	if (!encodingMap[encoding]) {
		throw new Error(
			`Encoding "${encoding}" is not supported. Only ${Object.keys(encodingMap).join(", ")} are allowed values` );
	}
	return encodingMap[encoding];
};

module.exports = nonAsciiEscaper;