import { generateTagsRegexp } from './utils-wikitext';
/**
* Class for replacing parts of a text that shouldn't be modified, with a placeholder, in order to
* ignore it when performing certain text replacement operations and then replace the placeholders
* back with the original text.
*
* After creating an instance and masking some text using {@link TextMasker#mask} or its derivative
* methods like {@link TextMasker#maskTemplatesRecursively}, there are two ways to use this class
* based on your needs:
* 1. Use {@link TextMasker#withText} to make further transformations to the text, unmask it using
* {@link TextMasker#unmask} (as opposed to {@link TextMasker#unmaskText}), and get the result
* using {@link TextMasker#getText}.
* 2. Get the text using {@link TextMasker#getText}, work with it, and in the end, unmask it using
* {@link TextMasker#unmaskText}.
*
* Note that the methods support chaining, so you can sometimes successfully fit all transformations
* in one chain.
*/
class TextMasker {
/**
* Create a text masker.
*
* @param {string} text
* @param {string[]} [maskedTexts] Array of masked texts to reuse. Use this when you are using the
* class with a string that already has masked parts, or you will run into problems.
*/
constructor(text, maskedTexts) {
/**
* Text parts of which are masked.
*
* @type {string}
*/
this.text = text;
/**
* Array of masked texts. Its indexes correspond to marker indexes.
*
* @type {string[]}
*/
this.maskedTexts = maskedTexts || [];
}
/**
* Replace text matched by a regexp with placeholders.
*
* @param {RegExp} regexp
* @param {string} [type] Should consist only of alphanumeric characters.
* @param {boolean} [useGroups=false] Use the first two capturing groups in the regexp as the
* `preText` and `textToMask` parameters. (Used for processing table code.)
* @returns {TextMasker}
*/
mask(regexp, type, useGroups = false) {
if (type && !type.match(/^\w+$/)) {
console.warn('TextMasker.mask: the `type` argument should match `^\\w+$/`. Proceeding nevertheless.');
}
this.text = this.text.replace(regexp, (s, preText, textToMask) => {
if (!useGroups) {
preText = null;
textToMask = null;
}
// Handle tables separately.
return (
(preText || '') +
(type === 'table' ? '\x03' : '\x01') +
this.maskedTexts.push(textToMask || s) +
(type ? '_' + type : '') +
(type === 'table' ? '\x04' : '\x02')
);
});
return this;
}
/**
* In a provided string, replace placeholders added by {@link TextMasker#mask} with their text.
*
* @param {string} text
* @param {string} [type]
* @returns {string}
*/
unmaskText(text, type) {
const regexp = type ?
new RegExp(`(?:\\x01|\\x03)(\\d+)(?:_${type}(?:_\\d+)?)?(?:\\x02|\\x04)`, 'g') :
/(?:\x01|\x03)(\d+)(?:_\w+)?(?:\x02|\x04)/g;
while (regexp.test(text)) {
text = text.replace(regexp, (s, num) => this.maskedTexts[num - 1]);
}
return text;
}
/**
* Replace placeholders added by {@link TextMasker#mask} with their text.
*
* @param {string} type
* @returns {TextMasker}
*/
unmask(type) {
this.text = this.unmaskText(this.text, type);
return this;
}
/**
* Mask templates taking into account nested ones.
*
* Borrowed from
* https://ru.wikipedia.org/w/index.php?title=MediaWiki:Gadget-wikificator.js&oldid=102530721
*
* @param {Function} [handler] Function that processes the template code.
* @param {boolean} [addLengths=false] Add lengths of the masked templates to markers.
* @returns {TextMasker}
* @author Putnik
* @author Jack who built the house
*/
maskTemplatesRecursively(handler, addLengths = false) {
let pos = 0;
const stack = [];
while (true) {
let left = this.text.indexOf('{{', pos);
let right = this.text.indexOf('}}', pos);
if (left !== -1 && left < right) {
// Memorize the wrapper's start position; will search the wrapped next
stack.push(left);
pos = left + 2;
} else {
// Nothing more found _inside_ the wrapper; time to go up the hierarchy
// No wrappers left - we're at the outermost level
if (!stack.length) break;
// Get back to the wrapper
left = stack.pop();
// Handle unclosed `{{` and unopened `}}`
if (typeof left === 'undefined') {
if (right === -1) {
pos += 2;
continue;
} else {
left = 0;
}
}
if (right === -1) {
right = this.text.length;
}
// Mask the template
right += 2;
let template = this.text.substring(left, right);
if (handler) {
template = handler(template);
}
const lengthOrNot = addLengths ?
'_' + template.replace(/\x01\d+_template_(\d+)\x02/g, (m, n) => ' '.repeat(n)).length :
'';
this.text = (
this.text.substring(0, left) +
'\x01' +
this.maskedTexts.push(template) +
'_template' +
lengthOrNot +
'\x02' +
this.text.substr(right)
);
// Synchronize the position
pos = right - template.length;
}
}
return this;
}
/**
* Mask HTML tags in the text.
*
* @param {string[]} tags
* @param {string} type
* @returns {TextMasker}
*/
maskTags(tags, type) {
return this.mask(generateTagsRegexp(tags), type);
}
/**
* Replace code, that should not be modified when processing it, with placeholders.
*
* @param {Function} [templateHandler]
* @returns {TextMasker}
*/
maskSensitiveCode(templateHandler) {
return this
.maskTags(['pre', 'source', 'syntaxhighlight'], 'block')
.maskTags(['gallery', 'poem'], 'gallery')
.maskTags(['nowiki'], 'inline')
.maskTemplatesRecursively(templateHandler)
.mask(/^(:* *)(\{\|[^]*?\n\|\})/gm, 'table', true)
// Tables with a signature inside that are clipped on comment editing.
.mask(/^(:* *)(\{\|[^]*\n\|)/gm, 'table', true);
}
/**
* Run a certain function for the text.
*
* @param {Function} func Function that should accept and return a string. It can also accept the
* {@link TextMasker} object as a second parameter.
* @returns {TextMasker}
*/
withText(func) {
this.text = func(this.text, this);
return this;
}
/**
* Get the text in its current (masked/unmasked) state.
*
* @returns {string}
*/
getText() {
return this.text;
}
/**
* Get the masked texts.
*
* @returns {string[]}
*/
getMaskedTexts() {
return this.maskedTexts;
}
}
export default TextMasker;