/**
* Web worker entry point.
*
* Note that currently there may be difficulties in testing the web worker in the "single" mode with
* custom config functions such as {@link module:defaultConfig.rejectNode} due to the (unfortunate)
* use of `eval()` here and the fact that webpack renames some objects in some contexts resulting in
* a lost tie between them.
*
* @module worker
*/
import CdError from './CdError';
import CommentSkeleton from './CommentSkeleton';
import Parser from './Parser';
import SectionSkeleton from './SectionSkeleton';
import cd from './cd';
import debug from './debug';
import { parseDocument, traverseSubtree } from './htmlparser2Extended';
import { isHeadingNode, isMetadataNode } from './utils-general';
let isFirstRun = true;
let alarmTimeout;
let rootElement;
cd.isWorker = true;
cd.debug = debug;
debug.init();
/**
* Send a "wake up" message to the window after the specified interval.
*
* @param {number} interval
* @private
*/
function setAlarm(interval) {
clearTimeout(alarmTimeout);
alarmTimeout = setTimeout(() => {
postMessage({ type: 'wakeUp' });
}, interval);
}
/**
* Get all text nodes under the root element.
*
* @returns {external:Node[]}
* @private
*/
function getAllTextNodes() {
let nodes = [];
traverseSubtree(rootElement, (node) => {
if (node.nodeType === Node.TEXT_NODE) {
nodes.push(node);
}
// Remove DT reply button html comments as well to optimize.
if (node.nodeType === Node.COMMENT_NODE && node.data.startsWith('__DTREPLYBUTTONS__')) {
node.remove();
}
});
return nodes;
}
/**
* Remove all html comments added by DiscussionTools related to reply buttons.
*
* @private
*/
function removeDtButtonHtmlComments() {
// See getAllTextNodes()
}
/**
* DomHandler's node.
*
* @external Node
* @see
* https://github.com/fb55/domhandler/blob/c3232247c2350566cb6a0cba45d5e34177b3b811/src/node.ts#L18
*/
/**
* DomHandler's data node.
*
* @external DataNode
* @see
* https://github.com/fb55/domhandler/blob/c3232247c2350566cb6a0cba45d5e34177b3b811/src/node.ts#L84
*/
/**
* DomHandler's element.
*
* @external Element
* @see
* https://github.com/fb55/domhandler/blob/c3232247c2350566cb6a0cba45d5e34177b3b811/src/node.ts#L200
*/
/**
* Find comment signatures and section headings on the page.
*
* @param {Parser} parser
* @returns {object[]}
* @private
*/
function findTargets(parser) {
parser.init();
parser.processAndRemoveDtMarkup();
return parser.findHeadings()
.concat(parser.findSignatures())
.sort((t1, t2) => parser.context.follows(t1.element, t2.element) ? 1 : -1);
}
/**
* Parse the comments and modify the related parts of the DOM.
*
* @param {Parser} parser
* @param {object[]} targets
* @private
*/
function processComments(parser, targets) {
targets
.filter((target) => target.type === 'signature')
.forEach((signature) => {
try {
cd.comments.push(parser.createComment(signature, targets));
} catch (e) {
if (!(e instanceof CdError)) {
console.error(e);
}
}
});
}
/**
* Parse the sections and modify some parts of them.
*
* @param {Parser} parser
* @param {object[]} targets
* @private
*/
function processSections(parser, targets) {
targets
.filter((target) => target.type === 'heading')
.forEach((heading) => {
try {
cd.sections.push(parser.createSection(heading, targets));
} catch (e) {
if (!(e instanceof CdError)) {
console.error(e);
}
}
});
}
/**
* Remove the element's attributes whose names start with `data-` and IDs added by Parsoid.
*
* @param {external:Element} element
* @private
*/
function removeDataAndParsoidAttributes(element) {
Object.keys(element.attribs).forEach((name) => {
if (/^data-/.test(name) || (name === 'id' && /^mw.{2,3}$/.test(element.attribs[name]))) {
element.removeAttribute(name);
}
});
}
/**
* Replace a comment element with a marker.
*
* @param {external:Element} el
* @param {CommentSkeleton} comment
* @returns {?external:DataNode}
* @private
*/
function hideElement(el, comment) {
let type;
if (el.classList.contains('reference')) {
type = 'reference';
} else if (el.classList.contains('references')) {
type = 'references';
} else if (el.classList.contains('autonumber')) {
type = 'autonumber';
} else {
type = 'templateStyles';
}
const num = comment.hiddenElementsData.push({
type,
tagName: el.tagName,
html: el.outerHTML,
});
const textNode = document.createTextNode(`\x01${num}_${type}\x02`);
el.parentNode.insertBefore(textNode, el);
el.remove();
if (comment.elements.includes(el)) {
comment.elements[comment.elements.indexOf(el)] = textNode;
return textNode;
}
return null;
}
/**
* Remove unnecessary content, hide dynamic content in a comment.
*
* @param {CommentSkeleton} comment
* @private
*/
function filterCommentContent(comment) {
comment.hiddenElementsData = [];
comment.elementHtmls = comment.elements.map((element) => {
if (isHeadingNode(element)) {
// Keep only the headline, as other elements contain dynamic identifiers.
let headlineElement = element.getElementsByClassName('mw-headline', 1)[0];
if (!headlineElement) {
headlineElement = element.querySelectorAll('h1, h2, h3, h4, h5, h6')[0];
}
if (headlineElement) {
// Was removed in 2021, see T284921. Keep this for some time.
headlineElement.getElementsByClassName('mw-headline-number', 1)[0]?.remove();
// Use `[...iterable]`, as childNodes is a live collection, and when an element is removed
// or moved, indexes will change.
[...element.childNodes].forEach((el) => {
el.remove();
});
[...headlineElement.childNodes].forEach(element.appendChild.bind(element));
}
}
// Data attributes may include dynamic components, for example
// https://ru.wikipedia.org/wiki/Проект:Знаете_ли_вы/Подготовка_следующего_выпуска.
removeDataAndParsoidAttributes(element);
element.getElementsByAttribute(/^data-|^id$/).forEach(removeDataAndParsoidAttributes);
// Empty comment anchors, in most cases added by the script.
element.getElementsByTagName('span')
.filter((el) => el.attribs.id && Object.keys(el.attribs).length === 1 && !el.textContent)
.forEach((el) => {
el.remove();
});
element
.filterRecursively((node) => node.nodeType === Node.COMMENT_NODE)
.forEach((node) => {
node.remove();
});
if (element.classList.contains('references') || isMetadataNode(element)) {
return hideElement(element, comment).textContent;
} else {
element
.filterRecursively((node) => (
node.tagName &&
(
['autonumber', 'reference', 'references']
.some((name) => node.classList.contains(name)) ||
// Note that filterRecursively's range includes the root element.
isMetadataNode(node)
)
))
.forEach((el) => {
hideElement(el, comment);
});
return element.outerHTML;
}
});
}
/**
* Add properties to a comment that will be used to compare its content to the content of a comment
* in another revision.
*
* @param {CommentSkeleton} comment
* @private
*/
function addCompareHelperProperties(comment) {
/*
One of the reasons for the existence of this function is that we can't use `outerHTML` for
comparing comment revisions as the difference may be in <div> vs. <dd> (<li>) tags in this case:
This creates a <dd> tag:
: Comment. [signature]
This creates a <div> tag for the first comment:
: Comment. [signature] :: Reply. [signature]
So the HTML is `<dd><div>...</div><dl>...</dl></dd>`. A newline also appears before `</div>`, so
we need to trim.
*/
comment.htmlToCompare = '';
comment.textHtmlToCompare = '';
comment.headingHtmlToCompare = '';
comment.elements.forEach((el) => {
let htmlToCompare;
el.getElementsByClassName?.('ext-discussiontools-init-timestamplink').forEach((link) => {
// The link may change
link.removeAttribute('href');
});
if (el.tagName === 'DIV' && !el.classList.contains('mw-heading')) {
// Workaround the bug where the {{smalldiv}} output (or any <div> wrapper around the
// comment) is treated differently depending on whether there are replies to that comment.
// When there are no, a <li>/<dd> element containing the <div> wrapper is the only comment
// part; when there are, the <div> wrapper is.
el.classList.remove('cd-comment-part', 'cd-comment-part-first', 'cd-comment-part-last');
if (!el.getAttribute('class')) {
el.removeAttribute('class');
}
if (Object.keys(el.attribs).length) {
// https://ru.wikipedia.org/w/index.php?title=Википедия:Форум/Правила&oldid=125661313#c-Vladimir_Solovjev-20220921144700-D6194c-1cc-20220919200300
// without children has no trailing newline, while with children it has.
if (el.lastChild?.data === '\n') {
el.lastChild.remove();
}
htmlToCompare = el.outerHTML;
} else {
htmlToCompare = el.innerHTML;
}
} else {
htmlToCompare = el.innerHTML || el.textContent;
}
comment.htmlToCompare += htmlToCompare + '\n';
if (isHeadingNode(el)) {
comment.headingHtmlToCompare += htmlToCompare;
} else {
comment.textHtmlToCompare += htmlToCompare + '\n';
}
});
comment.htmlToCompare = comment.htmlToCompare.trim();
comment.textHtmlToCompare = comment.textHtmlToCompare.trim();
comment.headingHtmlToCompare = comment.headingHtmlToCompare.trim();
comment.signatureElement.remove();
comment.text = comment.elements.map((el) => el.textContent).join('\n').trim();
comment.elementNames = comment.elements.map((el) => el.tagName);
comment.elementClassNames = comment.elements.map((el) => el.className);
}
/**
* Keep only those values of an object whose names are not in the "dangerous" names list.
*
* @param {object} obj
* @param {string[]} dangerousKeys
* @private
*/
function keepSafeValues(obj, dangerousKeys) {
// Use the same object, as creating a copy would kill the prototype.
Object.keys(obj).forEach((key) => {
if (dangerousKeys.includes(key)) {
delete obj[key];
}
});
}
/**
* Prepare comments and sections for transferring to the main process. Remove unnecessary content
* and properties, hide dynamic content, add properties.
*
* @param {Parser} parser
* @private
*/
function prepareCommentsAndSections(parser) {
CommentSkeleton.processOutdents(parser);
cd.comments.forEach((comment) => {
filterCommentContent(comment);
addCompareHelperProperties(comment);
});
cd.comments.forEach((comment, i) => {
comment.children = comment.getChildren();
comment.children.forEach((reply) => {
reply.parent = comment;
reply.isToMe = comment.isOwn;
});
comment.previousComments = cd.comments
.slice(Math.max(0, i - 2), i)
.reverse();
keepSafeValues(comment, [
'authorLink',
'authorTalkLink',
'cachedParent',
'elements',
'extraSignatures',
'highlightables',
'parser',
'parts',
'signatureElement',
'timestampElement',
]);
});
cd.sections.forEach((section) => {
section.parent = section.getParent();
section.ancestors = section.getAncestors().map((section) => section.headline);
section.oldestCommentId = section.oldestComment?.id;
keepSafeValues(section, [
'cachedAncestors',
'headingElement',
'hElement',
'headlineElement',
'lastElement',
'lastElementInFirstChunk',
'parser',
]);
});
}
/**
* Parse the page and send a message to the window.
*
* @private
*/
function parse() {
cd.comments = [];
cd.sections = [];
Parser.init();
let areThereOutdents;
const parser = new Parser({
CommentClass: CommentSkeleton,
SectionClass: SectionSkeleton,
childElementsProp: 'childElements',
follows: (el1, el2) => el1.follows(el2),
getAllTextNodes,
getElementByClassName: (el, className) => {
const elements = el.getElementsByClassName(className, 1);
return elements[0] || null;
},
rootElement,
areThereOutdents: () => {
areThereOutdents ??= Boolean(
rootElement.getElementsByClassName(cd.config.outdentClass, 1).length
);
return areThereOutdents;
},
processAndRemoveDtElements: (elements) => {
elements.forEach((el) => {
el.remove();
});
},
removeDtButtonHtmlComments,
});
const targets = findTargets(parser);
debug.startTimer('worker: process comments');
processComments(parser, targets);
debug.stopTimer('worker: process comments');
debug.startTimer('worker: process sections');
processSections(parser, targets);
debug.stopTimer('worker: process sections');
debug.startTimer('worker: prepare comments and sections');
prepareCommentsAndSections(parser);
debug.stopTimer('worker: prepare comments and sections');
}
/**
* Restore function from its code.
*
* @param {string} code
* @returns {Function}
* @private
*/
function restoreFunc(code) {
if (code) {
if (!/^ *function\b/.test(code) && !/^.+=>/.test(code)) {
code = 'function ' + code;
}
if (/^ *function *\(/.test(code)) {
code = '(' + code + ')';
}
}
// FIXME: Any idea how to avoid using eval() here?
return eval(code);
}
/**
* Callback for messages from the window.
*
* @param {Event} e
* @private
*/
function onMessageFromWindow(e) {
const message = e.data;
if (isFirstRun) {
console.debug('Convenient Discussions\' web worker has been successfully loaded. Click the link with the file name and line number to open the source code in your debug tool.');
isFirstRun = false;
}
if (message.type === 'setAlarm') {
setAlarm(message.interval);
}
if (message.type === 'removeAlarm') {
clearTimeout(alarmTimeout);
}
if (message.type === 'parse') {
const timerLabel = `worker: processing revision ${message.revisionId}`;
debug.startTimer(timerLabel);
cd.g = message.g;
cd.config = message.config;
cd.config.rejectNode = restoreFunc(cd.config.rejectNode);
cd.g.isIPv6Address = restoreFunc(cd.g.isIPv6Address);
self.document = parseDocument(message.text, {
withStartIndices: true,
withEndIndices: true,
decodeEntities: false,
});
rootElement = document.childNodes[0];
parse();
postMessage({
type: message.type,
revisionId: message.revisionId,
resolverId: message.resolverId,
comments: cd.comments,
sections: cd.sections,
});
debug.stopTimer(timerLabel);
debug.logAndResetEverything();
}
}
self.onmessage = onMessageFromWindow;