
import cd from './cd';
import sectionRegistry from './sectionRegistry';
import { calculateWordOverlap } from './utils-general';
import { endWithTwoNewlines, extractSignatures, normalizeCode, removeWikiMarkup } from './utils-wikitext';

 * Class that keeps the methods and data related to a section's source code. Also used for section
 * source match candidates before a single match is chosen among them.
class SectionSource {
   * Create a section's source object.
   * @param {object} options
   * @param {import('./Section').default} options.section
   * @param {string[]} options.sectionHeadingMatch
   * @param {string} options.contextCode
   * @param {string} options.adjustedContextCode
   * @param {boolean} options.isInSectionContext
  }) {
    this.section = section;
    this.isInSectionContext = isInSectionContext;

    this.collectMatchData(sectionHeadingMatch, contextCode, adjustedContextCode);
    if (!this.code || !this.firstChunkCode) {
      console.warn(`Couldn't read the "${this.headline}" section contents.`);

   * _For internal use._ Extract the section's last comment's indentation characters if needed or a
   * vote / bulleted reply placeholder.
   * @param {import('./CommentForm').default} commentForm
   * @returns {?string}
  extractLastCommentIndentation(commentForm) {
    if (this.lastCommentIndentation === undefined) {
      const [, replyPlaceholder] = this.firstChunkCode.match(/\n([#*]) *\n+$/) || [];
      if (replyPlaceholder) {
        this.lastCommentIndentation = replyPlaceholder;
      } else {
        this.lastCommentIndentation = null;

        const lastComment = this.section.commentsInFirstChunk.slice(-1)[0];
        if (
          lastComment &&
          (commentForm.getContainerListType() === 'ol' || cd.config.indentationCharMode === 'mimic')
        ) {
          try {

            if (
              !lastComment.source.indentation.startsWith('#') ||

              // For now we use the workaround with commentForm.getContainerListType() to make
              // sure `#` is a part of comments organized in a numbered list, not of a numbered list
              // _in_ the target comment.
              commentForm.getContainerListType() === 'ol'
            ) {
              this.lastCommentIndentation = lastComment.source.indentation;
          } catch {
            // Empty

    return this.lastCommentIndentation;

   * Modify a whole section or page code string related to the section in accordance with an action.
   * @param {object} options
   * @param {'replyInSection'|'addSubsection'} options.action
   * @param {string} options.commentCode Comment code, including trailing newlines and the
   *   signature.
   * @returns {object}
  modifyContext({ action, commentCode }) {
    const originalContextCode = this.isInSectionContext ?
      this.section.presumedCode :
    let contextCode;
    switch (action) {
      case 'replyInSection': {
        contextCode = (
          originalContextCode.slice(0, this.firstChunkContentEndIndex) +
          commentCode +

      case 'addSubsection': {
        contextCode = (
          endWithTwoNewlines(originalContextCode.slice(0, this.contentEndIndex)) +
          commentCode +

    return { contextCode, commentCode };

   * Collect data for the match, including section text, first chunk text, indexes, etc.
   * @param {object} sectionHeadingMatch
   * @param {string} contextCode
   * @param {string} adjustedContextCode
   * @private
  collectMatchData(sectionHeadingMatch, contextCode, adjustedContextCode) {
    const fullHeadingMatch = sectionHeadingMatch[1];
    const equalSignsPattern = `={1,${sectionHeadingMatch[2].length}}`;
    const codeFromSection = contextCode.slice(sectionHeadingMatch.index);
    const adjustedCodeFromSection = adjustedContextCode.slice(sectionHeadingMatch.index);
    const sectionMatch = (
      adjustedCodeFromSection.match(new RegExp(
        // Will fail at "===" or the like.
        '(' +
        mw.util.escapeRegExp(fullHeadingMatch) +
        '[^]*?\\n)' +
        equalSignsPattern +
        '[^=].*=+[ \\t\\x01\\x02]*\\n'
      )) ||
      adjustedCodeFromSection.match(new RegExp(
        '(' +
        mw.util.escapeRegExp(fullHeadingMatch) +

    // To simplify the workings of the "replyInSection" mode we don't consider terminating line
    // breaks to be a part of the first chunk of the section (i.e., the section subdivision before
    // the first heading).
    const firstChunkMatch = (
      adjustedCodeFromSection.match(new RegExp(
        // Will fail at "===" or the like.
        '(' +
        mw.util.escapeRegExp(fullHeadingMatch) +
        '[^]*?\\n)\\n*' +

        // Any next heading.
        '={1,6}' +

        '[^=].*=+[ \\t\\x01\\x02]*\\n'
      )) ||
      adjustedCodeFromSection.match(new RegExp(
        '(' +
        mw.util.escapeRegExp(fullHeadingMatch) +

    const code = sectionMatch && codeFromSection.substr(sectionMatch.index, sectionMatch[1].length);
    const firstChunkCode = (
      firstChunkMatch &&
      codeFromSection.substr(firstChunkMatch.index, firstChunkMatch[1].length)

    const startIndex = sectionHeadingMatch.index;
    const endIndex = startIndex + code.length;
    const contentStartIndex = sectionHeadingMatch.index + sectionHeadingMatch[0].length;
    const firstChunkEndIndex = startIndex + firstChunkCode.length;

    let firstChunkContentEndIndex = firstChunkEndIndex;
    let contentEndIndex = endIndex;
    cd.g.keepInSectionEnding.forEach((regexp) => {
      const firstChunkMatch = firstChunkCode.match(regexp);
      if (firstChunkMatch) {
        // `1` accounts for the first line break.
        firstChunkContentEndIndex -= firstChunkMatch[0].length - 1;

      const match = code.match(regexp);
      if (match) {
        // `1` accounts for the first line break.
        contentEndIndex -= match[0].length - 1;

      Sections may have `#` or `*` as a placeholder for a vote or bulleted reply. In this case,
      we must use that `#` or `*` in the reply. As for the placeholder, perhaps we should remove
      it, but as for now, we keep it because if:

        * the placeholder character is `*`,
        * `cd.config.indentationCharMode` is `'unify'`,
        * `cd.config.defaultIndentationChar` is `':'`, and
        * there is more than one reply,

      the next reply would go back to `:`, not `*` as should be.
    const placeholderMatch = firstChunkCode.match(/\n([#*] *\n+)$/);
    if (placeholderMatch) {
      firstChunkContentEndIndex -= placeholderMatch[1].length;

    Object.assign(this, {
      relativeContentStartIndex: contentStartIndex - startIndex,
      headline: normalizeCode(removeWikiMarkup(sectionHeadingMatch[3])),

   * Calculate and set a score for the match.
   * @param {number} sectionIndex
   * @param {string} thisHeadline
   * @param {string[]} headlines
   * @private
  calculateMatchScore(sectionIndex, thisHeadline, headlines) {
    const doesHeadlineMatch = thisHeadline.includes('{{') ? 0.5 : this.headline === thisHeadline;

    let doesSectionIndexMatch;
    let doPreviousHeadlinesMatch;
    if (this.isInSectionContext) {
      doesSectionIndexMatch = 0;
      doPreviousHeadlinesMatch = 0;
    } else {
      // Matching section index is one of the most unreliable ways to tell matching sections as
      // sections may be added and removed from the page, so we don't rely on it very much.
      doesSectionIndexMatch = this.section.index === sectionIndex;

      const previousHeadlinesToCheckCount = 3;
      const previousHeadlinesInCode = headlines
      doPreviousHeadlinesMatch = sectionRegistry.getAll()
        .slice(Math.max(0, this.section.index - previousHeadlinesToCheckCount), this.section.index)
        .map((section) => section.headline)
        .every((headline, i) => normalizeCode(headline) === previousHeadlinesInCode[i]);


    let oldestSig;
    extractSignatures(this.code).forEach((sig) => {
      if (!oldestSig || (! && || > {
        oldestSig = sig;
    const sectionOldestComment = this.section.oldestComment;
    const doesOldestCommentMatch = oldestSig ?
        sectionOldestComment &&
          oldestSig.timestamp === sectionOldestComment.timestamp &&
      ) :

      // There's no comments neither in the code nor on the page.

    // Multiply by 0.5 to avoid situations like
    // even though they are not CD's fault
    let oldestCommentWordOverlap = Number(!this.section.oldestComment && !oldestSig) * 0.5;

    if (this.section.oldestComment && oldestSig) {
      // Use the comment text overlap factor due to this error
      // The comment's source code is
      // extracted only superficially, without exluding the headline code and other operations
      // performed in Comment#adjustCommentBeginning.
      oldestCommentWordOverlap = calculateWordOverlap(
        removeWikiMarkup(this.code.slice(oldestSig.commentStartIndex, oldestSig.startIndex))

    // If changing this, change the maximal possible score in Section#searchInCode
    this.score = (
      doesOldestCommentMatch * 1 +
      oldestCommentWordOverlap +
      doesHeadlineMatch * 1 +
      doesSectionIndexMatch * 0.5 +

      // Shouldn't give too high a weight to this factor as it is true for every first section.
      doPreviousHeadlinesMatch * 0.25

export default SectionSource;