package com.github.wikibot.main;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.json.JSONObject;
import org.wikiutils.ParseUtils;

import com.github.wikibot.dumps.XMLDumpConfig;
import com.github.wikibot.dumps.XMLDumpTypes;
import com.github.wikibot.dumps.XMLRevision;
import com.github.wikibot.utils.DBUtils;

public final class Test {
    private static final String SQL_PLWIKI_URI = "jdbc:mysql://plwiki.analytics.db.svc.wikimedia.cloud:3306/plwiki_p";
    private static final String SQL_WDWIKI_URI = "jdbc:mysql://wikidatawiki.analytics.db.svc.wikimedia.cloud:3306/wikidatawiki_p";

    private static final String CLAIM = "P856";
    private static final Wikibot wb = Wikibot.newSession("pl.wikipedia.org");

    public static void main(String[] args) throws Exception {
        var text = wb.getPageText(List.of("Wikipedysta:Malarz pl/szablony/XYZ")).get(0);

        var targetedInfoboxes = Pattern.compile("^\\* \\[{2}([^\\]]+?)\\]{2}\n<pre>(.+?)</pre>", Pattern.MULTILINE | Pattern.DOTALL).matcher(text).results()
            .filter(mr -> !mr.group(2).toLowerCase().contains("{{#property:p856}}"))
            .map(mr -> mr.group(1))
            .toList();

        System.out.println("targeted infoboxes: " + targetedInfoboxes.size());

        var sitelinkToInfobox = getInfoboxTransclusions(targetedInfoboxes);
        System.out.println("transclusions: " + sitelinkToInfobox.size());

        var wdPageidToSitelink = retrievePropertyBacklinks(sitelinkToInfobox.keySet());
        System.out.println("WD backlinks: " + wdPageidToSitelink.size());

        // sitelinkToInfobox.keySet().retainAll(wdPageidToSitelink.values());

        var items = getWikidataItems(wdPageidToSitelink.keySet());
        System.out.println("WD items: " + items.size());

        wdPageidToSitelink.clear(); // gc?

        var articles = items.stream().map(Item::article).toList();
        // sitelinkToInfobox.keySet().retainAll(articles);

        var sitelinkToEntity = items.stream().collect(Collectors.toMap(Item::article, Item::entity));
        var sitelinkToValue = items.stream().collect(Collectors.toMap(Item::article, Item::value));

        var dump = new XMLDumpConfig("plwiki").type(XMLDumpTypes.PAGES_ARTICLES_MULTISTREAM).local().fetch().get();

        var noValue = new ArrayList<String>(100000);
        var differentValue = new ArrayList<String>(100000);

        try (var stream = dump.filterTitles(articles).stream()) {
            stream
                .filter(XMLRevision::isMainNamespace)
                .filter(XMLRevision::nonRedirect)
                .filter(rev -> sitelinkToInfobox.containsKey(rev.getTitle()))
                .sorted(Comparator.comparing(XMLRevision::getTitle, Collator.getInstance(new Locale("pl"))))
                .forEach(rev -> {
                    var infobox = sitelinkToInfobox.get(rev.getTitle());
                    var templates = ParseUtils.getTemplatesIgnoreCase(infobox, rev.getText());

                    if (templates.size() == 1) {
                        var params = ParseUtils.getTemplateParametersWithValue(templates.get(0));
                        var value = params.getOrDefault("www", "");

                        if (value.isEmpty()) {
                            noValue.add(String.format("#[[%s]] ([[d:%s]], %s): %s",
                                                      rev.getTitle(),
                                                      sitelinkToEntity.get(rev.getTitle()),
                                                      infobox,
                                                      sitelinkToValue.get(rev.getTitle())));
                        } else if (!value.equals(sitelinkToValue.get(rev.getTitle()))) {
                            differentValue.add(String.format("#[[%s]] ([[d:%s]], %s): %s vs %s",
                                                             rev.getTitle(),
                                                             sitelinkToEntity.get(rev.getTitle()),
                                                             infobox,
                                                             value,
                                                             sitelinkToValue.get(rev.getTitle())));
                        }
                    }
                });
        }

        System.out.println("no value: " + noValue.size());
        System.out.println("different value: " + differentValue.size());

        Files.write(Paths.get("./data/plwiki-www-no-value.txt"), noValue);
        Files.write(Paths.get("./data/plwiki-www-different-value.txt"), differentValue);
    }

    private static Map<String, String> getInfoboxTransclusions(List<String> infoboxes) throws SQLException, IOException {
        var out = new HashMap<String, String>(750000);

        try (var connection = DriverManager.getConnection(SQL_PLWIKI_URI, DBUtils.prepareSQLProperties())) {
            var infoboxesStr = infoboxes.stream()
                .map(wb::removeNamespace)
                .map(infobox -> String.format("'%s'", infobox.replace(' ', '_').replace("'", "\\'")))
                .collect(Collectors.joining(", "));

            var query = String.format("""
                SELECT
                    DISTINCT(page_id),
                    page_title,
                    lt_title
                FROM page
                    INNER JOIN templatelinks ON page_id = tl_from
                    INNER JOIN linktarget ON tl_target_id = lt_id
                WHERE
                    tl_from_namespace = 0 AND
                    lt_namespace = 10 AND
                    lt_title IN (%s);
                """, infoboxesStr);

            var statement = connection.createStatement();
            var resultSet = statement.executeQuery(query);

            while (resultSet.next()) {
                var title = resultSet.getString("page_title").replace('_', ' ');
                var template = resultSet.getString("lt_title").replace('_', ' ');
                out.put(title, template);
            }
        }

        return out;
    }

    private static Map<Long, String> retrievePropertyBacklinks(Set<String> sitepages) throws SQLException, IOException {
        var backlinks = new HashMap<Long, String>(600000);

        try (var connection = DriverManager.getConnection(SQL_WDWIKI_URI, DBUtils.prepareSQLProperties())) {
            var query = String.format("""
                SELECT
                    DISTINCT(page_id),
                    ips_site_page
                FROM page
                    INNER JOIN pagelinks ON pl_from = page_id
                    INNER JOIN wb_items_per_site ON page_namespace = 0 AND
                        CONCAT('Q', ips_item_id) = page_title
                WHERE
                    pl_from_namespace = 0 AND
                    ips_site_id = 'plwiki' AND
                    pl_namespace = 120 AND
                    pl_title = '%s';
                """, CLAIM);

            var rs = connection.createStatement().executeQuery(query);

            while (rs.next()) {
                var id = rs.getLong("page_id");
                var sitepage = rs.getString("ips_site_page").replace('_', ' ');

                if (sitepages.contains(sitepage)) {
                    backlinks.put(id, sitepage);
                }
            }
        }

        return backlinks;
    }

    private static List<Item> getWikidataItems(Set<Long> wdPageids) {
        var wdDump = new XMLDumpConfig("wikidatawiki").type(XMLDumpTypes.PAGES_ARTICLES_MULTISTREAM).local().fetch().get();

        try (var stream = wdDump.filterIds(wdPageids).stream()) {
            final var qPatt = Pattern.compile("^Q\\d+$");

            return stream
                .filter(XMLRevision::isMainNamespace)
                .filter(XMLRevision::nonRedirect)
                .filter(rev -> qPatt.matcher(rev.getTitle()).matches())
                .filter(rev -> !rev.isRevisionDeleted())
                .map(rev -> new JSONObject(rev.getText()))
                .filter(json -> json.optString("type").equals("item"))
                .filter(json -> Optional.ofNullable(json.optJSONObject("sitelinks")).filter(sl -> sl.has("plwiki")).isPresent())
                .filter(json -> Optional.ofNullable(json.optJSONObject("claims")).filter(claims -> claims.has(CLAIM)).isPresent())
                .<Item>mapMulti((json, consumer) -> {
                    var claims = json.getJSONObject("claims").optJSONArray(CLAIM);

                    if (claims != null && !claims.isEmpty()) {
                        var optValue = Optional.of(claims.get(0))
                            .map(obj -> ((JSONObject)obj).optJSONObject("mainsnak"))
                            .filter(mainsnak -> mainsnak.optString("snaktype").equals("value"))
                            .map(mainsnak -> mainsnak.optJSONObject("datavalue"))
                            .filter(snakvalue -> snakvalue.optString("type").equals("string"))
                            .map(snakvalue -> snakvalue.optString("value"))
                            .filter(value -> !value.isEmpty());

                        if (optValue.isPresent()) {
                            var sitepage = json.getJSONObject("sitelinks").getJSONObject("plwiki").getString("title");
                            var entity = json.getString("id");

                            consumer.accept(new Item(sitepage, entity, optValue.get()));
                        }
                    }
                })
                .toList();
        }
    }

    private record Item(String article, String entity, String value) {}
}
