package com.github.wikibot.main;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.wikipedia.Wiki;
import org.wikiutils.ParseUtils;

import com.github.wikibot.dumps.XMLDumpReader;

public final class Test {
	private static final String SQL_PLWIKI_URI = "jdbc:mysql://localhost:4715/plwiki_p";
	private static final Properties properties;

	static {
		var defaultSQLProperties = new Properties();
		defaultSQLProperties.setProperty("autoReconnect", "true");
		defaultSQLProperties.setProperty("useUnicode", "yes");
		defaultSQLProperties.setProperty("characterEncoding", StandardCharsets.UTF_8.name());
		defaultSQLProperties.setProperty("sslMode", "DISABLED");

		try {
			properties = prepareSQLProperties(defaultSQLProperties);
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	public static void main(String[] args) throws Exception {
		var articles = new HashSet<String>();
		articles.addAll(getRecursiveCategoryMembers("Biografie", Wiki.MAIN_NAMESPACE));
		articles.removeAll(getRecursiveCategoryMembers("Biografie według miejsca urodzenia", Wiki.MAIN_NAMESPACE));
		
		System.out.println(articles.size());
		
		var templates = getRecursiveCategoryMembers("Infoboksy – biogramy", Wiki.TEMPLATE_NAMESPACE);
		var reader = new XMLDumpReader("plwiki", true).seekTitles(articles);
		
		record Item(String title, String parameter) {}
		
		try (var stream = reader.getStAXReaderStream()) {
			var out = stream.parallel()
				.<Item>mapMulti((rev, consumer) -> {
					var param = templates.stream()
						.flatMap(template -> ParseUtils.getTemplatesIgnoreCase(template, rev.getText()).stream())
						.map(ParseUtils::getTemplateParametersWithValue)
						.map(params -> params.get("miejsce urodzenia"))
						.filter(Objects::nonNull)
						.filter(p -> !p.isBlank())
						.findAny();
					
					if (param.isPresent()) {
						var i = new Item(rev.getTitle(), param.get());
						consumer.accept(i);
					}
				})
				.sorted((i1, i2) -> i1.title.compareTo(i2.title))
				.map(i -> String.format("#[[%s]]: %s", i.title, i.parameter))
				.collect(Collectors.joining("\n"));
			
			Files.writeString(Paths.get("./data/temp.txt"), out);
		}
	}

	private static Properties prepareSQLProperties(Properties defaults) throws IOException {
		var properties = new Properties(defaults);
		var patt = Pattern.compile("(.+)='(.+)'");
		var cnf = Paths.get("./data/sessions/replica.my.cnf");

		Files.lines(cnf)
			.map(patt::matcher)
			.filter(Matcher::matches)
			.forEach(m -> properties.setProperty(m.group(1), m.group(2)));

		return properties;
	}

	private static Set<String> getRecursiveCategoryMembers(String category, int namespace) throws SQLException {
		var articles = new HashSet<String>(400000);
		var visitedCats = new HashSet<String>(100000);
		var targetCategories = Arrays.asList(category.replace(' ', '_'));
		var depth = 0;

		final var queryFmt = """
			SELECT DISTINCT page_title AS page_title, page_namespace
			FROM page LEFT JOIN categorylinks ON cl_from = page_id
			WHERE page_is_redirect = 0
			AND cl_to IN (%s);
			""";

		try (var connection = DriverManager.getConnection(SQL_PLWIKI_URI, properties)) {
			while (!targetCategories.isEmpty()) {
				var catArray = targetCategories.stream()
					.map(cat -> String.format("'%s'", cat.replace("'", "\\'")))
					.collect(Collectors.joining(","));

				var query = String.format(queryFmt, catArray);
				var rs = connection.createStatement().executeQuery(query);

				var members = new ArrayList<String>();
				var subcats = new ArrayList<String>();

				while (rs.next()) {
					var title = rs.getString("page_title");
					var ns = rs.getInt("page_namespace");

					if (ns == namespace) {
						members.add(title.replace('_', ' '));
					} else if (ns == 14) {
						subcats.add(title);
					}
				}

				articles.addAll(members);
				visitedCats.addAll(targetCategories);

				System.out.printf("depth = %d, articles = %d, subcats = %d%n", depth++, members.size(), subcats.size());

				subcats.removeAll(visitedCats);
				targetCategories = subcats;
			}
		}

		System.out.printf("Got %d category members for category %s.%n", articles.size(), category);
		return articles;
	}
}
