#!/usr/bin/python3
# Author: Michal Kodad

import requests
import sys
import re
import os


def print_help():
    print("Call me with `-ini` param", file=sys.stderr)
    sys.exit(1)


hash_map = {}


def process_content(root_url, path):
    global hash_map
    response = requests.get(f"{root_url}/{path}")
    if response.status_code != 200:
        print("----- Skipped ----- ", response.url, file=sys.stderr)
        return
    print("Processing page: ", response.url, file=sys.stderr)

    # odstranění všeho kromě tag div s hlavním obsahem (content)
    content = re.sub(r"^.*<div id='content'>", '', response.text, flags=re.DOTALL)
    content = re.sub(r"<div id='sidebar-wrapper'>.*", '', content, flags=re.DOTALL)

    # odstranění HTML entit
    content = re.sub(r'&.*?;', ' ', content, flags=re.DOTALL)

    # odstranění tagů
    content = re.sub(r'<.*?>', ' ', content, flags=re.DOTALL)

    content = content.lower()

    for word in re.findall(r'\w+', content):
        hash_map[word] = f"/{path}"


def preprocess():
    root_url = "http://ksp.mff.cuni.cz"
    root_nodes = ['h/ulohy', 'z/ulohy']
    pages = ['zadani', 'reseni', 'komentare']

    for node in root_nodes:
        for year in range(33):
            for series in range(1, 6):
                for page in pages:
                    process_content(root_url, f"{node}/{year}/{page}{series}.html")


PREPROCESSED_FILE = "preprocessed.txt"

if len(sys.argv) >= 2:
    if sys.argv[1] == '-ini':
        preprocess()

        with open(PREPROCESSED_FILE, "w") as db:
            for word in sorted(list(hash_map.keys())):
                db.write(f"{word} {hash_map[word]}\n")
        print("Success", file=sys.stderr)
        sys.exit(0)

    print_help()

try:
    with open(PREPROCESSED_FILE, "r") as file:
        for line in file.readlines():
            key, value = line.split()
            hash_map[key] = value
except FileNotFoundError:
    print_help()

n = int(input())
for _ in range(n):
    word_to_find = input()
    assert word_to_find in hash_map, f"Word `{word_to_find}`not found in {PREPROCESSED_FILE}"
    print(hash_map[word_to_find])