#!/usr/bin/env python3
from lxml import etree

from termwikiimporter import read_termwiki


def l_to_expression(lang, lemma_element):
    for exp in [exp.strip() for exp in lemma_element.text.split(',')]:
        expression = {}
        expression['expression'] = exp
        expression['pos'] = lemma_element.get('pos')
        expression['language'] = lang
        expression['sanctioned'] = 'True'

        yield expression


def entry2concept(entry):
    concept = read_termwiki.Concept()
    concept.data['concept']['collection'] = set()
    concept.data['concept']['collection'].add('JustermTana')
    for expression in l_to_expression('nb', entry.find('.//l')):
        concept.related_expressions.append(expression)
    for translation_group in entry.iter('tg'):
        lang = LANGS[translation_group.get(
            '{http://www.w3.org/XML/1998/namespace}lang')]
        for expression in l_to_expression(lang,
                                          translation_group.find('.//t')):
            concept.related_expressions.append(expression)
    return str(concept)


LANGS = {
    "fin": "fi",
    "sme": "se",
}
TREE = etree.parse('gt_rapl-ril.xml')
PAGES = etree.Element('pages')
for index, entry in enumerate(TREE.getroot().iter('e')):
    page = etree.SubElement(PAGES, 'page')
    page.set('title', f'Juridihkka:JustermTana {index}')
    concept = etree.SubElement(page, 'concept')
    concept.text = entry2concept(entry)

with open(f'pages.xml', 'w') as to_file:
    to_file.write(etree.tostring(PAGES, pretty_print=True, encoding='unicode'))
