#!/usr/bin/env python3

# -*- coding: utf-8 -*-
"""Convert mekanikk-1999 to TermWiki."""
import os

from lxml import etree

from termwikiimporter import read_termwiki

SRCDIR = os.path.join(os.getenv('GTHOME'), 'words/terms/mekanikk-1999/newsrc')


def read_terms(termfile, title_index, language):
    """Read a terms-xxx.xml file."""
    for sense in termfile.iter('sense'):
        identifier = sense.get('idref')
        if identifier is not None:
            concept = title_index.get(identifier, read_termwiki.Concept())
            if sense.get('class'):
                concept.data['concept']['category'] = sense.get('class')
            if not concept.data['concept'].get('collection'):
                concept.data['concept']['collection'] = set()
                concept.data['concept']['collection'].add('Mekanikk-1999')
            definition = sense.find('.//def')
            if definition is not None and definition.text is not None:
                concept.data['concept_infos'].append({
                    'language':
                    language,
                    'definition':
                    ' '.join(definition.text.split())
                })

            head = sense.getparent().getparent().find('.//head')
            for exp in head.text.split(','):
                exp = exp.strip()
                lang = language
                if '[' in exp:
                    if '[b]' in exp:
                        lang = 'nb'
                    else:
                        lang = 'nn'
                    exp = exp[:-3]
                expression = {
                    'language': lang,
                    'expression': exp,
                    'sanctioned': 'True'
                }
                concept.clean_up_expression(expression)

                title_index[identifier] = concept


def read_sdterm():
    """Read term entries from SD-terms files."""
    title_index = {}
    # Change the given languages to something wikimedia digests
    langs = {
        'eng': 'en',
        'fin': 'fi',
        'nor': 'nb',
        'sme': 'se',
        'swe': 'sv',
    }

    for lang in langs:
        filename = f'{SRCDIR}/terms-{lang}.xml'
        read_terms(etree.parse(filename), title_index, langs[lang])

    return title_index


def write_pages():
    """Write the content of the mekanikk-1999 files to a tw like format."""
    pages = etree.Element('pages')
    title_index = read_sdterm()
    print(len(title_index))
    for title, concept in title_index.items():
        page = etree.SubElement(pages, 'page')
        page.set('title', title)
        xml_concept = etree.SubElement(page, 'concept')
        xml_concept.text = str(concept)

    with open(f'{SRCDIR}/pages.xml', 'w') as to_file:
        to_file.write(
            etree.tostring(pages, pretty_print=True, encoding='unicode'))


write_pages()
