# -*- coding:utf-8 -*-
import re, os, errno, cgi, json, xml
import sys, codecs, locale, getopt
import xml.etree.ElementTree as ET
from subprocess import Popen, PIPE
from operator import itemgetter
from xml.dom.minidom import parse, parseString
from imp import reload
from collections import defaultdict


def main():
    # to be adjusted as needed
    pos='Prop'
    in_dir = '3_uxml'
    out_dir = '4_axml'
    cwd = os.getcwd()
    out_dir_path = os.path.join(cwd,out_dir)
    if not os.path.exists(out_dir_path):
        os.mkdir(out_dir_path)
        
    debug_fst = False
    
    namespaces = {'xml': 'http://www.w3.org/1999/xml'}
    
    # parameters to be adjusted as needed
    plup = Popen('which lookup', shell=True, stdout=PIPE, stderr=PIPE)
    olup, elup = plup.communicate()
    print("___ lookup is ",olup.decode())
    if not olup.decode():
        print('No lookup found, please install it!')
        sys.exit()
    
    lookup = olup.decode().strip()
    langs_dir = '$GTHOME/langs/'
    xfst_file = '/src/analyser-gt-'
    
    for root, dirs, files in os.walk(in_dir):
    
        for f in files:
            if f.endswith('xml'):
                print('... processing ', str(f))
                tree = ET.parse(os.path.join(in_dir,f))
                f_root = tree.getroot()
                
                lgs = f_root.findall('.//lang')
                for lg in lgs:
                    lang_code=lg.get('code')
                    
                    names = lg.findall('./name')
                    for name in names:
                        # analyser-gt-norm.xfst
                        # analyser-gt-desc.xfst
                        
                        for fst_type in ['norm', 'desc']:
                            c_fst = langs_dir + lang_code + xfst_file + fst_type + '.xfst'
                            name = checkAnalysis(fst_type, c_fst, name, lang_code)
                                
                                
                tree.write(os.path.join(out_dir_path,str(f)),
                            xml_declaration=True,encoding='utf-8',
                            method="xml")
                print('DONE ', f, '\n\n')


def checkAnalysis(fst_type, fst, name, lang_code):
    
    _fst_type = fst_type
    _fst = fst
    _name = name
    _lang_code = lang_code
    
    spelling=_name.find('spelling').text
    
    print('... lemma ', str(spelling))
    
    cmd = " | lookup -q -flags mbTT " + _fst
    
    p = Popen('echo "'+spelling+'"'+cmd, shell=True, stdout=PIPE, stderr=PIPE)
    out, err = p.communicate()
    c_analysis = ''
    filtered_analysis = ''
    print("|", out.decode().split('\n', 1 ),"|")
    current_analysis = filter(None,out.decode().split('\n\n'))
    
    for current_cohort in current_analysis:
        cc_list = current_cohort.split('\n')
        # set default analysis value to 'no'
        _name.find('spelling').set(_fst_type+'_fst', 'no')
        for analysis in cc_list:
            analysis = analysis.partition('\t')[2]
            if '+Prop+' in analysis:
                # due to tags in nob output
                if _lang_code is 'nob':
                    _name.find('spelling').set(_fst_type+'_fst', 'yes')
                    break
                # due to tags in non-nob output
                if analysis.endswith('+Nom') and not _lang_code is 'nob':
                    _name.find('spelling').set(_fst_type+'_fst', 'yes')
                    break
                    
            # refine analysis: check with Nåebrie, Storfjellet and kommunenavn Lierne
            # this is easier done via xsl: both files are xml-files
            
    return _name
        
        
if __name__ == "__main__":
    reload(sys)
    main()