# -*- coding: utf-8 -*-
from settings import *
from drill.models import *
from django.db.models import Q
from xml.dom import minidom as _dom
from django.utils.encoding import force_unicode
import sys
import re
import string
import codecs


# Lexicons: words

class Words:

    def install_lexicon(self,infile,linginfo,delete=None,paradigmfile=None,placenamefile=None):

        xmlfile=file(infile)
        tree = _dom.parse(infile)
        
        lex = tree.getElementsByTagName("lexicon")[0]
        mainlang = lex.getAttribute("xml:lang")
        if not mainlang and not placenamefile:
            print "Attribute mainlang not defined in", infile, "stop."
            sys.exit()

        self.all_wordids=[]

        for e in tree.getElementsByTagName("entry"):
            pos=e.getElementsByTagName("pos")[0].getAttribute("class")
            self.store_word(e,linginfo,mainlang,paradigmfile,placenamefile,delete)

        if delete and pos and not placenamefile:
            allids = Word.objects.filter(Q(pos=pos) & ~Q(semtype__semtype="PLACE-NAME-LEKSA")).values_list('wordid',flat=True)
            for a in allids:
                if force_unicode(a) not in set(self.all_wordids):
                    print "Word id not found from xml. Deleting:", a
                    word = Word.objects.get(pos=pos,wordid=a)
                    word.delete()

        if delete and placenamefile:
            allids = Word.objects.filter(Q(pos="N") & Q(semtype__semtype="PLACE-NAME-LEKSA")).values_list('wordid',flat=True)
            for a in allids:
                if force_unicode(a) not in set(self.all_wordids):
                    print "Word id not found from xml. Deleting:", a
                    word = Word.objects.get(pos=pos,wordid=a)
                    word.delete()

    def add_translation(self,el,w,pos,placenamefile):
        if el.firstChild:
            translation=el.firstChild.data
            lang=el.getAttribute("xml:lang")
            if lang == "sme":
                if Word.objects.filter(wordid=translation,pos=pos).count()>0:
                    transl = Word.objects.filter(wordid=translation,pos=pos)[0]
                else:
                    transl, created = Word.objects.get_or_create(wordid=translation,pos=pos)
                    if created:
                        transl.lemma=translation
                        transl.save()
                        # Add reference to the new word object as translation.
                w.translations.add(transl)
                w.save()                   

            else:
                if lang == "fin":
                    transl, created = Wordfin.objects.get_or_create(wordid=translation)
                    if created:
                        transl.lemma=translation
                        transl.save()
                    w.translations2fin.add(transl)
                    w.save()

                if lang == "swe":
                    transl, created = Wordswe.objects.get_or_create(wordid=translation)
                    if created:
                        transl.lemma=translation
                        transl.save()
                    w.translations2swe.add(transl)
                    w.save()

                if lang == "eng":
                    transl, created = Wordeng.objects.get_or_create(wordid=translation)
                    if created:
                        transl.lemma=translation
                        transl.save()
                    w.translations2eng.add(transl)
                    w.save()

                if lang == "deu":
                    transl, created = Worddeu.objects.get_or_create(wordid=translation)
                    if created:
                        transl.lemma=translation
                        transl.save()
                    w.translations2deu.add(transl)
                    w.save()

                if lang == "nob":
                    transl, created = Wordnob.objects.get_or_create(wordid=translation)
                    if created:
                        transl.lemma=translation
                        transl.save()
                    w.translations2nob.add(transl)
                    w.save()
								
                    # special treatment for å-verbs.
                    if pos=="V":
                        oo = "å".decode('utf8')
                        wordform = translation.lstrip(oo + " ")
                        transl, created = Wordnob.objects.get_or_create(wordid=wordform)
                        if created:
                            transl.lemma=wordform
                            transl.save()
                        # Add reference to the new word object as translation.
                        w.translations2nob.add(transl)
                        w.save()          
         
####### Possible bug here: proper nouns!!!

                    # If placenames
                    # Give norwegian words same semantic classes as sami words.
                    # Temporary solution.
                    if placenamefile:
                        sem_entry, created = Semtype.objects.get_or_create(semtype="PLACE-NAME-LEKSA")
                        if created:
                            print "Created semtype entry with name PLACE-NAME-LEKSA"
                            #transl.semtype.add(sem_entry)
                            #transl.frequency=w.frequency
                            #transl.geography=w.geography
                            #transl.save()

                        transl.semtype.add(sem_entry)
                        transl.frequency=w.frequency
                        transl.geography=w.geography
                        transl.save()


    def add_semantics(self,e,w,placenamefile):
        # Give placenames special semantic tag
        # This is temporary solution.
        if placenamefile:
            sem_entry, created = Semtype.objects.get_or_create(semtype="PLACE-NAME-LEKSA")
            if created:
                print "Created semtype entry with name PLACE-NAME-LEKSA"
            w.semtype.add(sem_entry)
            w.save()

        else:
            semantics = e.getElementsByTagName("semantics")[0]
            elements=semantics.getElementsByTagName("sem")
            
            for el in elements:
                sem=el.getAttribute("class")
                if sem:
                    print sem					
                    # Add semantics entry if not found.
                    # Leave this if DTD is used.
                    sem_entry, created = Semtype.objects.get_or_create(semtype=sem)
                    if created:
                        print "Created semtype entry with name ", sem
                    w.semtype.add(sem_entry)
                    w.save()        

    def add_sources(self,e,w):
        sources = e.getElementsByTagName("sources")[0]
        elements=sources.getElementsByTagName("book")
        for el in elements:
            book=el.getAttribute("name")
            if book:
                # Add book to the database
                # Leave this if DTD is used
                book_entry, created = Source.objects.get_or_create(name=book)
                # Mind the gap/indentation
                if created:
                    print "Created book entry with name ", book
                w.source.add(book_entry)
                w.save()
                print w.lemma, " added to book ", book

    def store_word(self,e,linginfo,mainlang,paradigmfile,placenamefile,delete):
        
        # Store first unique fields
        id=e.getAttribute("id")
        lemma=e.getElementsByTagName("lemma")[0].firstChild.data
        if not id:
            id=lemma
        self.all_wordids.append(id)
        stem=""
        forms=""
        dialects=["GG","KJ"]
        diphthong="no"
        gradation=""
        rime=""
        attrsuffix=""
        soggi=""
        valency=""
        compare=""
        frequency=""
        geography=""
        presentationform = ""
        only_sg = 0
        only_pl = 0
        noleksa = 0
        print lemma
        if e.getElementsByTagName("forms"):
            forms=e.getElementsByTagName("forms")[0]
			
        if e.getElementsByTagName("presentationform"):
            presentationform=e.getElementsByTagName("presentationform")[0].firstChild.data

        if e.getElementsByTagName("stem"):
            stem=e.getElementsByTagName("stem")[0].getAttribute("class")
            diphthong=e.getElementsByTagName("stem")[0].getAttribute("diphthong")
            gradation=e.getElementsByTagName("stem")[0].getAttribute("gradation")
            rime=e.getElementsByTagName("stem")[0].getAttribute("rime")
            if rime=="0": rime="norime"
            soggi=e.getElementsByTagName("stem")[0].getAttribute("soggi")
            compare=e.getElementsByTagName("stem")[0].getAttribute("compare")
            attrsuffix=e.getElementsByTagName("stem")[0].getAttribute("attrsuff")
            if attrsuffix == "0": attrsuffix="noattr"
        
        for d in e.getElementsByTagName("dialect"):
            dialect=d.getAttribute("class")
            if dialect:
                invd=dialect.lstrip("NOT-")
                dialects.remove(invd)
    
        if e.getElementsByTagName("frequency"):
            frequency=e.getElementsByTagName("frequency")[0].getAttribute("class")

        if e.getElementsByTagName("geography"):
            geography=e.getElementsByTagName("geography")[0].getAttribute("class")


        if e.getElementsByTagName("only-sg"):
            only_sg = 1
        if e.getElementsByTagName("only-pl"):
            only_pl = 1
        if e.getElementsByTagName("noleksa"):
            noleksa = 1

        if e.getElementsByTagName("valency"):
            valencies = e.getElementsByTagName("valency")[0]
            for val in valencies.getElementsByTagName("val"):
                valency = val.getAttribute("class")
                if valency: break

        # Part of speech information
        pos=e.getElementsByTagName("pos")[0].getAttribute("class") 
        if not pos:
            print "Part of speech information not found for ", lemma, ". give it command line: --pos=N"
            sys.exit()

        # Search for existing word in the database.
        w=None
        if mainlang == "nob":
            w,created = Wordnob.objects.get_or_create(wordid=id)
        elif mainlang == "fin":
            w,created = Wordfin.objects.get_or_create(wordid=id)
        elif mainlang == "swe":
            w,created = Wordswe.objects.get_or_create(wordid=id)
        elif mainlang == "eng":
            w,created = Wordeng.objects.get_or_create(wordid=id)
        elif mainlang == "deu":
            w,created = Worddeu.objects.get_or_create(wordid=id)
        else:
            w,created = Word.objects.get_or_create(wordid=id,pos=pos)

        w.pos=pos
        w.lemma=lemma
        w.presentationform=presentationform
        print presentationform
        w.stem=stem
        w.rime=rime
        w.compare = compare
        w.attrsuffix = attrsuffix
        w.soggi=soggi
        w.gradation=gradation
        w.diphthong=diphthong

#         why two times the same operation (cf. line 250): not mainlang(nob) <=> mainlang(sme)
#        if not mainlang == "nob":
#            for d in dialects:
#                dia, created = Dialect.objects.get_or_create(dialect=d)
#                w.dialects.add(dia)
#                w.save()

        w.valency = valency
        w.frequency = frequency
        w.geography = geography
        w.save()

#         why two times the same operation (cf. line 238): not mainlang(nob) <=> mainlang(sme)
        if mainlang=="sme":
            for d in dialects:
                dia, created = Dialect.objects.get_or_create(dialect=d)
                w.dialects.add(dia)
                w.save()
            
        # Add forms and tags
        if paradigmfile:
            linginfo.create_paradigm(lemma,pos,forms)
            # Remove old forms.
            forms = Form.objects.filter(word=w)
            for f in forms:
                f.delete()
            for f in linginfo.paradigm:

                g=f.classes
                if w.pos == "A" and w.compare == "no" and \
                       (g.get('Grade')=="Comp" or g.get('Grade')=="Superl"):
                    continue
                    
                t,created=Tag.objects.get_or_create(string=f.tags,pos=g.get('Wordclass', ""),\
                                                    number=g.get('Number',""),case=g.get('Case',""),\
                                                    possessive=g.get('Possessive',""),\
                                                    grade=g.get('Grade',""),\
                                                    infinite=g.get('Infinite',""), \
                                                    personnumber=g.get('Person-Number',""),\
                                                    polarity=g.get('Polarity',""),\
                                                    tense=g.get('Tense',""),mood=g.get('Mood',""), \
                                                    subclass=g.get('Subclass',""),\
                                                    attributive=g.get('Attributive',""))

                t.save()
                
                form = Form(fullform=f.form,tag=t,word=w)				
                print f.form
                form.save()
                if len(f.dialects)==1: dialects2 = f.dialects[:]
                else: dialects2 = dialects[:]
                for d in dialects2:
                    dia, created = Dialect.objects.get_or_create(dialect=d)
                    form.dialects.add(dia)
                    form.save()
                form.save()

        if only_sg:
            print "deleting plural forms for", w.lemma
            Form.objects.filter(Q(word=w.id) & Q(tag__number="Pl")).delete()
        if only_pl:
            print "deleting singular forms for", w.lemma
            Form.objects.filter(Q(word=w.id) & Q(tag__number="Sg")).delete
        if noleksa:
            print "word not in leksa", w.lemma
            w.leksa=0
        else:
            w.leksa=1

        if e.getElementsByTagName("sources"):
            self.add_sources(e,w)
        
        if e.getElementsByTagName("semantics"):
            self.add_semantics(e,w,placenamefile)
        
        # Add translations
        translations = e.getElementsByTagName("translations")[0]
        elements=translations.getElementsByTagName("tr")
        for el in elements:
            self.add_translation(el,w,pos,placenamefile)


    def delete_word(self, wid=None,pos=None):
# no check for pos at the moment because
# there is a mess in the nob entries (some have pos, some not)
# however pos check is a todo issue.

        words = Word.objects.filter(wordid=wid)
        wordnobs = Wordnob.objects.filter(wordid=wid)
        wordfins = Wordfin.objects.filter(wordid=wid)

        for w in words:
            print "Removing", w.wordid
            w.delete()
	if not words:
            print wid, "not found in sma-db ... searching nob-db"
	    for w in wordnobs:
                print "Removing", w.wordid
		w.delete()
            if not wordnobs:
                print wid, "not found in nob-db either ... searching fin-db"
                for w in wordfins:
                    print "Removing", w.wordid
                    w.delete()
                    if not wordfins:
                        print wid, "not found in fin-db either. Beklager!"


