# -*- coding: utf-8 -*-

from settings import *
from drill.models import *
from xml.dom import minidom as _dom
from optparse import OptionParser
from django.db.models import Q
import sys
import re
import string
import codecs


class Questions:

    def read_element(self,qaelement,el,el_id,qtype):

        print
        print "Creating element", el_id

        # Syntactic function of the element
        if self.values.has_key(el_id) and self.values[el_id].has_key('syntax'):
            syntax = self.values[el_id]['syntax']
        else:
            syntax = el_id

        if not el: print syntax, "No element given."

        # Some of the answer elements share content of question elements.
        content_id=""
        if el: content_id = el.getAttribute("content")
        if not content_id: content_id=el_id
            
        # Search for the same element in question side
        # If there is no element given in the answer, the element
        # is a copy of the question.
        question_qelements = None
        
        if (not el or el.getAttribute("content")) and \
            QElement.objects.filter(Q(question__id=qaelement.question_id) & \
                                              Q(identifier=content_id)).count() > 0:
            question_qelements = QElement.objects.filter(Q(question__id=qaelement.question_id) & \
                                                         Q(identifier=content_id))
        else:
            if el and el.getAttribute("content"):
                if QElement.objects.filter(Q(question__id=qaelement.id) & \
                                           Q(identifier=content_id)).count() > 0:
                    question_qelements = QElement.objects.filter(Q(question__id=qaelement.id) & \
                                                                 Q(identifier=content_id))
            
        if not el and question_qelements:
            print "CREATING", syntax
            for q in question_qelements:
                qe = QElement.objects.create(question=qaelement,\
                                             identifier=el_id,\
                                             syntax=q.syntax)
                # mark as a copy
                q.copy_set.add(qe)
                qe.save()
                q.save()
                return
            
        ############### AGREEMENT
        # Search for elementes that agree
        agr_elements=None
        if syntax=="MAINV":
            agr_id="SUBJ"
            print "TRYING verb agreement " + agr_id + " " + qaelement.qatype
            if QElement.objects.filter(Q(question=qaelement) & Q(syntax=agr_id) &\
                                       Q(question__qatype=qaelement.qatype)).count() > 0:
                agr_elements = QElement.objects.filter(Q(question=qaelement) & \
                                                       Q(syntax=agr_id) &\
                                                       Q(question__qatype=qaelement.qatype))
        

        agreement = ""
        if el: agreement = el.getElementsByTagName("agreement")
        if agreement: print "Agreement:", agreement[0].getAttribute("id")
        
        # Agreement from xml-files
        # Try first inside question or answer
        # Then in answer-question level
        if agreement:
            agr_id=agreement[0].getAttribute("id")
            if QElement.objects.filter(Q(question=qaelement) & Q(syntax=agr_id) & \
                                       Q(question__qatype=qaelement.qatype)).count() > 0:
                agr_elements = QElement.objects.filter(Q(question=qaelement) & \
                                                       Q(syntax=agr_id) &\
                                                       Q(question__qatype=qaelement.qatype))
                
            else:
                if Question.objects.filter(id=qaelement.question_id).count() > 0:
                    q=Question.objects.filter(id=qaelement.question_id)[0]
                    if QElement.objects.filter(Q(question__id=qaelement.question_id) & \
                                               Q(syntax=agr_id)).count() > 0:
                        agr_elements = QElement.objects.filter(Q(question__id=qaelement.question_id) & \
                                                               Q(syntax=agr_id))

            if not agr_elements:
                print "ERROR: no agreement elements found"
				
        ############ WORDS
        # Search for existing word in the database.
        ids = []
        if el: ids=el.getElementsByTagName("id")
        words = {}
        word_elements = None
        for i in ids:
            word_id = i.firstChild.data
            if word_id:
                print "found word", word_id
                # Add pos information here!
                word_elements = Word.objects.filter(Q(wordid=word_id))
                if not word_elements:
                    print "Word not found! " + word_id
                    
        # Search for existing semtype
        # Semtype overrides the word id selection
        if not word_elements:
            semclasses= []
            if el: semclasses=el.getElementsByTagName("sem")
            if semclasses:
                semclass=semclasses[0].getAttribute("class")
                word_elements = Word.objects.filter(Q(semtype__semtype=semclass))
            valclasses= []
            if el: valclasses=el.getElementsByTagName("val")
            if valclasses:
                valclass=valclasses[0].getAttribute("class")
                word_elements = Word.objects.filter(Q(valency=valclass))

        # If still no words, get the default words for this element:
        if not word_elements:
            if self.values.has_key(el_id) and self.values[el_id].has_key('words'):
                word_elements = self.values[el_id]['words']

        if word_elements:
            for w in word_elements:
                if not words.has_key(w.pos): words[w.pos] = []
                words[w.pos].append(w)


        ############# GRAMAMR
        tagelements = None
        grammars = []
        if el: grammars = el.getElementsByTagName("grammar")
        if not el or not grammars:
            # If there is no grammatical specification, the element is created solely
            # on the basis of grammar.
            if self.values.has_key(el_id):
                if self.values[el_id].has_key('tags'):
                    tagelements = self.values[el_id]['tags']
        # An element for each different grammatical specification.
        else:
            poses = []
            tags = []
            for gr in grammars:
                tags.append(gr.getAttribute("tag"))
                poses.append(gr.getAttribute("pos"))
            tagstrings = []
            if poses:
                if self.values.has_key(el_id):
                    if self.values[el_id].has_key('tags'):
                        tagelements = self.values[el_id]['tags'].filter(pos__in=poses)

            if tags:
                for tag in tags:
                    tagvalues = []
                    self.get_tagvalues(tag,"",tagvalues)
                    tagstrings.extend(tagvalues)
                if tagelements:
                    tagelements = tagelements | Tag.objects.filter(Q(string__in=tagstrings))
                else:
                    tagelements = Tag.objects.filter(Q(string__in=tagstrings))


            # Extra check for pronouns
            # If pronoun id is given, only the tags related to that pronoun are preserved.
            for t in tagelements:
                if t.pos == 'Pron':
                    if not words.has_key('Pron'): break
                    found = False
                    for w in words['Pron']:
                        if Form.objects.filter(Q(tag=t) & Q(word=w)).count()>0:
                            found = True
                            break
                    if not found:
                        tagelements = tagelements.filter(~Q(id=t.id))

            # Remove those words which do not have any forms with the tags.
            if words.has_key('N'): 
                for w in words['N']:
                    found = False
                    for t in tagelements:
                        if t.pos == 'N':
                            if Form.objects.filter(Q(tag=t) & Q(word=w)).count()>0:
                                found = True
                    if not found:
                        words['N'].remove(w)
            
        # Find different pos-values in tagelements
        posvalues = {}
        # Elements that do not inflection information are not created.
        if not tagelements and not agr_elements:
            print "no inflection for", el_id
            return
        if not tagelements: posvalues[""] = 1
        else:
            for t in tagelements:
                posvalues[t.pos] = 1

        if el:
            task = el.getAttribute("task")
            if task:
                print "setting", el_id, "as task"
                qaelement.task = syntax
                qaelement.save()
        else:
            if el_id == qtype:
                qaelement.task = syntax
                qaelement.save()
                
        ############# CREATE ELEMENTS

        # Add an element for each pos:
        for p in posvalues.keys():
            qe = QElement.objects.create(question=qaelement,\
                                         identifier=el_id,\
                                         syntax=syntax)
            
            # Add links to corresponding question elements.
            if question_qelements:
                for q in question_qelements:
                    q.copy_set.add(qe)
                    qe.save()
                    q.save()

            if tagelements:
                for t in tagelements:
                    if t.pos == p:
                        qe.tags.add(t)

            # Create links to words.
            if not words.has_key(p):
                print "looking for words..", el_id, p
                word_elements = Word.objects.filter(pos=p)
                if word_elements:
                    for w in word_elements:
                        if not words.has_key(p): words[w.pos] = []
                        words[w.pos].append(w)

            for w in words[p]:
                we = WordQElement.objects.create(qelement=qe,\
                                                 word=w)

            # add agreement info.
            if agr_elements:
                for a in agr_elements:
                    a.agreement_set.add(qe)
                a.save()
            qe.save()


    # Read elements attached to particular question or answer.
    def read_elements(self, head, qaelement, qtype):

        els = head.getElementsByTagName("element")
        qastrings =  qaelement.string.split()

        # Read first subject for agreement
        element=None
        if "SUBJ" in set(qastrings):
            for e in els:
                if e.getAttribute("id")=="SUBJ":
                    element = e
                    break

            self.read_element(qaelement, element, "SUBJ", qtype)


        # Process rest of the elements in the string.
        subj=False
        for s in qastrings:
            if s=="SUBJ" and not subj:
                subj=True
                continue

            syntax = s.lstrip("(")
            syntax = syntax.rstrip(")")

            element=None
            found = False
            for e in els:
                el_id = e.getAttribute("id")
                if el_id==s and not s=="SUBJ":
                    self.read_element(qaelement,e,syntax,qtype)
                    found = True
            if not found:
                self.read_element(qaelement,None,syntax,qtype)

    def read_questions(self, infile, grammarfile):

        xmlfile=file(infile)
        tree = _dom.parse(infile)

        self.read_grammar(grammarfile)

        qs = tree.getElementsByTagName("questions")[0]
        gametype = qs.getAttribute("game")
        if not gametype: gametype="morfa"

        print "Created questions:"
        for q in tree.getElementsByTagName("q"):

            qid = q.getAttribute('id')
            if not qid:
                print "ERROR Missing question id, stopping."
                exit()
            print qid.encode('utf-8')
            level = q.getAttribute('level')
            if not level: level="1"

            # Store question
            qtype=""
            qtype_el = q.getElementsByTagName("qtype")
            if qtype_el:
                qtype = q.getElementsByTagName("qtype")[0].firstChild.data
            question=q.getElementsByTagName("question")[0]
            text=question.getElementsByTagName("text")[0].firstChild.data

            #If there exists already a question with that name, delete all the references to it.
            if qid:
                questions = Question.objects.filter(qid=qid)
                if questions:
                    questions[0].delete()

            question_element,created = Question.objects.get_or_create(qid=qid, \
                                                                      level=int(level), \
                                                                      string=text, \
                                                                      qtype=qtype, \
                                                                      gametype=gametype,\
                                                                      qatype="question")
            
            # Add source information if present
            if q.getElementsByTagName("sources"):
                sources = q.getElementsByTagName("sources")[0]
                elements=sources.getElementsByTagName("book")
                for el in elements:
                    book=el.getAttribute("name")
                    if book:
                        # Add book to the database
                        # Leave this if DTD is used
                        book_entry, created = Source.objects.get_or_create(name=book)
                        if created:
                            print "Created book entry with name ", book
                    question_element.source.add(book_entry)
                    question_element.save()                    

            else:
                book = "all"
                # Add book to the database
                book_entry, created = Source.objects.get_or_create(name=book)
                if created:
                    print "Created book entry with name ", book
                question_element.source.add(book_entry)
                question_element.save()

            # Read the elements
            self.read_elements(question, question_element,qtype)    

            # There can be more than one answer for each question,
            # Store them separately.
            answers=q.getElementsByTagName("answer")
            for ans in answers:                
                text=ans.getElementsByTagName("text")[0].firstChild.data
                answer_element = Question.objects.create(string=text,qatype="answer",question=question_element,level=1)

                answer_element.save()                
                self.read_elements(ans, answer_element,qtype)


    def read_grammar(self, infile):
    
        xmlfile=file(infile)
        tree = _dom.parse(infile)

        self.values = {}
        
        tags=tree.getElementsByTagName("tags")[0]
        for el in tags.getElementsByTagName("element"):

            identifier=el.getAttribute("id")
            
            info2 = {}
            
            elements = []
            word_id=""
            word = None
            
            syntax =""
            syntaxes = el.getElementsByTagName("syntax")
            if syntaxes:
                syntax = syntaxes[0].firstChild.data
                info2['syntax'] = syntax
                
            word_ids = el.getElementsByTagName("id")
            if word_ids:
                word_id = word_ids[0].firstChild.data
                if word_id:
                    words = Word.objects.filter(wordid=word_id)
                    info2['words'] = words

            info2['pos'] = []
            tagstrings = []

            grammars = el.getElementsByTagName("grammar")
            for gr in grammars:
                pos=gr.getAttribute("pos")
                if pos:
                    info2['pos'].append(pos)

                tag=gr.getAttribute("tag")
                tagvalues = []
                self.get_tagvalues(tag,"",tagvalues)
                tagstrings.extend(tagvalues)

            if len(tagstrings) > 0:
                tags = Tag.objects.filter(string__in=tagstrings)
                info2['tags'] = tags
                
            self.values[identifier] = info2

    def get_tagvalues(self,rest,tagstring,tagvalues):

        if not rest:
            tagvalues.append(tagstring)
            return
        if rest.count("+") > 0:
            t, rest = rest.split('+',1)
        else:
            t=rest
            rest=""
        if Tagname.objects.filter(tagname=t).count() > 0:
            if tagstring:
                tagstring = tagstring + "+" + t
            else:
                tagstring = t
            self.get_tagvalues(rest,tagstring,tagvalues)
        else:
            if Tagset.objects.filter(Q(tagset=t)).count() > 0:
                tagnames=Tagname.objects.filter(tagset__tagset=t)
                for t in tagnames:
                    if tagstring:
                        tagstring2 = tagstring + "+" + t.tagname
                    else:
                        tagstring2 = t.tagname
                    self.get_tagvalues(rest,tagstring2,tagvalues)
    

    def delete_question(self, qid=None):
        
        if qid:
            questions = Question.objects.filter(qid=qid)
            if questions:
                for q in questions:
                    q.delete()

            questions = Question.objects.filter(string=qid)
            if questions:
                for q in questions:
                    q.delete()