from django.core.management.base import BaseCommand, CommandError
from optparse import OptionParser, make_option

from xml.dom import minidom as _dom

from settings import *
from drill.models import *
from xml.dom import minidom as _dom
from optparse import OptionParser
from django import db
import sys
import re
import string
import codecs

from morfac.questions import GrammarDefaults, QObj, _elements, _attribute

class Questions:

	def read_element(self,qaelement,el,el_id,qtype):
		
		semclass = False
		
		print
		print "Creating element", el_id

		# Syntactic function of the element
		if self.values.has_key(el_id) and self.values[el_id].has_key('syntax'):
			syntax = self.values[el_id]['syntax']
		else:
			syntax = el_id
		
		if not el: print syntax, "No element given."

		# Some of the answer elements share content of question elements.
		content_id=""
		if el:
			content_id = el.getAttribute("content")
		if not content_id: content_id=el_id
		# Search for the same element in question side
		# If there is no element given in the answer, the element
		# is a copy of the question.
		question_qelements = None
		
		if (not el or el.getAttribute("content")) and \
			QElement.objects.filter(question__id=qaelement.question_id,
									identifier=content_id).count() > 0:
			question_qelements = QElement.objects.filter(question__id=qaelement.question_id,
														 identifier=content_id)
		else:
			if el and el.getAttribute("content"):
				if QElement.objects.filter(question__id=qaelement.id,
										   identifier=content_id).count() > 0:
					question_qelements = QElement.objects.filter(question__id=qaelement.id,
																 identifier=content_id)
			
		if not el and question_qelements:
			for q in question_qelements:
				qe = QElement.objects.create(question=qaelement,
											 identifier=el_id,
											 syntax=q.syntax)
				# mark as a copy
				q.copy_set.add(qe)
				qe.save()
				q.save()
				return
			
		############### AGREEMENT
		# Search for elementes that agree
		agr_elements=None
		if syntax=="MAINV":
			agr_id="SUBJ"
			print "TRYING verb agreement " + agr_id + " " + qaelement.qatype
			if QElement.objects.filter(question=qaelement, syntax=agr_id,
									   question__qatype=qaelement.qatype).count() > 0:
				agr_elements = QElement.objects.filter(question=qaelement,
													   syntax=agr_id,
													   question__qatype=qaelement.qatype)
		

		agreement = ""
		if el: agreement = el.getElementsByTagName("agreement")
		if agreement: print "Agreement:", agreement[0].getAttribute("id")
		
		# Agreement from xml-files
		# Try first inside question or answer
		# Then in answer-question level
		if agreement:
			agr_id=agreement[0].getAttribute("id")
			if QElement.objects.filter(question=qaelement, syntax=agr_id,
									   question__qatype=qaelement.qatype).count() > 0:
				agr_elements = QElement.objects.filter(question=qaelement,
													   syntax=agr_id,
													   question__qatype=qaelement.qatype)
				
			else:
				if Question.objects.filter(id=qaelement.question_id).count() > 0:
					q=Question.objects.filter(id=qaelement.question_id)[0]
					if QElement.objects.filter(question__id=qaelement.question_id,
											   syntax=agr_id).count() > 0:
						agr_elements = QElement.objects.filter(question__id=qaelement.question_id,
															   syntax=agr_id)

			if not agr_elements:
				print "ERROR: no agreement elements found"
				
		############ WORDS
		# Search for existing word in the database.
		ids = []
		if el: ids=el.getElementsByTagName("id")
		words = {}
		word_elements = None
		for i in ids:
			word_id = i.firstChild.data
			if word_id:
				print "found word", word_id
				# Add pos information here!
				word_elements = Word.objects.filter(wordid=word_id)
				if not word_elements:
					print "Word not found! " + word_id
					
		# Search for existing semtype
		# Semtype overrides the word id selection
		if not word_elements:
			semclasses= []
			if el: semclasses=el.getElementsByTagName("sem")
			if semclasses:
				semclass=semclasses[0].getAttribute("class")
				word_elements = Word.objects.filter(semtype__semtype=semclass)
			valclasses= []
			if el: valclasses=el.getElementsByTagName("val")
			if valclasses:
				valclass=valclasses[0].getAttribute("class")
				word_elements = Word.objects.filter(valency=valclass)

		# If still no words, get the default words for this element:
		if not word_elements:
			if self.values.has_key(el_id) and self.values[el_id].has_key('words'):
				word_elements = self.values[el_id]['words']

		if word_elements:
			for w in word_elements:
				if not words.has_key(w.pos): words[w.pos] = []
				words[w.pos].append(w)

		############# GRAMAMR
		tagelements = None
		grammars = []
		if el: grammars = el.getElementsByTagName("grammar")
		if not el or not grammars:
			# If there is no grammatical specification, the element is created solely
			# on the basis of grammar.
			if self.values.has_key(el_id):
				if self.values[el_id].has_key('tags'):
					tagelements = self.values[el_id]['tags']
		# An element for each different grammatical specification.
		else:
			poses = []
			tags = []
			for gr in grammars:
				tags.append(gr.getAttribute("tag"))
				poses.append(gr.getAttribute("pos"))
			tagstrings = []
			if poses:
				if self.values.has_key(el_id):
					if self.values[el_id].has_key('tags'):
						tagelements = self.values[el_id]['tags'].filter(pos__in=poses)
			if tags:
				for tag in tags:
					tagvalues = []
					self.get_tagvalues(tag,"",tagvalues)
					tagstrings.extend(tagvalues)
				if tagelements:
					tagelements = tagelements or Tag.objects.filter(string__in=tagstrings)
				else:
					tagelements = Tag.objects.filter(string__in=tagstrings)


			# Extra check for pronouns
			# If pronoun id is given, only the tags related to that pronoun are preserved.
			for t in tagelements:
				if t.pos == 'Pron':
					if not words.has_key('Pron'): break
					found = False
					for w in words['Pron']:
						if Form.objects.filter(tag=t,word=w).count()>0:
							found = True
							break
					if not found:
						tagelements = tagelements.exclude(id=t.id)

			# Remove those words which do not have any forms with the tags.
			if words.has_key('N'): 
				for w in words['N']:
					found = False
					for t in tagelements:
						if t.pos == 'N':
							if Form.objects.filter(tag=t, word=w).count()>0:
								found = True
					if not found:
						words['N'].remove(w)
			
		# Find different pos-values in tagelements
		posvalues = {}
		# Elements that do not inflection information are not created.
		if not tagelements and not agr_elements:
			print "no inflection for", el_id
			return
		if not tagelements: posvalues[""] = 1
		else:
			for t in tagelements:
				posvalues[t.pos] = 1
		attempt = False
		if el:
			task = el.getAttribute("task")
			if task:
				print "setting", el_id, "as task"
				qaelement.task = syntax
				qaelement.save()
		else:
			if el_id == qtype:
				qaelement.task = syntax
				qaelement.save()
		# if el:
			# task = el.getAttribute("task")
			# if task:
				# # print task
				# # print syntax
				# # print 'TEST'
				# # raw_input()
				# print "setting", el_id, "as task"
				# qaelement.task = syntax
				# qaelement.save()
				# attempt = True
				# if qaelement.task != syntax:
					# print 'Task not saved!'
					# sys.exit(2)
				# # print qaelement.task
				# # raw_input()
		# else:
			# if el_id == qtype:
				# qaelement.task = syntax
				# qaelement.save()
				# attempt = True
		
		# if task:
			# if qaelement.task != syntax:
				# print 'TASK NOT SAVED'
				# print qaelement.task
				# print syntax
				# print 'attempt: '
				# print attempt
				# sys.exit(2)

		############# CREATE ELEMENTS
		print 'CREATING ELEMENTS'
		print 'Elements for the following keys...'
		print posvalues.keys()
		# Add an element for each pos:
		for p in posvalues.keys():
			qe = QElement.objects.create(question=qaelement,\
										 identifier=el_id,\
										 syntax=syntax)
			if semclass:
				semty, _ = Semtype.objects.get_or_create(semtype=semclass)
				qe.semtype = semty
				qe.save()
			
			print '\tsemtype: ', semclass
			# Add links to corresponding question elements.
			if question_qelements:
				for q in question_qelements:
					q.copy_set.add(qe)
					qe.save()
					q.save()

			if tagelements:
				for t in tagelements:
					print '\ttag: ', t.string
					if t.pos == p:
						qe.tags.add(t)

			# Create links to words.
			if not words.has_key(p):
				word_pks = None
				print "looking for words..", el_id, p
				# word_elements = Word.objects.filter(form__tag__in=qe.tags.all()) # pos=p)
				
				# Just filtering isn't enough; .filter() doesn't return a list of unique items with this kind of query. 
				
				if semclass:
					word_pks = Word.objects.filter(form__tag__in=qe.tags.all()).filter(semtype=qe.semtype).values_list('pk', flat=True)
				else:
					word_pks = Word.objects.filter(form__tag__in=qe.tags.all()).values_list('pk', flat=True)
				word_pks = list(set(word_pks))
				if len(word_pks) == 0:
					print 'Error: Elements with zero possibilities not permitted.'
					print ' > ', qe.question
					print ' > Word tags: %s' % repr(qe.tags.all())
					print ' > semtypes: %s' % repr(qe.semtype)
					sys.exit(2)
				print '%d elements available. ' % len(word_pks)
				
				# Generator faster, but maybe should test in_bulk on vic.
				word_elements_gen = (Word.objects.get(pk=int(b)) for b in word_pks)
				# word_elements = Word.objects.in_bulk(word_pks).values()
				
				if not word_elements:
					word_elements = []
				else:
					word_elements = list(word_elements)

				if word_elements_gen:
					for w in word_elements_gen:
						if not words.has_key(p): words[w.pos] = []
						words[w.pos].append(w)
						word_elements.append(w)
				
			# print 'Creating elements for %d words' % word_elements.count()
			for w in word_elements:
				qe.wordqelement_set.create(word=w)
				# we = WordQElement.objects.create(qelement=qe,\
												 # word=w)

			# add agreement info.
			if agr_elements:
				for a in agr_elements:
					a.agreement_set.add(qe)
				a.save()
			qe.save()


	# Read elements attached to particular question or answer.
	def read_elements(self, head, qaelement, qtype):

		els = head.getElementsByTagName("element")
		qastrings =  qaelement.string.split()

		# Read first subject for agreement
		element=None
		if "SUBJ" in set(qastrings):
			for e in els:
				if e.getAttribute("id")=="SUBJ":
					element = e
					break

			self.read_element(qaelement, element, "SUBJ", qtype)


		# Process rest of the elements in the string.
		subj=False
		for s in qastrings:
			if s=="SUBJ" and not subj:
				subj=True
				continue

			syntax = s.lstrip("(")
			syntax = syntax.rstrip(")")

			element=None
			found = False
			for e in els:
				el_id = e.getAttribute("id")
				if el_id==s and not s=="SUBJ":
					self.read_element(qaelement,e,syntax,qtype)
					found = True
			if not found:
				self.read_element(qaelement,None,syntax,qtype)

	def read_questions(self, infile, grammarfile):

		xmlfile=file(infile)
		tree = _dom.parse(infile)

		self.read_grammar(grammarfile)

		qs = tree.getElementsByTagName("questions")[0]
		gametype = qs.getAttribute("game")
		if not gametype: gametype="morfa"

		print "Created questions:"
		for q in tree.getElementsByTagName("q"):
			qid = q.getAttribute('id')
			if not qid:
				print "ERROR Missing question id, stopping."
				exit()
			print qid.encode('utf-8')
			level = q.getAttribute('level')
			if not level: level="1"

			# Store question
			qtype=""
			qtype_el = q.getElementsByTagName("qtype")
			if qtype_el:
				qtype = q.getElementsByTagName("qtype")[0].firstChild.data
			question=q.getElementsByTagName("question")[0]
			text=question.getElementsByTagName("text")[0].firstChild.data

			#If there exists already a question with that name, delete all the references to it.
			if qid:
				questions = Question.objects.filter(qid=qid)
				if questions:
					questions[0].delete()

			question_element,created = Question.objects.get_or_create(qid=qid, \
																	  level=int(level), \
																	  string=text, \
																	  qtype=qtype, \
																	  gametype=gametype,\
																	  qatype="question")
			
			# Add source information if present
			if q.getElementsByTagName("sources"):
				sources = q.getElementsByTagName("sources")[0]
				elements=sources.getElementsByTagName("book")
				for el in elements:
					book=el.getAttribute("name")
					if book:
						# Add book to the database
						# Leave this if DTD is used
						book_entry, created = Source.objects.get_or_create(name=book)
						if created:
							print "Created book entry with name ", book
					question_element.source.add(book_entry)
					question_element.save()					

			else:
				book = "all"
				# Add book to the database
				book_entry, created = Source.objects.get_or_create(name=book)
				if created:
					print "Created book entry with name ", book
				question_element.source.add(book_entry)
				question_element.save()

			# Read the elements
			self.read_elements(question, question_element,qtype)	

			# There can be more than one answer for each question,
			# Store them separately.
			answers=q.getElementsByTagName("answer")
			for ans in answers:				
				text=ans.getElementsByTagName("text")[0].firstChild.data
				answer_element = Question.objects.create(string=text,qatype="answer",question=question_element,level=1)

				answer_element.save()
				self.read_elements(ans, answer_element, qtype)
			db.reset_queries() 


	def read_grammar(self, infile):
	
		xmlfile=file(infile)
		tree = _dom.parse(infile)

		self.values = {}
		
		tags=tree.getElementsByTagName("tags")[0]
		for el in tags.getElementsByTagName("element"):

			identifier=el.getAttribute("id")
			
			info2 = {}
			
			elements = []
			word_id=""
			word = None
			
			syntax =""
			syntaxes = el.getElementsByTagName("syntax")
			if syntaxes:
				syntax = syntaxes[0].firstChild.data
				info2['syntax'] = syntax
				
			word_ids = el.getElementsByTagName("id")
			if word_ids:
				word_id = word_ids[0].firstChild.data
				if word_id:
					words = Word.objects.filter(wordid=word_id)
					info2['words'] = words

			info2['pos'] = []
			tagstrings = []

			grammars = el.getElementsByTagName("grammar")
			for gr in grammars:
				pos=gr.getAttribute("pos")
				if pos:
					info2['pos'].append(pos)

				tag=gr.getAttribute("tag")
				tagvalues = []
				self.get_tagvalues(tag,"",tagvalues)
				tagstrings.extend(tagvalues)

			if len(tagstrings) > 0:
				tags = Tag.objects.filter(string__in=tagstrings)
				info2['tags'] = tags
				
			self.values[identifier] = info2

	def get_tagvalues(self,rest,tagstring,tagvalues):

		if not rest:
			tagvalues.append(tagstring)
			return
		if rest.count("+") > 0:
			t, rest = rest.split('+',1)
		else:
			t=rest
			rest=""
		if Tagname.objects.filter(tagname=t).count() > 0:
			if tagstring:
				tagstring = tagstring + "+" + t
			else:
				tagstring = t
			self.get_tagvalues(rest,tagstring,tagvalues)
		else:
			if Tagset.objects.filter(tagset=t).count() > 0:
				tagnames=Tagname.objects.filter(tagset__tagset=t)
				for t in tagnames:
					if tagstring:
						tagstring2 = tagstring + "+" + t.tagname
					else:
						tagstring2 = t.tagname
					self.get_tagvalues(rest,tagstring2,tagvalues)
	

	def delete_question(self, qid=None):
		
		if qid:
			questions = Question.objects.filter(qid=qid)
			if questions:
				for q in questions:
					q.delete()

			questions = Question.objects.filter(string=qid)
			if questions:
				for q in questions:
					q.delete()


# # # 
# 
#  Command class
#
# # #

class FileLog(object):

	def __init__(self, fname):
		self.loglines = []

		if fname:
			self.fname = fname
			self.logfile = open(fname, 'w')
		else:
			self.logfile = False

	def log(self, string, pipe=False):

		if not string.endswith('\n'):
			string += '\n'
		
		try:
			string = string.encode('utf-8')
		except UnicodeEncodeError:
			pass

		if self.logfile:
			self.logfile.write(string)
		else:
			self.loglines.append(string)
		
		if not pipe:
			pipe = sys.stderr
		print >> pipe, string.rstrip('\n')
		
		return


from testquestions import QObj


class QuestiontoModel(QObj):
	def prepareQElement(self):
		""" QElement will be a list of pickled query kwargs instead
			of references to WordQElements.

		"""

		return


class Command(BaseCommand):
	args = '--grammarfile FILE --questionfile FILE --qid QID'
	help = """
	Runs through a question XML file and produces test sentences.
	Errors are printed to stderr, so that the rest can be filtered out.

	Example command:
		./manage.py installquestions --grammarfile grammar_defaults.xml \\
								  --questionfile noun_questions.xml \\
								  --logfile accusative_errors.log \\
								  --iterations 3 \\
								  --qid acc#
	"""
	option_list = BaseCommand.option_list + (
        make_option("-g", "--grammarfile", dest="grammarfile", default=False,
                          help="XML-file for grammar defaults for questions"),
        make_option("-q", "--questionfile", dest="questionfile", default=False,
                      help="XML-file that contains questions"),
	)

	def handle(self, *args, **options):
		import sys, os
		from morfac.models import Question, Answer

		# TODO: file does not exist exceptions

		with open(options['questionfile'], 'r') as questionfile:
			tree = _dom.parse(questionfile)
			tree = _elements(tree, 'q')

		with open(options['grammarfile'], 'r') as defaults_file:
			defaults_tree = _dom.parse(defaults_file)
		
		defaults = GrammarDefaults(defaults_tree)

		for q_node in tree:
			try:
				q = QObj(q_node, grammar_defaults=defaults)
			except Exception, e:
				print e
				print _attribute(q_node, 'id')
			
			# Create question

			question = Question.objects.create(
							qid=q.qid,
							qtype=q.qtype,)
			
			question_changed = False


			# Create question elements
			for (qelem_name, queries) in q.question_query_elements:
				qkwargs = queries['qkwargs']
				meta = queries['meta'] # id -> identifier

				qeargs = {
					'identifier': meta.get('id'),
					'game': meta.get('game'),
					'task': meta.get('task'),
					'agreement': meta.get('agreement'),
					'copy': meta.get('copy') or False,
					'qkwargs': qkwargs,}

				qe = question.qelement_set.create(**qeargs)

				if qe.task:
					question.task = qe.identifier
					question_changed = True


			if question_changed:
				question.save()


###			log.log(' == QUESTION: %s ==' % q.qid, _OUT)
###
###			if len(q.task.keys()) > 0:
###				task = q.task.keys()[0]
###			else:
###				task = False
###
###			questkwargs = {
###				'qid': q.qid,
###				'gametype': 'morfa',
###				'qtype': q.qtype, 
###				'level': '',
###				'task': task,
###				'string': q.text,
###				# 'question': 
###				# 'source': '', 
###			}
###
###			def constructQuery(self):
###				element_to_query = {
###					'tags': 'tag__string',
###					'semtypes': 'word__semtype__semtype',
###					'pos': 'word__pos',
###					'lemma': 'word__lemma',
###					'hid': 'word__hid',
###				}
###				
###				print self.question_elements
###
###
###			print questkwargs
###			print constructQuery(q)
###			raw_input()
###			# questionobj = Question.objects.create(**questkwargs)
###
###			### for iteration in range(iterations):
###			### 	c = iteration + 1
###			### 	log.log(' - %d' % c, _OUT)
###			### 	error = False
###			### 	baseform = False
###
###			### 	try:		
###			### 		qword = q.task.values()[0]['selected'].getBaseform()
###			### 		qword = qword.fullform
###			### 	except Form.DoesNotExist:
###			### 		qword = 'NO FORM'
###			### 		baseform = q.task.values()[0]['selected'].word
###			### 	except:
###			### 		qword = 'TASK'
###			### 	finally:
###			### 		error = True
###
###			### 	log.log('    Q: ' + u'%s (%s)' % (q.question_text, qword), _OUT)
###			### 	
###			### 	log.log('    A: ' + u'%s' % q.answer_text_blank, _OUT)
###			### 	
###			### 	try:		
###			### 		aword = q.task.values()[0]['selected']
###			### 		aword = aword.fullform
###			### 	except:		
###			### 		aword = 'TASK'
###			### 	finally:
###			### 		error = True
###
###			### 	log.log('           - %s' % aword, _OUT)
###			### 	
###			### 	if error:
###			### 		if baseform:
###			### 			log.log('      *** Baseform does not exist for <%s>' % baseform.lemma)
###
###			### 	if len(q.errors.keys()) > 0:
###			### 		for k, v in q.errors.items():
###			### 			log.log('    *** Error in %s' % k, _ERR)
###			### 			indent      = '        '
###			### 			log.log(''.join([indent + a + '\n' for a in v]), _ERR)
###			### 	q = QObj(q_node, grammar_defaults=defaults)