# -*- encoding: utf-8 -*-

from django.core.management.base import BaseCommand, CommandError
from optparse import make_option


WORD_ATTRS = [
	# 'wordid',
	'language',
	# 'lemma',
	# 'presentationform',
	'pos',
	'stem',
	'wordclass',
	# 'valency',
	# 'hid',
	# 'diphthong',
	# 'gradation',
	# 'rime',
	# 'attrsuffix',
	# 'soggi', # TODO: limit value options to say, 3, + None
	'compare',
	'frequency',
	'geography',
	# 'tcomm',

]

WORD_MANYTOMANIES = [
	'semtype',
	'source',
	'dialects',
]

WORD_TRANSLATION_ATTRS = [
	'language',
	# 'wordid',
	# 'lemma',
	# 'phrase',
	# 'explanation',
	'pos',
	'frequency',
	'geography',
	'tcomm',
	'tcomm_pref',

]

WORD_TRANSLATION_MANYTOMANIES = [
	'semtype',
	'source',
]


def getUniques(model, model_attributes):

	def productKwargs(values_dict):
		from itertools import product
		value_product = product(*values_dict.values())
		zipper = lambda x: dict([a for a in zip(values_dict.keys(), x) if a[1]])

		for item in value_product:
			yield zipper(item)
		
		# Non-generator version
		# return map(zipper, value_product)
	
	def uniqueValues(obj, key_list):
		print "Getting unique attributes for <%s>..." % obj.__name__
		
		attr_values = {}
		# TODO: limit unique values 
		# ('lemma', 5) = limits to 5, excluding None
		# ('lemma', ['2syll', '3syll'] = excludes other values not
		# these, then includes None
		for item in key_list:
			vals = list(set(obj.objects.all().values_list(item, flat=True)))
			vals = [v for v in vals if v]
			vals.append(None)

			if len(vals) > 0:
				attr_values[item] = vals
			
			print ' - %s' % item
			print '   ' + ', '.join([repr(a) for a in vals])

		return attr_values 

	def count_iterable(i):
		return sum(1 for e in i)
	
	model_vals = uniqueValues(model, model_attributes)

	model_val_kwargs = productKwargs(model_vals)
	model_val_kwargs_count = productKwargs(model_vals)

	print 'Iterations: %d' % count_iterable(model_val_kwargs_count)
	print 'Ok? [Enter]'
	raw_input()

	def getUniqueWords(model, kwarg_list, limit=3):
		import pickle
		
		uniq_words = []
		count = 0
		try:
			with open('pickle_unique_' + model.__name__, 'r') as f: 
				kwarg_list = pickle.load(f)
				kwargs_with_things = False
				print kwarg_list
				omg
		except Exception, e:
			print e
			kwargs_with_things = []
			pass
		
		for item in kwarg_list:
			words = model.objects.filter(**item).order_by('?')[:limit]
			lemmas = '\n -'.join([a + ': ' + b for a, b in words.values_list('lemma', 'pos')])
			if words.count() > 0:
				ids = words.values_list('pk', flat=True)
				uniq_words.extend(ids)
				if type(kwargs_with_things) != bool:
					kwargs_with_things.append(item)

				print 'Selecting for kwargs: %s' % repr(item)
				print lemmas
			else:
				print count

			count += 1

		print 'Total fetched: ', str(len(uniq_words))
		print 'Product count: ', str(count)

		if type(kwargs_with_things) != bool:
			with open('pickle_unique_' + model.__name__, 'w') as f:
				pickle.dump(kwargs_with_things, f)
		
		return uniq_words
	
	word_ids = getUniqueWords(model, model_val_kwargs)

	return word_ids

class Command(BaseCommand):
	args = '--grammarfile FILE --questionfile FILE --qid QID'
	help = """
	Runs through a question XML file and produces test sentences.
	Errors are printed to stderr, so that the rest can be filtered out.

	Example command:
		./manage.py testquestions --grammarfile grammar_defaults.xml \\
								  --questionfile noun_questions.xml \\
								  --logfile accusative_errors.log \\
								  --iterations 3 \\
								  --qid acc#
	"""
	option_list = BaseCommand.option_list + (
		make_option("-g", "--grammarfile", dest="grammarfile", default=False,
						  help="XML-file for grammar defaults for questions"),
		make_option("-q", "--questionfile", dest="questionfile", default=False,
		              help="XML-file that contains questions"),
		make_option("--qid", dest="qid", default=False,
		              help="Specify a list of IDs to test with commas and no spaces, or specify a partial part of an id to filter questions by, e.g. ill1,ill2  OR  ill#; note the wildcard symbol."),
		
		make_option("--iterations", dest="itercount", default=5,
						help="The count of iterations for each question"),
		make_option("--logfile", dest="logfile", default=False,
						help="Store all output to a file in addition to stdout."),
		# TODO: question iterations count
	)

	def handle(self, *args, **options):
		""" Minimizes data in database to a set of test data for later import.
		"""
		
		from smadrill.models import Semtype, Word, WordTranslation
		
		# Words by unique attributes
		
		word_unique_ids = getUniques(Word, WORD_ATTRS)
		# word_translation_unique_ids = getUniques(WordTranslation, WORD_TRANSLATION_ATTRS)
		existing_objects = Word.objects.filter(id__in=word_unique_ids)
		
		# 3 words from each semtypes

		for semtype in Semtype.objects.all():
			words = existing_objects.filter(semtype=semtype)
			if words.count() == 0:
				words = semtype.word_set.all()
			
			words = words.order_by('?')[:3]
			
			lemmas = '\n -'.join([a + ': ' + b for a, b in words.values_list('lemma', 'pos')])
			
			ids = words.values_list('pk', flat=True)
			word_unique_ids.extend(ids)
			
			print 'Selecting for semtype: %s' % semtype
			print lemmas

		print 'Total uniques: ', len(list(set(word_unique_ids)))
		# 5 words with forms in all tags