#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""
USAGE
-----

Specify each file name as a separate argument


	python2.7 check_feedback.py sme_data/meta_data/messages_vasta.xml \
   						   	   sme_data/meta_data/messages_vasta.fin.xml \
   						   	   sme_data/meta_data/messages_vasta.sme.xml \
   						   	   sme_data/meta_data/messages_vasta.eng.xml \
   						   	   sme_data/meta_data/messages_vasta.swe.xml

Script will check all permutations of these files to figure out which msgids
are missing from one or many, missing items are printed to stderr (for
inclusion in error log), and calculation in progress is printed to stdout.

"""

from xml.dom import minidom as _dom
import sys
import re
import string
import codecs
import operator
from itertools import product

from django.utils.encoding import force_unicode

def fix_encoding(s):
	try:
		s = s.decode('utf-8')
	except:
		pass
	
	return force_unicode(s)

try:
	from collections import OrderedDict
except ImportError:
	from conf.ordereddict import OrderedDict

def chunks(l, n):
	""" Yield successive n-sized chunks from l.
	"""
	for i in xrange(0, len(l), n):
		yield l[i:i+n]

def get_attrs(item, attr_names):
	""" For an object, get attributes from a list of attributes.
	"""
	vals = []
	for attr in attr_names:
		val = item.__getattribute__(attr)
		if val:
			vals.append(fix_encoding(val))
		else:
			vals.append('')
	return vals

def render_kwargs(D):
	lines = []
	for k, vs in D.iteritems():
		line = ' %s = %s ' % (k, ', '.join(vs))
		lines.append(line)
	
	return '\n'.join(lines)

def read_messages(infiles):

	file_sets = {}
	for infile in infiles:
		xmlfile = file(infile)
		tree = _dom.parse(infile)
		lex = tree.getElementsByTagName("messages")[0]
		lang = lex.getAttribute("xml:lang")		

		msg_ids = set()
		for el in tree.getElementsByTagName("message"):
			msg_ids.add(el.getAttribute("id"))

		file_sets[infile] = msg_ids
	
	from itertools import permutations
	total_differences = set()
	for set1, set2 in permutations(file_sets.items(), 2):
		diff = set1[1] ^ set2[1]
		print >> sys.stdout, 'Symmetric distance in:'
		print >> sys.stdout, '  %s' % set1[0] 
		print >> sys.stdout, '  %s' % set2[0]
		print >> sys.stdout, ''
		print >> sys.stdout, '  %s' % ', '.join(diff)
		print >> sys.stdout, ''
		print >> sys.stdout, ''
		for a in diff:
			total_differences.add(a)

	if len(list(total_differences)) > 0:
		print >> sys.stderr, ' ! Missing feedback messages in one or many files:'
		for a in list(total_differences):
			print >> sys.stderr, '    ' + a
			missing_files = [f for f, _is in file_sets.items() if a not in _is]
			print >> sys.stderr, '    ' + ', '.join(missing_files)
		print >> sys.stderr, ''
	else:
		print >> sys.stdout, " * No asymmetricalities between feedback files"
	


	
	# print reduce(lambda _s, __s: _s ^ __s, file_sets.values())
	
if __name__ == "__main__":
	filenames = sys.argv[1::]
	read_messages(filenames)

