from morphology import generation_overrides as morphology from lexicon import lexicon_overrides # @lexicon_overrides.postlookup_filters_for_lexicon(('eng', 'crk')) # def sort_by_rank(lex, nodelist, *args, **kwargs): # # _str_norm = 'string(normalize-space(%s))' # # def get_rank(n): # try: # rank = int( n.xpath(_str_norm % './/rank/@rank') ) # except: # rank = False # if rank: # return rank # else: # return n.xpath(_str_norm % './/l/text()') # # return sorted(nodelist, key=get_rank) @morphology.postgeneration_filter_for_iso('crk') def force_hyphen(generated_forms, *input_args, **input_kwargs): """ For any +Cnj forms that are generated, filter out those without ê- """ def matches_hyphen(f): return u'ê-' in f def form_fx((tag, forms)): if forms: forms = list(set(forms)) _hyph = [f for f in forms if '-' in f] if len(_hyph) > 0: unhyphs = [h.replace('-', '') for h in _hyph] # throw out all forms that have a hyphenated equivalent _filt = lambda x: x not in unhyphs and '%' not in x fs = filter(_filt, forms) return (tag, fs) return (tag, forms) return map(form_fx, generated_forms) @morphology.tag_filter_for_iso('crk') def adjust_tags_for_gen(lemma, tags, node=None, **kwargs): """ **tag filter**: Lexicon -> FST changes. Change POS to be compatible with FST for when they are not. """ if 'template_tag' not in kwargs: return lemma, tags, node from flask import current_app, g import re # get tagset for pre-lemma stuff morph = current_app.config.morphologies.get(g._from, False) tagsets = morph.tagsets.sets prelemmas = tagsets.get('prelemma_tags') # TODO: where is the lemma # print g._from # print lemma # print list(prelemmas.members) cleaned_tags = [] for t in tags: # print t cleaned_tag = [] for pl in prelemmas.members: before = [] rest = [] pl = unicode(pl) try: _pl = re.compile(pl) except Exception, e: _pl = False for part in t: if _pl: if _pl.match(part) or pl == part: before.append(part) continue else: if pl == part: before.append(part) continue rest.append(part) cleaned_tag.extend(before) cleaned_tag.append(lemma) cleaned_tag.extend(rest) # print cleaned_tag cleaned_tags.append(cleaned_tag) if len(cleaned_tags) == 0 and len(tags) > 0: tags = cleaned_tags # print cleaned_tags return lemma, cleaned_tags, node