# -*- encoding: utf-8 -*-
"""
sma-specific overrides, and pregenerated paradigm selection.
A set of lexicon-related language specific rules, provided by
`lexicon.LexiconOverrides`. There is probably a better location
for this documentation, but for now ...
Example source formatting function:
@lexicon.entry_source_formatter('sme')
def format_source_sme(ui_lang, entry_node):
# do some processing on the entry node ...
if successful:
return some_formatted_string
return None
Example target string formatting function:
@lexicon.entry_target_formatter('sme', 'nob')
def format_target_sme(ui_lang, entry_node, tg_node):
# do some processing on the entry and tg node ...
if successful:
return some_formatted_string
return None
"""
# NOTE: if copying this for a new language, remember to make sure that
# it's being imported in __init__.py
from morphology import generation_overrides as morphology
from lexicon import lexicon_overrides as lexicon
from flask import current_app
from morpholex import morpholex_overrides as morpholex
@lexicon.entry_source_formatter('sma')
def format_source_sma(ui_lang, e, target_lang):
from morphology.utils import tagfilter_conf
paren_args = []
_str_norm = 'string(normalize-space(%s))'
_lemma = e.xpath(_str_norm % 'lg/l/text()')
_class = e.xpath(_str_norm % 'lg/l/@class')
_pos = e.xpath(_str_norm % 'lg/l/@pos')
_lemma_ref = e.xpath(_str_norm % 'lg/lemma_ref/text()')
if _lemma_ref:
_link_targ = u'/detail/%s/%s/%s.html' % ('sma', target_lang, _lemma_ref)
_lemma_ref_link = u'%s' % (_link_targ, _lemma_ref)
_lemma_ref_link = u' → ' + _lemma_ref_link
_lemma_ref_link += u''
else:
_lemma_ref_link = ''
if _pos:
filters = current_app.config.tag_filters.get(('sma', 'nob'))
paren_args.append(tagfilter_conf(filters, _pos))
if _class:
paren_args.append(_class)
if len(paren_args) > 0:
entry_string = '%s (%s)' % (_lemma, ', '.join(paren_args))
return entry_string + _lemma_ref_link
else:
return _lemma
return None
# LEX_TO_FST = {
# 'a': 'A',
# 'adv': 'Adv',
# 'n': 'N',
# 'npl': 'N',
# 'num': 'Num',
# 'prop': 'Prop',
# 'v': 'V',
# }
@morphology.pregenerated_form_selector('sma')
def pregenerate_sma(form, tags, node, **kwargs):
_has_mini_paradigm = node.xpath('.//mini_paradigm[1]')
_has_lemma_ref = node.xpath('.//lemma_ref')
if len(_has_lemma_ref) > 0:
return form, [], node, []
if len(_has_mini_paradigm) == 0:
return form, tags, node
else:
mp = _has_mini_paradigm[0]
def analysis_node(node):
""" Node ->
("lemma", ["Pron", "Sg", "Tag"], ["wordform", "wordform"])
"""
tag = node.xpath('.//@ms')
if len(tag) > 0:
tag = tag[0].split('_')
else:
tag = []
wfs = node.xpath('.//wordform/text()')
return (form, tag, wfs)
analyses = map(analysis_node, mp.xpath('.//analysis'))
return form, tags, node, analyses
# from common import remove_blank, match_homonymy_entries, external_korp_url
from common import remove_blank, match_homonymy_entries
morphology.postgeneration_filter_for_iso(
'sma',
)(remove_blank)
morpholex.post_morpho_lexicon_override(
'sma'
)(match_homonymy_entries)
# KORP_SEARCHES = [
# ('korp_wordform', 'sma', 'nob'),
# # ('korp_wordform', 'nob', 'sma'),
# ]
#
# lexicon.external_search(*KORP_SEARCHES)(
# external_korp_url
# )