# -*- encoding: utf-8 -*-
""" Various rules for displaying ``sme`` entries properly, and
connecting FST to Lexicon.
"""
# NOTE: if copying this for a new language, remember to make sure that
# it's being imported in __init__.py
# * paradigm documentation here:
# http://giellatekno.uit.no/doc/dicts/dictionarywork.html
from logging import getLogger
from morphology import generation_overrides as morphology
from lexicon import lexicon_overrides as lexicon
from lexicon import autocomplete_filters as autocomplete_filters
from morpholex import morpholex_overrides as morpholex
morph_log = getLogger('morphology')
# This is called before any lookup is done, regardless of whether it
# came from analysis or not.
@autocomplete_filters.autocomplete_filter_for_lang(('nob', 'sme'))
def remove_orig_entry(entries):
_entries = [e for e in entries if 'orig_entry' not in e.attrib]
return _entries
@morphology.pregenerated_form_selector(*['sme', 'SoMe'])
def pregenerate_sme(form, tags, node, **kwargs):
""" **pregenerated form selector**: mini_paradigm / lemma_ref
If mini_paradigm and lemma_ref exist for this node, then grab
analyses and tags from the node, instead of from the FST.
"""
_has_mini_paradigm = node.xpath('.//mini_paradigm[1]')
_has_lemma_ref = node.xpath('.//lemma_ref')
if len(_has_lemma_ref) > 0:
return form, [], node, []
if len(_has_mini_paradigm) == 0:
return form, tags, node
else:
mp = _has_mini_paradigm[0]
def analysis_node(node):
""" Node ->
("lemma", ["Pron", "Sg", "Tag"], ["wordform", "wordform"])
"""
tag = node.xpath('.//@ms')
if len(tag) > 0:
tag = tag[0].split('_')
else:
tag = []
wfs = node.xpath('.//wordform/text()')
return (form, tag, wfs)
analyses = map(analysis_node, mp.xpath('.//analysis'))
return form, tags, node, analyses
_str_norm = 'string(normalize-space(%s))'
SME_NOB_DICTS = [
('sme', 'nob'),
('SoMe', 'nob'),
]
NOB_SME = [
('nob', 'sme'),
]
@lexicon.entry_source_formatter(*['sme', 'SoMe'])
def format_source_sme(ui_lang, e, target_lang):
""" **Entry source formatter**
Format the source for a variety of parameters. Here:
* Include @pos and @class attributes
* if there is a lemma_ref, then we provide the link to that
entry too (e.g., munnje)
# TODO: new-style templates
"""
from morphology.utils import tagfilter_conf
from flask import current_app
paren_args = []
_str_norm = 'string(normalize-space(%s))'
_lemma = e.xpath(_str_norm % 'lg/l/text()')
_class = e.xpath(_str_norm % 'lg/l/@class')
_pos = e.xpath(_str_norm % 'lg/l/@pos')
_lemma_ref = e.xpath(_str_norm % 'lg/lemma_ref/text()')
_til_ref = e.xpath(_str_norm % 'lg/l/@til_ref')
if _lemma_ref:
_link_targ = u'/detail/%s/%s/%s.html' % ('sme', target_lang, _lemma_ref)
_lemma_ref_link = u'%s' % (_link_targ, _lemma_ref)
_lemma_ref_link = u' → ' + _lemma_ref_link
_lemma_ref_link += u''
else:
_lemma_ref_link = ''
if _pos:
filters = current_app.config.tag_filters.get(('sme', ui_lang))
if filters:
paren_args.append(tagfilter_conf(filters, _pos))
else:
paren_args.append(_pos)
if _class:
paren_args.append(_class.lower())
if len(paren_args) > 0:
thing = '%s (%s)' % (_lemma, ', '.join(paren_args))
return thing + _lemma_ref_link
else:
return _lemma
return None
@lexicon.entry_source_formatter('nob')
def format_source_nob(ui_lang, e, target_lang):
""" **Entry source formatter**
Format the source for a variety of parameters. Here:
* Include @pos and @class attributes
* if there is a lemma_ref, then we provide the link to that
entry too (e.g., munnje)
# TODO: new-style templates
"""
from morphology.utils import tagfilter_conf
from flask import current_app
paren_args = []
_str_norm = 'string(normalize-space(%s))'
_lemma = e.xpath(_str_norm % 'lg/l/text()')
_class = e.xpath(_str_norm % 'lg/l/@class')
_pos = e.xpath(_str_norm % 'lg/l/@pos')
_lemma_ref = e.xpath(_str_norm % 'lg/lemma_ref/text()')
_til_ref = e.xpath(_str_norm % 'lg/l/@til_ref')
_orig_entry = e.xpath(_str_norm % 'lg/l/@orig_entry')
tag_filter = current_app.config.tag_filters.get(('sme', 'nob'))
if _til_ref and _orig_entry:
_link_return = "/nob/sme/ref/?l_til_ref=%s" % _orig_entry
_link = "%s" % (_link_return, _orig_entry)
_lemma_ref_link = u' → ' + _link
_lemma_ref_link += u''
_transl_pos = "(%s)" % tag_filter.get(_pos)
_new_str = [ _lemma
, _transl_pos
, _lemma_ref_link
]
_new_str = ' '.join(_new_str)
return _new_str
return None
@lexicon.entry_target_formatter(('sme', 'nob'), ('SoMe', 'nob'))
def format_target_sme(ui_lang, e, tg):
"""**Entry target translation formatter**
Display @reg (region) attribute in translations, but only for ``N
Prop``.
# TODO: new-style templates
"""
_str_norm = 'string(normalize-space(%s))'
_type = e.xpath(_str_norm % 'lg/l/@type')
_pos = e.xpath(_str_norm % 'lg/l/@pos')
if _pos == 'N' and _type == 'Prop':
_t_lemma = tg.xpath(_str_norm % 't/text()')
_reg = tg.xpath(_str_norm % 't/@reg')
if _reg:
return "%s (%s)" % (_t_lemma, _reg)
return None
@lexicon.entry_target_formatter(('nob', 'sme'))
def format_fra_ref_links(ui_lang, e, tg):
"""**Entry target translation formatter**
Display @reg (region) attribute in translations, but only for ``N
Prop``.
"""
# print 'format_fra_ref_links'
_str_norm = 'string(normalize-space(%s))'
_fra_ref = tg.xpath(_str_norm % 're/@fra_ref')
_fra_text = tg.xpath(_str_norm % 're/text()')
# print ''
# print _fra_text
# print _fra_ref
if _fra_ref is not None:
if len(_fra_ref) > 0:
return "%s →" % (_fra_ref, _fra_text)
_type = e.xpath(_str_norm % 'lg/l/@type')
_pos = e.xpath(_str_norm % 'lg/l/@pos')
if _pos == 'N' and _type == 'Prop':
_t_lemma = tg.xpath(_str_norm % 't/text()')
_reg = tg.xpath(_str_norm % 't/@reg')
if _reg:
return "%s (%s)" % (_t_lemma, _reg)
return None