! Divvun & Giellatekno - open source grammars for Sámi and other languages ! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament ! http://giellatekno.uit.no & http://divvun.no ! ! This program is free software; you can redistribute and/or modify ! this file under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. The GNU General Public License ! is found at http://www.gnu.org/licenses/gpl.html. It is ! also available in the file $GTHOME/LICENSE.txt. ! ! Other licensing options are available upon request, please contact ! giellatekno@uit.no or feedback@divvun.no ! =========================================== !! !!!Continuation lexicons for abbreviations ! =========================================== !! !!Lexica for adding tags and periods ! ---------------------------------- !! !!The sublexica !! !Continuation lexicons for abbrs both with and witout final period LEXICON ab-dot-noun-adj-trab !!= * __@CODE@__ +N+Sem/Lang: ab-dot-noun-trab ; +A+Sem/Lang: ab-dot-adj-trab ; LEXICON ab-noun-itrab +ABBR+Gram/IAbbr: ab-noun ; LEXICON ab-noun-trab +ABBR+Gram/TAbbr: ab-noun ; LEXICON ab-noun-trnumab +ABBR+Gram/TNumAbbr: ab-noun ; LEXICON ab-noun !!= * __@CODE@__ ab-nodot-noun ; +Use/NG: ab-dot-noun ; LEXICON ab-adj-itrab +ABBR+Gram/IAbbr: ab-adj ; LEXICON ab-adj-trab +ABBR+Gram/TAbbr: ab-adj ; LEXICON ab-adj !!= * __@CODE@__ ab-nodot-adj ; +Use/NG: ab-dot-adj ; LEXICON ab-adv-itrab +ABBR+Gram/IAbbr: ab-adv ; LEXICON ab-adv-numnoab +ABBR+Gram/NoAbbr: ab-adv ; LEXICON ab-adv-trab +ABBR+Gram/TAbbr: ab-adv ; LEXICON ab-adv-trnumab +ABBR+Gram/TNumAbbr: ab-adv ; LEXICON ab-adv !!= * __@CODE@__ ab-nodot-adv ; +Use/NG: ab-dot-adv ; LEXICON ab-num-itrab +ABBR+Gram/IAbbr: ab-num ; LEXICON ab-num !!= * __@CODE@__ ab-nodot-num ; +Use/NG: ab-dot-num ; !! !Lexicons without final period LEXICON ab-nodot-noun-itrab +ABBR+Gram/IAbbr: ab-nodot-noun ; LEXICON ab-nodot-noun-trab +ABBR+Gram/TAbbr: ab-nodot-noun ; LEXICON ab-nodot-noun-trnumab +ABBR+Gram/TNumAbbr: ab-nodot-noun ; LEXICON ab-nodot-noun !!= * __@CODE@__ The bulk nodot-attrnomaccgen-infl ; nodot-oblique-infl ; LEXICON ab-nodot-adj-itrab +ABBR+Gram/IAbbr: ab-nodot-adj ; LEXICON ab-nodot-adj !!= * __@CODE@__ nodot-attrnomaccgen-infl ; LEXICON ab-nodot-adv-itrab +ABBR+Gram/IAbbr: ab-nodot-adv ; LEXICON ab-nodot-adv-trnumab +ABBR+Gram/TNumAbbr: ab-nodot-adv ; LEXICON ab-nodot-adv !!= * __@CODE@__ # ; +Attr: # ; RHyph ; ! +Sg+Nom: # ; ! +Sg+Acc: # ; ! +Sg+Gen: # ; LEXICON ab-nodot-num !!= * __@CODE@__ nodot-nomaccgen-infl ; !! !Lexicons with final period LEXICON ab-dot-noun-itrab +ABBR+Gram/IAbbr: ab-dot-noun ; LEXICON ab-dot-noun-noab +ABBR+Gram/NoAbbr: ab-dot-noun ; LEXICON ab-dot-noun-trab +ABBR+Gram/TAbbr: ab-dot-noun ; LEXICON ab-dot-noun-trnumab +ABBR+Gram/TNumAbbr: ab-dot-noun ; LEXICON ab-dot-noun !!= * __@CODE@__ This is the lexicon for abbrs that must have a period. dot-attrnomaccgen-infl ; LEXICON ab-dot-adj-itrab +ABBR+Gram/IAbbr: ab-dot-adj ; LEXICON ab-dot-adj-noab +ABBR+Gram/NoAbbr: ab-dot-adj ; LEXICON ab-dot-adj-trab +ABBR+Gram/TAbbr: ab-dot-adj ; LEXICON ab-dot-adj-trnumab +ABBR+Gram/TNumAbbr: ab-dot-adj ; LEXICON ab-dot-adj !!= * __@CODE@__ This is the lexicon for abbrs that must have a period. dot-attrnomaccgen-infl ; LEXICON ab-dot-adv-itrab +ABBR+Gram/IAbbr: ab-dot-adv ; LEXICON ab-dot-adv-numnoab +ABBR+Gram/NumNoAbbr: ab-dot-adv ; LEXICON ab-dot-adv-trab +ABBR+Gram/TAbbr: ab-dot-adv ; LEXICON ab-dot-adv-trnumab +ABBR+Gram/TNumAbbr: ab-dot-adv ; LEXICON ab-dot-adv !!= * __@CODE@__ This is the lexicon for abbrs that must have a period. DOT ; ! Adv without case. LEXICON ab-dot-num-itrab +Num+ABBR+Gram/IAbbr: ab-dot-num ; LEXICON ab-dot-num-trab +Num+ABBR+Gram/TAbbr: ab-dot-num ; LEXICON ab-dot-num !!= * __@CODE@__ This is the lexicon for abbrs that must have a period. dot-nomaccgen-infl ; LEXICON ab-dot-cc-itrab +CC+ABBR+Gram/IAbbr: ab-dot-cc ; LEXICON ab-dot-cc-trab +CC+ABBR+Gram/TAbbr: ab-dot-cc ; LEXICON ab-dot-cc !!= * __@CODE@__ DOT ; ! Then, as an afterthought, come our two verbs, gč. and vrd. LEXICON ab-verb-itrab +V+ABBR+Gram/IAbbr: ab-verb ; LEXICON ab-verb-trab +V+ABBR+Gram/TAbbr: ab-verb ; LEXICON ab-verb !!= * __@CODE@__ A lexicon for "gč." and perhaps also other abbreviated verbs. +Use/NG: ab-dot-verb ; ab-nodot-verb ; LEXICON ab-dot-verb-itrab +V+ABBR+Gram/IAbbr: ab-dot-verb ; LEXICON ab-dot-verb-trab +V+ABBR+Gram/TAbbr: ab-dot-verb ; LEXICON ab-dot-verb-trnumab +V+ABBR+Gram/TNumAbbr: ab-dot-verb ; LEXICON ab-dot-verb !!= * __@CODE@__ +TV+Imprt: DOT ; ! Period. LEXICON ab-nodot-verb !!= * __@CODE@__ +TV+Imprt: # ; ! No period. ! riegádan: LEXICON ab-dot-IVprfprc-trab +V+ABBR+Gram/TAbbr: ab-dot-IVprfprc ; LEXICON ab-dot-IVprfprc-trnumab +V+ABBR+Gram/TNumAbbr: ab-dot-IVprfprc ; LEXICON ab-dot-IVprfprc !!= * __@CODE@__ +Sg+IV+PrfPrc: DOT ; ! Period. LEXICON nodot-attrnomaccgen-infl !!= * __@CODE@__ nodot-attr-infl ; nodot-nomaccgen-infl ; LEXICON nodot-attr-infl !!= * __@CODE@__ +Attr: # ; LEXICON nodot-nomaccgen-infl !!= * __@CODE@__ +Sg+Nom: # ; +Sg+Acc: # ; +Sg+Acc+Err/Orth:%>%:a # ; !km:a +Sg+Gen: # ; RHyph ; LEXICON nodot-oblique-infl +Sg+Ill:%>%:i # ; +Sg+Loc:%>%:s # ; +Sg+Com:%>%:in # ; +Sg+Ess:%>%:n # ; +Pl+Nom:%>%:t # ; +Pl+Gen:%>%:id # ; +Pl+Acc:%>%:id # ; +Pl+Ill:%>%:ide # ; +Pl+Loc:%>%:in # ; +Pl+Com:%>%:igujn # ; +Sg+Ill+Err/Orth:%>%:ii # ; +Sg+Loc+Err/Orth:%>%:as # ; +Sg+Com+Err/Orth:%>%:ain # ; +Sg+Com+Err/Orth:%>%:iin # ; +Sg+Ess+Err/Orth:%>%:an # ; LEXICON dot-attrnomaccgen-infl !!= * __@CODE@__ dot-nomaccgen-infl ; dot-attr ; LEXICON dot-attr !!= * __@CODE@__ +Attr: DOT ; LEXICON dot-nomaccgen-infl !!= * __@CODE@__ +Sg+Nom: DOT ; +Sg+Acc: DOT ; +Sg+Gen: DOT ; LEXICON DOT !!= * __@CODE@__ - Adds the dot to dotted abbreviations. +Use/-PMatch:%. # ; ! We need the dot here for regular fsts ! Split the abbr + full stop in two segments, but only when using pmatch: < "@P.Pmatch.Loc@" {.} "+CLB":0 "+Use/PMatch":0 > # ; ! Make a regular ABBR analysis AND backtrack to find alternative analyses: < "+Use/PMatch":0 "@P.Pmatch.Backtrack@" 0:%. > # ; ! Gives: !$ echo 'su.' \ !| hfst-tokenise -g tools/tokenisers/tokeniser-gramcheck-gt-desc.pmhfst !"" ! "." CLB "<.>" ! "su" Adv ABBR Gram/NumNoAbbr "" ! "su" Adv ABBR Gram/NumNoAbbr ! "." CLB "<.>" ! "son" Pron Pers Sg3 Gen "" ! "." CLB "<.>" ! "son" Pron Pers Sg3 Acc "" !:\n ! ! which is exactly what we want.