# Make initial downcasing after the misnamed U.Cap.Opt # This gives oslolaš across the board # like Pariisi -> pariisilainen # ---- starts here: ------ # Various flag definitions to handle both regular downcase and downcase on hfst # transducers with hyperminimisation. HypMin converts each lexicon name to a # flag diacritic, and the result is that there can be more symbols before the # initial letter of a word. # # NB! Note that the lexicon name of the proper nouns can be different from what # is defined here. Change if required! define DowncaseFlag "@U.Cap.Opt@" ; define CmpHyph "@U.CmpHyph.TRUE@" ; define LexRootFlag "@P.LEXNAME.Root@" ; define LexProperFlag "@P.LEXNAME.ProperNoun@" ; read regex [ A -> a, B -> b, C -> c, D -> d, E -> e, F -> f, G -> g, H -> h, I -> i, J -> j, K -> k, L -> l, M -> m, N -> n, O -> o, P -> p, Q -> q, R -> r, S -> s, T -> t, U -> u, V -> v, W -> w, X -> x, Y -> y, Z -> z, Å -> å, Ä -> ä, Ö -> ö, Æ -> æ, Ø -> ø, Á -> á, Č -> č, Đ -> đ, Ŋ -> ŋ, Š -> š, Ŧ -> ŧ, Ž -> ž || .#. ( LEXROOTFLAG ) DCASEFLAG ( LEXPROPERFLAG ) ( HYPHFLAG ) ( ? ) _ , DCASEFLAG ( LEXPROPERFLAG ) ( HYPHFLAG ) ?* %- ( ? ) _ # ---- ends here: ------ # The lower context fixes issue when downcasing words like Davvi-Suopma # -> davvi-suopmelaš # Kept for reference: The old version, i.e., # The Book's treatment of paloaltolainen and pariisilainen, p. 368 ff. # Allow optional initial downcasing after @U.Cap.Opt@ #A (->) a, B (->) b, C (->) c, D (->) d, E (->) e, #F (->) f, G (->) g, H (->) h, I (->) i, J (->) j, #K (->) k, L (->) l, M (->) m, N (->) n, O (->) o, #P (->) p, Q (->) q, R (->) r, S (->) s, T (->) t, #U (->) u, V (->) v, W (->) w, X (->) x, Y (->) y, #Z (->) z, Á (->) á, #Č (->) č, Đ (->) đ, Ŋ (->) ŋ, Š (->) š, Ŧ (->) ŧ, Ž (->) ž #|| .#. %@U%.Cap%.Opt%@ _ #.o. # No uppercase in the middle of a downcasable word #A->a, B->b, C->c, D->d, E->e, F->f, G->g, H->h, #I->i, J->j, K->k, L->l, M->m, N->n, O->o, P->p, #Q->q, R->r, S->s, T->t, U->u, V->v, W->w, X->x, #Y->y, Z->z || %@U%.Cap%.Opt%@ ?+ _ #.o. # Eliminate internal spaces inside a downcasable word # Spaces are indicated here with the literal # underscore character #%_ -> [] || .#. %@U%.Cap%.Opt%@ ?+ _ ] ; substitute defined DowncaseFlag for DCASEFLAG substitute defined CmpHyph for HYPHFLAG substitute defined LexRootFlag for LEXROOTFLAG substitute defined LexProperFlag for LEXPROPERFLAG