! Divvun & Giellatekno - open source grammars for Sámi and other languages ! Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament ! http://giellatekno.uit.no & http://divvun.no ! ! This program is free software; you can redistribute and/or modify ! this file under the terms of the GNU General Public License as published by ! the Free Software Foundation, either version 3 of the License, or ! (at your option) any later version. The GNU General Public License ! is found at http://www.gnu.org/licenses/gpl.html. It is ! also available in the file $GTHOME/LICENSE.txt. ! ! Other licensing options are available upon request, please contact ! giellatekno@hum.uit.no or feedback@divvun.no ! ========================= ! ! Hyphenator for North Sámi ! ! ========================= ! echo <> define Vow [ a | á | e | i | o | u | y | æ | ø | å | ä | ö | A | Á | E | I | O | U | Y | Æ | Ø | Å | Ä | Ö | é | ó | ú | í | à | è | ò | ù | ì | ë | ü | ï | â | ê | ô | û | î | ã | ý É | Ó | Ú | Í | À | È | Ò | Ù | Ì | Ë | Ü | Ï | Â | Ê | Ô | Û | Î | Ã | Ý ] ; define 2ndVow [ e | a | u | o | i | á | y | ø | E | A | U | O | I | Á | Y | Ø ] ; !define 3dVow [ u | U ] ; define VowNot2ndVow [ Vow - 2ndVow ] ; !define VowNot3dVow [ Vow - 3dVow ] ; define U [ u | U ] ; define O [ o | O ] ; define A [ a | A ] ; define E [ e | E ] ; define I [ i | I ] ; define Y [ y | Y ] ; define EO [ e | E | o | O ] ; define EOA [ e | E | o | O | a | A ] ; define IAO [ i | I | a | A | o | O ] ; define Oslash [ ø | Ø ] ; ! Renamed from Ø to avoid non-ASCII chars define UI [ u | U | i | I ] ; define UAAdipht [ u á | U á | U Á ] ; define IEdipht [ i e | I e | I E ] ; define UOdipht [ u o | U o | U O ] ; define EAdipht [ e a | E a | E A ] ; define OAdipht [ o a | O a | O A ] ; define OUdipht [ o u | O u | O U ] ; ! jour-na-lis-sta define AUdipht [ a u | A u | A U ] ; ! fau-na, Lauv-sjø-vo-la, Kår-ve-jau-re ok. define EUdipht [ e u | E u | E U ] ; ! leut-nán-ta define AEdipht [ a e | A e | A E ] ; ! lae-sta-di-us, define OslashYdipht [ ø y | Ø y | Ø Y ] ; ! nøyt-rála ! Renamed from ØYdipht to avoid non-ASCII chars define OEdipht [ o e | O e | O E ] ; ! goe-the define IOslashdipht [ i ø | I ø | I Ø ] ; ! skiøld ! Renamed from IØdipht to avoid non-ASCII chars define IOdipht [ i o | I o | I O ] ; ! skiold, nation !define EAUtripht [ e a u | E a u | E A U ] ; !Bor-deaux, Beau-fort !define IEUtripht [ i e u | I e u | I E U ] ; !Nieu-siedler define dipht [ IEdipht | UAAdipht | UOdipht | EAdipht | OAdipht | OUdipht | AUdipht | EUdipht | AEdipht | OslashYdipht | OEdipht | IOslashdipht | IOdipht] ; !define tripht [ EAUtripht | IEUtripht ] ; define VowNotU [ Vow - U ] ; define VowNotO [ Vow - O ] ; define VowNotA [ Vow - A ] ; define VowNotE [ Vow - E ] ; define VowNotI [ Vow - I ] ; define VowNotEO [ Vow - EO ] ; define VowNotEOA [ Vow - EOA ] ; define VowNotIAO [ Vow - IAO ] ; define VowNotY [ Vow - Y ] ; define VowNotOslash [ Vow - Oslash ] ; ! Renamed from VowNotØ to avoid non-ASCII chars define VowNotUI [ Vow - UI ] ; define LongVow [ a a | e e | i i | o o | u u | á á | ä ä | ö ö ] ; define Cns [ b | c | č | d | đ | f | g | h | j | k | l | m | n | ŋ | B | C | Č | D | Đ | F | G | H | J | K | L | M | N | Ŋ | p | q | r | s | š | t | ŧ | v | w | x | z | ž | P | Q | R | S | Š | T | Ŧ | V | W | X | Z | Ž ] ; define S [ S | s | Š | š ] ; define Cnss [ Cns - S ] ; define Sgm [ Cns | Vow ] ; !define Cns [ Cnssymb | Vow i ] ; echo << Rules>> define afterlong [..] -> %^ || LongVow Cns* _ Cns Vow ; define beforedipht [..] -> %^ || Vow _ dipht ; !a^ie, e^ao, o^uo, u^ea etc! Any diphth, bled by savetripht later on. define nodipht [..] -> %^ || Cns Vow _ VowNot2ndVow (Sgm) ; !e^å, e^ä etc !define notripht [..] -> %^ || Cns Vow VowNot2ndVow _ U (Sgm) ; ! Not needed? define NotOslhYdi [..] -> %^ || [Cns|Vow %-] VowNotOslash _ Y (Sgm) ; !a^y, e^y etc define NotOAdipht [..] -> %^ || [Cns|Vow %-] VowNotEO _ A (Sgm) ; !i^a, u^a etc define NotXUdipht [..] -> %^ || [Cns|Vow %-] VowNotEOA _ U (Sgm) ; !y^u, i^u etc define NotUOdipht [..] -> %^ || [Cns|Vow %-] VowNotUI _ O (Sgm) ; !a^o, i^o etc define NotIEdipht [..] -> %^ || [Cns|Vow %-] VowNotIAOE _ E (Sgm) ; ! u^e etc define NotIOslhdi [..] -> %^ || [Cns|Vow %-] VowNotI _ Oslash (Sgm) ; !i^a, i^u etc define 4cns [..] -> %^ || Vow r _ d n j , Vow Cns _ s t r Vow , Vow k s _ p [ l | r | s | n ] Vow , Vow n [ k | g ] _ s j Vow ; !gar^dnjil etc !define unvoiced [..] -> %^ || Vow (Cns) _ h [ n | m | l | j | r ] Vow ; !guv^hli ! FUOM: čállinrávvagat: ! Giellalávdegoddi lea dolin mearridan ahte dj, hj, hl, lj, hm, hn, nj, ja hr ! ii galgga juohkit, danin go dát bustávat čállojuvvojit oktii ja dain ! guðesge dušše okta konsonántajietnadat. Dán mearrádusa berret mii du ! nu ahte go fal lea vejolaš, de ii berre daid juohkit. ! Go gemináhtakonsonánttaid galgá dábálaččat juohkit guovtte sadj ! ái, de ii leat vuogas sáni juohkit ná: da-hje, čái-hni, bea-lje-sku-h ! ran. Maiddái dáid sániid berre juohkit váldonjuolggadusa miel ! de: dah-je, čáih-ni, beal-je-skuh-ran. define unvoicedwg [..] -> %^ || Vow (Cns) h [ n | m | l | j | r ] _ [ n | m | l | j | r ] Vow ; !guvhl^li define dissnas [..] -> %^ || Vow [ đ | i | l | r | v ] _ [ b m | d n (j) | g ŋ ] ; !ráv^dnji ! define palatal [..] -> %^ || Vow (Cns) Cns _ d j , ! Vow (Cns) Cns _ l j , ! Vow (Cns) Cns _ n j , ! Vow (Cns) Cns _ n j , ! Vow _ n j ; !ád^djá etc ! !, Vow t _ n j ; ! Redefining palatal as follows, look into this one! define palatal [..] -> %^ || Vow (Cns) Cns _ d j , Vow (Cns) Cns _ l j , Vow (Cns) Cns _ n j ; ! Vow _ n j ; !o^lju, bo^njan etc, removed: Vow _ d j , Vow _ l j (causes o^ljju) , until we get decision from SGL, commented out, see comment on line 103 (TO) define s2cns [..] -> %^ || Vow [ Cns | i ] ( %> ) _ [s|š] Cnss Vow ; ! mái-stit (if commented out: máis-tit) define s2cnswg [..] -> %^ || Vow [ Cns | i ] [s|š] Cnss ( %> ) _ Cnss ( %> ) Vow ; !máist^tán define 3cns [..] -> %^ || Vow [ Cns | i ] Cns ( %> ) _ Cns ( %> ) Vow ; !biil^la !define foreign [..] -> %^ || Vow ( %> ) _ g r Vow ; !para^gráfa define xcns [..] -> %^ || x ( %> ) _ ( %> ) Vow ; !sex^a define 2cns [..] -> %^ || Vow [ Cns | i ] ( %> ) _ Cns ( %> ) Vow ; !áh^ku define 1cns [..] -> %^ || Vow ( %> ) _ Cns ( %> ) Vow ; !jo^ga ! The philosophy here is to remove some boundaries who, when seen also in a left ! context, are parts of triphthongs, and thus should not be divided. We do it ! via restoring, in order not to mess up our diphthong rules. define savetripht %^ -> 0 || e _ a u ; ! does work... !Bor-deaux, Beau-fort define savetripht2 %^ -> 0 || i _ e u ; ! does work... !Nieu-siedler define savetripht3 %^ -> 0 || i _ o u ; ! does work... !Sioux define saveaa %^ -> 0 || a _ a ; ! does work... !alaamasta Finnish define saveii %^ -> 0 || i _ i ; ! does work... !alaamasta Finnish define saveuu %^ -> 0 || u _ u ; ! does work... !alaamasta Finnish define saveoo %^ -> 0 || o _ o ; ! does work... !alaamasta Finnish define saveöö %^ -> 0 || ö _ ö ; ! does work... !alaamasta Finnish define saveyö %^ -> 0 || y _ ö ; ! does work... !työläinen Finnish define saveyy %^ -> 0 || y _ y ; ! does work... !alaamasta Finnish define saveee %^ -> 0 || e _ e ; ! does work... !alaamasta Finnish define saveää %^ -> 0 || ä _ ä ; ! does work... !alaamasta Finnish define removedouble %^ -> 0 || %- _ ; define removedouble2 %^ -> 0 || _ %- ; define removedouble3 %# -> 0 || %- _ ; define removedouble4 %# -> 0 || _ %- ; define readablehyphen %^ -> %- ; echo <> read regex [ !notripht !.o. afterlong .o. beforedipht .o. nodipht .o. NotOslhYdi .o. NotOAdipht .o. NotXUdipht .o. NotUOdipht .o. NotIEdipht .o. NotIOslhdi .o. 4cns !.o. unvoiced .o. unvoicedwg .o. dissnas .o. palatal .o. s2cns .o. s2cnswg .o. 3cns !.o. foreign .o. xcns .o. 2cns .o. 1cns .o. savetripht .o. savetripht2 .o. savetripht3 .o. saveaa .o. saveii .o. saveuu .o. saveoo .o. saveöö .o. saveyö .o. saveyy .o. saveee .o. saveää .o. removedouble .o. removedouble2 .o. removedouble3 .o. removedouble4 .o. readablehyphen ] ; ! For quick compilation, comment the following line in: !save h.fst ; ! Then, at the command line, give this command: ! xfst -utf8 < src/hyph-sme.txt ! As a result, a temporary h.fst file will be saved in sme/, it can be called ! with e.g. "lookup -flags mbTT -utf8 h.fst", or in a pipe.