!Distributed under the terms of the GNU General Public License version 2 ! or any later version. ! ========================= ! ! Hyphenator for North Sámi ! ! ========================= ! echo <> define Vow [ a | á | e | i | o | u | y | æ | ø | å | ä | ö | A | Á | E | I | O | U | Y | Æ | Ø | Å | Ä | Ö | é | ó | ú | í | à | è | ò | ù | ì | ë | ü | ï | â | ê | ô | û | î | ã | ý É | Ó | Ú | Í | À | È | Ò | Ù | Ì | Ë | Ü | Ï | Â | Ê | Ô | Û | Î | Ã | Ý ] ; define 2ndVow [ e | a | u | o | i | y | ø | E | A | U | O | I | Y | Ø ] ; !define 3dVow [ u | U ] ; define VowNot2ndVow [ Vow - 2ndVow ] ; !define VowNot3dVow [ Vow - 3dVow ] ; define U [ u | U ] ; define O [ o | O ] ; define A [ a | A ] ; define E [ e | E ] ; define I [ i | I ] ; define Y [ y | Y ] ; define EO [ e | E | o | O ] ; define EOA [ e | E | o | O | a | A ] ; define IAO [ i | I | a | A | o | O ] ; define Oslash [ ø | Ø ] ; ! Renamed from Ø to avoid non-ASCII chars define UI [ u | U | i | I ] ; define IEdipht [ i e | I e | I E ] ; define UOdipht [ u o | U o | U O ] ; define EAdipht [ e a | E a | E A ] ; define OAdipht [ o a | O a | O A ] ; define OUdipht [ o u | O u | O U ] ; ! jour-na-lis-sta define AUdipht [ a u | A u | A U ] ; ! fau-na, Lauv-sjø-vo-la, Kår-ve-jau-re ok. define EUdipht [ e u | E u | E U ] ; ! leut-nán-ta define AEdipht [ a e | A e | A E ] ; ! lae-sta-di-us, define OslashYdipht [ ø y | Ø y | Ø Y ] ; ! nøyt-rála ! Renamed from ØYdipht to avoid non-ASCII chars define OEdipht [ o e | O e | O E ] ; ! goe-the define IOslashdipht [ i ø | I ø | I Ø ] ; ! skiøld ! Renamed from IØdipht to avoid non-ASCII chars define IOdipht [ i o | I o | I O ] ; ! skiold, nation !define EAUtripht [ e a u | E a u | E A U ] ; !Bor-deaux, Beau-fort !define IEUtripht [ i e u | I e u | I E U ] ; !Nieu-siedler define dipht [ IEdipht | UOdipht | EAdipht | OAdipht | OUdipht | AUdipht | EUdipht | AEdipht | OslashYdipht | OEdipht | IOslashdipht | IOdipht] ; !define tripht [ EAUtripht | IEUtripht ] ; define VowNotU [ Vow - U ] ; define VowNotO [ Vow - O ] ; define VowNotA [ Vow - A ] ; define VowNotE [ Vow - E ] ; define VowNotI [ Vow - I ] ; define VowNotEO [ Vow - EO ] ; define VowNotEOA [ Vow - EOA ] ; define VowNotIAO [ Vow - IAO ] ; define VowNotY [ Vow - Y ] ; define VowNotOslash [ Vow - Oslash ] ; ! Renamed from VowNotØ to avoid non-ASCII chars define VowNotUI [ Vow - UI ] ; define Cns [ b | c | č | d | đ | f | g | h | j | k | l | m | n | ŋ | B | C | Č | D | Đ | F | G | H | J | K | L | M | N | Ŋ | p | q | r | s | š | t | ŧ | v | w | x | z | ž | P | Q | R | S | Š | T | Ŧ | V | W | X | Z | Ž ] ; define S [ S | s | Š | š ] ; define Cnss [ Cns - S ] ; define Sgm [ Cns | Vow ] ; !define Cns [ Cnssymb | Vow i ] ; echo << Rules>> define beforedipht [..] -> %^ || Vow _ dipht ; !a^ie, e^ao, o^uo, u^ea etc! Any diphth, bled by savetripht later on. define nodipht [..] -> %^ || Cns Vow _ VowNot2ndVow (Sgm) ; !e^å, e^ä etc !define notripht [..] -> %^ || Cns Vow VowNot2ndVow _ U (Sgm) ; ! Not needed? define NotOslhYdi [..] -> %^ || [Cns|Vow %-] VowNotOslash _ Y (Sgm) ; !a^y, e^y etc define NotOAdipht [..] -> %^ || [Cns|Vow %-] VowNotEO _ A (Sgm) ; !i^a, u^a etc define NotXUdipht [..] -> %^ || [Cns|Vow %-] VowNotEOA _ U (Sgm) ; !y^u, i^u etc define NotUOdipht [..] -> %^ || [Cns|Vow %-] VowNotUI _ O (Sgm) ; !a^o, i^o etc define NotIEdipht [..] -> %^ || [Cns|Vow %-] VowNotIAO _ E (Sgm) ; !e^e, u^e etc define NotIOslhdi [..] -> %^ || [Cns|Vow %-] VowNotI _ Oslash (Sgm) ; !i^a, i^u etc define 4cns [..] -> %^ || Vow r _ d n j , Vow Cns _ s t r Vow , Vow k s _ p [ l | r | s | n ] Vow , Vow n [ k | g ] _ s j Vow ; !gar^dnjil etc define unvoiced [..] -> %^ || Vow (Cns) _ h [ n | m | l | j | r ] Vow ; !guv^hli define unvoicedwg [..] -> %^ || Vow (Cns) h [ n | m | l | j | r ] _ [ n | m | l | j | r ] Vow ; !guvhl^li define dissnas [..] -> %^ || Vow [ đ | i | l | r | v ] _ [ b m | d n (j) | g ŋ ] ; !ráv^dnji ! define palatal [..] -> %^ || Vow (Cns) Cns _ d j , ! Vow (Cns) Cns _ l j , ! Vow (Cns) Cns _ n j , ! Vow (Cns) Cns _ n j , ! Vow _ n j ; !ád^djá etc ! !, Vow t _ n j ; ! Redefining palatal as follows, look into this one! define palatal [..] -> %^ || Vow (Cns) Cns _ d j , Vow (Cns) Cns _ l j , Vow (Cns) Cns _ n j , Vow _ n j ; !o^lju, bo^njan etc, removed: Vow _ d j , Vow _ l j (causes o^ljju) , until we get decision from SGL define s2cns [..] -> %^ || Vow [ Cns | i ] _ [s|š] Cnss Vow ; ! mái-stit (if commented out: máis-tit) define s2cnswg [..] -> %^ || Vow [ Cns | i ] [s|š] Cnss _ Cnss Vow ; !máist^tán define 3cns [..] -> %^ || Vow [ Cns | i ] Cns _ Cns Vow ; !biil^la define foreign [..] -> %^ || Vow _ g r Vow ; !para^gráfa define xcns [..] -> %^ || x _ Vow ; !sex^a define 2cns [..] -> %^ || Vow [ Cns | i ] _ Cns Vow ; !áh^ku define 1cns [..] -> %^ || Vow _ Cns Vow ; !jo^ga ! The philosophy here is to remove some boundaries who, when seen also in a left ! context, are parts of triphthongs, and thus should not be divided. We do it ! via restoring, in order not to mess up our diphthong rules. define savetripht %^ -> 0 || e _ a u ; ! does work... !Bor-deaux, Beau-fort define savetripht2 %^ -> 0 || i _ e u ; ! does work... !Nieu-siedler define savetripht3 %^ -> 0 || i _ o u ; ! does work... !Sioux define removedouble %^ -> 0 || %- _ ; define removedouble2 %^ -> 0 || _ %- ; define removedouble3 %# -> 0 || %- _ ; define removedouble4 %# -> 0 || _ %- ; echo <> read regex [ !notripht !.o. beforedipht .o. nodipht .o. NotOslhYdi .o. NotOAdipht .o. NotXUdipht .o. NotUOdipht .o. NotIEdipht .o. NotIOslhdi .o. 4cns .o. unvoiced .o. unvoicedwg .o. dissnas .o. palatal .o. s2cns .o. s2cnswg .o. 3cns .o. foreign .o. xcns .o. 2cns .o. 1cns .o. savetripht .o. savetripht2 .o. savetripht3 .o. removedouble .o. removedouble2 .o. removedouble3 .o. removedouble4 ] ; invert net ; ! For quick compilation, comment the following line in: !save h.fst ; ! Then, at the command line, give this command: ! xfst -utf8 < src/hyph-sme.txt ! As a result, a temporary h.fst file will be saved in sme/, it can be called ! with e.g. "lookup -flags mbTT -utf8 h.fst", or in a pipe.