) TARGET NSgPar (1 ("<(.*)ystä>"r)) (2 ("<(.*)ystä"r) + ("<(.*)ys>"r)) ;
MAP (@SUBJ @OBJ @PC) TARGET NPlNom (1 ("<(.*)kset>"r)) (2 ("<(.*)kset"r) + ("<(.*)s>"r)) ;
# GenITIVES
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @ @N>) TARGET NSgGen (1 ("<(.*)ksen>"r)) (2 ("<(.*)ksen"r) + ("<(.*)s>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)sen>"r)) (2 ("<(.*)sen"r) + ("<(.*)nen>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)sonin>"r)) (2 ("<(.*)sonin"r) + ("<(.*)son>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)anin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)enin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)inin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)nkin>"r)) (2 ("<(.*)nkin>"r)) ; # SUDBURYNKIN
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)onin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)oinnin>"r)) (2 ("<(.*)oinnin"r) + ("<(.*)ointi>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)öinnin>"r)) (2 ("<(.*)öinnin"r) + ("<(.*)öinti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)unin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ynin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)otin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)elin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)antin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)dtin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ftin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)esin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)asin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)chtin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)bin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)cin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)din>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)fin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)gin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)min>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)manin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)mannin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)rin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ttin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ngin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)vin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)win>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)zin>"r)) (2 ("<(.*)in>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)aatin>"r)) (2 ("<(.*)aatin"r) + ("<(.*)aatti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)eetin>"r)) (2 ("<(.*)eetin"r) + ("<(.*)eetti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)iitin>"r)) (2 ("<(.*)iitin"r) + ("<(.*)iitti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ootin>"r)) (2 ("<(.*)ootin"r) + ("<(.*)ootti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)uutin>"r)) (2 ("<(.*)uutin"r) + ("<(.*)uutti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)yytin>"r)) (2 ("<(.*)yytin"r) + ("<(.*)yytti>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)uuden>"r)) (2 ("<(.*)uuden"r) + ("<(.*)uus>"r)) ;
MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)yyden>"r)) (2 ("<(.*)yyden"r) + ("<(.*)yys>"r)) ;
# Isn't this too general?
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)in>"r)) (2 ("<(.*)in>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)en>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)än>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)yn>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)ön>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)un>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)on>"r)) (2 ("<(.*)n>"r)) ;
# MAP (@SUBJ @NEC-S @NF-S @OBJ @PC @
@N>) TARGET NSgGen (1 ("<(.*)an>"r)) (2 ("<(.*)n>"r)) ;
MAP (@SUBJ @OBJ @PC @
@) TARGET NSgPar (1 ("<(.*)aa>"r)) (2 ("<(.*)a>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)ea>"r)) (2 ("<(.*)a>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)oa>"r)) (2 ("<(.*)a>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)ua>"r)) (2 ("<(.*)a>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)ia>"r)) (2 ("<(.*)a>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)ää>"r)) (2 ("<(.*)ä>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)eä>"r)) (2 ("<(.*)ä>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)yä>"r)) (2 ("<(.*)ä>"r)) ;
MAP (@SUBJ @OBJ @PC @ @) TARGET NSgPar (1 ("<(.*)iä>"r)) (2 ("<(.*)ä>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ade) (1 ("<(.*):nnella>"r)) (2 ("<(.*):nnella>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Abl) (1 ("<(.*):nnelta>"r)) (2 ("<(.*):nnelta>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg All) (1 ("<(.*):nnelle>"r)) (2 ("<(.*):nnelle>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ine) (1 ("<(.*):nnessa>"r)) (2 ("<(.*):nnessa>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ela) (1 ("<(.*):nnesta>"r)) (2 ("<(.*):nnesta>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ill) (1 ("<(.*):nteen>"r)) (2 ("<(.*):nteen>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ess) (1 ("<(.*):ntena>"r)) (2 ("<(.*):ntena>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Tra) (1 ("<(.*):nneksi>"r)) (2 ("<(.*):nneksi>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Tra) (1 ("<(.*)nneksi>"r)) (2 ("<(.*)nneksi>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ade) (1 ("<(.*):nnellä>"r)) (2 ("<(.*):nnellä>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Abl) (1 ("<(.*):nneltä>"r)) (2 ("<(.*):nneltä>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ine) (1 ("<(.*):nnessä>"r)) (2 ("<(.*):nnessä>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ela) (1 ("<(.*):nnestä>"r)) (2 ("<(.*):nnestä>"r)) ;
MAP (@ADVL @N>) TARGET (ORD Num Sg Ess) (1 ("<(.*):ntenä>"r)) (2 ("<(.*):ntenä>"r)) ;
# Post-tagging disambiguation
SECTION
# Commented out, may be useful at a later stage, but at the moment may hide ambiguities - 20.01.2015 -FMT+KM
#SELECT @)) (@1 NIL) (@2 NI
# some first additions from Story (TP++)
REMOVE A (0 PrfPrc)(1 Gen_Par)(*-1 OLLA BARRIER S-BOUNDARY) ;
REMOVE Ind + Sg3 (0 InfA) (-1 ("miksi"))(NEGATE *-2 NPSg3Nom)(NEGATE *1 NPSg3Nom BARRIER S-BOUNDARY) ;
SELECT Ela-Advl + Ind (*1 Ela BARRIER S-BOUNDARY) ; # ("pitää")
SELECT ("sääli") + N (-1 >>>) ;
SELECT ("") + Adv (*-1 V BARRIER S-BOUNDARY) ;
SELECT ("i") + Adv (*1 V BARRIER S-BOUNDARY) ;
REMOVE A + Der_sti (0 Adv) ;
SELECT PrfPrc + Sg (*-1 OLLA + Sg3 BARRIER PrfPrc OR S-BOUNDARY) ;
SELECT PrfPrc + Pl (*-1 OLLA + Pl3 BARRIER PrfPrc OR S-BOUNDARY) ;
SELECT PrfPrc + Sg (-1 CCC LINK *-1 PrfPrc + Sg BARRIER S-BOUNDARY LINK *-1 OLLA + Sg3 BARRIER S-BOUNDARY) ;
SELECT PrfPrc + Pl (-1 CCC LINK *-1 PrfPrc + Sg BARRIER S-BOUNDARY LINK *-1 OLLA + Pl3 BARRIER S-BOUNDARY) ;
REMOVE V + Ind (0 Imprt + Sg2)(-1 COLON)(NEGATE *1 V BARRIER S-BOUNDARY) ;
SELECT V + Ind (NEGATE *1 V BARRIER S-BOUNDARY)(NEGATE *-1 V BARRIER S-BOUNDARY)(NEGATE -1 OMS) ;
SELECT V + Ind (NEGATE *1 V BARRIER S-BOUNDARY)(NEGATE *-1 V BARRIER S-BOUNDARY)(NEGATE 1 ("kuin") LINK 1 V) ;
REMOVE InfA (0 Act + Sg3) (NEGATE *1 V BARRIER S-BOUNDARY)(NEGATE *-1 V BARRIER S-BOUNDARY) ;
SELECT Ind + Pl3 (*0 NPPl3Nom BARRIER S-BOUNDARY)(NEGATE *1 V BARRIER S-BOUNDARY)(NEGATE *-1 V BARRIER S-BOUNDARY) ;
SELECT Ind + Sg3 (*0 NPSg3Nom BARRIER S-BOUNDARY)(NEGATE *1 V BARRIER S-BOUNDARY)(NEGATE *-1 V BARRIER S-BOUNDARY) ;
SELECT Ill (0C Ill OR Par)(*0 Ill-Advl BARRIER S-BOUNDARY OR CCC) ;
SELECT Ela (0C Ela OR Par)(*0 Ela-Advl BARRIER S-BOUNDARY OR CCC) ;
SELECT LOC-CASE (0C LOC-CASE OR Par)(*0 LOC-V BARRIER S-BOUNDARY OR CCC) ;
SELECT Par (0C LOC-CASE OR Par)(*0 Par-OBJ BARRIER S-BOUNDARY OR CCC) ;
SELECT NNom (NEGATE 0 V)(NEGATE *1 Nom BARRIER S-BOUNDARY OR V OR CCC OR NNom)(*-1 ("kuten") BARRIER S-BOUNDARY OR V OR CCC OR NNom) ;
SELECT Gen_Par (0 (@>>)(NEGATE *1 NNom) ;
SELECT NNom + @SUBJ (NEGATE 0 V)(NEGATE *-1 @SUBJ BARRIER S-BOUNDARY)(NEGATE *1 @SUBJ OR NPSg3Nom BARRIER S-BOUNDARY) ;
SELECT NPar + @SUBJ (NEGATE 0 V)(NEGATE *-1 @SUBJ BARRIER S-BOUNDARY)(NEGATE *1 @SUBJ OR NPSg3Nom BARRIER S-BOUNDARY) ;
SELECT NNom (0C N)(*0 VSg3 OR VPl3 BARRIER S-BOUNDARY OR QUO)(NEGATE *-1 NNom OR NPar BARRIER S-BOUNDARY)(NEGATE *1 NNom OR NPar BARRIER S-BOUNDARY) ;
SELECT NPar (0C N)(*0 VSg3 OR VPl3 BARRIER S-BOUNDARY OR QUO)(NEGATE *-1 NNom OR NPar BARRIER S-BOUNDARY)(NEGATE *1 NNom OR NPar BARRIER S-BOUNDARY) ;
SELECT NSgNom (*0 VSg3 + Act + Ind BARRIER S-BOUNDARY OR Punct)(NEGATE *-1 NSgNom BARRIER S-BOUNDARY)(NEGATE *1 NSgNom BARRIER S-BOUNDARY) ;
SELECT NPlNom (*0 VPl3 + Act + Ind BARRIER S-BOUNDARY OR Punct)(NEGATE *-1 NSgNom OR NPlNom BARRIER S-BOUNDARY)(NEGATE *1 NSgNom OR NPlNom BARRIER S-BOUNDARY) ;
REMOVE NPlNom (0 NSgNom)(-1C Sg + Gen)(1C Sg + Nom) ;
REMOVE Pron + Rel (NEGATE -1 COMMA) ;
SELECT PxSg3 (1 HÄN OR ("jokainen")) ;
SELECT PxSg3 (-1 HÄN OR ("jokainen")) ;
SELECT PxPl3 (1 HE) ;
SELECT PxPl3 (-1 HE) ;
SELECT PxSg3 + $$ALLCASES (0C $$ALLCASES)(*1 HÄN OR ("jokainen") BARRIER S-BOUNDARY) ;
SELECT PxSg3 + $$ALLCASES (0C $$ALLCASES)(*-1 HÄN OR ("jokainen") BARRIER S-BOUNDARY) ;
SELECT PxPl3 + $$ALLCASES (0C $$ALLCASES)(*1 HE BARRIER S-BOUNDARY) ;
SELECT PxPl3 + $$ALLCASES (0C $$ALLCASES)(*-1 HE BARRIER S-BOUNDARY) ;
#SELECT PxSg3 + $$ALLCASES (0C $$ALLCASES)(*0 N + Sg)(NEGATE *-1 N + Pl OR Num + Pl)(NEGATE *1 N + Pl OR Num + Pl) ;
REMOVE PxPl3 + $$ALLCASES (0 PxSg3 + $$ALLCASES)(*0 N + Sg)(NEGATE *-1 THIRD_PersON_PL OR Num + Pl)(NEGATE *1 THIRD_PersON_PL OR Num + Pl) ;
REMOVE PxPl2 + $$ALLCASES (0 PxSg2 + $$ALLCASES)(*0 N + Sg)(NEGATE *-1 SECOND_PersON_PL OR Num + Pl)(NEGATE *1 SECOND_PersON_PL OR Num + Pl) ;
REMOVE PxPl1 + $$ALLCASES (0 PxSg1 + $$ALLCASES)(*0 N + Sg)(NEGATE *-1 FIRST_PersON_PL OR Num + Pl)(NEGATE *1 FIRST_PersON_PL OR Num + Pl) ;
SELECT ("anteeksi") (-1 COMMA OR >>>) (1 COMMA OR ("!")) ;
SELECT Prop (1 COLON) ; # + sanoo-like verbs
SELECT ("Matti") + Prop (*1 ("mies") BARRIER Prop) ;
SELECT ("Matti") + Prop (>*1 HÄN BARRIER Prop) ;
SELECT ("Matti") + Prop (>*1 HE) ;
SELECT ("Matti") + Prop (>*1C ("Matti") + Prop) ; # generalize to any prop!
SELECT ("Matti") + Prop (<*-1C ("Matti") + Prop) ;
SELECT ("kello") + CNOUN ;
SELECT Num (*-1 ("kello")) ;
SELECT ("sinä") ;
REMOVE Prop + Sem (0 Prop - Sem) ;
#REMOVE Prop (0 N - Prop + $$ALLCASES)(0 Prop + $$ALLCASES) ;
REMOVE Pl + POSS_SUFF (0 Sg)(*0 Pron + Pers + SGa BARRIER S-BOUNDARY);
REMOVE SUB:1 Cmp/SgGen (0/* Cmp/SgNom) ; #This for HFST
REMOVE SUB:1 A - Cmp/Attr (0/* Cmp/Attr) ; #This for HFST
REMOVE SUB:1 Prop + Attr (0/* Cmp/Hyph) ; # This for HFST
REMOVE:RemoveAllCmp SUB:1 Cmp (0/* Cmp);
REMOVE:X @X ;
# ================= ============ =============
LIST WORDLEMMA = (".*"r) ; #!! * WORDLEMMA = regex giving the lemma in question
#!! * **Rule: errorth** removes Err/Orth if there is an analysis without Err/Orth with the same lemma
REMOVE:errorth $$WORDLEMMA + (Err/Orth) (0 $$WORDLEMMA - (Err/Orth) ) ; #Removes Err/Orth analysis when there is an analysis with same lemma without Err/Orth
AFTER-SECTIONS #
SUBSTITUTE (A) (A ) WORD ; #RemoveFromApertium
SUBSTITUTE (N) (N ) WORD ; #RemoveFromApertium
SUBSTITUTE (Adv) (Adv ) WORD ; #RemoveFromApertium
SUBSTITUTE (V) (V ) WORD ; #RemoveFromApertium
SUBSTITUTE (Num) (Num ) WORD ; #RemoveFromApertium
SUBSTITUTE (Interj) (Interj ) WORD ; #RemoveFromApertium
SUBSTITUTE (Po) (Po ) WORD ; #RemoveFromApertium
SUBSTITUTE (Pr) (Pr ) WORD ; #RemoveFromApertium
SUBSTITUTE (Pron) (Pron ) WORD ; #RemoveFromApertium
SUBSTITUTE (CC) (CC ) WORD ; #RemoveFromApertium
SUBSTITUTE (CS) (CS ) WORD ; #RemoveFromApertium
SUBSTITUTE (Pcle) (Pcle ) WORD ; #RemoveFromApertium