# ===================== # # Faroese disambiguator # # ===================== # # ========== # # Delimiters # # ========== # DELIMITERS = "<.>" "" "" "<¶>"; # ============= # # Tags and sets # # ============= # # ======== SETS # ======== LIST BOS = (>>>) (); LIST EOS = (<<<) (); # vislcg and CG-2 together. LIST N = N ; LIST V = V ; LIST A = A ; LIST Prop = Prop ; LIST Adv = Adv ; LIST Num = Num ; LIST CC = CC ; LIST CS = CS ; LIST Interj = Interj ; LIST Abbr = Abbr ; LIST ACR = ACR ; LIST Pr = Pr ; LIST Pron = Pron ; LIST Pers = Pers ; LIST Dem = Dem ; LIST Det = Det ; LIST Refl = Refl ; LIST Recipr = Recipr ; LIST Poss = Poss ; LIST Interr = Interr ; LIST Nom = Nom ; LIST Acc = Acc ; LIST Gen = Gen ; LIST Dat = Dat ; LIST Msc = Msc ; LIST Fem = Fem ; LIST Neu = Neu ; LIST Sg = Sg ; LIST Pl = Pl ; LIST Def = Def ; LIST Indef = Indef ; LIST Cmp = Cmp ; LIST Superl = Superl ; LIST Prs = Prs ; LIST Prt = Prt ; LIST 1Sg = 1Sg ; LIST 2Sg = 2Sg ; LIST 3Sg = 3Sg ; LIST 1Pl = 1Pl ; LIST 2Pl = 2Pl ; LIST 3Pl = 3Pl ; LIST Inf = Inf ; LIST PrfPrc = PrfPrc ; LIST PrsPrc = PrsPrc ; LIST IM = IM ; LIST Indic = Indic ; LIST Imp = Imp ; LIST Sup = Sup ; LIST Cmpnd = Cmpnd ; LIST CLB = CLB ; LIST PUNCT = PUNCT ; LIST LEFT = LEFT ; LIST RIGHT = RIGHT ; LIST TAG = N V A Adv CC CS Interj Pr Pron Pers Det Refl Recipr Poss Nom Acc Gen Dat Msc Fem Neu Sg Pl Def Indef Cmp Sup Prs Prt 1Sg 2Sg 3Sg Inf PrfPrc PrsPrc Sup Imp CLB PUNCT LEFT RIGHT ; LIST NAGD = Nom Acc Gen Dat ; LIST GENDER = Msc Fem Neu ; LIST NUMBER = Sg Pl ; # Sets # ==== # Noun sets SET NounMscFem = (N Msc) OR (N Fem) ; SET NounMscNeu = (N Msc) OR (N Neu) ; SET NounFemNeu = (N Fem) OR (N Neu) ; # Nominal sets LIST NOMINALHEAD = N Num Pers Refl Recipr ; LIST PRENAGR = Det Dem A ; # Verb sets LIST COPULA = "vera" "verða" ; LIST AUX = "kunna" "skula" "skulu" "þurfa" "vera" "verða" ; LIST VFIN = Prs Prt Imp ; LIST Ind = Prs Prt ; LIST NONTHIRDV = 1Sg 2Sg 1Pl 2Pl ; LIST VINFIN = Inf Sup PrfPrs PrsPrc ; LIST MODV = "kunna" "láta" "skula" ; LIST DATV = "smakka" ; # Noun-Verb sets LIST THIRD = Sg 3Sg Pl 3Pl ; # Number sets SET NUMBERS = Num - ("eitt") - ("1") ; # Preposition sets LIST ACCPREP = "aftan" "aftanvert" "ábeint" "áraka" "foruttan" "gjøgnum" "hóast" "inntil" "ígjøgnum" "íkring" "kring" "niðan" "oman" "síðan" "umframt" "umhvørvis" "umkring" "báðumegin" "hasumegin" "hvørgumegin" "høgrumegin" "sínumegin" "skeivumegin" "somumegin" "vinstrumegin" "øðrumegin" "eystan" "norðan" "sunnan" "vestan" ; LIST DATPREP = "hjá" "með" "á" "aftrat" "afturat" "afturímóti" "andstøðis" "andsýnis" "mótvegis" "nær" "sambært" "viðvíkjandi" ; LIST ACCDATPREP = "í" "á" "undir" "yfir" "eftir" "fyri" "við" "frá" ; LIST ACCGENPREP = "innan" "uttan" "millum" "til" "vegna" ; LIST ACCDATGENPREP = "at" "av" "hjá" "móti" "um" "undan" "úr" ; SET SOMEACCPREP = ACCPREP | ACCDATPREP | ACCGENPREP | ACCDATGENPREP ; SET SOMEDATPREP = DATPREP | ACCDATPREP | ACCDATGENPREP ; SET SOMEGENPREP = ACCGENPREP | ACCDATGENPREP ; # Boundary sets SET S-BOUNDARY = CS | Interr | BOS | EOS ; # Complementary set # Case sets SET NOTDAT = Nom | Gen | Acc ; SET NOTACC = Nom | Gen | Dat ; SET NOTACCDAT = Nom | Gen ; SET OBL = Acc | Dat | Gen ; #SET NO SET PRE-N = A | Det | Dem | (N Gen) | Num ; # Det??? SET WORD = N | V | A | Pr | Pron | Adv | CC | CS | Interj | Num | ("\?") ; SET MARK = ("\,") | ("\:") | ("\;") ; SET WORDMARK = WORD | MARK ; SET NPNH = WORDMARK - PRE-N ; SET NPNHA = WORDMARK - PRE-N - Adv ; SET NOT-ADV = WORDMARK - Adv ; SET NOT-A = WORDMARK - A ; LIST COMMA = "," ; SECTION # POS disambiguation # ================== # Pron Pers or Dem # Det REMOVE (Num) IF (0 ("ein" Det))(*1 N BARRIER NPNHA); ## eina mynd # Specific lexemes # ================ SELECT Pr IF (0 ("á"))(1 Dat OR Acc); SELECT Pr IF (0 ("til"))(*1 (N Gen) BARRIER NPNHA); SELECT CS IF (0 ("at"))(1C Inf); REMOVE Dem IF (0 Pers)(NOT 1 NPNHA OR N); # Adverb SELECT Adv IF (-1 DATPREP)(*1 Dat BARRIER NPNHA); SELECT Adv IF (-1 VFIN)(1 Inf); # NP internal constraints # ======================= # Determiner disambiguation REMOVE:NotNeuToMscFemDemNagd Neu IF (0 (Pron Dem) + $$NAGD)(*1 NounMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu); REMOVE:NotMscToFemNeuDemNagd Msc IF (0 (Pron Dem) + $$NAGD)(*1 NounFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc); REMOVE:NotFemToMscNeuDemNagd Fem IF (0 (Pron Dem) + $$NAGD)(*1 NounMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem); REMOVE:NotNeuToMscFemDetNagd Neu IF (0 Det + $$NAGD)(*1 NounMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu); REMOVE:NotMscToFemNeuDetNagd Msc IF (0 Det + $$NAGD)(*1 NounFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc); REMOVE:NotFemToMscNeuDetNagd Fem IF (0 Det + $$NAGD)(*1 NounMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem); # Case disambiguation SELECT:NagdDemToN $$NAGD IF (0 (Pron Dem))(*1C $$NAGD BARRIER NPNHA); REMOVE:NotNagdDemToN $$NAGD IF (0 (Pron Dem))(*1 N BARRIER NPNHA LINK NOT 0 $$NAGD); SELECT:NagdDetToN $$NAGD IF (0 Det)(*1C $$NAGD BARRIER NPNHA); REMOVE:NotNagdDetToN $$NAGD IF (0 Det)(*1 N BARRIER NPNHA LINK NOT 0 $$NAGD); # Adjective disambiguation # ------------------------ REMOVE:NotFemAdjToMscNeuN Fem IF (0 (A) + $$NAGD) (*1 NounMscNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Fem); REMOVE:NotMscAdjToFemNeuN Msc IF (0 (A) + $$NAGD) (*1 NounFemNeu + $$NAGD BARRIER NPNHA LINK NOT 0 Msc); REMOVE:NotNeuAdjToMscFemN Neu IF (0 (A) + $$NAGD) (*1 NounMscFem + $$NAGD BARRIER NPNHA LINK NOT 0 Neu); # Case disambiguation SELECT:NagdAdjToNagd $$NAGD IF (0 A)(*1C $$NAGD BARRIER NPNHA)(NOT 0 V); REMOVE:NotNagdAdjToAdjNotNagd $$NAGD IF (0 A)(*1 N BARRIER NPNHA LINK NOT 0 $$NAGD); # Numeral disambiguation #SELECT:NumAdjToN $$NUMBER IF (0 A)(1 N + $$NUMBER); REMOVE:NumAdjToNpl (A Sg) IF (1C N + Pl); REMOVE:NumAdjToNsg (A Pl) IF (1C N + Sg); ## í ta døkku moldina. SELECT:NumDetToN $$NUMBER IF (0 Det)(*1 N + $$NUMBER BARRIER NOT-A); # Gender disamb of numerals SELECT:GenderNumToN $$GENDER IF (0 Num)(*1C (N $$GENDER) BARRIER A); ## eit sindur # Case disamb of numerals SELECT:GenderNumToA $$NAGD IF (0 Num)(*1C (N $$NAGD) BARRIER A); # Perhaps also rules that remove Neu if Msc or Fem, etc. # Noun disambiguation # ------------------- SELECT:GenderNounToAdj $$GENDER IF (-1C (A) + $$GENDER)(0 N); SELECT:NagdNounToAdj $$NAGD IF (-1C (A) + $$NAGD)(0 N); SELECT:NumberNounToAdj $$NUMBER IF (-1C (A) + $$NUMBER)(0 N); # Poss disambiguation REMOVE:GenderPossToPrecNoun $$GENDER IF (-1 N LINK NOT 0 $$GENDER)(0 Poss); REMOVE:GenderPossToNoun $$GENDER IF (0 Poss)(*1 N BARRIER NPNHA LINK NOT 0 $$GENDER); # Number disambiguation REMOVE Pl IF (*-1 (Num Sg) OR (Dem Sg) BARRIER NOT-A)(0 (N Sg)); # Here, we need vislcg3 and variable notation! # NP head disambiguation # ====================== # Inversion REMOVE:NotInverseNonSubj Acc IF (-2 CS)(-1 (V Pl))(0 (Pl Nom)) ; ## Tað var, sum váru tey sett út úr luftini. # A or N REMOVE A IF (*-1 Pr BARRIER NPNH)(0 N)(1 S-BOUNDARY); # PP disambiguation # ================= # Preposition or not? REMOVE Adv IF (0 ACCDATPREP)(*1 Acc OR Dat BARRIER NPNH); # at REMOVE Pr IF (0 ("at"))(NOT *1 Acc OR Gen OR Dat BARRIER V); SELECT CS IF (0 ("at"))(*1 Nom BARRIER NPNHA LINK *1 V BARRIER OBL); # millum REMOVE Adv IF (0 Dat OR Acc)(*-1 ("millum") BARRIER NPNH); # til SELECT Pr IF (0 ("til"))(*1 N OR NPNH OR ("at") BARRIER NPNHA); # Case within PP phrases SELECT:PPAcc Acc IF (*-1 ACCPREP BARRIER NOTACC OR V OR Pr); SELECT:PPDat Dat IF (*-1 DATPREP BARRIER NOTDAT OR V OR Pr); REMOVE:NotPPNotDat NOTDAT IF (*-1 DATPREP BARRIER NOTDAT OR S-BOUNDARY OR V OR Pr); #REMOVE Acc IF (*-1 DATPREP BARRIER NOTDAT); #REMOVE Gen IF (*-1 DATPREP BARRIER NOTDAT); REMOVE:NotPPGen Gen IF (*-1 ACCDATPREP BARRIER NOTACCDAT OR S-BOUNDARY OR V OR Pr); REMOVE:NotPPNom Nom IF (*-1 ACCDATPREP BARRIER NOTACCDAT OR S-BOUNDARY OR V OR Pr); # P chains or not SELECT Adv IF (0 Pr)(1C Pr); ## Hann fór niðan um Danmark. # Pronoun disambiguation REMOVE:NotDemBeforeVerb Dem IF (0 Pers)(1C V); ## Tey vóru so hugnalig. REMOVE:NotDemBeforeNonDef Dem IF (0 Pers)(NEGATE *1 Def BARRIER NPNH); ## # VP disambiguation # ================= # V or A REMOVE A IF (0 ("vera"))(1 (N Sg)); ## Enskt mál er móðurmál. SELECT ("vera" 3Sg) IF (-1 ("tað" Pron Pers Sg Nom)); ## Tað var, sum váru tey sett av tilvild av einum fóti út úr luftini. # Infinitive SELECT Inf IF (-1 ("at")); SELECT Inf IF (*-1 MODV BARRIER V); SELECT:AuxInf Inf IF (-2 Nom)(-1 AUX + VFIN); # Imperative #REMOVE Imp IF (-1 Adv OR Nom OR Pr OR CS OR V)(0 Prs OR Prt); REMOVE Imp IF (NOT *-1 BOS BARRIER WORD); # hmm # The Imp section must be written when there are genuine # examples to build upon. REMOVE Imp IF (0 N)(1 CC)(*2 N BARRIER NPNHA); ## Stýrið og stjórnin hjá... REMOVE (Imp Pl) IF (0 (N Neu))(1 VFIN); ## Húsið er stórt. # Supine SELECT Sup IF REMOVE Sup IF (0 VFIN)(NOT *-1C VFIN BARRIER S-BOUNDARY)(NOT *1C VFIN BARRIER S-BOUNDARY); # Present singular SELECT (V Ind 3Sg) IF (-1 (N Prop Nom) OR (N Sg Nom) OR (Pron 3Sg Nom) LINK NOT *-1 CC BARRIER NPNHA)(1 (Pron Refl Acc)) ; REMOVE (V Ind 2Sg) IF (*-1 (N Sg Nom) BARRIER NOT-ADV); SELECT (V Ind 3Sg) IF (-1 (N Prop Nom) OR (N Sg Nom) OR (Pron 3Sg Nom) LINK NOT *-1 CC BARRIER NPNHA OR S-BOUNDARY) ; SELECT (V 3Sg) IF (NOT -2 V + 1Sg)(-1 CC) SELECT (V 2Sg) IF (*-1 ("tú" Pron Pers Sg Nom) BARRIER NOT-ADV); REMOVE:3not1 1Sg IF (0 V + 3Sg)(NOT 0* Pron + 1Sg BARRIER Pron + 3Sg) ; # Present plural SELECT (V 3Pl) IF (-1 (N Pl Nom)); SELECT:Matrix 3Sg IF (*-1 S-BOUNDARY OR COMMA BARRIER NOT-ADV)(0 VFIN)(*1 N + Sg + Nom OR Pron + 3Sg + Pron BARRIER NPNH LINK 1 CS); REMOVE Inf IF (-1C Pron + 3Pl)(0 V + 3Pl); # V + Refl SELECT ("seg" Refl) IF (-1 (Inf OR 3Sg or 3Pl)) ; # Nominative # ========== SELECT (3Sg Nom)IF (*-1 S-BOUNDARY BARRIER NPNH + OBL)(1 VFIN + 3Sg) ; # Accusative # ========== REMOVE Nom IF (*-1 AUX OR MODV LINK -1 CC OR Nom BARRIER NPNH)(1 Inf); # Genitive # ======== # Genitive is marginal in faroese. The strategy should be : # 1. Write rules selecting genitive. (such rules are still forthcoming) SELECT Gen IF (*-1 ACCGENPREP BARRIER NPNHA)(NOT 0 Acc); # 2. Remove the remaining genitives. REMOVE Gen ; # Pronoun disambiguation # ======================= REMOVE ("hannur") IF (0 ("hann" Nom))(1 (V 3Sg)); ## Han var her. SELECT Refl IF (0 ("sik" Dat))(-1 VFIN); # Infinitive mark # ============== SELECT IM IF (0 ("at"))(1C Inf); SELECT Adv IF (0 ("þá"))(1 ("er" CS) OR VFIN); # Syntactic disambiguation # ======================== # Syntax tags LIST @SUBJ = @SUBJ ; LIST @OBJ = @OBJ ; LIST @>N = @>N ; LIST @N< = @N< ; LIST @SPRED = @SPRED ; LIST @VAUX = @VAUX ; LIST @VMAIN = @VMAIN ; LIST @ADVL = @ADVL ; LIST @P< = @P< ; LIST @CC = @CC ; LIST @CS = @CS ; LIST @APP = @APP : LIST @X = @X ; LIST SYNTAG = (@CC) (@CS) (@SUBJ) (@OBJ) (@>N) (@N<) (@SPRED) (@VFIN) (@ADVL) (@P<) (@APP) ; # Stop copy here # \\\\\\\\\\\\\\\\ # ======== # SECTION # # ======== # ADD (@CC) TARGET CC ; ADD (@CS) TARGET CS ; MAP (@VAUX) TARGET AUX + VFIN IF (0* VINFIN BARRIER CS OR CC); MAP (@VMAIN) TARGET V ; #MAP (@VMAIN) TARGET VFIN IF (NEGATE 0* VFIN BARRIER CS OR CLB); ADD (@SPRED) TARGET (N Nom) OR (A Nom) IF (*-1 COPULA BARRIER NPNHA LINK -1 Nom); #MAP (@SUBJ) TARGET (N Nom) OR (Num Nom) OR (Pron Nom); MAP:Subj2 (@SUBJ) TARGET ("tú" Pron Pers Sg Nom) IF (*1 (V 2Sg) BARRIER VFIN); MAP:Subj2 (@SUBJ) TARGET ("tú" Pron Pers Sg Nom) IF (*-1 (V 2Sg) BARRIER VFIN); MAP:SubjSg (@SUBJ) TARGET NOMINALHEAD + Sg + Nom IF (*0 (V 3Sg) OR (V Sg)); MAP:SubjPl (@SUBJ) TARGET NOMINALHEAD + Pl + Nom IF (*0 (V 3Pl)); MAP:SubjPronSg (@SUBJ) TARGET Pron + 3Sg + Nom IF (*0 (V 3Sg) OR (V Sg)); MAP:SubjPronPl (@SUBJ) TARGET Pron + 3Pl + Nom IF (*0 (V 3Pl)); MAP (@OBJ) TARGET (N Acc) OR (Num Acc) OR (Pron Acc) IF (*-1 V BARRIER Pr); MAP (@IOBJ) TARGET (N Dat) OR (Num Dat) OR (Pron Dat) IF (*1 (@OBJ) BARRIER NPNHA); MAP (@OBJ) TARGET Dat IF (*-1 DATV); MAP:DatPrComplTag (@P<) TARGET NOMINALHEAD + Dat IF (*-1 SOMEDATPREP BARRIER NPNHA); MAP:AccPrComplTag (@P<) TARGET NOMINALHEAD + Acc IF (*-1 SOMEACCPREP BARRIER NPNHA); MAP:GenPrComplTag (@P<) TARGET NOMINALHEAD + Gen IF (*-1 SOMEGENPREP BARRIER NPNHA); MAP:MapDemToN (@>N) TARGET Dem + $$GENDER + $$NAGD IF (+1 N + $$GENDER + $$NAGD BARRIER NPNHA); #MAP (@P<) TARGET Acc IF MAP (@>N) TARGET (N Gen) IF (*1 N BARRIER V OR Adv); MAP (@>N) TARGET (Det) IF (*1 N BARRIER V OR Adv); MAP (@>N) TARGET (Dem) IF (*1 N BARRIER V OR Adv); MAP (@>N) TARGET A + $$NAGD OR Dem + $$NAGD IF (*1 N + $$NAGD OR Num + $$NAGD BARRIER NPNHA); MAP (@VFIN) TARGET (V Prs) OR (V Prt) OR (V Imp); MAP (@ADVL) TARGET Pr ; MAP (@N<) TARGET Poss IF (0 $$NAGD)(-1 N + $$NAGD); MAP (@ADVL) TARGET Adv ; MAP (@N<) TARGET N IF (0 $$NAGD)(-1 (N $$NAGD)); MAP:SpredIfSpecial (@SPRED) TARGET A IF (-1 ("síggja"))(1 ("út")); MAP:SpredIfSpecialCC (@SPRED) TARGET A IF (-1 ("síggja"))(1 CC)(*2 A); MAP (@X) TARGET WORD ; # =========== # SECTION # # =========== # # No-syntax early REMOVE (@X) ; REMOVE (@SPRED) IF (NOT *-1 (@SUBJ))(NOT *1 (@SUBJ)); REMOVE (@OBJ) IF (*-1 Pr BARRIER NPNHA); REMOVE (@IOBJ) IF (*-1 Pr BARRIER NPNHA); REMOVE (@P<) IF (*-1 BOS OR (N Acc) OR (N Dat) OR (N Nom) OR V OR CLB OR CS BARRIER Pr); REMOVE (@>N) IF (*-1 Pr BARRIER NPNHA); SELECT (Sg @SPRED) IF (*-1 COPULA BARRIER NPNHA LINK -1 (Sg Nom)); SELECT (Pl @SPRED) IF (*-1 COPULA BARRIER NPNHA LINK -1 (Pl Nom)); REMOVE (@SUBJ) IF (*-1 VFIN BARRIER S-BOUNDARY LINK -1 (@SUBJ)); SELECT (@SUBJ) IF (-1 CS)(1 VFIN); ## sum hann hevði í lummanum. # PPs REMOVE (@SUBJ) IF (*-1C Pr BARRIER NPNH)(1C (@P<)); REMOVE (@OBJ) IF (*-1C Pr BARRIER NPNH)(1C (@P<)); REMOVE (@ADVL) IF (*-1C Pr BARRIER NPNH)(1C (@P<)); REMOVE (@SPRED) IF (*-1C Pr BARRIER NPNH)(1C (@P<)); REMOVE (@OPRED) IF (*-1C Pr BARRIER NPNH)(1C (@P<)); REMOVE:NotVerbIfPrCompl V IF (-1 Pr)(0 (@P<)); ## av tilvild # Case disambiguation #SELECT Acc IF (*-1 ACCPREP BARRIER NPNHA)(0 (@P<)); #SELECT Dat IF (*-1 DATPREP BARRIER NPNHA)(0 (@P<)); #REMOVE Nom IF (*-1 ACCGENPREP BARRIER NPNHA)(0 (@P<)); #REMOVE Dat IF (*-1 ACCGENPREP BARRIER NPNHA)(0 (@P<)); #REMOVE Nom IF (*-1 ACCDATPREP BARRIER NPNHA)(0 (@P<)); #REMOVE Gen IF (*-1 ACCDATPREP BARRIER NPNHA)(0 (@P<)); # Number SPRED disambiguation # VP disambiguation #REMOVE:NotVIfLeftDet V IF (-1 PRENAGR + $$NAGD)(0 (N $$NAGD)); #REMOVE:NotVIfLeftDet V IF (-1 (Det $$NAGD))(0 (N $$NAGD)); REMOVE:NotVIfLeftDet V IF (-1 (Det Nom))(0 (N Nom)); #END # Ordinals REMOVE (A Sg @>N) IF (1 (Num Sg)); ## fyrstu tríggjar mánðirnar. REMOVE (A @>N) IF (0 V)(NOT 1 N OR A); # Alternatively: Demand Nom to the left or Num to the right. # Postsyntactical verbal dis SELECT:FiniteVerb VFIN IF (NEGATE 0* VFIN); SELECT:InitialCC CC IF (-1 BOS)(1C (@SUBJ)); ## Men tað var hált. ## ## ## Tað var, sum vóru tey sett.