# Distributed under the terms of the GNU General Public License version 2 # or any later version. # =================================================================== # # =================================================================== # # S O U T H S Á M I D I S A M B I G U A T O R # # =================================================================== # # =================================================================== # # =========================================== # # Short table of contents # # =========================================== # # Delimiters # Tags and sets # Mapping rules # Disambiguation rules # One-cohort disambiguation - cycle 0 # Local disambiguation - cycles 1 and 2 # Cycle 2: Other local disambiguation # Cycle 3: Global disambiguation # Cycle 4: Syntactic disambiguation # Cycle 5: Post-syntactic morphological disambiguation # ========== # # Delimiters # # ========== # DELIMITERS = "<.>" "" "" "<...>" "<¶>"; # ============= # # Tags and sets # # ============= # LIST BOS = (>>>) (); LIST EOS = (<<<) (); # vislcg and CG-2 together. # Tags declared as single-membered LISTs # ====================================== LIST N = N ; LIST Sg = Sg ; LIST Pl = Pl ; LIST Nom = Nom ; LIST Acc = Acc ; LIST Gen = Gen ; LIST Ine = Ine ; LIST Ela = Ela ; LIST Ill = Ill ; LIST Com = Com ; LIST Ess = Ess ; LIST PxSg1 = PxSg1 ; LIST PxSg2 = PxSg2 ; LIST PxSg3 = PxSg3 ; LIST PxDu1 = PxDu1 ; LIST PxDu2 = PxDu2 ; LIST PxDu3 = PxDu3 ; LIST PxPl1 = PxPl1 ; LIST PxPl2 = PxPl2 ; LIST PxPl3 = PxPl3 ; LIST V = V ; LIST Prs = Prs ; LIST Prt = Prt ; LIST Sg1 = Sg1 ; LIST Sg2 = Sg2 ; LIST Sg3 = Sg3 ; LIST Du1 = Du1 ; LIST Du2 = Du2 ; LIST Du3 = Du3 ; LIST Pl1 = Pl1 ; LIST Pl2 = Pl2 ; LIST Pl3 = Pl3 ; LIST Inf = Inf ; LIST Neg = Neg ; LIST ConNeg = ConNeg ; LIST PrfPrc = PrfPrc ; LIST VGen = VGen ; LIST Ger = Ger ; LIST Ind = Ind ; LIST Imp = Imp ; LIST ImpII = ImpII ; LIST Pot = Pot ; LIST Cond = Cond ; LIST Opt = Opt ; LIST Actor = Actor ; LIST Clt = Clt ; LIST A = A ; LIST Comp = Comp ; LIST Superl = Superl ; LIST Attr = Attr ; LIST Pron = Pron ; LIST Pers = Pers ; LIST Dem = Dem ; LIST Adp = Adp ; LIST Po = Po ; LIST Adv = Adv ; LIST Interj = Interj ; LIST Num = Num ; LIST Prop = Prop ; LIST Mal = Mal ; LIST Fem = Fem ; LIST Sur = Sur ; LIST CS = CS ; LIST CC = CC ; LIST Pcle = Pcle ; LIST CLB = CLB ; LIST PUNCT = PUNCT ; LIST LEFT = LEFT ; LIST RIGHT = RIGHT ; LIST TITLE = "prof" "cand.philol" ; LIST WORD = N A V Adv Pron Interj Adp Num CS CC Pcle ; LIST NOT-ADV-INT-PCLE = N A Num Pron A V CC CS Adp ; SET NOT-ADV = WORD - Adv ; SET NP-HEAD = Num OR N OR Pron ; SET PRE-NP-HEAD = (Prop Attr) OR (Prop @>N) OR (A Attr) OR (ABBR Attr) OR (Pron Pers Gen) OR (N Gen) OR Num OR (Cmpnd) OR CC OR (Pron Dem) OR (Pron Refl Gen) OR (Pron Indef) OR (PrfPrc @>N) OR (PrfPrc @>N) OR (PrsPrc) OR (A Ord) OR Attr ; # The strict version of items that can only be premodifiers, not parts of the predicate - copied from sme-dis.rle SET NPNH = WORD - PRE-NP-HEAD OR (ABBR) ; # NPNH = "NOT-PRE-NP-HEAD" # Verb sets LIST VFIN = Ind OR Imp ; LIST INFV = "aelkedh" "båetedh" "edtjedh" "galkedh" "maehtedh" "provhkedh" "sjïdtedh" ; # These are often followed by an infitive. To the set belong also some verbs which are not in the lex: tjoeveridh tjoeredh # ============== # # Disambiguation # # ============== # SECTION # Cycle 0 REMOVE ("Da" N) IF (0 ("dïhte")); # Verbs IFF:IsItConNeg ConNeg (*-1 Neg BARRIER ConNeg); # Imperative REMOVE:NotImpIfWord Imp IF (*-1 NOT-ADV-INT-PCLE BARRIER CS OR CC); REMOVE:NotIndIfImpAndExcl Ind IF (NOT *-1 Neg)(0 Imp)(*1 ("excl") BARRIER Ind); ## SELECT:Pl3WhenPlSubj Pl3 IF (1 (N Pl Nom))(0 ("lea" V)); SELECT:InfIfInfv (V Inf) IF (*-1 INFV BARRIER V OR CS OR CC) ; # Should be BARRIER V OR CS OR CC # It doesn´t work... # Manne båatam sagke buerebe datneste jieledh, Karijuse. SELECT:VSg2IfPersSg2 (V Sg2) (*-1 (Pers Sg2) BARRIER V); SELECT:PrfPrcIfLea PrfPrc (*-1 ("lea") BARRIER V); SELECT:PlIfPlSubj Pl3 IF (*-1 (Pl Nom) BARRIER NOT-ADV); # Contract or not REMOVE:NotContrIfEven (".*ijidh"r) IF (0 (".*[oe]dh"r)); # This rule might be problematic in light of the empirical work. # Span sentences "" REMOVE Pl3 IF (<*-1 Sg1 OR Sg2)(NOT *-1 (N Pl Nom)); # Nomen SELECT Attr IF (0 Prop)(1 Prop); # Too hard. REMOVE (Prop Attr) IF (NOT 1 Prop); SELECT:GenToAdp Gen IF (1 Adp); REMOVE:NotNomToAdp Nom IF (1 Adp); SELECT:GenIfLeaNom Gen (1 ("lea" V) LINK 0 Pl3 OR Sg3)(*1 (N Nom) BARRIER NPNH) ; # Dan lin baenieh njaelmesne. SELECT:AccIfSVOV (Acc Sg) IF (*-1C INFV BARRIER NPNH)(1C V); # consider VFIN for INFV # Verb or Noun REMOVE:NotNomIfInf (N Pl Nom) IF (0 Inf)(*-1 Ind or Imp LINK *-1 Nom); ## jieledh. # Dem SELECT:DemGenIfNGen (Dem Pl Gen)(*1 (N Pl Gen) BARRIER NPNH); # Klaasen tjirrh olkese goevli doj veelkes baeniej gåajkoe, ... # CC and CS or Adv REMOVE:GoNotCs CS (-1 Neg)(0 ("goh")); REMOVE:NotNomIfCs (N Nom) (0 CS)(1 (N Nom)) ; # Guktie vielle Baktuse, giehtelh maa! REMOVE:CsIfNotFuncword Pcle (NEGATE *-1 NOT-ADV-INT-PCLE)(0 CS) ; # Jaa, jis damth seekere..... REMOVE:NotCsIfCc CS (-1 CC OR ("mij" Nom))(0 Adv) ; # Jih dellie jeehti... REMOVE:NotCsIfBOS CS (NEGATE -1 WORD)(0 Adv) ; # Putative contract verb # Verb person # Interj SELECT:InterjIfInterj Interj (0 ("ho"))(1 ("ho")); SECTION # Syntactic tags # -------------- LIST @+FAUXV = @+FAUXV ; LIST @+FMAINV = @+FMAINV ; LIST @-FAUXV = @-FAUXV ; LIST @-FMAINV = @-FMAINV ; LIST @-FSUBJ = @-FSUBJ ; SET FMAINV = @-FMAINV OR @+FMAINV ; LIST @ADV-A = @ADV-A ; LIST @ADV-ADV = @ADV-ADV ; LIST @ADVL = @ADVL ; LIST @AN> = @AN> ; LIST @ActioN> = @ActioN> ; LIST @CC = @CC ; LIST @CMPND = @CMPND ; LIST @CS = @CS ; LIST @DN> = @DN> ; LIST @GA> = @GA> ; LIST @GN> = @GN> ; LIST @GP< = @GP< ; LIST @GP> = @GP> ; LIST @HNOUN = @HNOUN ; LIST @INTERJ = @INTERJ ; LIST @MEASURE = @MEASURE ; LIST @NNum> = @NNum>; LIST @NumN< = @NumN<; LIST @NPron< = @NPron< ; LIST @NQ< = @NQ< ; LIST @NUM-PRON = @NUM-PRON ; LIST @OBJ = @OBJ ; LIST @OPRED = @OPRED ; LIST @PCLE = @PCLE ; LIST @PCLE-COMPL = @PCLE-COMPL ; LIST @PROP> = @PROP> ; LIST @PrcN> = @PrcN> ; LIST @PronN< = @PronN< ; LIST @PronN> = @PronN> ; LIST @QN< = @QN< ; LIST @QN> = @QN> ; LIST @SPRED = @SPRED ; LIST @SUBJ = @SUBJ ; LIST @SUBJ-QH = @SUBJ-QH ; LIST @TITLE = @TITLE ; LIST @VOC = @VOC ; LIST @X = @X ; MAP (@CC) TARGET CC ; MAP (@CS) TARGET CS ; MAP (@PROP>) TARGET (Prop Attr); MAP (@INTERJ) TARGET Interj ; MAP (@PCLE) TARGET Pcle ; ADD (@TITLE) TARGET TITLE IF (1 Prop LINK 0 Fem OR Mal OR Sur); MAP (@DN>) TARGET (Dem Nom) IF (*1 NP-HEAD + Nom BARRIER NPNH); MAP (@AN>) TARGET (A Attr) IF (*1 NP-HEAD + Nom BARRIER NPNH); MAP (@+FAUXV) TARGET Neg OR ("lea") IF (*1 V BARRIER CLB); MAP (@+FMAINV) TARGET VFIN IF (NOT *0 VFIN BARRIER CLB) ; MAP (@ADVL) TARGET Po ; MAP (@ADVL) TARGET Ine OR Ela OR Ill ; # MAP (@P<) TARGET (N Gen) IF (*-1 Pr BARRIER NPNH) ; # No Pr MAP (@>P) TARGET (N Gen) IF (1 Po); MAP (@OBJ) TARGET Acc ; MAP (@SPRED) TARGET NP-HEAD + Nom + Sg IF (*-1 V + Sg3 LINK -1 (Nom Sg)); MAP (@SUBJ) TARGET NP-HEAD + Nom + Sg IF (NEGATE *-1 V + Sg3 LINK -1 (Nom Sg)); MAP (@ADVL) TARGET Adv ; MAP (@X) TARGET WORD ; # =============== # # Syntactic rules # # =============== # SECTION REMOVE (@X);