# Distributed under the terms of the GNU General Public License version 2
# or any later version.
# =================================================================== #
# =================================================================== #
# S O U T H S Á M I D I S A M B I G U A T O R #
# =================================================================== #
# =================================================================== #
# =========================================== #
# Short table of contents #
# =========================================== #
# Delimiters
# Tags and sets
# Mapping rules
# Disambiguation rules
# One-cohort disambiguation - cycle 0
# Local disambiguation - cycles 1 and 2
# Cycle 2: Other local disambiguation
# Cycle 3: Global disambiguation
# Cycle 4: Syntactic disambiguation
# Cycle 5: Post-syntactic morphological disambiguation
# ========== #
# Delimiters #
# ========== #
DELIMITERS = "<.>" "" ">" "<...>" "<¶>";
# ============= #
# Tags and sets #
# ============= #
LIST BOS = (>>>) ();
LIST EOS = (<<<) (); # vislcg and CG-2 together.
# Tags declared as single-membered LISTs
# ======================================
LIST N = N ;
LIST Sg = Sg ;
LIST Pl = Pl ;
LIST Nom = Nom ;
LIST Acc = Acc ;
LIST Gen = Gen ;
LIST Ine = Ine ;
LIST Ela = Ela ;
LIST Ill = Ill ;
LIST Com = Com ;
LIST Ess = Ess ;
LIST PxSg1 = PxSg1 ;
LIST PxSg2 = PxSg2 ;
LIST PxSg3 = PxSg3 ;
LIST PxDu1 = PxDu1 ;
LIST PxDu2 = PxDu2 ;
LIST PxDu3 = PxDu3 ;
LIST PxPl1 = PxPl1 ;
LIST PxPl2 = PxPl2 ;
LIST PxPl3 = PxPl3 ;
LIST V = V ;
LIST Prs = Prs ;
LIST Prt = Prt ;
LIST Sg1 = Sg1 ;
LIST Sg2 = Sg2 ;
LIST Sg3 = Sg3 ;
LIST Du1 = Du1 ;
LIST Du2 = Du2 ;
LIST Du3 = Du3 ;
LIST Pl1 = Pl1 ;
LIST Pl2 = Pl2 ;
LIST Pl3 = Pl3 ;
LIST Inf = Inf ;
LIST Neg = Neg ;
LIST ConNeg = ConNeg ;
LIST PrfPrc = PrfPrc ;
LIST VGen = VGen ;
LIST Ger = Ger ;
LIST Ind = Ind ;
LIST Imp = Imp ;
LIST ImpII = ImpII ;
LIST Pot = Pot ;
LIST Cond = Cond ;
LIST Opt = Opt ;
LIST Actor = Actor ;
LIST Clt = Clt ;
LIST A = A ;
LIST Comp = Comp ;
LIST Superl = Superl ;
LIST Attr = Attr ;
LIST Pron = Pron ;
LIST Pers = Pers ;
LIST Dem = Dem ;
LIST Adp = Adp ;
LIST Po = Po ;
LIST Adv = Adv ;
LIST Interj = Interj ;
LIST Num = Num ;
LIST Prop = Prop ;
LIST Mal = Mal ;
LIST Fem = Fem ;
LIST Sur = Sur ;
LIST CS = CS ;
LIST CC = CC ;
LIST Pcle = Pcle ;
LIST CLB = CLB ;
LIST PUNCT = PUNCT ;
LIST LEFT = LEFT ;
LIST RIGHT = RIGHT ;
LIST TITLE = "prof" "cand.philol" ;
LIST WORD = N A V Adv Pron Interj Adp Num CS CC Pcle ;
LIST NOT-ADV-INT-PCLE = N A Num Pron A V CC CS Adp ;
SET NOT-ADV = WORD - Adv ;
SET NP-HEAD = Num OR N OR Pron ;
SET PRE-NP-HEAD = (Prop Attr) OR (Prop @>N) OR (A Attr) OR (ABBR Attr) OR (Pron Pers Gen) OR (N Gen) OR Num OR (Cmpnd) OR CC OR (Pron Dem) OR (Pron Refl Gen) OR (Pron Indef) OR (PrfPrc @>N) OR (PrfPrc @>N) OR (PrsPrc) OR (A Ord) OR Attr ;
# The strict version of items that can only be premodifiers, not parts of the predicate - copied from sme-dis.rle
SET NPNH = WORD - PRE-NP-HEAD OR (ABBR) ; # NPNH = "NOT-PRE-NP-HEAD"
# Verb sets
LIST VFIN = Ind OR Imp ;
LIST INFV = "aelkedh" "båetedh" "edtjedh" "galkedh" "maehtedh" "provhkedh" "sjïdtedh" ;
# These are often followed by an infitive. To the set belong also some verbs which are not in the lex: tjoeveridh tjoeredh
# ============== #
# Disambiguation #
# ============== #
SECTION
# Cycle 0
REMOVE ("Da" N) IF (0 ("dïhte"));
# Verbs
IFF:IsItConNeg ConNeg (*-1 Neg BARRIER ConNeg);
# Imperative
REMOVE:NotImpIfWord Imp IF (*-1 NOT-ADV-INT-PCLE BARRIER CS OR CC);
REMOVE:NotIndIfImpAndExcl Ind IF (NOT *-1 Neg)(0 Imp)(*1 ("excl") BARRIER Ind);
##
SELECT:Pl3WhenPlSubj Pl3 IF (1 (N Pl Nom))(0 ("lea" V));
SELECT:InfIfInfv (V Inf) IF (*-1 INFV BARRIER V OR CS OR CC) ;
# Should be BARRIER V OR CS OR CC # It doesn´t work...
# Manne båatam sagke buerebe datneste jieledh, Karijuse.
SELECT:VSg2IfPersSg2 (V Sg2) (*-1 (Pers Sg2) BARRIER V);
SELECT:PrfPrcIfLea PrfPrc (*-1 ("lea") BARRIER V);
SELECT:PlIfPlSubj Pl3 IF (*-1 (Pl Nom) BARRIER NOT-ADV);
# Contract or not
REMOVE:NotContrIfEven (".*ijidh"r) IF (0 (".*[oe]dh"r));
# This rule might be problematic in light of the empirical work.
# Span sentences
"" REMOVE Pl3 IF (<*-1 Sg1 OR Sg2)(NOT *-1 (N Pl Nom));
# Nomen
SELECT Attr IF (0 Prop)(1 Prop); # Too hard.
REMOVE (Prop Attr) IF (NOT 1 Prop);
SELECT:GenToAdp Gen IF (1 Adp);
REMOVE:NotNomToAdp Nom IF (1 Adp);
SELECT:GenIfLeaNom Gen (1 ("lea" V) LINK 0 Pl3 OR Sg3)(*1 (N Nom) BARRIER NPNH) ;
# Dan lin baenieh njaelmesne.
SELECT:AccIfSVOV (Acc Sg) IF (*-1C INFV BARRIER NPNH)(1C V); # consider VFIN for INFV
# Verb or Noun
REMOVE:NotNomIfInf (N Pl Nom) IF (0 Inf)(*-1 Ind or Imp LINK *-1 Nom);
## jieledh.
# Dem
SELECT:DemGenIfNGen (Dem Pl Gen)(*1 (N Pl Gen) BARRIER NPNH);
# Klaasen tjirrh olkese goevli doj veelkes baeniej gåajkoe, ...
# CC and CS or Adv
REMOVE:GoNotCs CS (-1 Neg)(0 ("goh"));
REMOVE:NotNomIfCs (N Nom) (0 CS)(1 (N Nom)) ;
# Guktie vielle Baktuse, giehtelh maa!
REMOVE:CsIfNotFuncword Pcle (NEGATE *-1 NOT-ADV-INT-PCLE)(0 CS) ;
# Jaa, jis damth seekere.....
REMOVE:NotCsIfCc CS (-1 CC OR ("mij" Nom))(0 Adv) ;
# Jih dellie jeehti...
REMOVE:NotCsIfBOS CS (NEGATE -1 WORD)(0 Adv) ;
# Putative contract verb
# Verb person
# Interj
SELECT:InterjIfInterj Interj (0 ("ho"))(1 ("ho"));
SECTION
# Syntactic tags
# --------------
LIST @+FAUXV = @+FAUXV ;
LIST @+FMAINV = @+FMAINV ;
LIST @-FAUXV = @-FAUXV ;
LIST @-FMAINV = @-FMAINV ;
LIST @-FSUBJ = @-FSUBJ ;
SET FMAINV = @-FMAINV OR @+FMAINV ;
LIST @ADV-A = @ADV-A ;
LIST @ADV-ADV = @ADV-ADV ;
LIST @ADVL = @ADVL ;
LIST @AN> = @AN> ;
LIST @ActioN> = @ActioN> ;
LIST @CC = @CC ;
LIST @CMPND = @CMPND ;
LIST @CS = @CS ;
LIST @DN> = @DN> ;
LIST @GA> = @GA> ;
LIST @GN> = @GN> ;
LIST @GP< = @GP< ;
LIST @GP> = @GP> ;
LIST @HNOUN = @HNOUN ;
LIST @INTERJ = @INTERJ ;
LIST @MEASURE = @MEASURE ;
LIST @NNum> = @NNum>;
LIST @NumN< = @NumN<;
LIST @NPron< = @NPron< ;
LIST @NQ< = @NQ< ;
LIST @NUM-PRON = @NUM-PRON ;
LIST @OBJ = @OBJ ;
LIST @OPRED = @OPRED ;
LIST @PCLE = @PCLE ;
LIST @PCLE-COMPL = @PCLE-COMPL ;
LIST @PROP> = @PROP> ;
LIST @PrcN> = @PrcN> ;
LIST @PronN< = @PronN< ;
LIST @PronN> = @PronN> ;
LIST @QN< = @QN< ;
LIST @QN> = @QN> ;
LIST @SPRED = @SPRED ;
LIST @SUBJ = @SUBJ ;
LIST @SUBJ-QH = @SUBJ-QH ;
LIST @TITLE = @TITLE ;
LIST @VOC = @VOC ;
LIST @X = @X ;
MAP (@CC) TARGET CC ;
MAP (@CS) TARGET CS ;
MAP (@PROP>) TARGET (Prop Attr);
MAP (@INTERJ) TARGET Interj ;
MAP (@PCLE) TARGET Pcle ;
ADD (@TITLE) TARGET TITLE IF (1 Prop LINK 0 Fem OR Mal OR Sur);
MAP (@DN>) TARGET (Dem Nom) IF (*1 NP-HEAD + Nom BARRIER NPNH);
MAP (@AN>) TARGET (A Attr) IF (*1 NP-HEAD + Nom BARRIER NPNH);
MAP (@+FAUXV) TARGET Neg OR ("lea") IF (*1 V BARRIER CLB);
MAP (@+FMAINV) TARGET VFIN IF (NOT *0 VFIN BARRIER CLB) ;
MAP (@ADVL) TARGET Po ;
MAP (@ADVL) TARGET Ine OR Ela OR Ill ;
# MAP (@P<) TARGET (N Gen) IF (*-1 Pr BARRIER NPNH) ; # No Pr
MAP (@>P) TARGET (N Gen) IF (1 Po);
MAP (@OBJ) TARGET Acc ;
MAP (@SPRED) TARGET NP-HEAD + Nom + Sg IF (*-1 V + Sg3 LINK -1 (Nom Sg));
MAP (@SUBJ) TARGET NP-HEAD + Nom + Sg IF (NEGATE *-1 V + Sg3 LINK -1 (Nom Sg));
MAP (@ADVL) TARGET Adv ;
MAP (@X) TARGET WORD ;
# =============== #
# Syntactic rules #
# =============== #
SECTION
REMOVE (@X);