# Divvun & Giellatekno - open source grammars for Sámi and other languages # Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament # http://giellatekno.uit.no & http://divvun.no # # This program is free software; you can redistribute and/or modify # this file under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. The GNU General Public License # is found at http://www.gnu.org/licenses/gpl.html. It is # also available in the file $GTHOME/LICENSE.txt. # # Other licensing options are available upon request, please contact # giellatekno@hum.uit.no or divvun@samediggi.no # ==================================================================== # # ==================================================================== # # N O R T H E R N S Á M I T E X T D I S A M B I G U A T O R # # ==================================================================== # # ==================================================================== # # Sámi language technology project 2003-2006, University of Tromsø # # =========================================== # # Short table of contents # # =========================================== # # Usage: # First run through sme-dis.rle, then run through sme-tdis.rle: # preprocess --abbr=bin/abbr.txt text | # lookup -flags mbTT -utf8 bin/sme.fst | lookup2cg | # vislcg --grammar=src/sme-dis.rle | # vislcg --grammar=src/sme-tdis.rle --minimal | less # ========== # # Delimiters # # ========== # DELIMITERS = "<¶>"; # ============= # # Tags and sets # # ============= # SETS # (Remove all lines that start with SETS if you want to use mdis.) # I define end of clause and beginning of clause in a way so that the file # may be read both by the CG-2 and the vislcg formalisms. LIST BOS = (>>>) (~~); LIST EOS = (<<<) (~~); # vislcg and CG-2 together. #LIST BOS = (>>>); LIST EOS = (<<<); # end and beg. of sentence. for vislcg. # Tags declared as single-membered LISTs # ====================================== # Parts of speech # --------------- LIST N = N ; LIST A = A ; LIST Adv = Adv ; LIST V = V ; LIST Pron = Pron ; LIST CS = CS ; LIST CC = CC ; LIST Po = Po ; LIST Pr = Pr ; LIST Pcle = Pcle ; LIST Num = Num ; LIST Prop = Prop ; LIST LEFT = LEFT ; LIST RIGHT = RIGHT ; # POS sub-categories # ------------------ LIST Pers = Pers ; LIST Dem = Dem ; LIST Interr = Interr ; LIST Indef = Indef ; LIST Recipr = Recipr ; LIST Refl = Refl ; LIST Rel = Rel ; LIST Interj = Interj ; LIST Adp = Adp ; # Morphosyntactic properties # -------------------------- LIST Nom = Nom ; LIST Acc = Acc ; LIST Gen = Gen ; LIST Ill = Ill ; LIST Loc = Loc ; LIST Com = Com ; LIST Ess = Ess ; LIST Sg = Sg ; LIST Pl = Pl ; LIST Du = Du ; LIST RCmpnd = RCmpnd ; LIST SgNomCmp = SgNomCmp ; LIST SgGenCmp = SgGenCmp ; LIST PxSg1 = PxSg1 ; LIST PxSg2 = PxSg2 ; LIST PxSg3 = PxSg3 ; LIST PxDu1 = PxDu1 ; LIST PxDu2 = PxDu2 ; LIST PxDu3 = PxDu3 ; LIST PxPl1 = PxPl1 ; LIST PxPl2 = PxPl2 ; LIST PxPl3 = PxPl3 ; LIST Comp = Comp ; LIST Superl = Superl ; LIST Attr = Attr ; LIST Card = Card ; LIST Ord = Ord ; LIST Date = Date ; LIST Range = Range ; LIST Qst = Qst ; LIST Foc = Foc ; LIST IV = IV ; LIST TV = TV ; LIST Pass = Pass ; LIST Prt = Prt; LIST Prs = Prs ; LIST Ind = Ind ; LIST Pot = Pot ; LIST Cond = Cond ; LIST Imprt = Imprt ; LIST ImprtII = ImprtII ; LIST Sg1 = Sg1 ; LIST Sg2 = Sg2 ; LIST Sg3 = Sg3 ; LIST Du1 = Du1 ; LIST Du2 = Du2 ; LIST Du3 = Du3 ; LIST Pl1 = Pl1 ; LIST Pl2 = Pl2 ; LIST Pl3 = Pl3 ; LIST Inf = Inf ; LIST ConNeg = ConNeg ; LIST Neg = Neg ; LIST PrfPrc = PrfPrc ; LIST VGen = VGen ; LIST PrsPrc = PrsPrc ; LIST Ger = Ger ; LIST Sup = Sup ; LIST Actio = Actio ; LIST Actor = Actor ; LIST VAbess = VAbess ; # Derivation # ---------- LIST Der/adda = Der/adda ; LIST Der/ahtti = Der/ahtti ; LIST Der/alla = Der/alla ; LIST Der/asti = Der/asti ; LIST Der/easti = Der/easti ; LIST Der/aš = Der/aš ; LIST Der/d = Der/d ; LIST Der/eaddji = Der/eaddji ; LIST Der/eamoš = Der/eamoš ; LIST Der/amoš = Der/amoš ; LIST Der/eapmi = Der/eapmi ; LIST Der/g = Der/g ; LIST Der/geahtes = Der/geahtes ; LIST Der/gielat = Der/gielat ; LIST !better: = !better: ; LIST Der/NuA = Der/NuA ; LIST Der/h = Der/h ; LIST Der/heapmi = Der/heapmi ; LIST Der/hudda = Der/hudda ; LIST Der/huhtti = Der/huhtti ; LIST Der/huvva = Der/huvva ; LIST Der/halla = Der/halla ; LIST Der/j = Der/j ; LIST Der/l = Der/l ; LIST Der/laš = Der/laš ; LIST Der/las = Der/las ; LIST Der/hat = Der/hat ; LIST Der/meahttun = Der/meahttun ; LIST Der/muš = Der/muš ; LIST Der/n = Der/n ; LIST Der/š = Der/š ; LIST Der/st = Der/st ; LIST Der/stuvva = Der/stuvva ; LIST Der/upmi = Der/upmi ; LIST Der/supmi = Der/supmi ; LIST Der/us = Der/us ; LIST Der/vuohta = Der/vuohta ; LIST Der/goahti = Der/goahti ; LIST Der/lágan = Der/lágan ; LIST Der/lágán = Der/lágán ; LIST Der/lágaš = Der/lágaš ; LIST Der/jagáš = Der/jagáš ; LIST Der/Dimin = Der/Dimin ; LIST Der/viđá = Der/viđá ; LIST Der/viđi = Der/viđi ; LIST Der/veara = Der/veara ; LIST Der/duohke = Der/duohke ; LIST Der/duohkai = Der/duohkai ; LIST Der/vuolle = Der/vuolle ; LIST Der/vuollai = Der/vuollai ; LIST Der/vuolde = Der/vuolde ; # Syntactic tags LIST @+FAUXV = @+FAUXV ; LIST @+FMAINV = @+FMAINV ; LIST @-FAUXV = @-FAUXV ; LIST @-FMAINV = @-FMAINV ; LIST @-FSUBJ = @-FSUBJ ; LIST @ADV-A = @ADV-A ; LIST @ADV-ADV = @ADV-ADV ; LIST @ADVL = @ADVL ; LIST @AN> = @AN> ; LIST @ActioN> = @ActioN> ; LIST @CC = @CC ; LIST @CMPND = @CMPND ; LIST @CS = @CS ; LIST @DN> = @DN> ; LIST @GA> = @GA> ; LIST @GN> = @GN> ; LIST @GP< = @GP< ; LIST @GP> = @GP> ; LIST @HNOUN = @HNOUN ; LIST @INTERJ = @INTERJ ; LIST @NNum> = @NNum>; LIST @NumN< = @NumN<; LIST @NPron< = @NPron< ; LIST @NQ< = @NQ< ; LIST @NUM-PRON = @NUM-PRON ; #LIST @NUMBER = @NUMBER ; LIST @OBJ = @OBJ ; LIST @OPRED = @OPRED ; LIST @PCLE = @PCLE ; LIST @PCLE-COMPL = @PCLE-COMPL ; LIST @PROP> = @PROP> ; LIST @PrcN> = @PrcN> ; LIST @PronN< = @PronN< ; LIST @PronN> = @PronN> ; LIST @QN< = @QN< ; LIST @QN> = @QN> ; LIST @SPRED = @SPRED ; LIST @SUBJ = @SUBJ ; LIST @SUBJ-QH = @SUBJ-QH ; LIST @APP = @APP ; # LIST @MEASURE = @MEASURE ; #Not in use anymore. We use @QN<. LIST @TITLE = @TITLE ; LIST @VOC = @VOC ; LIST @X = @X ; # Semantic tags # ------------- LIST Ani = Ani ; LIST Fem = Fem ; LIST Mal = Mal ; LIST Obj = Obj ; List Org = Org ; LIST Plc = Plc ; LIST Sur = Sur ; LIST Tit = Tit ; # Other tags # ---------- LIST ABBR = ABBR ; LIST ACR = ACR ; LIST CLB = CLB ; LIST PUNCT = PUNCT ; # Single-word sets # ---------------- LIST COMMA = "," ; LIST ¶ = ¶; LIST OKTA = "okta"; LIST go = "go" ; # Initials # -------- LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "á" "æ" "ø" "å" "ö" "ä" ; # Tag sets # ======== SETS # ======== LIST WORD = N A Adv V Pron CS CC Po Pr Interj Pcle Num ABBR ACR Date Range ; # any word SET REALWORD = WORD - Num ; # Derivational affixes # -------------------- #LIST V-DER = adda alla d g h asti halla hudda huhtti huvva j l ahtti st # stuvva goahti ; LIST V-DER = Der/adda Der/alla Der/d Der/g Der/h Der/asti Der/hudda Der/huhtti Der/huvva Der/j Der/l Der/ahtti Der/st Der/stuvva Der/goahti ; #LIST N-DER = eaddji eamoš eapmi heapmi š muš upmi us vuohta Actor Actio; LIST N-DER = Der/eaddji Der/eamoš Der/eapmi Der/heapmi Der/š Der/muš Der/upmi Der/us Der/vuohta Actor Actio; #LIST A-DER = geahtes keahtes heapmi lágan laš meahttun ; LIST A-DER = Der/geahtes Der/keahtes Der/heapmi Der/lágan Der/laš Der/meahttun ; #SET DER = V-DER | N-DER | A-DER ; # Case sets # --------- LIST ADVLCASE = Ill Loc Com Ess ; LIST CASE = Nom Acc Gen Ill Loc Com Ess ; SET NOT-NOM = CASE - Nom ; SET NOT-GEN = CASE - Gen ; SET NOT-ACC = CASE - Acc ; # Verb sets # --------- # Finiteness and mood # - - - - - - - - - - SET V-IND-FIN = Prs | Prt ; # Problem: "In boahtán" is an invisible indicative SET V-MOOD = Ind | Pot | Imprt | ImprtII | Cond ; SET VFIN = V-MOOD - ConNeg | ("gč"); SET VFIN-POS = V-MOOD - ConNeg - Neg ; #SET VFIN-NOT-IMP = Ind | Pot | Cond ; SET VFIN-NOT-IMP = VFIN - Imprt ; # Person # - - - - LIST V-SG1 = (V Ind Prs Sg1) (V Ind Prt Sg1) (V Cond Prs Sg1) (V Cond Prt Sg1) (V Pot Prs Sg1) (V Neg Ind Sg1); LIST V-SG2 = (V Ind Prs Sg2) (V Ind Prt Sg2) (V Cond Prs Sg2) (V Cond Prt Sg2) (V Pot Prs Sg2) (V Neg Ind Sg2); LIST V-SG3 = (V Ind Prs Sg3) (V Ind Prt Sg3) (V Cond Prs Sg3) (V Cond Prt Sg3) (V Pot Prs Sg3) (V Neg Ind Sg3); LIST V-DU1 = (V Ind Prs Du1) (V Ind Prt Du1) (V Cond Prs Du1) (V Cond Prt Du1) (V Pot Prs Du1) (V Neg Ind Du1); LIST V-DU2 = (V Ind Prs Du2) (V Ind Prt Du2) (V Cond Prs Du2) (V Cond Prt Du2) (V Pot Prs Du2) (V Neg Ind Du2); LIST V-DU3 = (V Ind Prs Du3) (V Ind Prt Du3) (V Cond Prs Du3) (V Cond Prt Du3) (V Pot Prs Du3) (V Neg Ind Du3); LIST V-PL1 = (V Ind Prs Pl1) (V Ind Prt Pl1) (V Cond Prs Pl1) (V Cond Prt Pl1) (V Pot Prs Pl1) (V Neg Ind Pl1); LIST V-PL2 = (V Ind Prs Pl2) (V Ind Prt Pl2) (V Cond Prs Pl2) (V Cond Prt Pl2) (V Pot Prs Pl2) (V Neg Ind Pl2); LIST V-PL3 = (V Ind Prs Pl3) (V Ind Prt Pl3) (V Cond Prs Pl3) (V Cond Prt Pl3) (V Pot Prs Pl3) (V Neg Ind Pl3); # Note that imperative verbs are not included in these sets! # Some subsets of the VFIN sets # - - - - - - - - - - - - - - - SET V-SG = V-SG1 | V-SG2 | V-SG3 ; SET V-DU = V-DU1 | V-DU2 | V-DU3 ; SET V-PL = V-PL1 | V-PL2 | V-PL3 ; SET V-DU-PL = V-DU1 | V-DU2 | V-DU3 | V-PL1 | V-PL2 | V-PL3 ; SET V-NOT-SG1 = VFIN-NOT-IMP - V-SG1 ; SET V-NOT-SG3 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-DU3 | V-PL1 | V-PL2 | V-PL3 ; SET V-1-2 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-PL1 | V-PL2 ; SET V-3 = V-SG3 | V-DU3 | V-PL3 ; # Sets consisting of forms of "leat" (these ones need to be rewritten) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SET LEAN = ("leat" V Ind Prs Sg1)|("leat" V PrfPrc); SET LEAI = ("leat" V Ind Prt Sg3)|("leat" V Imprt Prs Sg3); SET LEAT = ("leat" V Ind Prs Pl1)|("leat" V Ind Prs Pl3)| ("leat" V Ind Prs Sg2)|("leat" V Inf)|("leat" V Ind Prs ConNeg); SET LEAT-DU-PL = ("leat" V Du1)|("leat" V Du2)|("leat" V Du3)| ("leat" V Pl1)|("leat" V Pl2)|("leat" V Pl3); SET LEAT-DU3-PL3 = ("leat" V Du3) | ("leat" V Pl3); # Not referred to by any rule. LIST LEAT-FIN-NOT-IMP = ("leat" V Ind) ("leat" V Cond) ("leat" V Pot); LIST LEAN-PRC = ("leat" V PrfPrc); # Not referred to by any rule. LIST LEAN-SG1 = ("leat" V Ind Prs Sg1); # Pronoun sets # ------------ SETS SET MUN = (Pron Pers Sg1 Nom); SET DON = (Pron Pers Sg2 Nom); SET SON = ("son" Pron Pers Sg3 Nom); SET MOAI = (Pron Pers Du1 Nom); SET DOAI = (Pron Pers Du2 Nom); SET SOAI = (Pron Pers Du3 Nom); SET MII-PERS = (Pron Pers Pl1 Nom); SET DII = (Pron Pers Pl2 Nom); SET SII = ("son" Pron Pers Pl3 Nom); SET PPRON-DU-PL = MOAI | DOAI | SOAI | MII-PERS | DII | SII ; SET PPRON-NOT-SII = MUN | DON | SON | MOAI | DOAI | SOAI | MII-PERS | DII ; LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen) (Pl1 Gen) (Pl2 Gen) (Pl3 Gen); SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ; SET DEM-SG = (Pron Dem Sg Nom); SET DEM-PL = (Pron Dem Pl Nom); SET NOT-DEM = WORD - Dem ; # Adjectival sets and their complements # ------------------------------------- SET A-ATTR = (A Attr) ; # Used in one rule. Not needed? SET LEX-A = A - A-DER ; SET A-CASE = A - Attr - Adv ; LIST A-CC = A CC ; SET NOT-ADJ = WORD - A ; SET NOT-ADJ-COMMA = WORD - A - COMMA ; SET NOT-Attr = WORD - Attr ; SET NOT-ADJ-PCLE = WORD - A - Pcle ; SET NOT-ADJ-CC = WORD - A-CC ; SET NOT-AA = WORD - A - Adv ; LIST NOMINAL-ADJ = "guoktilaš" "lámis" "oasálaš" ("suddu" laš) "viissis"; # and many others # Adverbial sets and their complements # ------------------------------------ SET LEX-ADV = Adv - (A*) ; # LIST LEX-ADV = "gusto"; SET ADVERBIAL = Adv | Ill | Loc | Com | Ess ; SET NOT-ADV = WORD - Adv; SET NOT-ADV-PCLE = WORD - Adv - Pcle ; SET NOT-ADV-INDEF = WORD - Adv - Indef ; SET NOT-ADV-PCLE-ILL = WORD - Adv - Pcle - Ill ; SET NOT-ADV-PCLE-INDEF = WORD - Adv - Pcle - Indef ; SET NOT-ADV-PCLE-NEG = WORD - Adv - Pcle - Neg ; SET NOT-ADVL-PCLE-NEG = WORD - @ADVL - @GP< - Pcle - Neg ; #Ask Eckhardt. LIST IKTE = "ikte" "dolin" "dovle" ; LIST MO = "goas" "gokko" "gos" "gosa" "makkár" "manne" "mo" "mot" "movt" "nugo" (V Qst); # Introduce finite clauses. # Sets for adverbs that have lookalikes # - - - - - - - - - - - - - - - - - - - # Here come some adverbs that have identical twins in other POS. # If these are found in Adv contexts, we treat them as adverbs. LIST ADV-NOT-VERB = "dušše" ; LIST ADV-NOT-NOUN = "easka" "várra" ; LIST ADV-NOT-NAMES = "Ain" "Anne" "Diego" "Dieppe" "Enge" "Galle" "Haga" "Joba" "Johan" "Liikka" "Mai" "Mannes" "Mo" "Mot" "Naba" "Nan" "Oktan" "Sierra" "Sokka" "Villa" ; SET ADV-NOT-OTHER-POS = ADV-NOT-VERB | ADV-NOT-NOUN | ADV-NOT-NAMES ; # Sets of elements with common syntactic behaviour # ================================================ SETS # VERBS # ----- SET VERB = V - (V N); # V is all readings with a V tag in them, VERB should # be the ones without an N tag following the V SET LEX-V = VERB - V-DER; # VERB but not V-DER SET V-NOT-CMPND = V - RCmpnd; # Not in use. LIST COPULAS = "dáidit" "bissut" "gártat" "leahkit" "leat" "orrut" "šaddat" ; # 'Dáidit' can appear without 'leat'. SET V-NOT-COP = V - COPULAS ; LIST MOD-ASP = "berret" "dáidit" "fertet" "galgat" "gillet" "lávet" "sáhttit" "seahtit" "soaitit" "viggat" ; # Verbs that never have arguments of their own - so we leave out "áigut", for ex. LIST AUX-OR-MAIN = "áigut" "beassat" "hálidit" "háliidit" "máhttit" "nagodit" "orrut" "sihtat" "veadjit"; # These verbs can take arguments, so they do not belong in the AUX group, # but they are nevertheless mapped to (@+FAUXV). SET AUX = COPULAS | Neg | MOD-ASP | AUX-OR-MAIN ; SET V-NOT-AUX = V - AUX ; # The set of verbs that cannot be auxiliaries. # Does not include AUX-OR-MAIN. SET VFIN-NOT-AUX = VFIN - AUX ; SET V-MAIN = V - COPULAS - Neg - MOD-ASP | ("orrut") ; # The set of verbs that can be # main verbs. Includes # AUX-OR-MAIN. SET V-DER-PASS = V + V-DER + Pass; # + is intersection, this should work # The set is used in one rule with no hits. SET V-PASS = (V TV Pass); SET V-NOT-AUX-PASS = V - AUX - V-PASS ; # No longer in use. SET V-ACT = V - Pass; # No longer in use. SET ACT-PRFP = PrfPrc - V-PASS; # Appears in one rule. SET V-TRANS-ACT = TV - Pass ; # All active verbs with a TV tag, including AUX-OR-MAIN. SET V-TRANS-ACT-SG3 = V-SG3 - Pass ; SET V-TRANS-ACT-NOT-AUX = TV - V-PASS - AUX ; # Sets for verbs choosing oblique objects or adverbials # - - - - - - - - - - - - - - - - - - - - - - - - - - - LIST LOC-VERB = "ballat" "beassat" "beroštit" "biehttalit" "bihtit" "boahtit" "ceavzit" "čuoččut" "čuovvut" "dolkat" "fuolahit" "fuollat" "geargat" "heaitit" "ilbmat" "jearrat" "luohpat" "nohkkot" "váruhit" "veallát"; #Note that "fuolahit" obviously has two homonymous variants: #fuolahit1+Acc, fuolahit2+Loc LIST COM-VERB = "árvalit" "árvvohuššat" "ávkašuvvat" "bargat" "bártašuvvat" "háladit" "hilbošit" "humadit" "leaikkastallat" "meannudit" "náitalit" "riidalit" "ságastaddat" "ságastallat" "šiehtadit" ; #Note that "leaikkástallat" has two homonymous variants: #leaikkástallat1 (IV), leaikkástallat2+Com LIST ILL-VERB = "áibbašit" "álgit" "báitit" "beassat" "čohkkedit" "čujuhit" "čuohcit" "deaivat" "doaškut" "dorvvastit" "došket" "duhtat" "geahččat" "gullat" "guoskat" "gustot" "heivet" "irggástallat" "jáhkkit" "liikot" "luohttit" "mannat" "ollet" "oskut" "riepmat" "ráhkkanit" "soahpat" "suhttat" "vuolgit"; LIST ACC-LOC-VERB = "biehttalit" "earuhit" "gieldit" "jearrat" "viežžat" ; LIST ACC-ILL-VERB = "addit" "bidjat" "mearkkašit"; # Other verb sets # - - - - - - - - LIST INF-VERB = "adnot" "astat" "ádjánit" "áigut" "álgit" "ásahit" "bágget" "bávččagit" "beassat" "berret" "bivdit" "bivvat" "bistit" "boahtit" "bovdet" "čohkkát" "čohkkedit" "čohkkánit" "čuoččahit" "čuoččastit" "čuorvut" "čurggodit" ("dadjat" Pass) "dagahit" "dáhttut" "dáidit" "dárbbašit" "diktit" "doaivut" "doapmat" "duostat" "fertet" "fuobmát" "fuolahit" "galgat" "gáibidit" "gárrut" "gártat" "geahččalit" "geatnegahttit" "gierdat" "gillet" "gohččut" "háhppehit" "hálidit" "háliidit" "hárjánit" "heivet" "lávet" "mannat" "máhttit" "máššat" "movttiidahttit" "muitit" "nagodit" "navdit" "oahpahit" "oahppat" "oažžut" "ollet" "ribahit" "riepmat" "sáhttit" "seahtit" "sihtat" "soaitit" "suovvat" "šaddat" "stađđat" "veadjit" "viggat" "viššat" "vuogáiduvvat" "vuolgit" "vuollánit" "vuordit"; LIST ACC-INF-VERB = "bivdit" "čuorvut" "dagahit" "diktit" "gohččut" "movttiidahttit" "sihtat" "suovvat" ; # These are verbs taking accusative objects and infinitives; # a subset of INF-VERB. ## Ii suova nieiddas náitalit. LIST OPRED-VERB = "atnit" "dadjat" "dahkat" "dulkot" "geavahit" "gohčodit" "gohččut" "ráhkadit" "válljet"; # Verbs that occur with objects that have secondary predicates. LIST MOVEMENT-VERB = "čuoigat" "mannat" "sihkkelastit" "viehkalit" "viehkat" "vuodjat" "vuodjit" "vuolgit" ; # These verbs typically combine with place adverbials in the genitive case # (i.e. not accusative). LIST TOGETHER-VERB = "goallostuvvot" "beaškkehat" "bidjat" "čatnit" "suddat" "heivet" "biddjojuvvot" "časkit" "geassit" "gullat" "rehkenastit" "ordnet" "soahpat" "gávnnadit" "heivehit" "čatnat" "doaibmat" "iskat" "laktit" "seaguhit"; # Danger: This list could be to long, in certain respects. # TODO: Think of counter examples, i.e. sentences where we have these verbs, but # where oktii is not an adverbial. # Noun sets # --------- # NP sets defined according to their morphosyntactic features # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - LIST N-SG-NOM = (N Sg Nom); LIST N-PL-NOM = (N Pl Nom); LIST N-NOM = (N Nom) ; LIST N-GEN = (N Gen); SET LEX-N = N - N-DER; LIST PROP = (N Prop); SET CNOUN = N - Prop; SET CNOUN-PL-NOM = (N Pl Nom) - Prop ; SET HNOUN = N - RCmpnd ; SET HNOUN-NOM = (N Nom) - RCmpnd ; SET N-NOT-GEN = N - (N Gen) ; SET NP-HEAD = Pron | HNOUN ; SET NP-HEAD-SG = (Pron Sg) | (Pron Sg3) | (N Sg) - RCmpnd; SET NP-HEAD-PL = (Pron Pl) | (Pron Pl3) | (N Pl) - RCmpnd; SET NP-HEAD-SG-NOM = (Pron Sg Nom) | (Pron Sg3 Nom) | (N Sg Nom) - RCmpnd; SET NP-HEAD-PL-NOM = (Pron Pl Nom) | (Pron Pl3 Nom) | (N Pl Nom) - RCmpnd; SET NP-HEAD-NOM = NP-HEAD-SG-NOM | NP-HEAD-PL-NOM; SET NP-HEAD-ACC = (Pron Acc) | (N Acc) - RCmpnd; SET NP-HEAD-GEN = (Pron Gen) | (N Gen) - RCmpnd; # The PRE-NP-HEAD family of sets # - - - - - - - - - - - - - - - - SET PRE-NP-HEAD = (Prop Attr) | (Prop @PROP>) | A | (Pron Pers Gen) | (N Gen) | Num | RCmpnd | CC | (Pron Dem) | (Pron Refl Gen) | (Pron Indef) | (PrfPrc @AN>) | PrsPrc ; SET NP-MEMBER = PRE-NP-HEAD | N ; # There must be a sensible naming convention for these sets... SET NPNH = WORD - PRE-NP-HEAD | ABBR ; # NPNH = "NOT-PRE-NP-HEAD". SET NPNHA = WORD - PRE-NP-HEAD - Adv ; # NPNHA = "NOT (PRE-NP-HEAD-OR-ADV)" SET NPNHAI = WORD - PRE-NP-HEAD - Adv - Indef ; # NPNHAI " "...-OR-INDEF" SET NPNHAII = WORD - PRE-NP-HEAD - Adv - Indef - Ill ; # Illative indir.obj. SET NPNHAIIP = WORD - PRE-NP-HEAD - Adv - Indef - Ill - Pcle ; SET NPNHC = WORD - PRE-NP-HEAD - Com ; # "njaŋggofirpmiiguin bivdin" SET NPNHCCC = WORD - PRE-NP-HEAD - CC - COMMA ; # Other negatively defined sets # - - - - - - - - - - - - - - - SET NOT-N = WORD - N ; SET NOT-A = WORD - A ; SET NOT-NA = WORD - N - A ; SET NOT-NP = VFIN | ConNeg | Inf | Pcle | Interj | CS ; # The set NP-BORDER is bigger. # Nominal sets defined according to their lexical properties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - LIST XMEANTA = "dokumeanta" "boles#departemeanta" "dutkan#departemeanta" "eanandoallo#departemeanta" "energi#departemeanta" "finans#departemeanta" "gielda#departemeanta" "guolástus#departemeanta" "guovlo#departemeanta" "guovlu#departemeanta" "industri#departemeanta" "justiisa#departemeanta" "justis#departemeanta" "kultur#departemeanta" "politi#departemeanta" "sis#departemeanta" ; LIST XMENTA = "dokumenta" "boles#departementa" "dutkan#departementa" "eanandoallo#departementa" "energi#departementa" "finans#departementa" "gielda#departementa" "guolástus#departementa" "guovlo#departementa" "guovlu#departementa" "industri#departementa" "justiisa#departementa" "justis#departementa" "kultur#departementa" "politi#departementa" "sis#departementa" ; LIST MANNU = "ođđa#jage#mánnu" "ođđa#jagi#mánnu" "guovva#mánnu" "njukča#mánnu" "njuvčča#mánnu" "márs#mánnu" "cuoŋo#mánnu" "cuoŋos#mánnu" "gáranas#mánnu" "miesse#mánnu" "mái#mánnu" "geasse#mánnu" "suoidne#mánnu" "borge#mánnu" "šnjilča#mánnu" "šnjilža#mánnu" "čakča#mánnu" "golggot#mánnu" "skábma#mánnu" "november#mánnu" "ritne#mánnu" "juovla#mánnu" ; LIST AIGODAT = "áigi" "áigodat" "stuoradigge#áigodat" "sámedigge#áigodat" "guođohan#áigodat" "doaibma#áigodat" ; LIST VAHKKU = "beaivi" "jándor" "bodda" "čakča" "čakča#geassi" "čuohte#jahki" "dálvi" "diibmu" "dbm" "geardi" "geassi" "giđđa" "hávvi" "idja" "jahki" "jahke#čuohti" "mánnu" "riegádan#beaivi" "tiibmu" "vahkku" jahki "stuorradigge#áigodat" ; # The quoteless jahki here is since the preprocessor is not fine-tuned # yet, it still gives '"1870-" # jahki N Sg Com'. # lets see if "dbm" works LIST BEAIVI = "mánnodat" "vuoss#árga" "disdat" "maŋŋeb#árga" "gaska#vahkku" "duorastat" "bearjadat" "lávvardat" "lávvordat" "sotna#beaivi" "duopmo#beaivi" "sábbát"; SET TIME = MANNU | VAHKKU | BEAIVI ; # Do not add AIGODAT, it has some other characteristics in some cases. SET NOTIME = N - TIME ; LIST GEASSET = "dálvet" "dálvit" "giđđat" "geasset" "geassit" "čakčat" ; # What is special here? LIST GUOVLU = "ássan#guovlu" "sámi#guovlu" "báiki" "biras" "gávpot" "guovlu" "sádji" "sadji" "viessu" ; LIST ROUTE = "fávli" "geaidnu" "guorra" "luodda" "miila"; # and others! LIST MEASURE = "miila" "kilo#mehtar" "mehtar" "mehter" "centi#mehtar" "sente" "cm" "milli#mehtar" "tumá" "állan" "salla" "lihtter" "gilo" "čuolda#badji" "fierbme#badji" ; LIST NUMUNIT = "čuohti" "duhát" "miljon" "millijovdna" "milliovdna" "mill" "milj" "miljo" "miljarda" "mrd"; LIST PL-NOUN = "beassaš" "čalbme#láse" "gálssot" "headja" "sabet" "sisttet" "skárri"; # Nominals that appear with plural numerals. LIST SG-WORD = "ipmárdus" "doaivu" "dáro#giella" "sáme#giella" ; # Words that are used in singular unless their plurality is stressed by words such as "goappeš", "goappašat", "earálágan", "máŋgalágan" or Num or Ord. LIST LOC-NOUN = "ovda#mearka"; # There is also an idiomatic use of "ovda#mearka" in the constructions "ovdamearkka dihte" and "ovdamearka" (Ess). LIST PREGEN = ("álgogeahčen")("bealli" N Sg Loc)("gaskavuohta" N Sg Loc)("guoskevaš") ("loahpageahčen")("miella" N Sg Loc)("oktavuohta" N Sg Loc); # Nouns and adjectives that take a preceding genitive. LIST N-NOPX = "lohku"; # Nominals that probably do not have a Px ('1600-logus'). LIST N-PX = "áhčči" "bárdni" "eadni" "nieida" "viellja" "oabbá"; # Nominals that well may have a Px. LIST ALU = ("allu" Gen)("assu" Gen)("čiekŋu" Gen)("gassu" Gen)("govdu" Gen) ("guhkku" Gen)("lossu" Gen)("stuorru" Gen)("viidu" Gen); # Those are words that only appear in one form, which Nickel calls A/G. # They are only in a predicative position. LIST TITLE = "ámta#mánni" "direktevra" "eanan#hearrá" ("gásttašit" eaddji) "gonagas" "justitiarius" "korefereanta" "ossodat#direktevra" "profehta" "professor" "prof" "ráđđe#addi" "stipendiáhta" "sundi" "universitehtalektor" "vuosttas#konsuleanta" "cand.mag" "cand.oecon" "cand.philol" "cand.real" "cand.scient" "cand.theol" "cand" "dr.art" "eksp.hoavd" "eksp.sj" "fenr" "fig" "fil.tri" "gen.lt" "genr" "genr.lt" "h.r.adv" "hr" "kapt" "korp" "lekt" "lic" "ltn" "mag.art" "mag" "merc" "odont" "oecon" "ordf" "ossod.dir" "stud.phil." "res.kap" "stud.philol" "vit.ass" "dr" "dr.med" "dr.philos" "dr.theol" ; LIST COUNTABLE = "oassi" "kapihttal" "paragráfa" ; LIST NUMMAR = "nr" "nummar" "nummir" ; LIST KLASS = "s" "siidu" "§" "§§" "pragráfa" "oassi" "kapihttal" "kapihtal"; LIST LAHKA = "almennet#láhka" "almmolašvuođa#láhka" "apotehka#láhka" "arkiiva#láhka" "áhpenuohtástan#láhka" "álbmotoadjo#láhka" "bargiid#láhka" "bargobiras#láhka" "báikenamma#láhka" "bákte#láhka" "bátnedearvvašvuođabálvalus#láhka" "bivdo#láhka" "boazodoallo#láhka" "čázádat#láhka" "dálkkas#láhka" "dásseárvo#láhka" "deanu#láhka" "dearvvašvuođabargiid#láhka" "dearvvašvuođa#láhka" "dearvvašvuođaregisttar#láhka" "divššohasvuoigatvuođa#láhka" "eana#láhka" "eanasuodjalan#láhka" "eanavuovdin#láhka" "elliidsuodjalan#láhka" "energi#láhka" "fápmudusa#láhka" "fápmudus#láhka" "Finnmárko#láhka" "friddjasuohkan#láhka" "fuođđo#láhka" "gávpot#láhka" "geaidno#láhka" "gearggusvuođa#láhka" "gielddadearvvašvuođabálvalus#láhka" "gielda#láhka" "gilioppalašeana#láhka" "irko#láhka" "golle#láhka" "guladigge#láhka" "guollebiebman#láhka" "háhkankonsešuvdna#láhka" "hálddašan#láhka" "heaggadieđuid#láhka" "huksen#láhka" "industrikonsešuvdna#láhka" "jávrebivdo#láhka" "joatkkaoahpahus#láhka" "johtolat#láhka" "konsešuvdna#láhka" "kulturmuito#láhka" "lavdnjeloggun#láhka" "láhka" "luodda#láhka" "luonddusuodjalus#láhka" "luopmo#láhka" "luossa#láhka" "mánáidgárde#láhka" "mánáidsuodjalusbálvalus#láhka" "mearraguolástan#láhka" "mearraresursa#láhka" "minerálaláhka" "mohtorjohtolat#láhka" "mudden#láhka" "namma#láhka" "oahpahalli#láhka" "oahpahus#láhka" "oahpo#láhka" "olgunastin#láhka" "olmmošvuoigatvuođa#láhka" "oktasašsáme#láhka" "pasientavuoigatvuođa#láhka" "ráŋggáštan#láhka" "riekteveahkke#láhka" "sáivabivdo#láhka" "sáme#láhka" "sámi#láhka" "searvan#láhka" "sierra#láhka" "spesialistadearvvašvuođabálvalus#láhka" "stáhtaborgárrievtti#láhka" "stáhtafitnodat#láhka" "tuollu#láhka" "varasguolle#láhka" "várre#láhka" "vearrodahko#láhka" "vearro#láhka" "vuođđo#láhka" "vuođđoskuvla#láhka" "vuovdedoallo#láhka" "vuovdegáhtten#láhka" "vuovde#láhka" "1842-#láhka" "1863-#láhka" "1888-#láhka" "1899-bivdo#láhka" "1899-#láhka" "1920-várre#láhka" "1933-#láhka" "1965-#láhka" "1975-#láhka" "1975-várre#láhka" "1978-#láhka" ; LIST LAGA = "lága" "vuođđo#lága"; # This is for lága compounds SET NOT-LAHKA = N - LAHKA ; LIST FYLKA = "Finnmárku" "Finnmarku" "Romsa" "Nordlánda" "Lulli-Trøndelag" "Davvi-Trøndelag" "Hedmárku" "Akershus" "Buskerud" "Vestfold" "Østfold" "Aust-Agder" "Vest-Agder" "Rogalánda" "Hordalánda" "Opplánda" "Fjordane" "Romsdal" "Telemark" ; LIST XDEPARTEMEANTA = "bearaš#departemeanta" "eanandoallo#departemeanta" "dearvvašvuođa#departemeanta" "kultur#departemeanta" "birasgáhtten#departemeanta" "dutkan#departemeanta"; LIST INSTITUTION = "álbmot#allaskuvla" "alla#skuvla" "ámmát#skuvla" "bargo#kantuvra" "báhpa#skuvla" "buohcce#stohpu" "buohcce#viessu" "eanandoallo#departemeanta" "giella#guovdaš" "giella#ráđđi" "girku" "guovllubuohcce#viessu" "dearvvašvuođa#guovddáš" "joatkka#skuvla" "klinihkka" "mánáid#gárdi" "mánáid#skuvla" "meieriija" "musea" "nuoraid#skuvla" "parlameantta" "psykiatriijabuohcce#viessu" "ráđđi" "riika#searvi" "searvodat" "skuvla" "skuvla#kantuvra" "universiteahtta" "vuođđo#skuvla" ; LIST ARRANGEMENT = "čakča#márkan" ; LIST HUMAN-INDIVIDUAL = "áhčči" "áddjá" "áhkku" "bártni" "boaris" "čeahci" "eadni" "eahki" "eanu" "gánda" "goaski" "muoŧŧa" "nieida" "nisson" "oabba" "oambealli" "oarpmealli" "olbmo" "olmmái" "siessá" "váhnen" "viellja" "vilbealli" ; LIST HUMAN-GROUP = "eami#álbmot" ; LIST PROFESSION = "logopeda" "suohkanfysio#terapeuta" "fysio#terapeuta" "fysio#terapevta" "oahppah#eaddji" "doavttir" "politihkar" "politihkkár"; LIST ANIMAL = "áldu" "bálddis" "beana" "boazu" "bohkká" "ciiku" "čivga" "čuska" "dámmá" "dorski" "ealit" "gáhttu" "gáica" "gálbi" "gihcci" "gilot" "guigu" "gussa" "hávga" "heasta" "juksu" "láppis" "loddi" "miessi" "muortu" "njiŋŋálas" "ore" "sallit" "sarvvis" "sávza" spiinne#čivga "spiidni" "stábbasáidi" "stáinnir" "stuorasáidi" "rutnot" "vársi" "vielppis" "vierca" "vuoksá" "vuoncá#varis" "vuonccis" "vuonjal" ; SET HUMAN = HUMAN-INDIVIDUAL | HUMAN-GROUP | PROFESSION ; SET ANIMATE = HUMAN | ANIMAL; LIST OASSI = "oassi", "váldo#oassi"; #Might be useful for partitive constructions. LIST ILLNESS = "albmasuoigi" "allergiija" "anemiija" "astan#dávda" "árbe#dávda" "báhti" "bátne#várka" "bátne#vearka" "bárku" "bárko#dávda" "bárku#dávda" "bearri" "boazo#dávda" "boahkko#dávda" "boahkku" "boras#dávda" "buohcu" "buohcuvuohta" "buollán" "buozalmas" "čoavje#bávččas" "čoavje#bonjahat" "dávda" "dohppehallan#dávda" "eksema" "gárren#ávnnas" "gárrenávnnas#váttisvuohta" ""gárren#dilli" "golgo#dávda" "juhkanvuohta" "logahas#dávda" "njoammu#dávda" "nuoskkes#dávda" "oaivvesvuohta" "ráŋka" "rohttu#dávda "ruodnu" "skihpa" "časkásan#váddu" "sis#váddu" "soga#dávda" "soga#váddu" "suohtta" "varra#sonahat" "váddu" "vátne#dávda" "vihki" "vuolši" ; LIST BODY = "ámadadju" "bahta" "bahta#ráigi" "bállo#suotna" "beađbe#lađas" "bealgi" "beallji" "bierggas" "bieža#dákti" "billu" "buđđá" "čalbme#deahkki" "čalbmi" "čatnan#gođđu" "čeabet" "čelččen" "čeŋkkeš" "čibbe#skálžu" "čibbi" "čielge#ađa" "čielge#dákti" "čielgi" "čoalli" "čoamohas" "čoarbbeal#deahkki" "čoarbbeal#lađas" "čoarb#bealli" "čoavji" "čorbma#oassi" "čorbma#váimmus" "čorbma#vuođđu" "čuožžamas#deahkki" "čurti" "čuvdi" "dábba" "dáđvi" "dákte#gođus" "dákti" "dávdi" "deahkke#gođus" "deahkki" "erttet" "erttet#deahkki" "gahpárus" "gahperus" "gardnjil" "gaska#suorbma" "gassa#čoalli" "gassa#váhkká" "gáddu" "gáibi" "gállu" "gánis" "gátnis" "geađđi" "geahpis" "giegir" "giehta" "giehta#deahkki" "giehta#gávva" "giehta#láhpi" "giehta#ramas" "giehta#ruohtta" "giehta#sealgi" "giehta#váibmu" "giehta#váimmus" "gilga" "goapmir#váimmus" "goaŧŧo#čotta" "goaŧŧu" "goike#dákti" "gullu" "guoggomas" "guoggomas#ráksá" "gurot#giehta" "hárdu" "heagga#roasmmoheapmi" "jarri" "juolge#bealgi" "juolge#čelččen" "juolge#gávva" "juolge#lađas" "juolge#láhpi" "juolge#mátta" "juolge#suorbma" "juolge#vuođđu" "juolgi" "lađas" "láhpe#lađas" "manin" "manne#jođas" "manne#ráksá" "maŋŋe#buoidi" "mánágoahte#čotta" "máná#goahti" "mielga#njunni" "monimuš" "monin" "monne#jođas" "monne#ráksá" "morče#suotna" "morči" "muohtu" "náhpi" "niehkki" "nierra" "niske#gohpi" "niski" "njálbmi" "njiehcehas" "njunni" "njuovčča" "oaive#assi" "oaive#čohkka" "oaivi" "oalge#deahkki" "oalgenoras#deahkki" "oalgenoras#váhkká" "oalgi" "oalul" "obba#lággá" "orbbeš" "raddi" "rápma" "rumaš" "rumaš#golggus" "rumaš#lahttu" "rumaš#njalbi" "sáhppe#suvri" "rumaš#golggus" "rumaš#lahttu" "rumaš#njalbi" "sáhppe#suvri" "siidu" "siskkit#beallji" "sotken#oalli" "spiral" "suolbmudan#oalli" "suorbma" "šušmi" "váhkká#deahkki" "váibmu" "vuohčče#cuozza" "vuoivvas" "vuokta" "vuolit" "oassi" "gieđas" "vuorggu"; # Miscellaneous sets # ------------------ LIST Clt = Qst Foc ; LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ; #SET PPOS = Po | Pr ; LIST POST-A = "rájes" ; LIST POST-B = "rádjái" ; #LIST WACKERNAGEL = "go" "ba" "bat" "be" "bahan" "son" ; LIST GASKAL = "gaskal" "gaskkal" "gaskii" "gaskka" "gaskkas" ; # p-positions that like coordination SET NUMERALS = Num - OKTA ; SET NOT-NUMERALS = WORD - Num ; LIST MANGA = "máŋga" "galle" ; # Not referred to by any rule. SET CARDINALS = Num - Ord - MANGA ; SET NOT-CC = WORD - CC ; SET NOT-PCLE = WORD - Pcle ; LIST COMPAR = "dávjjit" "eanet" "earalágan" "eará" "earret" "seammás" "seammalágan" "seammalágán" "seammaláhkai"; # These combine with "go" Pcle, but COMPAR stands for 'comparison' rather than # 'comparative'. LIST CONTRA = "muhto" ; # In lean muitalan, muhto dál muitalan. LIST PROSEANTA = "proseanta" "%" ; SET REALCLB = CLB - COMMA ; SET NOTCOMMA = WORD - COMMA ; # Border sets and their complements # --------------------------------- SET CP = (Pron Interr) | (Pron Rel) | MO ; SET NP-BORDER = BOS | EOS | REALCLB | VFIN | Inf | (Actio Ess) | ConNeg | VGen | Sup | PPRON-NOT-GEN | Recipr | Po | Pr | Pcle | ("jed") | Interj | CS | CP ; # CLB removed from set in order to allow for NP-internal commas. # Refl is removed because of "sin iežaset vieruid" etc. # Syntactic sets # -------------- LIST ALLSYNTAG = (@+FAUXV) (@+FMAINV) (@-FAUXV) (@-FMAINV) (@-FSUBJ) (@ADV-A) (@ADV-ADV) (@ADVL) (@AN>) (@ActioN>) (@CC) (@CMPND) (@CS) (@DN>) (@GA>) (@GN>) (@GP<) (@GP>) (@HNOUN) (@INTERJ) (@NPron<) (@NQ<) (@NUM-PRON) (@OBJ) (@OPRED) (@PCLE) (@PCLE-COMPL) (@PROP>) (@PrcN>) (@PronN<) (@PronN>) (@QN<) (@QN>) (@SPRED) (@NumQ<) (@SUBJ) (@SUBJ-QH) (@TITLE) (@VOC) (@X); # (@TIME)(@NUMBER) (@MEASURE) # SET ASSIGNEDSYN = ALLSYNTAG - @X ; # funzt nicht. # ============ # # Rule section # # ============ # CONSTRAINTS SELECT Pl1 IF (*-1 MII-PERS BARRIER (@SUBJ))(0 ("leat")); ## Amma mii eat leat máksán? De leat máksán. REMOVE Pl1 IF (*-1 SII BARRIER (@SUBJ))(0 ("leat")); #REMOVE (@SUBJ) IF (*-1 BOS LINK NOT *1 V BARRIER EOS)(NOT *-1 BOS BARRIER BOS LINK *-1 (Interr Nom) LINK -1 BOS); #REMOVE (@SPRED) IF (*-1 BOS LINK NOT *1 V BARRIER EOS)(NOT *-1 BOS BARRIER BOS LINK *-1 (Interr Nom) LINK -1 BOS); ## ILO konvenšuvdna (nr 169) eamiálbmogiid ja čearddaid birra iešbirgejeaddji stáhtain (ILO-konvenšuvdna). REMOVE Inf IF (-1 ("." CLB) LINK *-1 Pl1)(0 Pl1); ## Mii sávvat ollu lihku dutnje riegádanbeivviin. Sávvat maid lihkku váldofágain maid dán jagi leat čađaheamen. END