# Divvun & Giellatekno - open source grammars for Sámi and other languages # Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament # http://giellatekno.uit.no & http://divvun.no # # This program is free software; you can redistribute and/or modify # this file under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. The GNU General Public License # is found at http://www.gnu.org/licenses/gpl.html. It is # also available in the file $GTHOME/LICENSE.txt. # # Other licensing options are available upon request, please contact # giellatekno@hum.uit.no or feedback@divvun.no # ============================================================ # # ============================================================ # # L U L E S Á M I TEXT D I S A M B I G U A T O R # # ============================================================ # # ============================================================ # # Sámi language technology project 2003-2006, University of Tromsø # # =========================================== # # Short table of contents # # =========================================== # # Usage: # First run through smj-dis.rle, then run through smj-tdis.rle: # preprocess text | lookup -flags mbTT -utf8 bin/sme.fst | lookup2cg | # vislcg --grammar=src/smj-dis.rle | # vislcg --grammar=src/smj-tdis.rle --minimal | less # ========== # # Delimiters # # ========== # DELIMITERS = "<¶>"; # ============= # # Tags and sets # # ============= # # ======== SETS # ======== LIST BOS = (>>>) (); LIST EOS = (<<<) (); # vislcg and CG-2 together. # Tags declared as single-membered LISTs # ====================================== # Parts of speech # --------------- LIST N = N ; LIST A = A ; LIST Adv = Adv ; LIST V = V ; LIST Pron = Pron ; LIST CS = CS ; LIST CC = CC ; LIST Po = Po ; LIST Pr = Pr ; LIST Pcle = Pcle ; LIST Num = Num ; LIST Prop = Prop ; # POS sub-categories # ------------------ LIST Pers = Pers ; LIST Dem = Dem ; LIST Interr = Interr ; LIST Indef = Indef ; LIST Recipr = Recipr ; LIST Refl = Refl ; LIST Rel = Rel ; LIST Interj = Interj ; LIST Adp = Adp ; # Morphosyntactic properties # -------------------------- LIST Sg = Sg ; LIST Pl = Pl ; LIST Nom = Nom ; LIST Gen = Gen ; LIST Acc = Acc ; LIST Ill = Ill ; LIST Ine = Ine ; LIST Ela = Ela ; LIST Com = Com ; LIST Ess = Ess ; LIST Par = Par ; LIST Abe = Abe ; LIST SgCmp = SgCmp ; LIST SgNomCmp = SgNomCmp ; LIST SgGenCmp = SgGenCmp ; LIST PlGenCmp = PlGenCmp ; LIST RCmpnd = RCmpnd ; LIST PxSg1 = PxSg1 ; LIST PxSg2 = PxSg2 ; LIST PxSg3 = PxSg3 ; LIST PxDu1 = PxDu1 ; LIST PxDu2 = PxDu2 ; LIST PxDu3 = PxDu3 ; LIST PxPl1 = PxPl1 ; LIST PxPl2 = PxPl2 ; LIST PxPl3 = PxPl3 ; LIST Comp = Comp ; LIST Superl = Superl ; LIST Actor = Actor ; LIST Actio = Actio ; LIST Clt = Clt ; LIST Attr = Attr ; LIST Card = Card ; LIST Ord = Ord ; LIST Date = Date ; LIST Range = Range ; LIST Dim = Dim ; LIST Inch = Inch ; LIST Pass = Pass ; LIST Prs = Prs ; LIST Prt = Prt ; LIST Ind = Ind ; LIST Pot = Pot ; LIST Cond = Cond ; LIST Imprt = Imprt ; LIST ImprtII = ImprtII ; LIST Sup = Sup ; LIST IV = IV ; LIST TV = TV ; LIST Sg1 = Sg1 ; LIST Sg2 = Sg2 ; LIST Sg3 = Sg3 ; LIST Du1 = Du1 ; LIST Du2 = Du2 ; LIST Du3 = Du3 ; LIST Pl1 = Pl1 ; LIST Pl2 = Pl2 ; LIST Pl3 = Pl3 ; LIST Inf = Inf ; LIST Ger = Ger ; LIST GerII = GerII ; LIST ConNeg = ConNeg ; LIST Neg = Neg ; LIST PrsPrc = PrsPrc ; LIST PrfPrc = PrfPrc ; LIST VGen = VGen ; LIST VAbess = VAbess ; LIST CLB = CLB ; LIST PUNCT = PUNCT ; LIST HYPH = HYPH ; LIST PAR = PAR ; # Derivation # ---------- LIST Der/agák = Der/agák ; LIST Der/ahkes = Der/ahkes ; LIST Der/ahtes = Der/ahtes ; LIST Der/ahtte = Der/ahtte ; LIST Der/alla = Der/alla ; LIST Der/asste = Der/asste ; LIST Der/belak = Der/belak ; LIST Der/bieljak = Der/bieljak ; LIST Der/báguk = Der/báguk ; LIST Der/bájnuk = Der/bájnuk ; LIST Der/bának = Der/bának ; LIST Der/d = Der/d ; LIST Der/dahka = Der/dahka ; LIST Der/dahtte = Der/dahtte ; LIST Der/dalla = Der/dalla ; LIST Der/dasste = Der/dasste ; LIST Der/dibme = Der/dibme ; LIST Der/duhtte = Der/duhtte ; LIST Der/duvva = Der/duvva ; LIST Der/dábak = Der/dábak ; LIST Der/dáfuk = Der/dáfuk ; LIST Der/dávdak = Der/dávdak ; LIST Der/dávtak = Der/dávtak ; LIST Der/ferjak = Der/ferjak ; LIST Der/garák = Der/garák ; LIST Der/gattsak = Der/gattsak ; LIST Der/gerdak = Der/gerdak ; LIST Der/giedak = Der/giedak ; LIST Der/gielak = Der/gielak ; LIST Der/giesjkak = Der/giesjkak ; LIST Der/gietjak = Der/gietjak ; LIST Der/girjak = Der/girjak ; LIST Der/guovluk = Der/guovluk ; LIST Der/h = Der/h ; LIST Der/habsak = Der/habsak ; LIST Der/hekkak = Der/hekkak ; LIST Der/hábjak = Der/hábjak ; LIST Der/hájak = Der/hájak ; LIST Der/hámak = Der/hámak ; LIST Der/hápsak = Der/hápsak ; LIST Der/ivnak = Der/ivnak ; LIST Der/jagák = Der/jagák ; LIST Der/jergak = Der/jergak ; LIST Der/jienak = Der/jienak ; LIST Der/jiermak = Der/jiermak ; LIST Der/juolgak = Der/juolgak ; LIST Der/l = Der/l ; LIST Der/ladda = Der/ladda ; LIST Der/lahkak = Der/lahkak ; LIST Der/lahtte = Der/lahtte ; LIST Der/lanják = Der/lanják ; LIST Der/lasj = Der/lasj ; LIST Der/lasste = Der/lasste ; LIST Der/lijkak = Der/lijkak ; LIST Der/linnjak = Der/linnjak ; LIST Der/lis = Der/lis ; LIST Der/luonduk = Der/luonduk ; LIST Der/luottak = Der/luottak ; LIST Der/lágán = Der/lágán ; LIST Der/lágásj = Der/lágásj ; LIST Der/merkak = Der/merkak ; LIST Der/mielak = Der/mielak ; LIST Der/muoduk = Der/muoduk ; LIST Der/mus = Der/mus ; LIST Der/n = Der/n ; LIST Der/namák = Der/namák ; LIST Der/nierak = Der/nierak ; LIST Der/njunjak = Der/njunjak ; LIST Der/njálmak = Der/njálmak ; LIST Der/nálak = Der/nálak ; LIST Der/r = Der/r ; LIST Der/rabdak = Der/rabdak ; LIST Der/rattak = Der/rattak ; LIST Der/rávak = Der/rávak ; LIST Der/segak = Der/segak ; LIST Der/siejbak = Der/siejbak ; LIST Der/siejnak = Der/siejnak ; LIST Der/sijduk = Der/sijduk ; LIST Der/st = Der/st ; LIST Der/stahtte = Der/stahtte ; LIST Der/stalla = Der/stalla ; LIST Der/stasste = Der/stasste ; LIST Der/stával = Der/stával ; LIST Der/suorak = Der/suorak ; LIST Der/suorgak = Der/suorgak ; LIST Der/suttak = Der/suttak ; LIST Der/sáv'vak = Der/sáv'vak ; LIST Der/tjalmak = Der/tjalmak ; LIST Der/tjiegak = Der/tjiegak ; LIST Der/tjuolmak = Der/tjuolmak ; LIST Der/tjuvdak = Der/tjuvdak ; LIST Der/tjålak = Der/tjålak ; LIST Der/tjårvak = Der/tjårvak ; LIST Der/u/a/åd = Der/u/a/åd ; LIST Der/us = Der/us ; LIST Der/uvsak = Der/uvsak ; LIST Der/uvsuk = Der/uvsuk ; LIST Der/varák = Der/varák ; LIST Der/vigák = Der/vigák ; LIST Der/vuobdak = Der/vuobdak ; LIST Der/vuoduk = Der/vuoduk ; LIST Der/vuohta = Der/vuohta ; LIST Der/vájmuk = Der/vájmuk ; LIST Der/ájvukDer/såjak = Der/ájvukDer/såjak ; LIST Der/åjvak = Der/åjvak ; LIST Der/åsak = Der/åsak ; LIST DERIVED = "Der/agák" "Der/ahkes" "Der/ahtes" "Der/ahtte" "Der/alla" "Der/asste" "Der/belak" "Der/bieljak" "Der/báguk" "Der/bájnuk" "Der/bának" "Der/d" "Der/dahka" "Der/dahtte" "Der/dalla" "Der/dasste" "Der/dibme" "Der/duhtte" "Der/duvva" "Der/dábak" "Der/dáfuk" "Der/dávdak" "Der/dávtak" "Der/ferjak" "Der/garák" "Der/gattsak" "Der/gerdak" "Der/giedak" "Der/gielak" "Der/giesjkak" "Der/gietjak" "Der/girjak" "Der/guovluk" "Der/h" "Der/habsak" "Der/hekkak" "Der/hábjak" "Der/hájak" "Der/hámak" "Der/hápsak" "Der/ivnak" "Der/jagák" "Der/jergak" "Der/jienak" "Der/jiermak" "Der/juolgak" "Der/l" "Der/ladda" "Der/lahkak" "Der/lahtte" "Der/lanják" "Der/lasj" "Der/lasste" "Der/lijkak" "Der/linnjak" "Der/lis" "Der/luonduk" "Der/luottak" "Der/lágán" "Der/lágásj" "Der/merkak" "Der/mielak" "Der/muoduk" "Der/mus" "Der/n" "Der/namák" "Der/nierak" "Der/njunjak" "Der/njálmak" "Der/nálak" "Der/r" "Der/rabdak" "Der/rattak" "Der/rávak" "Der/segak" "Der/siejbak" "Der/siejnak" "Der/sijduk" "Der/st" "Der/stahtte" "Der/stalla" "Der/stasste" "Der/stával" "Der/suorak" "Der/suorgak" "Der/suttak" "Der/sáv'vak" "Der/tjalmak" "Der/tjiegak" "Der/tjuolmak" "Der/tjuvdak" "Der/tjålak" "Der/tjårvak" "Der/u/a/åd" "Der/us" "Der/uvsak" "Der/uvsuk" "Der/varák" "Der/vigák" "Der/vuobdak" "Der/vuoduk" "Der/vuohta" "Der/vájmuk" "Der/ájvukDer/såjak" "Der/åjvak" "Der/åsak" ; # Syntactic tags # -------------- LIST @+FAUXV = @+FAUXV ; LIST @+FMAINV = @+FMAINV ; LIST @-FAUXV = @-FAUXV ; LIST @-FMAINV = @-FMAINV ; LIST @-FSUBJ = @-FSUBJ ; LIST @ADV-A = @ADV-A ; LIST @ADV-ADV = @ADV-ADV ; LIST @ADVL = @ADVL ; LIST @AN> = @AN> ; LIST @ActioN> = @ActioN> ; LIST @CC = @CC ; LIST @CMPND = @CMPND ; LIST @CS = @CS ; LIST @DN> = @DN> ; LIST @GA> = @GA> ; LIST @GN> = @GN> ; LIST @GP< = @GP< ; LIST @GP> = @GP> ; LIST @HNOUN = @HNOUN ; LIST @INTERJ = @INTERJ ; LIST @MEASURE = @MEASURE ; LIST @NNum> = @NNum>; LIST @NumN< = @NumN<; LIST @NPron< = @NPron< ; LIST @NQ< = @NQ< ; LIST @NUM-PRON = @NUM-PRON ; #LIST @NUMBER = @NUMBER ; LIST @OBJ = @OBJ ; LIST @OPRED = @OPRED ; LIST @PCLE = @PCLE ; LIST @PCLE-COMPL = @PCLE-COMPL ; LIST @PROP> = @PROP> ; LIST @PrcN> = @PrcN> ; LIST @PronN< = @PronN< ; LIST @PronN> = @PronN> ; LIST @QN< = @QN< ; LIST @QN> = @QN> ; LIST @SPRED = @SPRED ; LIST @SUBJ = @SUBJ ; LIST @SUBJ-QH = @SUBJ-QH ; LIST @TITLE = @TITLE ; LIST @VOC = @VOC ; LIST @X = @X ; # Semantic tags # ------------- LIST Ani = Ani ; LIST Fem = Fem ; LIST Mal = Mal ; LIST Obj = Obj ; List Org = Org ; LIST Plc = Plc ; LIST Sur = Sur ; LIST Tit = Tit ; # Other tags # ---------- LIST ABBR = ABBR ; LIST ACR = ACR ; # Single-word sets # ---------------- LIST COMMA = "," ; LIST ¶ = ¶; #LIST OKTA = "okta"; LIST gå = "gå" ; # Initials # -------- LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z" "á" "æ" "ø" "å" "ö" "ä" ; # ======== # # Tag sets # # ======== # SETS # # ======== # LIST WORD = N A Adv V Pron CS CC Adp Po Pr Interj Pcle Pers Dem Interr Refl Recipr Rel Indef ; # any word # Case sets # --------- #LIST ADVLCASE = Ill Loc Com Ess ; #LIST CASE = Nom Acc Gen Ill Loc Com Ess ; #SET NOT-NOM = CASE - Nom ; #SET NOT-GEN = CASE - Gen ; #SET NOT-ACC = CASE - Acc ; # Noun sets # --------- LIST N-SG-NOM = (N Sg Nom); LIST N-PL-NOM = (N Pl Nom); LIST N-NOM = (N Nom) ; LIST N-GEN = (N Gen); #SET LEX-N = N - N-DER; LIST PROP = (N Prop); SET CNOUN = N - Prop; SET CNOUN-PL-NOM = (N Pl Nom) - Prop ; SET HNOUN = N - RCmpnd ; SET HNOUN-NOM = (N Nom) - RCmpnd ; SET N-NOT-GEN = N - (N Gen) ; # Verb sets # --------- # Finiteness and mood # - - - - - - - - - - SET V-IND-FIN = Prs | Prt ; # Problem: "In boahtán" is an invisible indicative SET V-MOOD = Prs | Prt | Pot | Imprt | Cond ; SET VFIN = V-MOOD - ConNeg ; SET VFIN-POS = V-MOOD - ConNeg - Neg ; SET VFIN-NOT-IMP = VFIN - Imprt ; # Person # - - - - LIST V-SG1 = (V Ind Prs Sg1) (V Ind Prt Sg1) (V Cond Prs Sg1) (V Cond Prt Sg1) (V Pot Prs Sg1) (V Neg Ind Sg1); LIST V-SG2 = (V Ind Prs Sg2) (V Ind Prt Sg2) (V Cond Prs Sg2) (V Cond Prt Sg2) (V Pot Prs Sg2) (V Neg Ind Sg2); LIST V-SG3 = (V Ind Prs Sg3) (V Ind Prt Sg3) (V Cond Prs Sg3) (V Cond Prt Sg3) (V Pot Prs Sg3) (V Neg Ind Sg3); LIST V-DU1 = (V Ind Prs Du1) (V Ind Prt Du1) (V Cond Prs Du1) (V Cond Prt Du1) (V Pot Prs Du1) (V Neg Ind Du1); LIST V-DU2 = (V Ind Prs Du2) (V Ind Prt Du2) (V Cond Prs Du2) (V Cond Prt Du2) (V Pot Prs Du2) (V Neg Ind Du2); LIST V-DU3 = (V Ind Prs Du3) (V Ind Prt Du3) (V Cond Prs Du3) (V Cond Prt Du3) (V Pot Prs Du3) (V Neg Ind Du3); LIST V-PL1 = (V Ind Prs Pl1) (V Ind Prt Pl1) (V Cond Prs Pl1) (V Cond Prt Pl1) (V Pot Prs Pl1) (V Neg Ind Pl1); LIST V-PL2 = (V Ind Prs Pl2) (V Ind Prt Pl2) (V Cond Prs Pl2) (V Cond Prt Pl2) (V Pot Prs Pl2) (V Neg Ind Pl2); LIST V-PL3 = (V Ind Prs Pl3) (V Ind Prt Pl3) (V Cond Prs Pl3) (V Cond Prt Pl3) (V Pot Prs Pl3) (V Neg Ind Pl3); # Note that imperative verbs are not included in these sets! # Some subsets of the VFIN sets # - - - - - - - - - - - - - - - SET V-DU = V-DU1 | V-DU2 | V-DU3 ; SET V-PL = V-PL1 | V-PL2 | V-PL3 ; SET V-DU-PL = V-DU1 | V-DU2 | V-DU3 | V-PL1 | V-PL2 | V-PL3 ; SET V-NOT-SG1 = VFIN-NOT-IMP - V-SG1 ; SET V-NOT-SG3 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-DU3 | V-PL1 | V-PL2 | V-PL3 ; SET V-1-2 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-PL1 | V-PL2 ; SET V-3 = V-SG3 | V-DU3 | V-PL3 ; # Sets consisting of forms of "leat" (these ones need to be rewritten) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Pronoun sets # ------------ SETS SET MUN = (Pron Pers Sg1 Nom); SET DON = (Pron Pers Sg2 Nom); SET SON = ("son" Pron Pers Sg3 Nom); SET MOAI = (Pron Pers Du1 Nom); SET DOAI = (Pron Pers Du2 Nom); SET SOAI = (Pron Pers Du3 Nom); SET MII-PERS = (Pron Pers Pl1 Nom); SET DII = (Pron Pers Pl2 Nom); SET SII = ("son" Pron Pers Pl3 Nom); SET PPRON-DU-PL = MOAI | DOAI | SOAI | MII-PERS | DII | SII ; SET PPRON-NOT-SII = MUN | DON | SON | MOAI | DOAI | SOAI | MII-PERS | DII ; LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen) (Pl1 Gen) (Pl2 Gen) (Pl3 Gen); SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ; SET DEM-SG = (Pron Dem Sg Nom); SET DEM-PL = (Pron Dem Pl Nom); SET NOT-DEM = WORD - Dem ; # Adjectival sets and their complements # ------------------------------------- SET A-ATTR = (A Attr) ; # Used in one rule. Not needed? #SET LEX-A = A - A-DER ; SET A-CASE = A - Attr - Adv ; LIST A-CC = A CC ; SET NOT-ADJ = WORD - A ; SET NOT-Attr = WORD - Attr ; SET NOT-ADJ-PCLE = WORD - A - Pcle ; SET NOT-ADJ-CC = WORD - A-CC ; SET NOT-AA = WORD - A - Adv ; #LIST NOMINAL-ADJ = "guoktilaš" "lámis" "oasálaš" ("suddu" laš) "viissis"; # and many others # Adverbial sets and their complements # ------------------------------------ LIST MANNE = "gåsi" "goasse" "goassa" "goassis" "gåkku" "gånne" "gåste" "makkir" "makkár" "manne" "manen" "mannas" ; # Sets for adverbs that have lookalikes # - - - - - - - - - - - - - - - - - - - # Here come some adverbs that have identical twins in other POS. # If these are found in Adv contexts, we treat them as adverbs. #LIST ADV-NOT-VERB = "dušše" ; #LIST ADV-NOT-NOUN = "easka" "várra" ; #LIST ADV-NOT-NAMES = "Ain" "Anne" "Diego" "Dieppe" "Enge" "Galle" "Haga" # "Joba" "Johan" "Liikka" "Mai" "Mannes" "Mo" "Mot" "Naba" # "Nan" "Oktan" "Sierra" "Sokka" "Villa" ; #SET ADV-NOT-OTHER-POS = ADV-NOT-VERB | ADV-NOT-NOUN | ADV-NOT-NAMES ; # Sets of elements with common syntactic behaviour # ================================================ # VERBS # ----- SET VERB = V - (V N); # V is all readings with a V tag in them, VERB should # be the ones without an N tag following the V #SET LEX-V = VERB - V-DER; # VERB but not V-DER SET V-NOT-CMPND = V - RCmpnd; # Not in use. LIST COPULAS = "dáidit" "bissut" "gártat" "leahkit" "leat" "orrut" "šaddat" ; # 'Dáidit' can appear without 'leat'. SET V-NOT-COP = V - COPULAS ; #LIST MOD-ASP = "berret" "dáidit" "fertet" "galgat" "gillet" "lávet" "sáhttit" # "seahtit" "soaitit" "viggat" ; # Verbs that never have arguments of their own - so we leave out "áigut", for ex. #LIST AUX-OR-MAIN = "áigut" "beassat" "hálidit" "háliidit" "máhttit" "nagodit" # "orrut" "sihtat" "veadjit"; # These verbs can take arguments, so they do not belong in the AUX group, # but they are nevertheless mapped to (@+FAUXV). #SET AUX = COPULAS | Neg | MOD-ASP | AUX-OR-MAIN ; #SET V-NOT-AUX = V - AUX ; # The set of verbs that cannot be auxiliaries. # Does not include AUX-OR-MAIN. #SET VFIN-NOT-AUX = VFIN - AUX ; # #SET V-MAIN = V - COPULAS - Neg - MOD-ASP | ("orrut") ; # The set of verbs that can be # # main verbs. Includes # # AUX-OR-MAIN. # #SET V-DER-PASS = V + V-DER + Pass; # + is intersection, this should work # # The set is used in one rule with no hits. # #SET V-PASS = (V TV Pass); # #SET V-NOT-AUX-PASS = V - AUX - V-PASS ; # No longer in use. # #SET V-ACT = V - Pass; # No longer in use. # #SET ACT-PRFP = PrfPrc - V-PASS; # Appears in one rule. # #SET V-TRANS-ACT = TV - Pass ; # # All active verbs with a TV tag, including AUX-OR-MAIN. # #SET V-TRANS-ACT-NOT-AUX = TV - V-PASS - AUX ; # Sets for verbs choosing oblique objects or adverbials # - - - - - - - - - - - - - - - - - - - - - - - - - - - #LIST INE-VERB = #LIST COM-VERB = LIST ILL-VERB = "riegádit" ; #LIST ACC-LOC-VERB #LIST ACC-ILL-VERB # Other verb sets # - - - - - - - - LIST INF-VERB = "liehket" "galggat" "máhttet" "oadtjot" "sihtat" ; #LIST ACC-INF-VERB = # These are verbs taking accusative objects and infinitives; # a subset of INF-VERB. ## Ii suova nieiddas náitalit. #LIST OPRED-VERB = # Verbs that occur with objects that have secondary predicates. #LIST MOVEMENT-VERB = "mannat"; # These verbs typically combine with place adverbials in the genitive case # (i.e. not accusative). #LIST TOGETHER-VERB = "goallostuvvot" "beaškkehat" "bidjat" "čatnit" "suddat" "heivet" "biddjojuvvot" "časkit" "geassit" "gullat" "rehkenastit" "ordnet" "soahpat" "gávnnadit" "heivehit" "čatnat" "doaibmat" "iskat" "laktit" "seaguhit"; # Danger: This list could be to long, in certain respects. # TODO: Think of counter examples, i.e. sentences where we have these verbs, but # where oktii is not an adverbial. # NOUNS # ----- # NP sets defined according to their morphosyntactic features # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - SET NP-HEAD = Pron | HNOUN ; SET NP-HEAD-SG = (Pron Sg) | (Pron Sg3) | (N Sg) - RCmpnd; SET NP-HEAD-PL = (Pron Pl) | (Pron Pl3) | (N Pl) - RCmpnd; SET NP-HEAD-SG-NOM = (Pron Sg Nom) | (Pron Sg3 Nom) | (N Sg Nom) - RCmpnd; SET NP-HEAD-PL-NOM = (Pron Pl Nom) | (Pron Pl3 Nom) | (N Pl Nom) - RCmpnd; SET NP-HEAD-NOM = NP-HEAD-SG-NOM | NP-HEAD-PL-NOM; SET NP-HEAD-ACC = (Pron Acc) | (N Acc) - RCmpnd; SET NP-HEAD-GEN = (Pron Gen) | (N Gen) - RCmpnd; # The PRE-NP-HEAD family of sets # - - - - - - - - - - - - - - - - SET PRE-NP-HEAD = (Prop Attr) | (Prop @PROP>) | A | (Pron Pers Gen) | (N Gen) | Num | RCmpnd | CC | (Pron Dem) | (Pron Refl Gen) | (Pron Indef) | (PrfPrc @PrcN>) | (PrsPrc @PrcN>) ;SET NP-MEMBER = PRE-NP-HEAD | N ; # There must be a sensible naming convention for these sets... SET NPNH = WORD - PRE-NP-HEAD ;#| ABBR ; # NPNH = "NOT-PRE-NP-HEAD". SET NPNHA = WORD - PRE-NP-HEAD - Adv ; # NPNHA = "NOT (PRE-NP-HEAD-OR-ADV)" # Not at all sure that we need the following four: SET NPNHAI = WORD - PRE-NP-HEAD - Adv - Indef ; # NPNHAI " "...-OR-INDEF" SET NPNHAII = WORD - PRE-NP-HEAD - Adv - Indef - Ill ; # Illative indir.obj. SET NPNHAIIP = WORD - PRE-NP-HEAD - Adv - Indef - Ill - Pcle ; SET NPNHC = WORD - PRE-NP-HEAD - Com ; # "njaŋggofirpmiiguin bivdin" # Other negatively defined sets # - - - - - - - - - - - - - - - SET NOT-A = WORD - A ; SET NOT-ADV = WORD - Adv ; SET NOT-ADV-PCLE = WORD - Adv - Pcle ; SET NOT-N = WORD - N ; SET NOT-NA = WORD - N - A ; SET NOT-NP = VFIN | ConNeg | Inf | Pcle | Interj | CS ; # The set NP-BORDER is bigger. # Nominal sets defined according to their lexical properties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #LIST MANNU = "ođđa#jage#mánnu" "ođđa#jagi#mánnu" # "guovva#mánnu" # "njukča#mánnu" "njuvčča#mánnu" "márs#mánnu" # "cuoŋo#mánnu" "cuoŋos#mánnu" "gáranas#mánnu" # "miesse#mánnu" "mái#mánnu" # "geasse#mánnu" # "suoidne#mánnu" # "borge#mánnu" "šnjilča#mánnu" "šnjilža#mánnu" # "čakča#mánnu" # "golggot#mánnu" # "skábma#mánnu" "november#mánnu" "ritne#mánnu" # "juovla#mánnu" ; #LIST AIGODAT = "áigodat" "stuoradigge#áigodat" "sámedigge#áigodat" # "guođohan#áigodat" "doaibma#áigodat" ; #LIST VAHKKU = "áigi" "beaivi" "jándor" "bodda" "čakča" "čakča#geassi" "čuohte#jahki" # "dálvi" "diibmu" "dbm" "geardi" "geassi" "giđđa" "hávvi" "idja" "jahki" # "jahke#čuohti" "mánnu" "riegádan#beaivi" "tiibmu" "vahkku" jahki # "stuorradigge#áigodat" ; # The quoteless jahki here is since the preprocessor is not fine-tuned # yet, it still gives '"1870-" # jahki N Sg Com'. # lets see if "dbm" works #LIST BEAIVI = "mánnodat" "vuoss#árga" "disdat" "maŋŋeb#árga" "gaska#vahkku" # "duorastat" "bearjadat" "lávvardat" "lávvordat" "sotna#beaivi" # "duopmo#beaivi" "sábbát"; # #SET TIME = MANNU | VAHKKU | BEAIVI ; # Do not add AIGODAT, it has some other characteristics in some cases. #SET NOTIME = N - TIME ; # #LIST GEASSET = "dálvet" "dálvit" "giđđat" "geasset" "geassit" "čakčat" ; # What is special here? # #LIST GUOVLU = "ássan#guovlu" "sámi#guovlu" "báiki" "gávpot" "guovlu" "sádji" # "viessu" ; # #LIST ROUTE = "fávli" "geaidnu" "guorra" "luodda" "miila"; # and others! # #LIST MEASURE = "miila" "kilo#mehtar" "mehtar" "mehter" "centi#mehtar" "sente" # "cm" "milli#mehtar" "tumá" "állan" "salla" "lihtter" "gilo" ; # #LIST PL-NOUN = "beassaš" "čalbme#láse" "gálssot" "headja" "sabet" "sisttet" # "skárri"; # Nominals that appear with plural numerals. #LIST PREGEN = ("álgogeahčen")("bealli" N Sg Loc)("gaskavuohta" N Sg Loc)("guoskevaš") # ("loahpageahčen")("miella" N Sg Loc)("oktavuohta" N Sg Loc); # Nouns and adjectives that take a preceding genitive. #LIST N-NOPX = "lohku"; # Nominals that probably do not have a Px ('1600-logus'). #LIST N-PX = "áhčči" "bárdni" "eadni" "nieida" "viellja" "oabbá"; # Nominals that well may have a Px. #LIST ALU = ("allu" Gen)("assu" Gen)("čiekŋu" Gen)("gassu" Gen)("govdu" Gen) #("guhkku" Gen)("lossu" Gen)("stuorru" Gen)("viidu" Gen); #LIST TITLE = "ámta#mánni" "direktevra" "eanan#hearrá" ("gásttašit" eaddji) "gonagas" # "justitiarius" "korefereanta" "ossodat#direktevra" # "profehta" "professor" "prof" "ráđđe#addi" "stipendiáhta" "sundi" # "universitehtalektor" "vuosttas#konsuleanta" "cand.mag" "cand.oecon" # "cand.philol" "cand.real" "cand.scient" "cand.theol" "cand" "dr.art" # "eksp.hoavd" "eksp.sj" "fenr" "fig" "fil.tri" "gen.lt" "genr" # "genr.lt" "h.r.adv" "hr" "kapt" "korp" "lekt" "lic" "ltn" "mag.art" # "mag" "merc" "odont" "oecon" "ordf" "ossod.dir" "stud.phil." # "res.kap" "stud.philol" "vit.ass" "dr" "dr.med" "dr.philos" "dr.theol" ; # #LIST COUNTABLE = "oassi" "kapihttal" "paragráfa" ; #LIST NUMMAR #LIST KLASS #LIST LAHKA #SET NOT-LAHKA = N - LAHKA #LIST FYLKA #LIST INSTITUTION #LIST HUMAN # Miscellaneous sets # ------------------ #LIST Clt = Qst Foc ; LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ; #SET PPOS = Po | Pr ; #LIST POST-A = "rájes" ; #LIST POST-B = "rádjái" ; #LIST WACKERNAGEL = "go" "ba" "bat" "be" "bahan" "son" ; #LIST GASKAL = "gaskal" "gaskkal" "gaskii" "gaskka" "gaskkas" ; # p-positions that like coordination #SET NUMERALS = Num - OKTA ; #SET NOT-NUMERALS = WORD - Num ; #LIST MANGA = "máŋga" "galle" ; # Not referred to by any rule. #SET CARDINALS = Num - Ord - MANGA ; #SET NOT-CC = WORD - CC ; #SET NOT-PCLE = WORD - Pcle ; #LIST COMPAR = "dávjjit" "eanet" "earalágan" "eará" "earret" "seammás" # "seammalágan" "seammalágán" "seammaláhkai"; # These combine with "go" Pcle, but COMPAR stands for 'comparison' rather than # 'comparative'. #LIST CONTRA = "muhto" ; # In lean muitalan, muhto dál muitalan. #LIST PROSEANTA = "proseanta" "%" ; SET REALCLB = CLB - COMMA ; # Border sets and their complements # --------------------------------- SET CP = (Pron Interr) | (Pron Rel) | MANNE ; SET NP-BORDER = BOS | EOS | REALCLB | VFIN | Inf | (Actio Ess) | ConNeg | VGen | Sup | PPRON-NOT-GEN | Recipr | Po | Pr | Pcle | ("jed") | Interj | CS | CP ; # CLB removed from set in order to allow for NP-internal commas. # Refl is removed because of "sin iežaset vieruid" etc. # Syntactic sets # -------------- LIST ALLSYNTAG = (@+FAUXV) (@+FMAINV) (@-FAUXV) (@-FMAINV) (@-FSUBJ) (@ADV-A) (@ADV-ADV) (@ADVL) (@AN>) (@ActioN>) (@CC) (@CMPND) (@CS) (@DN>) (@GA>) (@GN>) (@GP<) (@GP>) (@HNOUN) (@INTERJ) (@MEASURE) (@NPron<) (@NQ<) (@NUM-PRON) (@OBJ) (@OPRED) (@PCLE) (@PCLE-COMPL) (@PROP>) (@PrcN>) (@PronN<) (@PronN>) (@QN<) (@QN>) (@SPRED) (@SUBJ) (@SUBJ-QH) (@TITLE) (@VOC) (@X); # (@TIME)(@NUMBER) # SET ASSIGNEDSYN = ALLSYNTAG - @X ; # funzt nicht. # ============ # # Rule section # # ============ # CONSTRAINTS # # ============ # # Denne dialogen kan vi disambiguere slik: # I line 3 er "le" 3. person fordi "ællo" er subjekt. # I line 4 er "le" 3. person fordi "le" i setninga før er 3. person. ## Le gus dån oahpes dán bájkkáj? ## Na lev. ## Le gus ællo gárden? ## Le. ## Njuovadihppe gus? ## Njuovadin. #SELECT (V Sg3) IF #SELECT (V Sg2) IF (*-1 (Pron Pers Nom Sg2)); SELECT (V Du3) IF (*-1 (Pron Nom Du3) BARRIER (NP-MEMBER Nom)); SELECT (V Du3) IF (*-3 (N Nom) OR (A Nom) LINK 1 CC LINK 1 (N Nom) OR (A Nom) BARRIER CC LINK 1 (N Nom) OR (A Nom)); # If there is no construction in either the same sentence or a sentence before, remove Dual. That construction can either consist of a) the dual pronoun in nominative b) two coordinated nouns that do not have a third CC with a noun or adjective in nominative after c) a guovttin construction (which unfortunately I do not know in lulesámi) SELECT ("liehkket" Ind Prs Sg2) IF (*-1 ("liehkket" Ind Prs Sg2)); ## Lev gå mus telefåvnnå? ## De le. END