# Divvun & Giellatekno - open source grammars for Sámi and other languages
# Copyright © 2000-2010 The University of Tromsø & the Norwegian Sámi Parliament
# http://giellatekno.uit.no & http://divvun.no
#
# This program is free software; you can redistribute and/or modify
# this file under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. The GNU General Public License
# is found at http://www.gnu.org/licenses/gpl.html. It is
# also available in the file $GTHOME/LICENSE.txt.
#
# Other licensing options are available upon request, please contact
# giellatekno@hum.uit.no or divvun@samediggi.no

# ============================================================ #
# ============================================================ #
#  L U L E   S Á M I   T E X T   D I S A M B I G U A T O R     #
# ============================================================ #
# ============================================================ #

#    Sámi language technology project 2003-2006, University of Tromsø  #

            # =========================================== #
            #            Short table of contents          #
            # =========================================== #

# Usage:
# First run through smj-dis.rle, then run through smj-tdis.rle:

# preprocess text | lookup -flags mbTT -utf8 bin/sme.fst | lookup2cg |
# vislcg --grammar=src/smj-dis.rle | 
# vislcg --grammar=src/smj-tdis.rle --minimal | less


# ========== #
# Delimiters #
# ========== #


DELIMITERS =  "<¶>";


# ============= #
# Tags and sets #
# ============= #


# ========
   SETS
# ========

LIST BOS = (>>>) (<s>); LIST EOS = (<<<) (</s>); # vislcg and CG-2 together.

# Tags declared as single-membered LISTs 
# ======================================


# Parts of speech
# ---------------

LIST N = N ; LIST A = A ; LIST Adv = Adv ; LIST V = V ; LIST Pron = Pron ;
LIST CS = CS ; LIST CC = CC ; LIST Po = Po ; LIST Pr = Pr ;
LIST Pcle = Pcle ; LIST Num = Num ; LIST Prop = Prop ;

# POS sub-categories
# ------------------

LIST Pers = Pers ; LIST Dem = Dem ; LIST Interr = Interr ;
LIST Indef = Indef ;
LIST Recipr = Recipr ; LIST Refl = Refl ; LIST Rel = Rel ; 
LIST Interj = Interj ; LIST Adp = Adp ; 


# Morphosyntactic properties
# --------------------------

LIST Sg = Sg ; LIST Pl = Pl ;

LIST Nom = Nom ; LIST Gen = Gen ; LIST Acc = Acc ;
LIST Ill = Ill ; LIST Ine = Ine ; LIST Ela = Ela ;
LIST Com = Com ; LIST Ess = Ess ; LIST Par = Par ; LIST Abe = Abe ;

LIST SgCmp = SgCmp ; LIST SgNomCmp = SgNomCmp ;
LIST SgGenCmp = SgGenCmp ; LIST PlGenCmp = PlGenCmp ; 
LIST RCmpnd = RCmpnd ;

LIST PxSg1 = PxSg1 ; LIST PxSg2 = PxSg2 ; LIST PxSg3 = PxSg3 ;
LIST PxDu1 = PxDu1 ; LIST PxDu2 = PxDu2 ; LIST PxDu3 = PxDu3 ;
LIST PxPl1 = PxPl1 ; LIST PxPl2 = PxPl2 ; LIST PxPl3 = PxPl3 ;

LIST Comp = Comp ; LIST Superl = Superl ; LIST Actor = Actor ;
LIST Actio = Actio ; LIST Clt = Clt ; LIST Attr = Attr ;
LIST Card = Card ; LIST Ord = Ord ; 
LIST Date = Date ; LIST Range = Range ;
LIST Dim = Dim ; LIST Inch = Inch ; LIST Pass = Pass ;

LIST Prs = Prs ; LIST Prt = Prt ;
LIST Ind = Ind ; LIST Pot = Pot ; LIST Cond = Cond ;
LIST Imprt = Imprt ; LIST ImprtII = ImprtII ; LIST Sup = Sup ;
LIST IV = IV ; LIST TV = TV ;

LIST Sg1 = Sg1 ; LIST Sg2 = Sg2 ; LIST Sg3 = Sg3 ;
LIST Du1 = Du1 ; LIST Du2 = Du2 ; LIST Du3 = Du3 ;
LIST Pl1 = Pl1 ; LIST Pl2 = Pl2 ; LIST Pl3 = Pl3 ;

LIST Inf = Inf ; LIST Ger = Ger ; LIST GerII = GerII ;
LIST ConNeg = ConNeg ; LIST Neg = Neg ;
LIST PrsPrc = PrsPrc ; LIST PrfPrc = PrfPrc ;
LIST VGen = VGen ; LIST VAbess = VAbess ;

LIST CLB = CLB ; LIST PUNCT = PUNCT ; 
LIST HYPH = HYPH ; LIST PAR = PAR ;


# Derivation
# ----------

LIST Der/agák = Der/agák ; LIST Der/ahkes = Der/ahkes ; 
LIST Der/ahtes = Der/ahtes ; LIST Der/ahtte = Der/ahtte ;
LIST Der/alla = Der/alla ; LIST Der/asste = Der/asste ;
LIST Der/belak = Der/belak ; LIST Der/bieljak = Der/bieljak ;
LIST Der/báguk = Der/báguk ; LIST Der/bájnuk = Der/bájnuk ;
LIST Der/bának = Der/bának ; LIST Der/d = Der/d ;
LIST Der/dahka = Der/dahka ; LIST Der/dahtte = Der/dahtte ;
LIST Der/dalla = Der/dalla ; LIST Der/dasste = Der/dasste ;
LIST Der/dibme = Der/dibme ; LIST Der/duhtte = Der/duhtte ;
LIST Der/duvva = Der/duvva ; LIST Der/dábak = Der/dábak ;
LIST Der/dáfuk = Der/dáfuk ; LIST Der/dávdak = Der/dávdak ;
LIST Der/dávtak = Der/dávtak ; LIST Der/ferjak = Der/ferjak ;
LIST Der/garák = Der/garák ; LIST Der/gattsak = Der/gattsak ;
LIST Der/gerdak = Der/gerdak ; LIST Der/giedak = Der/giedak ;
LIST Der/gielak = Der/gielak ; LIST Der/giesjkak = Der/giesjkak ;
LIST Der/gietjak = Der/gietjak ; LIST Der/girjak = Der/girjak ;
LIST Der/guovluk = Der/guovluk ; LIST Der/h = Der/h ;
LIST Der/habsak = Der/habsak ; LIST Der/hekkak = Der/hekkak ;
LIST Der/hábjak = Der/hábjak ; LIST Der/hájak = Der/hájak ; 
LIST Der/hámak = Der/hámak ; LIST Der/hápsak = Der/hápsak ;
LIST Der/ivnak = Der/ivnak ; LIST Der/jagák = Der/jagák ;
LIST Der/jergak = Der/jergak ; LIST Der/jienak = Der/jienak ;
LIST Der/jiermak = Der/jiermak ; LIST Der/juolgak = Der/juolgak ;
LIST Der/l = Der/l ; LIST Der/ladda = Der/ladda ;
LIST Der/lahkak = Der/lahkak ; LIST Der/lahtte = Der/lahtte ;
LIST Der/lanják = Der/lanják ; LIST Der/lasj = Der/lasj ;
LIST Der/lasste = Der/lasste ; LIST Der/lijkak = Der/lijkak ;
LIST Der/linnjak = Der/linnjak ; LIST Der/lis = Der/lis ;
LIST Der/luonduk = Der/luonduk ; LIST Der/luottak = Der/luottak ;
LIST Der/lágán = Der/lágán ; LIST Der/lágásj = Der/lágásj ;
LIST Der/merkak = Der/merkak ; LIST Der/mielak = Der/mielak ;
LIST Der/muoduk = Der/muoduk ; LIST Der/mus = Der/mus ;
LIST Der/n = Der/n ; LIST Der/namák = Der/namák ;
LIST Der/nierak = Der/nierak ; LIST Der/njunjak = Der/njunjak ;
LIST Der/njálmak = Der/njálmak ; LIST Der/nálak = Der/nálak ;
LIST Der/r = Der/r ; LIST Der/rabdak = Der/rabdak ;
LIST Der/rattak = Der/rattak ; LIST Der/rávak = Der/rávak ;
LIST Der/segak = Der/segak ; LIST Der/siejbak = Der/siejbak ;
LIST Der/siejnak = Der/siejnak ; LIST Der/sijduk = Der/sijduk ;
LIST Der/st = Der/st ; LIST Der/stahtte = Der/stahtte ;
LIST Der/stalla = Der/stalla ;
LIST Der/stasste = Der/stasste ; LIST Der/stával = Der/stával ;
LIST Der/suorak = Der/suorak ; LIST Der/suorgak = Der/suorgak ;
LIST Der/suttak = Der/suttak ; LIST Der/sáv'vak = Der/sáv'vak ;
LIST Der/tjalmak = Der/tjalmak ; LIST Der/tjiegak = Der/tjiegak ;
LIST Der/tjuolmak = Der/tjuolmak ; LIST Der/tjuvdak = Der/tjuvdak ;
LIST Der/tjålak = Der/tjålak ; LIST Der/tjårvak = Der/tjårvak ;
LIST Der/u/a/åd = Der/u/a/åd ; LIST Der/us = Der/us ;
LIST Der/uvsak = Der/uvsak ; LIST Der/uvsuk = Der/uvsuk ;
LIST Der/varák = Der/varák ; LIST Der/vigák = Der/vigák ;
LIST Der/vuobdak = Der/vuobdak ; LIST Der/vuoduk = Der/vuoduk ;
LIST Der/vuohta = Der/vuohta ; LIST Der/vájmuk = Der/vájmuk ;
LIST Der/ájvukDer/såjak = Der/ájvukDer/såjak ;
LIST Der/åjvak = Der/åjvak ; LIST Der/åsak = Der/åsak ;

LIST DERIVED =
  "Der/agák" "Der/ahkes" "Der/ahtes" "Der/ahtte" "Der/alla" "Der/asste" "Der/belak"
  "Der/bieljak" "Der/báguk" "Der/bájnuk" "Der/bának" "Der/d" "Der/dahka" "Der/dahtte"
  "Der/dalla" "Der/dasste" "Der/dibme" "Der/duhtte" "Der/duvva" "Der/dábak"
  "Der/dáfuk" "Der/dávdak" "Der/dávtak" "Der/ferjak" "Der/garák" "Der/gattsak"
  "Der/gerdak" "Der/giedak" "Der/gielak" "Der/giesjkak" "Der/gietjak" "Der/girjak"
  "Der/guovluk" "Der/h" "Der/habsak" "Der/hekkak" "Der/hábjak" "Der/hájak" "Der/hámak"
  "Der/hápsak" "Der/ivnak" "Der/jagák" "Der/jergak" "Der/jienak" "Der/jiermak"
  "Der/juolgak" "Der/l" "Der/ladda" "Der/lahkak" "Der/lahtte" "Der/lanják" "Der/lasj"
  "Der/lasste" "Der/lijkak" "Der/linnjak" "Der/lis" "Der/luonduk" "Der/luottak"
  "Der/lágán" "Der/lágásj" "Der/merkak" "Der/mielak" "Der/muoduk" "Der/mus" "Der/n"
  "Der/namák" "Der/nierak" "Der/njunjak" "Der/njálmak" "Der/nálak" "Der/r"
  "Der/rabdak" "Der/rattak" "Der/rávak" "Der/segak" "Der/siejbak" "Der/siejnak"
  "Der/sijduk" "Der/st" "Der/stahtte" "Der/stalla" "Der/stasste" "Der/stával"
  "Der/suorak" "Der/suorgak" "Der/suttak" "Der/sáv'vak" "Der/tjalmak" "Der/tjiegak"
  "Der/tjuolmak" "Der/tjuvdak" "Der/tjålak" "Der/tjårvak" "Der/u/a/åd" "Der/us"
  "Der/uvsak" "Der/uvsuk" "Der/varák" "Der/vigák" "Der/vuobdak" "Der/vuoduk"
  "Der/vuohta" "Der/vájmuk" "Der/ájvukDer/såjak" "Der/åjvak" "Der/åsak" ;
  

# Syntactic tags
# --------------

LIST @+FAUXV = @+FAUXV ; LIST @+FMAINV = @+FMAINV ; LIST @-FAUXV = @-FAUXV ; 
LIST @-FMAINV = @-FMAINV ; LIST @-FSUBJ = @-FSUBJ ; 

LIST @ADV-A = @ADV-A ; LIST @ADV-ADV = @ADV-ADV ; LIST @ADVL = @ADVL ; 

LIST @AN> = @AN> ; LIST @ActioN> = @ActioN> ; LIST @CC = @CC ; LIST @CMPND = @CMPND ; LIST @CS = @CS ; 
LIST @DN> = @DN> ; LIST @GA> = @GA> ; LIST @GN> = @GN> ; LIST @GP< = @GP< ; LIST @GP> = @GP> ; 
LIST @HNOUN = @HNOUN ; LIST @INTERJ = @INTERJ ; LIST @MEASURE = @MEASURE ; 
LIST @NNum> = @NNum>; LIST @NumN< = @NumN<;
LIST @NPron< = @NPron< ;
LIST @NQ< = @NQ< ;  LIST @NUM-PRON = @NUM-PRON ; #LIST @NUMBER = @NUMBER ;
LIST @OBJ = @OBJ ; 
LIST @OPRED = @OPRED ; LIST @PCLE = @PCLE ; LIST @PCLE-COMPL = @PCLE-COMPL ; LIST @PROP> = @PROP> ; 
LIST @PrcN> = @PrcN> ; LIST @PronN< = @PronN< ; LIST @PronN> = @PronN> ;  LIST @QN< = @QN< ; 
LIST @QN> = @QN> ; LIST @SPRED = @SPRED ; LIST @SUBJ = @SUBJ ; LIST @SUBJ-QH = @SUBJ-QH ; 


LIST @TITLE = @TITLE ; LIST @VOC = @VOC ; LIST @X = @X ;


# Semantic tags
# -------------

LIST Ani = Ani ; LIST Fem = Fem ; LIST Mal = Mal ;
LIST Obj = Obj ; List Org = Org ; LIST Plc = Plc ;
LIST Sur = Sur ; LIST Tit = Tit ;

# Other tags
# ----------

LIST ABBR = ABBR ; LIST ACR = ACR ; 


# Single-word sets
# ----------------
 
LIST COMMA = "," ; LIST ¶ = ¶;

#LIST OKTA = "okta"; LIST gå = "gå" ; 

# Initials
# --------

LIST INITIAL =  "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m"
                "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
                "á" "æ" "ø" "å" "ö" "ä" ;


# ======== #
# Tag sets #
# ======== #
   SETS    #
# ======== #

LIST WORD = N A Adv V Pron CS CC Adp Po Pr Interj Pcle Pers Dem Interr Refl Recipr Rel Indef ; # any word

# Case sets
# ---------
#LIST ADVLCASE = Ill Loc Com Ess ;

#LIST CASE = Nom Acc Gen Ill Loc Com Ess ;

#SET NOT-NOM = CASE - Nom ;
#SET NOT-GEN = CASE - Gen ;
#SET NOT-ACC = CASE - Acc ;

# Noun sets
# ---------

LIST N-SG-NOM = (N Sg Nom); LIST N-PL-NOM = (N Pl Nom);
LIST N-NOM = (N Nom) ; LIST N-GEN = (N Gen);

#SET LEX-N = N - N-DER;

LIST PROP = (N Prop);
SET CNOUN = N - Prop;
SET CNOUN-PL-NOM = (N Pl Nom) - Prop ;
SET HNOUN = N - RCmpnd ;
SET HNOUN-NOM = (N Nom) - RCmpnd ;
SET N-NOT-GEN = N - (N Gen) ;

# Verb sets
# ---------
# Finiteness and mood
# - - - - - - - - - -

SET V-IND-FIN = Prs | Prt ; # Problem: "In boahtán" is an invisible indicative
SET V-MOOD = Prs | Prt | Pot | Imprt | Cond ;
SET VFIN = V-MOOD - ConNeg ;
SET VFIN-POS = V-MOOD - ConNeg - Neg ;
SET VFIN-NOT-IMP = VFIN - Imprt ;


# Person
# - - - - 

LIST V-SG1 = (V  Ind Prs Sg1) (V Ind Prt Sg1) (V Cond Prs Sg1) 
             (V Cond Prt Sg1) (V Pot Prs Sg1) (V  Neg Ind Sg1); 
LIST V-SG2 = (V  Ind Prs Sg2) (V Ind Prt Sg2) (V Cond Prs Sg2) 
             (V Cond Prt Sg2) (V Pot Prs Sg2) (V  Neg Ind Sg2); 
LIST V-SG3 = (V  Ind Prs Sg3) (V Ind Prt Sg3) (V Cond Prs Sg3) 
             (V Cond Prt Sg3) (V Pot Prs Sg3) (V  Neg Ind Sg3); 
LIST V-DU1 = (V  Ind Prs Du1) (V Ind Prt Du1) (V Cond Prs Du1) 
	     (V Cond Prt Du1) (V Pot Prs Du1) (V  Neg Ind Du1); 
LIST V-DU2 = (V  Ind Prs Du2) (V Ind Prt Du2) (V Cond Prs Du2) 
	     (V Cond Prt Du2) (V Pot Prs Du2) (V  Neg Ind Du2); 
LIST V-DU3 = (V  Ind Prs Du3) (V Ind Prt Du3) (V Cond Prs Du3) 
	     (V Cond Prt Du3) (V Pot Prs Du3) (V  Neg Ind Du3); 
LIST V-PL1 = (V  Ind Prs Pl1) (V Ind Prt Pl1) (V Cond Prs Pl1) 
             (V Cond Prt Pl1) (V Pot Prs Pl1) (V  Neg Ind Pl1); 
LIST V-PL2 = (V  Ind Prs Pl2) (V Ind Prt Pl2) (V Cond Prs Pl2) 
             (V Cond Prt Pl2) (V Pot Prs Pl2) (V  Neg Ind Pl2); 
LIST V-PL3 = (V  Ind Prs Pl3) (V Ind Prt Pl3) (V Cond Prs Pl3) 
             (V Cond Prt Pl3) (V Pot Prs Pl3) (V  Neg Ind Pl3); 

 # Note that imperative verbs are not included in these sets!


# Some subsets of the VFIN sets
# - - - - - - - - - - - - - - -

SET V-DU = V-DU1 | V-DU2 | V-DU3 ;
SET V-PL = V-PL1 | V-PL2 | V-PL3 ;

SET V-DU-PL = V-DU1 | V-DU2 | V-DU3 | V-PL1 | V-PL2 | V-PL3 ;

SET V-NOT-SG1 = VFIN-NOT-IMP - V-SG1 ;

SET V-NOT-SG3 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-DU3 |
                V-PL1 | V-PL2 | V-PL3 ;

SET V-1-2 = V-SG1 | V-SG2 | V-DU1 | V-DU2 | V-PL1 | V-PL2 ;
SET V-3   = V-SG3 | V-DU3 | V-PL3 ;

# Sets consisting of forms of "leat" (these ones need to be rewritten)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Pronoun sets
# ------------

SETS

SET MUN = (Pron Pers Sg1 Nom);
SET DON = (Pron Pers Sg2 Nom);
SET SON = ("son" Pron Pers Sg3 Nom);
SET MOAI = (Pron Pers Du1 Nom);
SET DOAI = (Pron Pers Du2 Nom);
SET SOAI = (Pron Pers Du3 Nom);
SET MII-PERS = (Pron Pers Pl1 Nom);
SET DII = (Pron Pers Pl2 Nom);
SET SII = ("son" Pron Pers Pl3 Nom);

SET PPRON-DU-PL = MOAI | DOAI | SOAI | MII-PERS | DII | SII ;

SET PPRON-NOT-SII = MUN | DON | SON | MOAI | DOAI | SOAI | MII-PERS | DII ;

LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen) 
	(Pl1 Gen) (Pl2 Gen) (Pl3 Gen);

SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ;

SET DEM-SG = (Pron Dem Sg Nom);
SET DEM-PL = (Pron Dem Pl Nom);
SET NOT-DEM = WORD - Dem ;


# Adjectival sets and their complements
# -------------------------------------

SET A-ATTR = (A Attr) ; # Used in one rule. Not needed? 

#SET LEX-A = A - A-DER ;

SET A-CASE = A - Attr - Adv ;

LIST A-CC = A CC ;

SET NOT-ADJ = WORD - A ;
SET NOT-Attr = WORD - Attr ;
SET NOT-ADJ-PCLE = WORD - A - Pcle ;
SET NOT-ADJ-CC = WORD - A-CC ;

SET NOT-AA = WORD - A - Adv ;

#LIST NOMINAL-ADJ = "guoktilaš" "lámis" "oasálaš" ("suddu" laš) "viissis"; 
 # and many others

# Adverbial sets and their complements
# ------------------------------------

LIST MANNE = "gåsi" "goasse" "goassa" "goassis" "gåkku" "gånne" "gåste" "makkir" "makkár" "manne" "manen" "mannas" ;

# Sets for adverbs that have lookalikes
# - - - - - - - - - - - - - - - - - - -
 # Here come some adverbs that have identical twins in other POS. 
 # If these are found in Adv contexts, we treat them as adverbs.

#LIST ADV-NOT-VERB = "dušše" ;

#LIST ADV-NOT-NOUN = "easka" "várra" ;

#LIST ADV-NOT-NAMES = "Ain" "Anne" "Diego" "Dieppe" "Enge" "Galle" "Haga"
                     # "Joba" "Johan" "Liikka" "Mai" "Mannes" "Mo" "Mot" "Naba"
                     # "Nan" "Oktan" "Sierra" "Sokka" "Villa" ;

#SET ADV-NOT-OTHER-POS = ADV-NOT-VERB | ADV-NOT-NOUN | ADV-NOT-NAMES ;


# Sets of elements with common syntactic behaviour
# ================================================
# VERBS
# -----
SET VERB = V - (V N);   # V is all readings with a V tag in them, VERB should
                        # be the ones without an N tag following the V

#SET LEX-V = VERB - V-DER;             # VERB but not V-DER

SET V-NOT-CMPND = V - RCmpnd; # Not in use.


LIST COPULAS = "dáidit" "bissut" "gártat" "leahkit" "leat" "orrut" "šaddat" ;
 # 'Dáidit' can appear without 'leat'.

SET V-NOT-COP = V - COPULAS ;

#LIST MOD-ASP = "berret" "dáidit" "fertet" "galgat" "gillet" "lávet" "sáhttit" 
#		"seahtit" "soaitit" "viggat" ;
 # Verbs that never have arguments of their own - so we leave out "áigut", for ex.

#LIST AUX-OR-MAIN = "áigut" "beassat" "hálidit" "háliidit" "máhttit" "nagodit" 
#			"orrut" "sihtat" "veadjit";
 # These verbs can take arguments, so they do not belong in the AUX group, 
 # but they are nevertheless mapped to (@+FAUXV). 

#SET AUX = COPULAS | Neg | MOD-ASP | AUX-OR-MAIN ; 

#SET V-NOT-AUX = V - AUX ;  # The set of verbs that cannot be auxiliaries. 
			   # Does not include AUX-OR-MAIN.

#SET VFIN-NOT-AUX = VFIN - AUX ;
#
#SET V-MAIN = V - COPULAS - Neg - MOD-ASP | ("orrut") ;  # The set of verbs that can be 
#							# main verbs. Includes 
#							# AUX-OR-MAIN.
#
#SET V-DER-PASS = V + V-DER + Pass;    # + is intersection, this should work
#				      # The set is used in one rule with no hits.
#	
#SET V-PASS = (V TV Pass);
#
#SET V-NOT-AUX-PASS = V - AUX - V-PASS ; # No longer in use.
#
#SET V-ACT = V - Pass; # No longer in use.
#
#SET ACT-PRFP = PrfPrc - V-PASS; # Appears in one rule.
#
#SET V-TRANS-ACT = TV - Pass ; 
# # All active verbs with a TV tag, including AUX-OR-MAIN.
# 
#SET V-TRANS-ACT-NOT-AUX  = TV - V-PASS - AUX ;

# Sets for verbs choosing oblique objects or adverbials
# - - - - - - - - - - - - - - - - - - - - - - - - - - - 

#LIST INE-VERB =
#LIST COM-VERB = 

LIST ILL-VERB = "riegádit" ;

#LIST ACC-LOC-VERB
#LIST ACC-ILL-VERB


# Other verb sets
# - - - - - - - -

LIST INF-VERB = "liehket" "galggat" "máhttet" "oadtjot" "sihtat" ;
#LIST ACC-INF-VERB = 


 # These are verbs taking accusative objects and infinitives; 
 # a subset of INF-VERB.
	## Ii suova nieiddas náitalit.


#LIST OPRED-VERB = 
 # Verbs that occur with objects that have secondary predicates.

#LIST MOVEMENT-VERB = "mannat";
 # These verbs typically combine with place adverbials in the genitive case 
 # (i.e. not accusative).

#LIST TOGETHER-VERB = "goallostuvvot" "beaškkehat" "bidjat" "čatnit" "suddat"  "heivet" "biddjojuvvot" "časkit" "geassit" "gullat" "rehkenastit" "ordnet" "soahpat" "gávnnadit" "heivehit" "čatnat" "doaibmat" "iskat" "laktit" "seaguhit";
 # Danger: This list could be to long, in certain respects. 
 # TODO: Think of counter examples, i.e. sentences where we have these verbs, but
 # where oktii is not an adverbial.


# NOUNS
# -----
# NP sets defined according to their morphosyntactic features
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

SET NP-HEAD = Pron | HNOUN ;
SET NP-HEAD-SG = (Pron Sg) | (Pron Sg3) | (N Sg) - RCmpnd;
SET NP-HEAD-PL = (Pron Pl) | (Pron Pl3) | (N Pl) - RCmpnd;
SET NP-HEAD-SG-NOM = (Pron Sg Nom) | (Pron Sg3 Nom) | (N Sg Nom) - RCmpnd;
SET NP-HEAD-PL-NOM = (Pron Pl Nom) | (Pron Pl3 Nom) | (N Pl Nom) - RCmpnd;
SET NP-HEAD-NOM = NP-HEAD-SG-NOM | NP-HEAD-PL-NOM; 
SET NP-HEAD-ACC = (Pron Acc) | (N Acc) - RCmpnd;
SET NP-HEAD-GEN = (Pron Gen) | (N Gen) - RCmpnd;

# The PRE-NP-HEAD family of sets
# - - - - - - - - - - - - - - - -
SET PRE-NP-HEAD = (Prop Attr) | (Prop @PROP>) | A | (Pron Pers Gen) | (N Gen) | Num | RCmpnd | CC |
                  (Pron Dem)  | (Pron Refl Gen) | (Pron Indef) | (PrfPrc @PrcN>)  |
                   (PrsPrc @PrcN>) ;SET NP-MEMBER   = PRE-NP-HEAD | N ;

# There must be a sensible naming convention for these sets...
SET NPNH = WORD - PRE-NP-HEAD ;#| ABBR ;     # NPNH = "NOT-PRE-NP-HEAD". 
SET NPNHA = WORD - PRE-NP-HEAD - Adv ;     # NPNHA = "NOT (PRE-NP-HEAD-OR-ADV)"

# Not at all sure that we need the following four:
SET NPNHAI = WORD - PRE-NP-HEAD - Adv - Indef ; # NPNHAI " "...-OR-INDEF"
SET NPNHAII = WORD - PRE-NP-HEAD - Adv - Indef - Ill ; # Illative indir.obj.
SET NPNHAIIP = WORD - PRE-NP-HEAD - Adv - Indef - Ill - Pcle ;
SET NPNHC = WORD - PRE-NP-HEAD - Com ; # "njaŋggofirpmiiguin bivdin"

# Other negatively defined sets
# - - - - - - - - - - - - - - -

SET NOT-A = WORD - A ;
SET NOT-ADV = WORD - Adv ;
SET NOT-ADV-PCLE = WORD - Adv - Pcle ;
SET NOT-N =  WORD - N ;
SET NOT-NA = WORD - N - A ;
SET NOT-NP = VFIN | ConNeg | Inf | Pcle | Interj | CS ; # The set NP-BORDER is bigger. 


# Nominal sets defined according to their lexical properties
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

#LIST MANNU = "ođđa#jage#mánnu" "ođđa#jagi#mánnu" 
#             "guovva#mánnu" 
#             "njukča#mánnu" "njuvčča#mánnu" "márs#mánnu" 
#             "cuoŋo#mánnu" "cuoŋos#mánnu" "gáranas#mánnu" 
#             "miesse#mánnu" "mái#mánnu" 
#             "geasse#mánnu" 
#             "suoidne#mánnu" 
#             "borge#mánnu" "šnjilča#mánnu" "šnjilža#mánnu" 
#             "čakča#mánnu" 
#             "golggot#mánnu" 
#             "skábma#mánnu" "november#mánnu" "ritne#mánnu" 
#             "juovla#mánnu" ;
#LIST AIGODAT = "áigodat" "stuoradigge#áigodat" "sámedigge#áigodat" 
#              "guođohan#áigodat" "doaibma#áigodat" ;             

#LIST VAHKKU = "áigi" "beaivi" "jándor" "bodda" "čakča" "čakča#geassi" "čuohte#jahki" 
#		"dálvi"	"diibmu" "dbm" "geardi" "geassi" "giđđa" "hávvi" "idja" "jahki" 
#		"jahke#čuohti" "mánnu" "riegádan#beaivi" "tiibmu" "vahkku" jahki 
#		"stuorradigge#áigodat" ;
 # The quoteless jahki here is since the preprocessor is not fine-tuned
 # yet, it still gives '"1870-" # jahki N Sg Com'.
 # lets see if "dbm" works

 
#LIST BEAIVI =	"mánnodat" "vuoss#árga" "disdat" "maŋŋeb#árga" "gaska#vahkku"
#		"duorastat" "bearjadat" "lávvardat" "lávvordat" "sotna#beaivi" 
#		"duopmo#beaivi" "sábbát";
#
#SET TIME = MANNU | VAHKKU | BEAIVI ;
    # Do not add AIGODAT, it has some other characteristics in some cases.
#SET NOTIME = N - TIME ;
#
#LIST GEASSET = "dálvet" "dálvit" "giđđat" "geasset" "geassit" "čakčat" ; # What is special here?
#
#LIST GUOVLU = "ássan#guovlu" "sámi#guovlu" "báiki" "gávpot" "guovlu" "sádji" 
#	"viessu" ;
#
#LIST ROUTE = "fávli" "geaidnu" "guorra" "luodda" "miila"; # and others!
#
#LIST MEASURE = "miila" "kilo#mehtar" "mehtar" "mehter" "centi#mehtar" "sente"
#	"cm" "milli#mehtar" "tumá" "állan" "salla" "lihtter" "gilo" ;	
#
#LIST PL-NOUN = "beassaš" "čalbme#láse" "gálssot" "headja" "sabet" "sisttet" 
#	"skárri"; 
 # Nominals that appear with plural numerals.

#LIST PREGEN = ("álgogeahčen")("bealli" N Sg Loc)("gaskavuohta" N Sg Loc)("guoskevaš")
#	("loahpageahčen")("miella" N Sg Loc)("oktavuohta" N Sg Loc);
 # Nouns and adjectives that take a preceding genitive.

#LIST N-NOPX = "lohku";
 # Nominals that probably do not have a Px ('1600-logus').

#LIST N-PX = "áhčči" "bárdni" "eadni" "nieida" "viellja" "oabbá";
 # Nominals that well may have a Px.

#LIST ALU = ("allu" Gen)("assu" Gen)("čiekŋu" Gen)("gassu" Gen)("govdu" Gen)
	   #("guhkku" Gen)("lossu" Gen)("stuorru" Gen)("viidu" Gen);

#LIST TITLE = "ámta#mánni" "direktevra" "eanan#hearrá" ("gásttašit" eaddji) "gonagas" 
#	"justitiarius" "korefereanta" "ossodat#direktevra" 
#	"profehta" "professor" "prof" "ráđđe#addi" "stipendiáhta" "sundi" 
#	"universitehtalektor" "vuosttas#konsuleanta" "cand.mag" "cand.oecon"
#    "cand.philol" "cand.real" "cand.scient" "cand.theol" "cand" "dr.art" 
#    "eksp.hoavd" "eksp.sj" "fenr" "fig" "fil.tri" "gen.lt" "genr" 
#    "genr.lt" "h.r.adv" "hr" "kapt" "korp" "lekt" "lic" "ltn" "mag.art"
#    "mag" "merc" "odont" "oecon" "ordf" "ossod.dir" "stud.phil."
#    "res.kap" "stud.philol" "vit.ass" "dr" "dr.med" "dr.philos" "dr.theol" ;
#
#LIST COUNTABLE = "oassi" "kapihttal" "paragráfa" ;

#LIST NUMMAR
#LIST KLASS
#LIST LAHKA
#SET NOT-LAHKA = N - LAHKA 
#LIST FYLKA
#LIST INSTITUTION
#LIST HUMAN


# Miscellaneous sets
# ------------------

#LIST Clt = Qst Foc ; 

LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ;

#SET PPOS = Po | Pr ;

#LIST POST-A = "rájes" ;   
#LIST POST-B = "rádjái" ;

#LIST WACKERNAGEL = "go" "ba" "bat" "be" "bahan" "son" ;

#LIST GASKAL = "gaskal" "gaskkal" "gaskii" "gaskka" "gaskkas" ; 
 # p-positions that like coordination

#SET NUMERALS = Num - OKTA ;

#SET NOT-NUMERALS = WORD - Num ;

#LIST MANGA = "máŋga" "galle" ; # Not referred to by any rule.

#SET CARDINALS = Num - Ord - MANGA ;

#SET NOT-CC = WORD - CC ;

#SET NOT-PCLE = WORD - Pcle ;

#LIST COMPAR = "dávjjit" "eanet" "earalágan" "eará" "earret" "seammás" 
#		"seammalágan" "seammalágán" "seammaláhkai"; 
 # These combine with "go" Pcle, but COMPAR stands for 'comparison' rather than
 # 'comparative'.

#LIST CONTRA = "muhto" ; # In lean muitalan, muhto dál muitalan.

#LIST PROSEANTA = "proseanta" "%" ;

SET REALCLB = CLB - COMMA ;


# Border sets and their complements
# ---------------------------------

SET CP = (Pron Interr) | (Pron Rel) | MANNE ;

SET NP-BORDER = BOS | EOS | REALCLB | VFIN | Inf | (Actio Ess) | ConNeg | VGen | 
	Sup | PPRON-NOT-GEN | Recipr | Po | Pr | Pcle | ("jed") | Interj | CS | CP ;

 # CLB removed from set in order to allow for NP-internal commas.
 # Refl is removed because of "sin iežaset vieruid" etc.


# Syntactic sets
# --------------


LIST ALLSYNTAG = (@+FAUXV) (@+FMAINV) (@-FAUXV) (@-FMAINV) (@-FSUBJ)
                 (@ADV-A) (@ADV-ADV) (@ADVL) (@AN>) (@ActioN>) (@CC)
                 (@CMPND) (@CS) (@DN>) (@GA>) (@GN>) (@GP<) (@GP>)
                 (@HNOUN) (@INTERJ) (@MEASURE) (@NPron<) (@NQ<) 
                 (@NUM-PRON) (@OBJ) (@OPRED) (@PCLE) 
                 (@PCLE-COMPL) (@PROP>) (@PrcN>) (@PronN<) (@PronN>) 
                 (@QN<) (@QN>) (@SPRED)
                 (@SUBJ) (@SUBJ-QH) (@TITLE) (@VOC) (@X); # (@TIME)(@NUMBER) 

# SET ASSIGNEDSYN = ALLSYNTAG - @X ;
# funzt nicht.


# ============ #
# Rule section #
# ============ #
   CONSTRAINTS #
# ============ #

# Denne dialogen kan vi disambiguere slik:
# I line 3 er "le" 3. person fordi "ællo" er subjekt.
# I line 4 er "le" 3. person fordi "le" i setninga før er 3. person.

    ## Le gus dån oahpes dán bájkkáj?
    ## Na lev.
    ## Le gus ællo gárden?
    ## Le.
    ## Njuovadihppe gus?
    ## Njuovadin.


#SELECT (V Sg3) IF 
#SELECT (V Sg2) IF (*-1 (Pron Pers Nom Sg2));
SELECT (V Du3) IF (*-1 (Pron Nom Du3) BARRIER (NP-MEMBER Nom));
SELECT (V Du3) IF (*-3 (N Nom) OR (A Nom) LINK 1 CC LINK 1 (N Nom) OR (A Nom) BARRIER CC LINK 1 (N Nom) OR (A Nom));

# If there is no construction in either the same sentence or a sentence before, remove Dual. That construction can either consist of a) the dual pronoun in nominative b) two coordinated nouns that do not have a third CC with a noun or adjective in nominative after c) a guovttin construction (which unfortunately I do not know in lulesámi)

SELECT ("liehkket" Ind Prs Sg2) IF (*-1  ("liehkket" Ind Prs Sg2));
    ## Lev gå mus telefåvnnå?
    ## De le.


END