# -*- cg-pre-pipe: "hfst-tokenise -g ../../tools/tokenisers/tokeniser-gramcheck-gt-desc.pmhfst" -*-
# Divvun & Giellatekno - open source grammars for Sámi and other languages
# Copyright © 2000-2017 UiT The arctic University of Norway
# http://giellatekno.uit.no & http://divvun.no
#
# This program is free software; you can redistribute and/or modify
# this file under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. The GNU General Public License
# is found at http://www.gnu.org/licenses/gpl.html. It is
# also available in the file $GTHOME/LICENSE.txt.
#
# Other licensing options are available upon request, please contact
# giellatekno@hum.uit.no or divvun@hum.uit.no
# ==================================================================== #
#!! !!! S O U T H S A A M I G R A M M A R C H E C K E R
# ==================================================================== #
# ====================================================================
# Sámi language technology 2000-2017, UiT The arctic University of Norway #
# ---------------------- #
# Development setup: #
# ---------------------- #
#
# cd $GTHOME/langs/sma
# ./autogen.sh
# ./configure --enable-apertium --with-hfst --enable-syntax --enable-grammarchecker --enable-tokenisers --enable-alignment --enable-reversed-intersect
# make
# cd tools/grammarcheckers
# make dev
#
# ---------------------- #
# Then edit/test as: #
# ---------------------- #
#
# emacs grammarchecker.cg3 # and C-c C-i / C-c C-c if you have cg-mode installed
# echo "Sun ij puátá." | sh modes/smagram.mode # from the terminal
#
# ---------------------- #
# Other debug-pipes: #
# ---------------------- #
#
# ls modes/
#
# ---------------------- #
# The pipeline (but use modes, above)
# echo "Hei." | hfst-tokenise -g ../tokenisers/tokeniser-gramcheck-gt-desc.pmhfst | vislcg3 -g disambiguator.cg3 -t | vislcg3 -g grammarchecker.cg3 -t
# =========================================== #
# Short table of contents #
# =========================================== #
# Delimiters
# Tags and sets
# Grammar checking rules
# =============== #
#!! !!!DELIMITERS
# =============== #
DELIMITERS = "<.>" "" ">" "<...>" "<¶>" sent ;
# ================== #
#!! !!!TAGS AND SETS
# ================== #
SETS
#!! !!Tags
#!! This section lists all the tags inherited from the fst, and used as tags
#!! in the syntactic analysis. The next section, __Sets__, contains sets defined
#!! on the basis of the tags listed here, those set names are not visible in the output.
# Tags declared as single-membered LISTs
# ======================================
#!! !Beginning and end of sentence
LIST BOS = (>>>) () ; #!! BOS
LIST EOS = (<<<) () ; #!! EOS
# We define end of clause and beginning of clause in a way so that the file
# may be read both by the CG-2 and the vislcg formalisms.
# CG3 doesn´t function without >>> and <<< !
#!! !Parts of speech tags
LIST N = N ; #!! N
LIST A = A ; #!! A
LIST Adv = Adv ; #!! Adv
LIST V = V ; #!! V
LIST Pron = Pron ; #!! Pron
LIST CS = CS ; #!! CS
LIST CC = CC ; #!! CC
SET CC-CS = CC OR CS ; #!! CC-CS
LIST Po = Po ; #!! Po
LIST Pr = Pr ; #!! Pr
LIST Pcle = Pcle ; #!! Pcle
LIST Num = Num ; #!! Num
LIST Interj = Interj ; #!! Interj
LIST ABBR = ABBR ; #!! ABBR
LIST ACR = ACR ; #!! ACR
LIST CLB = CLB ; #!! CLB
LIST LEFT = LEFT ; #!! LEFT
LIST RIGHT = RIGHT ; #!! RIGHT
LIST WEB = WEB ; #!! WEB
LIST QMARK = """ ; # " #!! QMARK
LIST PPUNCT = PUNCT ; #!! PPUNCT
SET PUNCT = PPUNCT - QMARK ; #!! PUNCT
LIST COMMA = "," ; #!! COMMA
LIST ¶ = ¶; #!! ¶
#!! !Tags for POS sub-categories
LIST Pers = Pers ; #!! Pers
LIST Dem = Dem ; #!! Dem
LIST Interr = Interr ; #!! Interr
LIST Indef = Indef ; #!! Indef
LIST Recipr = Recipr ; #!! Recipr
LIST Refl = Refl ; #!! Refl
LIST Rel = Rel ; #!! Rel
LIST Coll = Coll ; #!! Coll
LIST NomAg = NomAg ; #!! NomAg
LIST Prop = Prop ; #!! Prop
LIST Allegro = Allegro ; #!! Allegro
LIST Arab = Arab ; #!! Arab
LIST Rom = Rom ; #!! Romertall
#!! !Tags for morphosyntactic properties
LIST Nom = Nom ; #!! Nom
LIST Acc = Acc ; #!! Acc
LIST Gen = Gen ; #!! Gen
LIST Ill = Ill ; #!! Ill
LIST Loc = Loc ; #!! Loc
LIST Com = Com ; #!! Com
LIST Ess = Ess ; #!! Ess
LIST Par = Par ; #!! Ess
LIST Sg = Sg ; #!! Sg
LIST Du = Du ; #!! Du
LIST Pl = Pl ; #!! Pl
LIST Cmp/SplitR = Cmp/SplitR ; #!! Cmp/SplitR
LIST Cmp/SgNom = Cmp/SgNom ; #!! Cmp/SgNom Cmp/SgGen
LIST Cmp/SgGen = Cmp/SgGen ; #!! Cmp/SgGen
LIST PxSg1 = PxSg1 ; #!! PxSg1
LIST PxSg2 = PxSg2 ; #!! PxSg2
LIST PxSg3 = PxSg3 ; #!! PxSg3
LIST PxDu1 = PxDu1 ; #!! PxDu1
LIST PxDu2 = PxDu2 ; #!! PxDu2
LIST PxDu3 = PxDu3 ; #!! PxDu3
LIST PxPl1 = PxPl1 ; #!! PxPl1
LIST PxPl2 = PxPl2 ; #!! PxPl2
LIST PxPl3 = PxPl3 ; #!! PxPl3
LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ; #!! Px
LIST Comp = Comp ; #!! Comp
LIST Superl = Superl ; #!! Superl
LIST Attr = Attr ; #!! Attr
LIST Ord = Ord ; #!! Ord
LIST Qst = Qst ; #!! Qst
LIST IV = IV ; #!! IV
LIST TV = TV ; #!! TV
LIST Prt = Prt; #!! Prt
LIST Prs = Prs ; #!! Prs
LIST Ind = Ind ; #!! Ind
LIST Pot = Pot ; #!! Pot
LIST Cond = Cond ; #!! Cond
LIST Imprt = Imprt ; #!! Imprt
LIST ImprtII = ImprtII ; #!! ImprtII
LIST Sg1 = Sg1 (p1 sg) ; #!! Sg1
LIST Sg2 = Sg2 (p2 sg) ; #!! Sg2
LIST Sg3 = Sg3 (p3 sg) ; #!! Sg3
LIST Du1 = Du1 (p1 du) ; #!! Du1
LIST Du2 = Du2 (p2 du) ; #!! Du2
LIST Du3 = Du3 (p3 du) ; #!! Du3
LIST Pl1 = Pl1 (p1 pl) ; #!! Pl1
LIST Pl2 = Pl2 (p2 pl); #!! Pl2
LIST Pl3 = Pl3 (p3 pl); #!! Pl3
LIST Inf = Inf ; #!! Inf
LIST ConNeg = ConNeg ; #!! ConNeg
LIST Neg = Neg ; #!! Neg
LIST PrfPrc = PrfPrc ; #!! PrfPrc
LIST VGen = VGen ; #!! VGen
LIST PrsPrc = PrsPrc ; #!! PrsPrc
LIST Ger = Ger ; #!! Ger
LIST Sup = Sup ; #!! Sup
LIST Actio = Actio ; #!! Actio
LIST VAbess = VAbess ; #!! VAbess
LIST Err/Orth = Err/Orth Err/Orth-a/á Err/Orth-nom/gen Err/Orth-nom/acc Err/DerSub Err/CmpSub Err/UnspaceCmp Err/HyphSub Err/SpaceCmp Err/Spellrelax err_orth_mt ; #!! Err/Orth
#!! !Semantic tags
LIST Sem/Act = Sem/Act ; #!! Sem/Act
LIST Sem/Ani = Sem/Ani ; #!! Sem/Ani
LIST Sem/Atr = Sem/Atr ; #!! Sem/Atr
LIST Sem/Body = Sem/Body ; #!! Sem/Body
LIST Sem/Clth = Sem/Clth ; #!! Sem/Clth
LIST Sem/Domain = Sem/Domain ; #!! Sem/Domain
LIST Sem/Feat-phys = Sem/Feat-phys ; #!! Sem/Feat-phys
LIST Sem/Fem = Sem/Fem ; #!! Sem/Fem
LIST Sem/Group = Sem/Group ; #!! Sem/Group
LIST Sem/Lang = Sem/Lang ; #!! Sem/Lang
LIST Sem/Mal = Sem/Mal ; #!! Sem/Mal
LIST Sem/Measr = Sem/Measr ; #!! Sem/Measr
LIST Sem/Money = Sem/Money ; #!! Sem/Money
LIST Sem/Obj = Sem/Obj ; #!! Sem/Obj
LIST Sem/Obj-el = Sem/Obj-el ; #!! Sem/Obj-el
LIST Sem/Org = Sem/Org ; #!! Sem/Org
LIST Sem/Perc-emo = Sem/Perc-emo ; #!! Sem/Perc-emo
LIST Sem/Plc = Sem/Plc ; #!! Sem/Plc
LIST Sem/Sign = Sem/Sign ; #!! Sem/Sign
LIST Sem/State-sick = Sem/State-lang-sick ; #!! Sem/State-sick
LIST Sem/Sur = Sem/Sur ; #!! Sem/Sur
LIST Sem/Time = Sem/Time ; #!! Sem/Time
LIST Sem/Txt = Sem/Txt ; #!! Sem/Txt
LIST HUMAN = Sem/Fem Sem/Mal Sem/Sur ; #!! HUMAN
SET HAB-ACTOR = HUMAN ; # Goal: make this like the sme one. #!! HAB-ACTOR
SET HAB-ACTOR-NOT-HUMAN = Sem/Org ; # make this like the sme one #!! HAB-ACTOR-NOT-HUMAN
LIST PROP-ATTR = Sem/Mal Sem/Sur Sem/Fem ; #!! PROP-ATTR
LIST PROP-SUR = Sem/Sur Sem/Mal Sem/Fem ; #!! PROP-SUR
SET TIME-N-SET = N + Sem/Time ; #!! TIME-N-SET
#!! ! Syntactic tags
LIST @+FAUXV = @+FAUXV ; #!! @+FAUXV
LIST @+FMAINV = @+FMAINV ; #!! @+FMAINV
LIST @-FAUXV = @-FAUXV ; #!! @-FAUXV
LIST @-FMAINV = @-FMAINV ; #!! @-FMAINV
LIST @-FSUBJ> = @-FSUBJ> ; #!! @-FSUBJ>
LIST @-F = @-FOBJ> ; #!! @-FOBJ>
LIST @SPRED = @-FADVL> ; #!! @-FADVL>
LIST @-F = @-FSPRED> ; #!! @-FSPRED>
LIST @-FOPRED> = @-FOPRED> ; #!! @-FOPRED>
SET FOBJ = @-F ;
SET FMAINV = @-FMAINV OR @+FMAINV ;
SET FAUXV = @-FAUXV OR @+FAUXV ;
LIST @>ADVL = @>ADVL ; #!! @>ADVL
LIST @ADVL< = @ADVL< ; #!! @ADVL<
LIST @ = @ADVL> ; #!! @ADVL>
LIST ADVL = @ADVL @ADVL> @ADVL< @ADVL @-F ; #!! @ADVL
LIST @HAB> = @HAB> ; #!! @HAB>
LIST @ ;
LIST @>N = @>N ; #!! @>N
LIST @Interj = @Interj ; #!! @Interj
LIST @N< = @N< ; #!! @N<
LIST @>A = @>A ; #!! @>A
LIST @P< = @P< ; #!! @P<
LIST @>P = @>P ; #!! @>P
LIST @HNOUN = @HNOUN ; #!! @HNOUN
LIST @INTERJ = @INTERJ ; #!! @INTERJ
LIST @>Num = @>Num; #!! @>Num
LIST @Pron< = @Pron< ; #!! @Pron<
LIST @>Pron = @>Pron ; #!! @>Pron
LIST @Num< = @Num< ; #!! @Num<
LIST @OBJ = @OBJ ; #!! @OBJ
LIST @ = @OBJ> ; #!! @OBJ>
LIST @OPRED = @OPRED ; #!! @OPRED
LIST @ = @OPRED> ; #!! @OPRED>
LIST @PCLE = @PCLE ; #!! @PCLE
LIST @COMP-CS< = @COMP-CS< ; #!! @COMP-CS<
LIST @SPRED = @SPRED ; #!! @SPRED
LIST @ = @SPRED> ; #!! @SPRED>
LIST @SUBJ = @SUBJ ; #!! @SUBJ
LIST @ = @SUBJ> ; #!! @SUBJ>
SET SUBJ = @ OR @SUBJ ; #!! SUBJ
SET SPRED = @ OR @SPRED ; #!! SPRED
SET OPRED = @ OR @OPRED ; #!! OPRED
LIST @PPRED = @PPRED ; #!! @PPRED
LIST @APP = @APP ; #!! @APP
LIST @APP-N< = @APP-N< ; #!! @APP-N<
LIST @APP-Pron< = @APP-Pron< ; #!! @APP-Pron<
LIST @APP>Pron = @APP>Pron ; #!! @APP>Pron
LIST @APP-Num< = @APP-Num< ; #!! @APP-Num<
LIST @APP-ADVL< = @APP-ADVL< ; #!! @APP-ADVL<
LIST @VOC = @VOC ; #!! @VOC
LIST @CVP = @CVP ; #!! @CVP
LIST @CNP = @CNP ; #!! @CNP
SET OBJ = (@) OR (@OBJ) OR (@-F) ; #!! OBJ
LIST = @OBJ> @-FOBJ> ; #!! OBJ>
SET -OTHERS = OBJ> OR (Gen) OR (Nom) OR (Ess) OR (Loc) OR (Adv) ; #!! OBJ>-OTHERS
SET NOT-FAUXV = FMAINV OR OBJ + V OR ADVL + V ;
# Works after the mapping rules for verbs.
SET SYN-V = FMAINV OR FAUXV OR V + SUBJ OR OBJ + V OR ADVL + V OR (V @>N) OR (V @N<) OR (V @A<) ; #!! SYN-V
LIST @X = @X ; #!! @X
# ========
SETS
# ========
#!! !!Sets containing sets of lists and tags
#!! This part of the file lists a large number of sets based partly upon the tags defined above, and
#!! partly upon lexemes drawn from the lexicon.
#!! See the sourcefile itself to inspect the sets, what follows here is an overview of the set types.
#!! !Sets for Single-word sets
LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m"
"n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
"á" "æ" "ø" "å" "ö" "ä" ; #!! INITIAL
#!! !Sets for word or not
LIST WORD = N A Adv V Pron CS CC Po Pr Interj Pcle Num ABBR ACR \? ; #!! WORD
# any word
# SET REAL-WORD = WORD - Num - Ord ; #!! REAL-WORD
SET REAL-WORD-NOT-ABBR = WORD - Num - Ord - (ABBR N) ; # This is former REALWORD-NOTABBR #!! REAL-WORD-NOT-ABBR
SET NOT-COMMA = WORD - COMMA ; #!! NOT-COMMA
#!! !Case sets
# ---------
LIST ADVLCASE = Ill Loc Com Ess ; #!! ADLVCASE
LIST CASE-AGREEMENT = Nom Acc Gen (Pl Ill) Loc Com Ess ; #!! CASE-AGREEMENT
LIST CASE = Nom Acc Gen Ill Loc Com Ess ; #!! CASE
SET NOT-NOM = CASE - Nom ; #!! NOT-NOM
SET NOT-GEN = CASE - Gen ; #!! NOT-GEN
SET NOT-ACC = CASE - Acc ; #!! NOT-ACC
#!! !Verb sets
# ---------
# Verbs and their complements
# - - - - - - - - - - - - - -
SET NOT-V = WORD - V ; #!! NOT-V
#!! !Sets for finiteness and mood
# - - - - - - - - - -
SET REAL-NEG = Neg - Sup ; #!! REAL-NEG
SET MOOD-V = Ind OR Pot OR Imprt OR ImprtII OR Cond OR (Neg Sup) ; #!! MOOD-V
SET NOT-PRFPRC = WORD - PrfPrc ; #!! NOT-PRFPRC
#!! !Sets for person
# - - - -
LIST SG1-V = (V Sg1) ; #!! SG1-V
LIST SG2-V = (V Sg2) ; #!! SG2-V
LIST SG3-V = (V Sg3) ; #!! SG3-V
LIST DU1-V = (V Du1) ; #!! DU1-V
LIST DU2-V = (V Du2) ; #!! DU2-V
LIST DU3-V = (V Du3) ; #!! DU3-V
LIST PL1-V = (V Pl1) ; #!! PL1-V
LIST PL2-V = (V Pl2) ; #!! PL2-V
LIST PL3-V = (V Pl3) ; #!! PL3-V
# Note that imperative verbs are not included in these sets!
# Some subsets of the VFIN sets
# - - - - - - - - - - - - - - -
SET SG-V = SG1-V OR SG2-V OR SG3-V ;
SET DU-V = DU1-V OR DU2-V OR DU3-V ;
SET PL-V = PL1-V OR PL2-V OR PL3-V ;
SET DU-PL-V = DU1-V OR DU2-V OR DU3-V OR PL1-V OR PL2-V OR PL3-V ;
SET 1-2-V = SG1-V OR SG2-V OR DU1-V OR DU2-V OR PL1-V OR PL2-V ;
#!! !Pronoun sets
# ------------
LIST MUN = (Pron Pers Sg1 Nom) (pers p1 sg nom) ;
LIST DON = (Pron Pers Sg2 Nom) (pers p2 sg nom) ;
LIST SON = (Pron Pers Sg3 Nom) (pers p3 sg nom) ;
LIST MOAI = (Pron Pers Du1 Nom) (pers p1 du nom) ;
LIST DOAI = (Pron Pers Du2 Nom) (pers p2 du nom) ;
LIST SOAI = (Pron Pers Du3 Nom) (pers p3 du nom) ;
LIST MII-PERS = (Pron Pers Pl1 Nom) (pers p1 pl nom) ;
LIST DII = (Pron Pers Pl2 Nom) (pers p2 pl nom) ;
LIST SII = (Pron Pers Pl3 Nom) (pers p3 pl nom) ;
SET PPRON-NOM-NOT-DAT = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ;
SET PPRON-DU-PL = MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ;
SET PPRON-PL = MII-PERS OR DII OR SII ;
SET PRON-DU = MOAI OR DOAI OR SOAI ;
SET PPRON-NOT-SII = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII ;
LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen)
(Pl1 Gen) (Pl2 Gen) (Pl3 Gen) (p1 sg gen) (p2 sg gen) (p3 sg gen) (p1 du gen) (p2 du gen) (p3 du gen)
(p1 pl gen) (p2 pl gen) (p3 pl gen) ;
SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ;
LIST SG-DEM = (Pron Dem Sg Nom);
LIST PL-DEM = (Pron Dem Pl Nom);
SET NOT-DEM = WORD - Dem ;
LIST SG-PRON = (Pron Sg1) (Pron Sg2) (Pron Sg3) (Pron Sg) (Pron PxSg1) (Pron PxSg2) (Pron PxSg3) (pron p1 sg) (pron p2 sg) (pron p3 sg);
LIST DU-PRON = (Pron Du1) (Pron Du2) (Pron Du3) (Pron PxDu1) (Pron PxDu2) (Pron PxDu3) (pron p1 du) (pron p2 du) (pron p3 du);
LIST PL-PRON = (Pron Pl1) (Pron Pl2) (Pron Pl3) (Pron Pl) (Pron PxPl1) (Pron PxPl2) (Pron PxPl3) (pron p1 pl) (pron p2 pl) (pron p3 pl);
LIST DU-PRON-NOTPX = (Pron Du1) (Pron Du2) (Pron Du3) (pron p1 du) (pron p2 du) (pron p3 du) ;
LIST FIRST-PX = PxSg1 PxDu1 PxPl1 ;
LIST SECOND-PX = PxSg2 PxDu2 PxPl2 ;
LIST THIRD-PX = PxSg3 PxDu3 PxPl3 ;
LIST SG-PX = PxSg1 PxSg2 PxSg3 ;
LIST DU-PX = PxDu1 PxDu2 PxDu3 ;
LIST PL-PX = PxPl1 PxPl2 PxPl3 ;
LIST PX = PxSg1 PxDu1 PxPl1 PxSg2 PxDu2 PxPl2 PxSg3 PxDu3 PxPl3 ;
LIST DU-NR = Du1 Du2 Du3 ;
SET NOT-SG-PRON = DU-PRON OR PL-PRON ;
#!! !Adjectival sets and their complements
# -------------------------------------
SET A-CASE = A - Attr - Adv ;
LIST A-CC = A CC ;
SET NOT-A = WORD - A ; # This is former NOT-ADJ
SET NOT-A-COMMA = WORD - A - COMMA ;
SET NOT-Attr = WORD - Attr ;
SET NOT-A-PCLE = WORD - A - Pcle ;
SET NOT-A-CC = WORD - A-CC ;
# and many others
#!! !Adverbial sets and their complements
# ------------------------------------
SET LEX-ADV = Adv - (AA) ;
SET NOT-ADV-DE = WORD - Adv ;
SET NOT-ADV = NOT-ADV-DE OR CLB ;
SET NOT-ADV-N = NOT-ADV - N;
SET NOT-ADV-PCLE = NOT-ADV - Pcle ;
SET NOT-ADV-INDEF = NOT-ADV - Indef ;
SET NOT-ADV-PCLE-ILL-LOC-COM = WORD - Adv - Pcle - Ill - Loc - Com;
SET NOT-A-ADV-PCLE = WORD - Pcle - A - Adv ; #
#!! !Sets of elements with common syntactic behaviour
# ================================================
SETS
#!! !NP sets defined according to their morphosyntactic features
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LIST N-SG-NOM = (N Sg Nom);
LIST PROP = (N Prop);
SET COMMON-N = N - Prop;
SET HEAD-N = N - Cmp/SplitR ;
SET HEAD-N-NOM = (N Nom) - Cmp/SplitR ;
# SET SUBJECTHEAD = N OR A OR Pron - Refl ; # These, can be subject heads
SET NP = N OR A ; # anything that can take except numerals
SET NP-HEAD = Pron OR HEAD-N ;
SET NP-HEAD-SG = SG-PRON OR (N Sg) OR (A Sg) - Cmp/SplitR ;
SET NP-HEAD-PL = PL-PRON OR (N Pl) OR (A Pl) - Cmp/SplitR ;
SET NP-HEAD-SG-NOM = SG-PRON + Nom OR (N Sg Nom) OR (A Sg Nom) - Cmp/SplitR ;
SET NP-HEAD-PL-NOM = PL-PRON + Nom OR (N Pl Nom) OR (N Coll Nom) OR (A Pl Nom) - Cmp/SplitR ;
SET NP-HEAD-NOM = NP-HEAD-SG-NOM OR NP-HEAD-PL-NOM ;
SET NP-HEAD-ACC = (Pron Acc) OR (N Acc) OR (A Acc) - Cmp/SplitR - (Dem Attr);
SET NP-HEAD-GEN = (Pron Gen) OR (N Gen) OR (A Gen) - Cmp/SplitR - (Dem Attr) ;
#!! !The PRE-NP-HEAD family of sets
#!! These sets model noun phrases (NPs). The idea is to first define whatever can
#!! occur in front of the head of the NP, and thereafter negate that with the
#!! expression __WORD - premodifiers__.
SET PRE-NP-HEAD = (Prop Attr) OR (Prop @>N) OR (A Attr) OR (ABBR Attr)
OR (Pron Pers Gen) OR (N Gen) OR
Num OR Cmp/SplitR OR CC OR (Pron Dem) OR (Pron Refl Gen) OR (Indef Attr) OR
(PrfPrc @>N) OR PrsPrc OR (A Ord) ;
# The strict version of items that can only be premodifiers, not parts of the predicate
SET PRE-A-N = (Pron Pers Gen) OR (Pron Pers Acc) OR (Pron Indef) OR Num OR (A Ord) OR (Pron Dem) OR (Pron Refl Gen) OR (Pron Refl Acc) ;
SET NOT-PRE-A-N = WORD - PRE-A-N ;
LIST PUNCT-LEFT = (PUNCT LEFT) ;
LIST PUNCT-RIGHT = (PUNCT RIGHT) ;
SET NOT-NPMOD = WORD - PRE-NP-HEAD OR ABBR OR @CVP ;
# This is the previous NPNH (npnh) set.
# NOT-NPMOD = "NOT-PRE-NP-HEAD"
SET NOT-NPMOD-ACC = NOT-NPMOD - Acc OR ABBR ;
SET NOT-NPMOD-ACC-ADV = NOT-NPMOD - Acc - Adv OR ABBR ;
SET NOT-NPMODADV = WORD - PRE-NP-HEAD - Adv ;
SET NOT-NPMODADV-INDEF = WORD - PRE-NP-HEAD - Adv - Indef ;
SET NOT-N = WORD - N ;
SET NOT-N-A = WORD - N - A ;
SET NOT-NP = Inf OR Pcle OR Interj OR CS ;
SET NUM = Num ;
SET NOT-NUM = WORD - Num ;
SET NOT-CC = WORD - CC ;
SET NOT-PCLE = WORD - Pcle ;
SET REAL-CLB = CLB - COMMA ;
#!! !Border sets and their complements
# ---------------------------------
SET CP = (Pron Interr) OR (Pron Rel) ;
LIST BOUNDARYSYMBOLS = "\;" ":" "-" "–" ;
SET S-BOUNDARY = BOUNDARYSYMBOLS OR (@CVP) ;
SET BOC = S-BOUNDARY OR BOS ;
SET BOC-PUNCT = BOC - ("-") - ("–") ;
SET EOC = S-BOUNDARY OR EOS ;
SET NP-BOUNDARY = BOS OR EOS OR REAL-CLB OR Inf OR VGen OR Sup OR PPRON-NOT-GEN OR Recipr OR Po OR Pr OR Pcle OR Interj OR CS OR CP OR @CVP ;
SET SV-BOUNDARY = S-BOUNDARY OR Inf OR Sup OR FMAINV ;
SET CCCOMMA = CC OR COMMA ;
# remember that those are potential sentence boundaries, too
#!! !Grammarchecker sets
LIST &err-agr = &err-agr ;
LIST &err-gen-po = &err-gen-po ;
LIST &err-no-conneg = &err-no-conneg ;
LIST &err-orrood-inf = &err-orrood-inf ;
LIST &err-ext-agr = &err-ext-agr ;
LIST &err-ext-vagr = &err-ext-vagr ;
MAPPING-PREFIX = & ;
SECTION
# Speller suggestions rule – add &SUGGESTWF to any spelling suggestion
# that we actually want to suggest to the user.
# The simplest is to just add it to all spelled words:
ADD:spell-it-all (&typo &SUGGESTWF) () ;
# But perhaps you want to only suggest spellings of words that are not inside "quotes":
ADD:spell-conservatively (&typo &SUGGESTWF) () IF (NEGATE -1 QMARK LINK 2 QMARK);
# A simple grammar checker rule without suggestions: Ensure preceding nominal agrees with the verb
ADD:err-agr (&err-agr) TARGET V IF (-1C Sg + Nom) (0 V) (NOT 0 Sg3);
ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Sg1 + Nom) (0 V) (NOT 0 Sg1);
ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Sg2 + Nom) (0 V) (NOT 0 Sg2);
ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Pl1 + Nom) (0 V) (NOT 0 Pl1);
ADD:err-agr (&err-agr) TARGET V IF (-1C Pron + Pl2 + Nom) (0 V) (NOT 0 Pl2);
# And one with a suggestion where we simply change the tag Sg into Attr:
ADD:attr-not-pred (&attr-not-pred) TARGET (A Sg) IF (NOT 0 Attr OR Gen) (1 CC LINK *1 COMMON-N) ;
COPY:attr-not-pred (Attr &SUGGEST) EXCEPT (Sg) TARGET (&attr-not-pred) ;
# The method is: Add &SUGGEST to a copied reading to *generate* a
# suggestion form from that reading. The copy should contain the error
# tag too – &-prefixed error tags are ignored when generating, but
# used to create human-readable messages.
# Simple punctuation rules showing how to change the lemma in the suggestions:
ADD:use-guillemets (&guillemets) TARGET (""");
COPY:left-guillemet ("«" &SUGGEST) EXCEPT (""") TARGET (&guillemets) ;
COPY:right-guillemet ("»" &SUGGEST) EXCEPT (""") TARGET (&guillemets) ;
ADD:use-ellipsis (&ellipsis) TARGET ("...");
COPY:use-ellipsis ("…" &SUGGEST) EXCEPT ("...") TARGET (&ellipsis) ;