# cat fkvtext_analysed.txt | lookup2cg | vislcg3 -g disambiguation.cg3 -t # echo 'sentence' | preprocess | lookup -flags mbTT 1.fst | lookup2cg|vislcg3 -g ../../syntax/1_disambiguation.cg3 # ============================ # #!! !!!Start making a syntactic disambiguator # ============================ # #!! !!Sets # ========== # # Delimiters # # ========== # DELIMITERS = "<.>" "<...>" "" "" "<¶>" ; #!! Sentence delimiters are the following: "<.>" "<...>" "" "" "<¶>" # ============= # # Tags and sets # # ============= # # ======== SETS # ======== LIST BOS = (>>>) () (sent); LIST EOS = (<<<) (); # vislcg and CG-2 together. #!! !Part-of-Speech LIST N = N ; #!! * N = noun LIST A = A ; #!! * A = adjective LIST Num = Num ; #!! * Num = numeral LIST V = V ; #!! * V = verb LIST CC = CC ; #!! * CC = conjunction LIST CS = CS ; #!! * CS = subjunction LIST Adv = Adv ; #!! * Adv = adverb LIST Pr = Pr ; #!! * Pr = preposition LIST Po = Po ; #!! * Po = postposition LIST Pron = Pron ; #!! * Pron = pronoun LIST Interj = Interj ; #!! * Interj = interjection LIST CLB = CLB ; LIST CLBfinal = CLBfinal ; # because common num LIST PUNCT = PUNCT ; LIST Prs = Prs ; LIST Prt = Prt ; LIST Act = Act ; LIST Pass = Pass ; LIST Attr = Attr ; # ? #!! !Numerus LIST Sg = Sg ; #!! * Sg = Singular LIST Pl = Pl ; #!! * Pl = Plural LIST Sg1 = Sg1 ; #!! * Sg1 = Singular 1.p. LIST Sg2 = Sg2 ; #!! * Sg2 = Singular 2.p. LIST Sg3 = Sg3 ; #!! * Sg3 = Singular 3.p. LIST Pl1 = Pl1 ; #!! * Pl1 = Plural 1.p. LIST Pl2 = Pl2 ; #!! * Pl2 = Plural 2.p. LIST Pl3 = Pl3 ; #!! * Pl3 = Plural 3.p. #!! !Cases LIST Nom = Nom ; #!! * Nom LIST Gen = Gen ; #!! * Gen LIST Acc = Acc ; #!! * Acc LIST Par = Par ; #!! * Par LIST Ine = Ine ; #!! * Ine LIST Ill = Ill ; #!! * Ill LIST Ela = Ela ; #!! * Ela LIST Ade = Ade ; #!! * Ade LIST Abe = Abe ; #!! * Abe LIST All = All ; #!! * All LIST Abl = Abl ; #!! * Abl LIST Ess = Ess ; #!! * Ess LIST Tra = Tra ; #!! * Tra LIST Ins = Ins ; #!! * Ins LIST Com = Com ; #!! * Com LIST SUBJ-CASE = Nom Par ; #!! * SUBJ-CASE = Nom Par LIST CASE = Nom Gen Acc Par Ine Ill Ela Ade Abe All Abl Ess Tra Ins Com ; LIST Pos = Pos ; LIST Comp = Comp ; LIST Superl = Superl ; LIST Ord = Ord ; LIST Err/Orth = Err/Orth ; #!! !Types LIST Prop = Prop ; #!! * Prop = Proper noun LIST Interr = Interr ; #!! * Interr = Interrogative LIST Dem = Dem ; #!! * Dem = demonstrative pron LIST Rel = Rel ; #!! * Rel = Relative pron LIST Relpronpl = (Pron Rel Pl) ; #!! Relpronpl "mikkä ja "jokka" LIST Relpronsg = (Pron Rel Sg) ; #!! Relpronsg "mikä" ja "joka" LIST Interrpronpl = (Pron Interr Pl) ; #!! Interrpronpl "kuka" ja "mikä" LIST Pers = Pers ; #!! * Pers = Personal pron LIST Indef = Indef ; #!! * Indef = Indef pron LIST Qu = Qu ; LIST Inf = Inf ; #!! * Inf = Infinitive LIST ConNeg = ConNeg ; #!! * ConNeg = Conjugated as Negative form LIST PrfPrc = PrfPrc ; #!! * PrfPrc = Perfectum Particip LIST Imprt = Imprt ; #!! * Imprt = Imperative LIST Act = Act ; #!! * Act = Active LIST Neg = Neg ; #!! * Neg = Negation verb LIST PxSg1 = PxSg1 ; LIST PxSg2 = PxSg2 ; LIST PxSg3 = PxSg3 ; LIST PxPl1 = PxPl1 ; LIST PxPl2 = PxPl2 ; LIST PxPl3 = PxPl3 ; LIST PX = PxSg1 PxSg2 PxSg3 PxPl1 PxPl2 PxPl3 ; LIST COMMA = "," ; #!! * COMMA = comma LIST Foc/han = Foc/han ; LIST Foc/kaan = Foc/kaan ; #!! * Foc/kaan = focus clitic -kaan LIST Sem/Fem = Sem/Fem ; #!! * Sem/Fem = feminin propernoun LIST @CVP = @CVP ; # !! * @CVP = Conjunction or subjunction that conjoins finite verb phrases. LIST @CNP = @CNP ; # !! * @CNP = Local conjunction or subjunction. LIST Sem/ID = Sem/ID ; LIST Arab = Arab ; LIST CURRENCY = "dinaari" "dollari" "euro" "kruunu" "kr" "rupla" "rubel" "¢" "€" "$"; #!! !!Sets with more members # ==== LIST WORD = N V A Adv Pr Interj Po Num CC CS Pron ; #!! * WORD = all PoS # Sets for barriers LIST NPMOD = (Pers Gen) (A Gen) (A Ine) (A Ade) (A Nom) (A Par) (A Acc) ; #!! * NPMOD = these can modify a noun SET NPMODADV = NPMOD OR Adv ; #!! * NPMODADV = NPMOD plus adverb SET NOT-NPMOD = WORD - NPMOD ; #!! * NOT-NPMOD = these cannot modify a noun SET NOT-NPMODADV = WORD - NPMODADV ; #!! * NOT-NPMODADV = these cannot modify a noun, and is not adverb LIST QVANT-ADV = "paljon" "vähän" "enämen" "vähemän" ; #!! * QVANT-ADV = e.g. paljon, vähän LIST KUNKA = "kunka" "missä" ; #!! * KUNKA = e.g. kunka missä (adverbs that start a sentence) SET S-BOUNDARY = CS OR ("mutta") OR (":") OR Rel OR @CVP OR KUNKA ; #!! * S-BOUNDARY = words that start a sentence LIST VFIN = Ind Cond Pot Imprt ; #!! * VFIN = finite verb LIST COPULAS = "olla" ; #!! * COPULAS = olla LIST MOD-ASP = "saađa" "häyttyyt" ; #add lemmas #!! * MOD-ASP = auxilaries LIST AUX-OR-MAIN = "tulla" "osata" "haluta" ; #add lemmas #!! * AUX-OR-MAIN = verbs which can be both auxilary and mainverb SET AUX = COPULAS OR MOD-ASP OR AUX-OR-MAIN ; #!! * AUX = verbs which can be auxilary SET SV-BOUNDARY = S-BOUNDARY OR VFIN ; #!! * SV-BOUNDARY = words that start a sentence and finite verb LIST ELA-VERB = "herättäät" "pittäät" ; # Here come the rules BEFORE-SECTIONS SECTION