sinclude $(GTLANG)/analyser.mk # include phonrules.mk # CFST = xfst -utf8 # ======================================================= # # Building different versions of the basic fst tagger # # ======================================================= # # Target for building a temporary propernoun lexicon # that combines north sámi lexicon with the lule sámi one. propernoun-$(GTLANG)-lex-tmp.txt: $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt: \ $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-morph.txt \ sme/src/propernoun-sme-lex.txt @echo @echo "*** Building tmp/propernoun-$(GTLANG)-lex-tmp.txt ***" ; @echo @cat $(word 2,$^) $< > $@ ifneq ($(GTLANG), sme) @echo "! <--- Dump from sme -->" >> $@ script/smesmjdump.pl sme/src/propernoun-sme-lex.txt >> $@ endif # The first goal is to build smX.save # This goal depends on twol-smX.bin and a bunch of lexicon files save: $(GTLANG)/bin/$(GTLANG).save $(GTLANG)/bin/$(GTLANG).save: \ $(GTLANG)/bin/twol-$(GTLANG)-descriptive.bin \ $(SRCS) @echo @echo "*** Building $(GTLANG).save ***" ; @echo printf "compile-source $(SRCS) \n\ read-rules $< \n\ compose-result \n\ save-result $@ \n\ quit \n" > tmp/save-script $(LEXC) < tmp/save-script rm -f tmp/save-script # The second goal is to build a normative smX.save save-norm: $(GTLANG)/bin/$(GTLANG)-norm.save $(GTLANG)/bin/$(GTLANG)-norm.save: \ $(GTLANG)/bin/twol-$(GTLANG).bin \ $(NORMFILES) @echo @echo "*** Building $(GTLANG)-norm.save ***" ; @echo printf "compile-source $(NORMFILES) \n\ read-rules $< \n\ compose-result \n\ save-result $@ \n\ quit \n" > tmp/save-script $(LEXC) < tmp/save-script rm -f tmp/save-script # This goal is to build a restrictive smX.save save-restr: $(GTLANG)/bin/$(GTLANG)-restr.save $(GTLANG)/bin/$(GTLANG)-restr.save: \ $(GTLANG)/bin/twol-$(GTLANG).bin \ $(RESTRFILES) @echo @echo "*** Building $(GTLANG)-restr.save ***" ; @echo printf "compile-source $(RESTRFILES) \n\ read-rules $< \n\ compose-result \n\ save-result $@ \n\ quit \n" > tmp/save-script $(LEXC) < tmp/save-script rm -f tmp/save-script # We need a variant of this third goal, to build a non-recursive sme.save # This goal depends on twol-XXX.bin and a bunch of lexicon files nonrec.save: $(GTLANG)/bin/nonrec-$(GTLANG).save $(GTLANG)/bin/nonrec-$(GTLANG).save: \ $(GTLANG)/bin/twol-$(GTLANG).bin \ $(NONRECFILES) @echo @echo "*** Building nonrec-$(GTLANG).save ***" ; @echo @printf "compile-source $(NONRECFILES) \n\ read-rules $< \n\ compose-result \n\ save-result $@ \n\ quit \n" > tmp/nonrec-script $(LEXC) < tmp/nonrec-script @rm -f tmp/nonrec-script # The nonrec-$GTLANG.fst file combines the *.save file with a filter to # remove unwanted derivational patterns. nonrec: $(GTLANG)/bin/nonrec-$(GTLANG).fst $(GTLANG)/bin/nonrec-$(GTLANG).fst: \ $(GTLANG)/bin/nonrec-$(GTLANG).save \ $(GTLANG)/bin/derivation-filter.fst \ common/bin/downcase.fst @echo @echo "*** Building nonrec-$(GTLANG).fst ***" ; @echo @printf "$(NONREC)" > tmp/nonrec-fst-script @printf "save stack $@ \n\ quit \n" >> tmp/nonrec-fst-script $(CFST) < tmp/nonrec-fst-script @rm -f tmp/nonrec-fst-script # ======================================================= # # Building different versions of the basic fst tagger # # ======================================================= # # We want an analyzer with POS tags only. It takes the linguistic # fst as input and gives us an alternate pos.fst. pos.fst: $(GTLANG)/bin/pos-$(GTLANG).fst $(GTLANG)/bin/pos-$(GTLANG).fst: \ common/bin/tag-pos.fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building pos-$(GTLANG).fst, $(GTLANG).fst with POS tags ***" @echo @printf "read regex [[@\"$<\"] .o. \ [@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\ save stack $@ \n\ quit \n" > tmp/pos-fst-script $(CFST) < tmp/pos-fst-script @rm -f tmp/pos-fst-script # In order to make pos.fst we need a binary tag-pos.fst # This goal depends on tag-pos.regex. The way it # is done is that all tags except the POS one are deleted. tag-pos.fst: common/bin/tag-pos.fst common/bin/tag-pos.fst: common/src/tag-pos.regex @echo @echo "*** Building tag-pos.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/tag-pos-script $(XFST) < tmp/tag-pos-script @rm -f tmp/tag-pos-script # We want to delete the +TV +IV tags for the generator (and other # tags later on. For that we need our tag-deleter. tag-not-save.fst: common/bin/tag-not-save.fst common/bin/tag-not-save.fst: common/src/tag-not-save.regex @echo @echo "*** Building tag-not-save.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/tag-not-save-script $(XFST) < tmp/tag-not-save-script @rm -f tmp/tag-not-save-script tag-inclusion-filter.fst: common/bin/tag-inclusion-filter.fst common/bin/tag-inclusion-filter.fst: common/src/tag-inclusion-filter.regex @echo @echo "*** Building tag-inclusion-filter.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/tag-inclusion-filter-script $(XFST) < tmp/tag-inclusion-filter-script @rm -f tmp/tag-inclusion-filter-script # We want an analyzer with Norwegian tags. It takes the linguistic # fst as input and gives us an alternate n-$(GTLANG).fst n-$(GTLANG).fst: $(GTLANG)/bin/n-$(GTLANG).fst $(GTLANG)/bin/n-$(GTLANG).fst: \ common/bin/tag-no.fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building n-$(GTLANG).fst, $(GTLANG).fst with Norwegian tags ***" @echo @printf "read regex [[@\"$<\"] .o. \ [@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\ save stack $@ \n\ quit \n" > tmp/n-fst-script $(CFST) < tmp/n-fst-script @rm -f tmp/n-fst-script # In order to make n-$(GTLANG).fst we need a binary tag-no.fst # This goal depends on tag-no.regex tag-no.fst: common/bin/tag-no.fst common/bin/tag-no.fst: common/src/tag-no.regex @echo @echo "*** Building tag-no.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/tag-no-script $(XFST) < tmp/tag-no-script @rm -f tmp/tag-no-script # We also want an analyzer with Sami tags. It takes the linguistic # sme.fst as input and gives us an alternate s-sme.fst s-$(GTLANG).fst: $(GTLANG)/bin/s-$(GTLANG).fst $(GTLANG)/bin/s-$(GTLANG).fst: \ common/bin/tag-$(GTLANG).fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building s-$(GTLANG).fst, $(GTLANG).fst with Sami tags ***" @echo @printf "read regex [[@\"$<\"] .o. \ [@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\ save stack $@ \n\ quit \n" > tmp/s-fst-script $(CFST) < tmp/s-fst-script @rm -f tmp/s-fst-script # In order to make s-$(GTLANG).fst we need a binary tag-$(GTLANG).fst # This goal depends on tag-sme.regex tag-$(GTLANG).fst: common/bin/tag-$(GTLANG).fst common/bin/tag-$(GTLANG).fst: common/src/tag-$(GTLANG).regex @echo @echo "*** Building tag-$(GTLANG).fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/tag-script $(XFST) < tmp/tag-script @rm -f tmp/tag-script # This goal is to build the final analyser. It depends on all the files. fst: $(GTLANG)/bin/$(GTLANG).fst $(GTLANG)/bin/$(GTLANG).fst: \ common/bin/nohardhyphen.fst \ common/bin/caseconv.fst \ common/bin/spellrelax.fst \ common/bin/downcase.fst \ common/bin/webadr.fst \ $(GTLANG)/bin/$(GTLANG).save \ $(GTLANG)/bin/$(GTLANG)-num.fst @echo @echo "*** Building $(GTLANG).fst ***" ; @echo @printf "$(FST)" > tmp/fst-script @printf "save stack $@ \n\ quit \n" >> tmp/fst-script $(CFST) < tmp/fst-script @rm -f tmp/fst-script fst-norm: $(GTLANG)/bin/$(GTLANG)-norm.fst $(GTLANG)/bin/$(GTLANG)-norm.fst: \ common/bin/nohardhyphen.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save @echo @echo "*** Building $(GTLANG)-norm.fst ***" ; @echo @printf "$(FSTNORM)" > tmp/fst-script @printf "save stack $@ \n\ quit \n" >> tmp/fst-script $(CFST) < tmp/fst-script @rm -f tmp/fst-script fst-restr: $(GTLANG)/bin/$(GTLANG)-restr.fst $(GTLANG)/bin/$(GTLANG)-restr.fst: \ common/bin/nohardhyphen.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/$(GTLANG)-restr.save @echo @echo "*** Building $(GTLANG)-restr.fst ***" ; @echo @printf "$(FSTRESTR)" > tmp/fst-script @printf "save stack $@ \n\ quit \n" >> tmp/fst-script $(CFST) < tmp/fst-script @rm -f tmp/fst-script # This goal is to make a regex for filenames, urls and mail addresses webadr: webadr.fst webadr.fst: common/bin/webadr.fst common/bin/webadr.fst: common/src/webadr.txt @echo @echo "*** Building webadr.fst ***" ; @echo printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/webadr-script $(XFST) < tmp/webadr-script rm -f tmp/webadr-script # This goal is to make a regex for dates num: num.fst num.fst: common/bin/num.fst common/bin/num.fst: common/src/num.txt @echo @echo "*** Building num.fst ***" ; @echo printf "compile-source $< \n\ source-to-result \n\ save-result $@ \n\ quit \n" > tmp/num-script $(LEXC) < tmp/num-script rm -f tmp/num-script # This goal is to allow for Scandinavian ä/æ and ö/ø mix spellrelax: spellrelax.fst spellrelax.fst: common/bin/spellrelax.fst common/bin/spellrelax.fst: common/src/spellrelax.regex @echo @echo "*** Building spellrelax.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/spellrelax-script $(XFST) < tmp/spellrelax-script @rm -f tmp/spellrelax-script # The second goal is to build the caseconv.fst file # This goal depends on case.regex # The resulting transducer allows for viessu / Viessu, i.e. initial # casing of all the words in the lexicon caseconv: caseconv.fst caseconv.fst: common/bin/caseconv.fst common/bin/caseconv.fst: common/src/case.regex @echo @echo "*** Building caseconv.fst ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/caseconv-script $(XFST) < tmp/caseconv-script @rm -f tmp/caseconv-script # This goal builds downcasing.fst # The resulting transducer allows for downcasing of derived names, # such as oslolaš < Oslo downcase: downcase.fst downcase.fst: common/bin/downcase.fst common/bin/downcase.fst: common/src/downcase.regex @echo @echo "*** Building downcase.fst ***" ; @echo @printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/downcase-script $(XFST) < tmp/downcase-script @rm -f tmp/downcase-script # This goal depends on allcaps.regex # The resulting transducer allows for all-caps words, such as VIESSU, OSLO # but not for e.g. VieSu, OslO # It is used in an xfst script (bin/cap-sme), but seldomly so, since it is slow. allcaps: allcaps.fst allcaps.fst: common/bin/allcaps.fst common/bin/allcaps.fst: common/src/allcaps.regex \ $(GTLANG)/bin/cap-$(GTLANG) @echo @echo "*** Building allcaps.fst ***" ; @echo @printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/allcaps-script $(XFST) < tmp/allcaps-script @rm -f tmp/allcaps-script digraph-infl.fst: common/bin/digraph-infl.fst common/bin/digraph-infl.fst: common/src/digraph-infl.regex @echo @echo "*** Building digraph-infl.fst ***" ; @echo @printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/digraph-infl-script $(XFST) < tmp/digraph-infl-script @rm -f tmp/digraph-infl-script # Target for clock simulator iclock.fst: $(GTLANG)/bin/iclock-$(GTLANG).fst $(GTLANG)/bin/iclock-$(GTLANG).fst: $(GTLANG)/bin/clock-$(GTLANG).fst @echo "*** iclock-$(GTLANG).fst ***" @printf "load < $(GTLANG)/bin/clock-$(GTLANG).fst \n\ invert net \n\ save stack $@ \n\ quit \n" >> tmp/iclock-script $(XFST) < tmp/iclock-script @rm -f tmp/iclock-script clock.fst: $(GTLANG)/bin/clock-$(GTLANG).fst $(GTLANG)/bin/clock-$(GTLANG).fst: $(GTLANG)/src/clock-$(GTLANG).lexc @echo "*** clock-$(GTLANG).fst ***" @printf "compile-source $< \n\ source-to-result \n\ save-result $@ \n\ quit \n" > tmp/generate-clock-script $(LEXC) < tmp/generate-clock-script @rm -f generate-clock-script num.fst: $(GTLANG)/bin/$(GTLANG)-num.fst $(GTLANG)/bin/$(GTLANG)-num.fst: \ $(GTLANG)/src/$(GTLANG)-num.txt \ $(GTLANG)/polderland/generated_nums-plx.txt @printf "compile-source $< \n\ source-to-result \n\ save-result $@ \n\ quit \n" > tmp/generate-num-script $(LEXC) < tmp/generate-num-script @rm -f generate-num-script # Here we build the final generator, an inverted transducer of the analyzer. # It is dependent upon sm*.save ifst: inverse.fst inverse.fst: $(GTLANG)/bin/i$(GTLANG).fst $(GTLANG)/bin/i$(GTLANG).fst: \ common/bin/tag-not-save.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/$(GTLANG).save @echo @echo "*** Building the inverse i$(GTLANG).fst ***" @echo @printf "$(INVERTNET)" > tmp/ifst-script @printf "invert net \n\ save stack $@ \n\ quit \n" >> tmp/ifst-script $(XFST) < tmp/ifst-script @rm -f tmp/ifst-script # Here we build a normative generator, an inverted transducer of the normative # analyzer. It is dependent upon sm*-norm.save ifst-norm: inverse-norm.fst inverse-norm.fst: $(GTLANG)/bin/i$(GTLANG)-norm.fst $(GTLANG)/bin/i$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save \ $(GTLANG)/bin/derivation-filter.fst @echo @echo "*** Building the normative, inverse i$(GTLANG)-norm.fst ***" @echo @printf "$(INVERTNORM)" > tmp/ifst-script @printf "invert net \n\ save stack $@ \n\ quit \n" >> tmp/ifst-script $(XFST) < tmp/ifst-script @rm -f tmp/ifst-script # Here we build a restrictive generator, an inverted transducer of the restrictive # analyzer, which gives only ONE form for each analysis. The philosophy is that this # shall be used for speech generation. It is dependent upon sm*-restr.save # This file will be parametrized later on, for $VARIANT. ifst-restr: inverse-restr.fst inverse-restr.fst: $(GTLANG)/bin/i$(GTLANG)-restr.fst $(GTLANG)/bin/i$(GTLANG)-restr.fst: \ common/bin/tag-not-save.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/$(GTLANG)-restr.save \ $(GTLANG)/bin/derivation-filter.fst @echo @echo "*** Building the restrictive, inverse i$(GTLANG)-restr.fst ***" @echo @printf "$(INVERTRESTR)" > tmp/ifst-script @printf "invert net \n\ save stack $@ \n\ quit \n" >> tmp/ifst-script $(XFST) < tmp/ifst-script @rm -f tmp/ifst-script # Here we build a normative generator with hyphenation, an inverted transducer # of the normative analyzer, used for paradigm generation. hi-norm: hi-norm.fst hi-norm.fst: h-inverse-norm.fst hifst-norm: h-inverse-norm.fst hi$(GTLANG)-norm: h-inverse-norm.fst hi$(GTLANG)-norm.fst: h-inverse-norm.fst h-inverse-norm.fst: $(GTLANG)/bin/hi$(GTLANG)-norm.fst $(GTLANG)/bin/hi$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/downcase.fst \ $(GTLANG)/bin/hyph-$(GTLANG).save \ $(GTLANG)/bin/derivation-filter.fst \ $(GTLANG)/bin/hyphrules-$(GTLANG).fst @echo @echo "*** Building the normative, inverse, hyphenated ***" @echo "*** hi$(GTLANG)-norm.fst ***" @echo @printf "read regex [ [@\"common/bin/tag-not-save.fst\" ] .o. \n\ [@\"$(GTLANG)/bin/derivation-filter.fst\"] .o. \n\ [@\"$(GTLANG)/bin/hyph-$(GTLANG).save\" ] .o. \n\ [@\"common/bin/downcase.fst\" ] \n\ ] ; \n" >tmp/hi-norm-script @printf "set flag-is-epsilon ON \n" >> tmp/hi-norm-script @printf "read regex [@\"$(GTLANG)/bin/hyphrules-$(GTLANG).fst\".i ] ; \n"\ >> tmp/hi-norm-script printf "turn stack \n\ compose net \n\ invert net \n\ save stack $@ \n\ quit \n" >> tmp/hi-norm-script $(XFST) < tmp/hi-norm-script @rm -f tmp/hi-norm-script # This goal builds derivation-filter.fst # The resulting transducer will only allow derivations following # a certain pattern as described in $(GTLANG)-lex.txt derivation-filter: derivation-filter.fst derivation-filter.fst: $(GTLANG)/bin/derivation-filter.fst $(GTLANG)/bin/derivation-filter.fst: $(GTLANG)/src/derivation-filter.regex @echo @echo "*** Building derivation-filter.fst ***" ; @echo @printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/derivation-filter-script $(XFST) < tmp/derivation-filter-script @rm -f tmp/derivation-filter-script # This goal is to remove hyphens to make the spellers work remove-hyphen.fst: common/bin/remove-hyphen.fst common/bin/remove-hyphen.fst: common/src/remove-hyphen.regex @echo @echo "*** Building $@ ***" ; @echo @printf "read regex < $< \n\ save stack $@ \n\ quit \n" > tmp/remove-hyphen-script $(XFST) < tmp/remove-hyphen-script @rm -f tmp/remove-hyphen-script # =========================== # # Building preprocessor files # # =========================== # foreign.fst: common/bin/foreign.fst common/bin/foreign.fst: script/old-foreign.txt script/new-foreign.txt @echo @echo "*** Building a transducer for foreign words ***" ; @echo @printf "read text script/old-foreign.txt \n\ read text script/new-foreign.txt \n\ union net \n\ save stack $@ \n\ quit \n" > tmp/foreign-script $(XFST) < tmp/foreign-script @rm -f tmp/foreign-script newforeign.fst: common/bin/new-foreign.fst common/bin/new-foreign.fst: script/new-foreign.txt @echo @echo "*** Our transducer for new foreign words ***" ; @echo @printf "read text < $< \n\ save stack $@ \n\ quit \n" > tmp/new-foreign-script $(XFST) < tmp/new-foreign-script @rm -f tmp/new-foreign-script oldforeign.fst: common/bin/old-foreign.fst common/bin/old-foreign.fst: script/old-foreign.txt @echo @echo "*** Our ready-built transducer for foreign words ***" ; @echo @printf "read text < $< \n\ save stack $@ \n\ quit \n" > tmp/old-foreign-script $(XFST) < tmp/old-foreign-script @rm -f tmp/old-foreign-script typos: typos.fst typos.fst: common/bin/typos.fst common/bin/typos.fst: common/bin/typoslist.txt @echo @echo "*** Our transducer for typographical errors ***" ; @echo @printf "read text < $< \n\ save stack $@ \n\ quit \n" > tmp/typos-script $(XFST) < tmp/typos-script @rm -f tmp/typos-script # Here we build a phonetic transducer phon.fst: $(GTLANG)/bin/phon-$(GTLANG).fst $(GTLANG)/bin/phon-$(GTLANG).fst: $(GTLANG)/src/phon-$(GTLANG).xfst @echo @echo "*** Building phon-$(GTLANG).fst ***" ; @echo @printf "source $< \n\ save stack $@ \n\ quit \n" > tmp/phon-script $(XFST) < tmp/phon-script @rm -f phon-script