# Target for making MS Office speller lexicons. It depends on compilers # from Polderland, only available on the Divvun project computers SPNUMBERS = 1000000 ifneq "$(TEST)" "false" SPNUMBERS = 10 endif ifeq "$(M4FLAGS)" "-DHUNSPELL" SETHYPH = common/bin/hyphen-remove.fst else SETHYPH = common/bin/hyphen-convert.fst endif ifneq "$(TEST)" "false" FSTPRINT = -e "write spaced-text > $@" else FSTPRINT = -e "lower-side" -e "print words > $@" endif # Generate all numbers from 1 to X as number strings in PLX format: gen-plx-num: $(GTLANG)/polderland/generated_nums-plx.txt $(GTLANG)/polderland/generated_nums-plx.txt: @echo @echo "*** Generating numbers in PLX format ***" @echo $(SCRIPTDIR)/generate-plx-numerals.py $(GTLANG) $(SPNUMBERS) numlist-plx: gen-plx-num @echo @echo "*** Creating plx numlist ***" @echo cp $(GTLANG)/polderland/generated_nums-plx.txt $(GTLANG)/polderland/numlist-$(GTLANG)-plx.txt # num-lex: $(GTLANG)/src/num-$(GTLANG)-lex.txt # $(GTLANG)/src/num-$(GTLANG)-lex.txt: \ # $(GTLANG)/polderland/generated_nums-plx.txt # @printf "LEXICON NUM \n" > $@ # @printf "0\n" >> $@ # @cut -f1 -d"-" $(GTLANG)/polderland/generated_nums-plx.txt \ # | head -n$(SPNUMBERS) >> $@ # @cut -f1 -d"-" $(GTLANG)/polderland/generated_nums-plx.txt \ # | head -n$(SPNUMBERS) | sed -e 's/\(.*\)/0\1/g' >> $@ # @cut -f1 -d"-" $(GTLANG)/polderland/generated_nums-plx.txt \ # | head -n$(SPNUMBERS) | sed -e 's/\(.*\)/00\1/g' >> $@ # @printf "100000\n" >> $@ # @printf "1000000\n" >> $@ # @printf "1000000000\n" >> $@ # @printf "LEXICON NUMERAL \n" >> $@ # @cut -f1 -d"-" $(GTLANG)/polderland/generated_nums-plx.txt \ # | head -n100000 | lookup -flags mbTT -utf8 $< | cut -f2 >> $@ # @printf "10000\n" | lookup -flags mbTT -utf8 $< | cut -f2 >> $@ # @printf "100000\n" | lookup -flags mbTT -utf8 $< | cut -f2 >> $@ # @printf "1000000\n" | lookup -flags mbTT -utf8 $< | cut -f2 >> $@ # @printf "1000000000\n" | lookup -flags mbTT -utf8 $< | cut -f2 >> $@ # Before we compile the plx speller lexicon, we collect all pieces and rev-sort them: tmp/all-plx-$(GTLANG).revsorted: \ numlist-plx \ common/polderland/version.plx \ ms-speller-version \ $(GTLANG)/polderland/spellerproper-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellernouns-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spelleradjs-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellerabbrs-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellerverbs-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/mwe-$(GTLANG)-plx.txt @echo @echo "*** Sorting $(GTLANG) files ***" @echo @if [ ! -d tmp/$(GTLANG) ]; then \ mkdir tmp/$(GTLANG) ; \ fi ifneq "$(TEST)" "false" @LC_ALL=C sort -S 6G -T tmp/$(GTLANG) -ur -o \ $(GTLANG)/polderland/testing/actual.plx \ tmp/$(GTLANG)-version.plx \ common/polderland/version.plx \ $(GTLANG)/polderland/num-$(GTLANG)-plx.txt \ $(PLXSRCsuffix) $(PLXSRCfst) else @LC_ALL=C sort -S 6G -T tmp/$(GTLANG) -ur -o $@ \ tmp/$(GTLANG)-version.plx \ common/polderland/version.plx \ $(GTLANG)/polderland/num-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/numlist-$(GTLANG)-plx.txt \ $(PLXSRCsuffix) $(PLXSRCfst) endif # Speller plx files are printed locally now # plx-transducer: \ # common/bin/hyphen-convert.fst \ # $(GTLANG)/bin/spellerverbs-$(GTLANG)-plx.fst \ # $(GTLANG)/bin/spellernouns-$(GTLANG)-plx.fst \ # $(GTLANG)/bin/spellerabbrs-$(GTLANG)-plx.fst \ # $(GTLANG)/bin/spelleradjs-$(GTLANG)-plx.fst \ # $(GTLANG)/bin/spellerproper-$(GTLANG)-plx.fst # Before we compile the hunspell lexicons, we collect all pieces and sort them: tmp/all-hsp-$(GTLANG).sorted: plx-conversion \ victorio-up \ $(GTLANG)/polderland/spellerproper-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellernouns-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spelleradjs-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellerabbrs-$(GTLANG)-plx.txt \ $(GTLANG)/polderland/spellerverbs-$(GTLANG)-plx.txt @echo @echo "*** Sorting $(GTLANG) files ***" @echo @if [ ! -d tmp/$(GTLANG) ]; then \ mkdir tmp/$(GTLANG) ; \ fi @LC_ALL=C sort -S 6G -T tmp/$(GTLANG) -u -o $@ \ $(PLXSRCjava) $(PLXSRCprefix) $(PLXSRCfst) # This target must be built with M4FLAGS=-DHUNSPELL hunspellspeller: hi-norm tmp/all-hsp-$(GTLANG).sorted @echo @echo "*** Making $(GTLANG) hunspell dictionary ***" filterplxfile $(GTLANG) tmp/all-hsp-$(GTLANG).sorted fix_set.py $(GTLANG) $(GTLANG)/hunspell cd $(GTLANG)/hunspell zip $(DATE)-$(GTLANG).hunspell.zip $(GTLANG).aff $(GTLANG).dic @scp $(DATE)-$(GTLANG).hunspell.zip sd@divvun.no:static_files/ @$(SSH) sd@divvun.no "cd staticfiles/ && ln -sf $(DATE)-$(GTLANG).hunspell.zip $(GTLANG).hunspell.zip" # We compile the final MS Office speller lexicon here: ms-win-speller: $(GTLANG)/polderland/$(WINSPELL) $(GTLANG)/polderland/$(WINSPELL): \ common/polderland/version.phon \ tmp/all-plx-$(GTLANG).revsorted \ $(GTPRIV)/polderland/src/phonrules-$(GTLANG).txt \ ms-speller-version @echo @echo "*** cat-ing $(GTLANG) phon files ***" @echo @cat $(GTPRIV)/polderland/src/phonrules-$(GTLANG).txt \ common/polderland/version.phon \ tmp/$(GTLANG)-version.phon > tmp/$(GTLANG)-phon.txt @echo @echo "*** Compiling $(GTLANG) M$$ speller lexicon ***" @echo ifneq "$(TEST)" "false" cut -f1,2 < $(GTLANG)/polderland/testing/actual.plx | uniq \ > $(GTLANG)/polderland/testing/actual.plx.tmp $(MKLEX) -M512 -p tmp/$(GTLANG)-phon.txt \ $(GTLANG)/polderland/testing/actual.plx.tmp $@ else $(MKLEX) -M512 -p tmp/$(GTLANG)-phon.txt tmp/all-plx-$(GTLANG).revsorted $@ endif # Upload the speller lexicon when finished # `make` uses lazy evaluation, meaning that the REVISION variable won't be set # until needed, thus reflecting the true revision number fetched from victorio. upload: REVISION = $(shell cat $(GTLANG)/polderland/revision.txt) upload: ifneq "$(TEST)" "false" @echo @echo "*** We're testing, nothing uploaded ***" @echo else @echo @echo "*** Uploading $(WINSPELL) to our download site ***" @echo scp $(GTLANG)/polderland/$(WINSPELL) sd@divvun.no:staticfiles/$(DATE)-$(REVISION)-$(WINSPELL) $(SSH) sd@divvun.no "cd staticfiles/ && ln -sf $(DATE)-$(REVISION)-$(WINSPELL) $(WINSPELL)" endif # This is the overall MS Office speller target. First build the lexicon, # then upload: ms-speller: ms-win-speller upload # Version easter egg generation: ms-speller-version: @echo @echo "*** Creating $(GTLANG) M$$ speller version info easter egg ***" @echo svn info | grep Revision | cut -d ' ' -f2 > $(GTLANG)/polderland/revision.txt $(SCRIPTDIR)/prooftools/add-version-info.pl \ --lang=$(GTLANG) \ --version=$(GTLANG)/polderland/version.txt \ --date=$(DATE) \ --revision=$(GTLANG)/polderland/revision.txt \ --plxfile=tmp/$(GTLANG)-version.plx \ --phonfile=tmp/$(GTLANG)-version.phon # Check out gt catalogue in victorio to make sure we have the latest & greatest # sources victorio-co: @echo @echo "*** Checking out gt in victorio ***" @echo $(SSH) victorio.uit.no "rm -rf speller-$(GTLANG)" $(SSH) victorio.uit.no "mkdir speller-$(GTLANG) && cd speller-$(GTLANG) && svn co https://victorio.uit.no/langtech/trunk/gt && chmod 755 gt/script/*" # Update gt catalogue in victorio to make sure we have the latest & greatest # sources victorio-up: # If we are testing: ifneq "$(TEST)" "false" @echo @echo "*** We're testing locally, no updating needed, just svn rev # ***" @echo svn info | grep Revision | cut -d ' ' -f2 > $(GTLANG)/polderland/revision.txt else @echo @echo "*** Updating gt in victorio ***" @echo $(SSH) victorio.uit.no "cd speller-$(GTLANG)/gt && svn up && chmod 755 script/* && touch $(GTLANG)/src/$(GTLANG)-lex.txt" $(SSH) victorio.uit.no "cd speller-$(GTLANG)/gt && svn info | grep Revision | cut -d ' ' -f2" > $(GTLANG)/polderland/revision.txt endif # These don't work for some reason.... verblist: $(GTLANG)/polderland/spellerverbs-$(GTLANG)-plx.txt abbrlist: $(GTLANG)/polderland/spellerabbrs-$(GTLANG)-plx.txt adjlist: $(GTLANG)/polderland/spelleradjs-$(GTLANG)-plx.txt nounlist: $(GTLANG)/polderland/spellernouns-$(GTLANG)-plx.txt properlist: $(GTLANG)/polderland/spellerproper-$(GTLANG)-plx.txt $(GTLANG)/polderland/%.txt: $(GTLANG)/bin/%.fst $(SETHYPH) @echo @echo "*** Building $(GTLANG) full-form verb list in PLX format ***" @echo $(CFST) -e "load stack < $(SETHYPH)" \ -e "load stack < $<" \ -e "compose net" \ $(FSTPRINT) \ -stop # If we are testing: ifneq "$(TEST)" "false" cat $@ | reformat-space-text.pl > $@.test mv -f $@.test $@ endif plx-conversion: lexc2xspell $(PLXSRCjava) # $(PLXSRCprefix) $(PLXSRCjava): $(SRCS) lexc2xspell $(GTLANG)/src/num-$(GTLANG)-lex.txt ifneq "$(TEST)" "false" @echo @echo "*** We're testing, no numerals created ***" @echo else @echo @echo "*** Creating $(GTLANG) Numeral PLX files ***" @echo java -Xmx512m -cp ../tools/lexc2xspell/build Lexc2xspell \ $(patsubst $(GTLANG)/polderland/%-plx.txt,$(GTLANG)/src/%-lex.txt,$@) \ `pwd`/$(GTLANG)/bin/hi$(GTLANG)-norm.fst \ `pwd`/$(GTLANG)/res/paradigm.$(GTLANG).txt \ `pwd`/$(GTLANG)/res/korpustags.$(GTLANG).txt \ | $(HYPHSED) | uniq > $@ endif lexc2xspell: ant -buildfile ../tools/lexc2xspell/build.xml common/bin/%.fst: common/polderland/%.regex @echo @echo "*** Building $(@F) ***" ; @echo $(XFST) -e "read regex < $< " \ -e "save stack $@ " \ -stop # ==================================================== # # Building speller transducers for specific POSes only # # ==================================================== # # Multiword printing. First it filters in all entries # containing space(s), then it separates last part and nonlast parts of these # entries and replaces spaces with line breaks. Printed word list # is sorted and plx tags are added to words. spellermwe: $(GTLANG)/polderland/mwe-$(GTLANG)-plx.txt $(GTLANG)/polderland/mwe-$(GTLANG)-plx.txt: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ common/bin/mwe-filter.fst @echo @echo "*** Building multi-word fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/mwe-filter.fst\" .o. @\"$<\" ; " \ -e "define multi" \ -e "read regex [0 <- ?* || \\\" \"* \" \" _ .#.] \ .o. multi .o. [?* -> 0 || \\\" \"* \" \" _ .#.] ; " \ -e "define nonlast" \ -e "read regex [0 <- ?* || .#. _ \" \" \\\" \"*] \ .o. multi .o. [?* -> 0 || .#. _ \" \" \\\" \"*] ; " \ -e "define last" \ -e "read regex [\"\n\" <- { }] .o. last .o. [{ } -> \"\n\"] ; " \ -e "push nonlast" \ -e "union net" \ -e "load stack < $(SETHYPH)" \ -e "turn stack" \ -e "compose net" \ -e "lower-side" \ -e "print words > $@.tmp" \ -stop LC_ALL=C cat $@.tmp | sort -ru | sed -e 's/\(.*\)/\1 NI/g' > $@ # The spellerverbs.fst target creates a derived speller transducer # only containing verbs, by utilising a special verb filter. spellerverbs: $(GTLANG)/bin/spellerverbs-$(GTLANG).fst $(GTLANG)/bin/spellerverbs-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ common/bin/verb-filter.fst @echo @echo "*** Building spellerverbs-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/verb-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "union net" \ -e "turn stack" \ -e "compose net" \ -e "save stack $@" \ -stop spellerverbs-plx: $(GTLANG)/bin/spellerverbs-$(GTLANG)-plx.fst $(GTLANG)/bin/spellerverbs-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spellerverbs-$(GTLANG).fst @echo @echo "*** Building spellerverbs-$(GTLANG)-plx.fst ***" ; @echo $(CFST) \ -e "read regex @re\"common/polderland/V-Actio-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerverbs-$(GTLANG).fst\" ; " \ -e "define actio" \ -e "read regex @\"$(GTLANG)/bin/spellerverbs-$(GTLANG).fst\" - actio; " \ -e "define noactio" \ -e "read regex @re\"common/polderland/V-Inf-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerverbs-$(GTLANG).fst\" ; " \ -e "define infinitive" \ -e "read regex @re\"common/polderland/V-PrfPrc-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerverbs-$(GTLANG).fst\" ; " \ -e "define prfprc" \ -e "read regex infinitive [{- NIX,NtPABX} ]; " \ -e "read regex prfprc [{ NAIBE} ]; " \ -e "read regex actio [{ VIBOE} ]; " \ -e "read regex noactio [{ VI} ]; " \ -e "union net" \ -e "turn stack" \ -e "load stack < common/bin/usage-tags-remove.fst" \ -e "compose net" \ -e "save stack $@" \ -stop # Filters out all uninflectable words + abbreviations, acronyms # and (underived) pronouns spellerabbrs: $(GTLANG)/bin/spellerabbrs-$(GTLANG).fst $(GTLANG)/bin/spellerabbrs-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ common/bin/abbr-filter.fst @echo @echo "*** Building spellerabbrs-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/abbr-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop # Encodes all uninflectable words + abbreviations, acronyms # and (underived) pronouns spellerabbrs-plx: $(GTLANG)/bin/spellerabbrs-$(GTLANG)-plx.fst $(GTLANG)/bin/spellerabbrs-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spellerabbrs-$(GTLANG).fst @echo @echo "*** Building spellerabbrs-$(GTLANG)-plx.fst ***" ; @echo $(CFST) -e "read regex @re\"common/polderland/ABBR-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define abbrcmp" \ -e "read regex @re\"common/polderland/ABBR-Cmp-nosugg-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define abbrcmpnosugg" \ -e "read regex @re\"common/polderland/ACRO-Nocmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define acronocmp" \ -e "read regex @re\"common/polderland/ABBR-Nocmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define abbrnocmp" \ -e "read regex @re\"common/polderland/ACRO-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define acrocmp" \ -e "read regex @re\"common/polderland/Adv-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" ; " \ -e "define advcmp" \ -e "read regex @\"$(GTLANG)/bin/spellerabbrs-$(GTLANG).fst\" \ - [ abbrcmp \ | abbrcmpnosugg \ | abbrnocmp \ | acrocmp \ | advcmp \ | acronocmp]; " \ -e "define therest" \ -e "read regex abbrcmp [{- NePABX} ] ; " \ -e "read regex abbrcmpnosugg [{ NePEX,UI} ] ; " \ -e "read regex abbrcmpnosugg [{- NIX,NePABX,NePEX} ] ; " \ -e "read regex abbrnocmp [{ NI,UI} ] ; " \ -e "read regex acrocmp [{ NePE,UI} ] ; " \ -e "read regex acrocmp [{- NIX,NePABOX,NePEX} ] ; " \ -e "read regex acronocmp [{ NI,NePE,UI} ] ; " \ -e "read regex advcmp [{- NAPBX,NtPAB,NIX} ] ; " \ -e "read regex therest [{ NI,NePEX,UI} ] ; " \ -e "union net" \ -e "turn stack" \ -e "load stack < common/bin/usage-tags-remove.fst" \ -e "compose net" \ -e "save stack $@ " \ -stop spelleradjs: $(GTLANG)/bin/spelleradjs-$(GTLANG).fst $(GTLANG)/bin/spelleradjs-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ $(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst \ common/bin/adj-filter.fst @echo @echo "*** Building spelleradjs-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/adj-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "read regex @\"common/bin/adj-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst\" ; " \ -e "union net" \ -e "save stack $@" \ -stop spelleradjs-plx: $(GTLANG)/bin/spelleradjs-$(GTLANG)-plx.fst $(GTLANG)/bin/spelleradjs-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spelleradjs-$(GTLANG).fst @echo @echo "*** Building spelleradjs-$(GTLANG)-plx.fst ***" ; @echo $(CFST) \ -e "read regex @re\"common/polderland/N-None-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define none" \ -e "read regex @re\"common/polderland/N-Last-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define last" \ -e "read regex @re\"common/polderland/A-Adv-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; "\ -e "define adv" \ -e "read regex @re\"common/polderland/A-Sg-Nom-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sgnom" \ -e "read regex @re\"common/polderland/A-Sg-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sggen" \ -e "read regex @re\"common/polderland/A-Pl-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define plgen" \ -e "read regex @re\"common/polderland/A-Sg-Nom-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sgnomcmp" \ -e "read regex @re\"common/polderland/A-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sggencmp" \ -e "read regex @re\"common/polderland/A-Pl-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define plgencmp" \ -e "read regex @re\"common/polderland/A-Def-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define defsggencmp" \ -e "read regex @re\"common/polderland/A-Def-Pl-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define defplgencmp" \ -e "read regex @re\"common/polderland/A-Sg-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sggencmpx" \ -e "read regex @re\"common/polderland/A-Pl-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define plgencmpx" \ -e "read regex @re\"common/polderland/A-Sg-Nom-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sgnomleft" \ -e "read regex @re\"common/polderland/A-Sg-Gen-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define sggenleft" \ -e "read regex @re\"common/polderland/A-Pl-Gen-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define plgenleft" \ -e "read regex @re\"common/polderland/A-Attr-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; " \ -e "define attrcmp" \ -e "read regex @re\"common/polderland/A-Attr-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" ; "\ -e "define allattr" \ -e "read regex @re\"common/polderland/A-Attr-Sharp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\"; " \ -e "define attrder" \ -e "read regex allattr - attrder;" \ -e "define attr" \ -e "read regex @\"$(GTLANG)/bin/spelleradjs-$(GTLANG).fst\" \ - [ none | last | adv \ | sgnom | sggen | plgen \ | sgnomcmp | sggencmp | plgencmp \ | defsggencmp | defplgencmp \ | sggencmpx | plgencmpx \ | sgnomleft | sggenleft | plgenleft \ | attrcmp | attr]; " \ -e "define otherforms" \ -e "read regex none [{ NI,UI} ]; " \ -e "read regex last [{ NAIE,UI} ]; " \ -e "read regex adv [{ NAI,UI} ]; " \ -e "read regex [sgnom .o. ~[ a a ?* | e e ?* ]] [{ NAIOE,NePOE,UI} ]; " \ -e "read regex [sgnom .o. [ a a ?* | e e ?* ]] [{ NePIE,UI} ]; " \ -e "read regex [sgnom .o. ~[ a a ?* | e e ?* ]] [{- NABOX,NtPABIEX,NePAEBOX} ]; " \ -e "read regex [sgnom .o. [ a a ?* | e e ?* ]] [{- NAB,NIX,NePOE} ]; " \ -e "read regex [sggen .o. ~[ a a ?* | e e ?* ]] [{ GaBO,NAIE,UI} ]; " \ -e "read regex [sggen .o. ~[ a a ?* | e e ?* ]] [{ NAIE,NePAE,UI} ]; " \ -e "read regex [sggen .o. [ a a ?* | e e ?* ]] [{ NePBO,NAB,NePIE,UI} ]; " \ -e "read regex [plgen .o. ~[ a a ?* | e e ?* ]] [{ GpBO,NAIE,NePAE,UI} ]; " \ -e "read regex [plgen .o. [ a a ?* | e e ?* ]] [{ GpBO,NePIE,UI} ]; " \ -e "read regex [sgnomcmp .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [sgnomcmp .o. [ a a ?* | e e ?* ]] [{ NePOE,NAB,UI} ]; " \ -e "read regex [sgnomcmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex sggencmp [{ GaBO,UI} ]; " \ -e "read regex [sggencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [sggencmp .o. [ a a ?* | e e ?* ]] [{- NAPIX,NAPB,NePABO} ]; " \ -e "read regex [sggencmp .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [sggencmp .o. [ a a ?* | e e ?* ]] [{- NABOIEX,NtPABX,NePAEOX} ]; " \ -e "read regex plgencmp [{ GpBO,UI} ]; " \ -e "read regex [plgencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmp .o. [ a a ?* | e e ?* ]] [{- NAPIX,NAPB,NePABO} ]; " \ -e "read regex defsggencmp [{ GaBO,UI} ]; " \ -e "read regex [defsggencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex defplgencmp [{ GpBO,UI} ]; " \ -e "read regex defplgencmp [{- NABOIEX,NtPABIX,NePAEOX} ]; " \ -e "read regex sggencmpx [{ NePBO,NePIE,NAB,UI} ]; " \ -e "read regex [sggencmpx .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmpx .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [plgencmpx .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmpx .o. [ a a ?* | e e ?* ]] [{- NePABO} ]; " \ -e "read regex sgnomleft [{ NAIOE,UI} ]; " \ -e "read regex sggenleft [{ NaAIOE,UI} ]; " \ -e "read regex plgenleft [{ NpAIOE,UI} ]; " \ -e "read regex attrcmp [{ NABO,UI} ]; " \ -e "read regex attr [{ NAIBOE,UI} ]; " \ -e "read regex [otherforms .o. ~[ a a ?* | e e ?* ]] [{ NAIE,NePAE,UI} ]; " \ -e "read regex [otherforms .o. [ a a ?* | e e ?* ]] [{ NtPA+,NePIE,UI} ]; " \ -e "union net" \ -e "turn stack" \ -e "load stack < common/bin/usage-tags-remove.fst" \ -e "compose net" \ -e "save stack $@" \ -stop spellernouns: $(GTLANG)/bin/spellernouns-$(GTLANG).fst $(GTLANG)/bin/spellernouns-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ $(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst \ common/bin/noun-filter.fst @echo @echo "*** Building spellernouns-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/noun-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "read regex @\"common/bin/noun-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst\" ; " \ -e "union net" \ -e "save stack $@" \ -stop spellernouns-plx: $(GTLANG)/bin/spellernouns-$(GTLANG)-plx.fst $(GTLANG)/bin/spellernouns-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spellernouns-$(GTLANG).fst @echo @echo "*** Building spellernouns-$(GTLANG)-plx.fst ***" ; @echo ## Positional tags: # +None # +Last $(CFST) \ -e "read regex @re\"common/polderland/N-None-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define none" \ -e "read regex @re\"common/polderland/N-Last-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define last" \ -e "read regex @re\"common/polderland/N-Pref-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define pref" \ -e "read regex @re\"common/polderland/N-Sg-Nom-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sgnom" \ -e "read regex @re\"common/polderland/N-Sg-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sggen" \ -e "read regex @re\"common/polderland/N-Pl-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define plgen" \ -e "read regex @re\"common/polderland/N-Sg-Nom-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sgnomcmp" \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sggencmp" \ -e "read regex @re\"common/polderland/N-Pl-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define plgencmp" \ -e "read regex @re\"common/polderland/N-Def-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define defsggencmp" \ -e "read regex @re\"common/polderland/N-Def-Pl-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define defplgencmp" \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sggencmpx" \ -e "read regex @re\"common/polderland/N-Pl-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define plgencmpx" \ -e "read regex @re\"common/polderland/N-Sg-Nom-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sgnomleft" \ -e "read regex @re\"common/polderland/N-Sg-Gen-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define sggenleft" \ -e "read regex @re\"common/polderland/N-Pl-Gen-Left-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" ; " \ -e "define plgenleft" \ -e "read regex @\"$(GTLANG)/bin/spellernouns-$(GTLANG).fst\" \ - [ none | last | pref \ | sgnom | sggen | plgen \ | sgnomcmp | sggencmp | plgencmp \ | defsggencmp | defplgencmp \ | sggencmpx | plgencmpx \ | sgnomleft | sggenleft | plgenleft ] ; " \ -e "define otherforms" \ -e "read regex none [{ NI,UI} ]; " \ -e "read regex last [{ NIE,UI} ]; " \ -e "read regex pref [{ NAB} ]; " \ -e "read regex pref [{- NAIX,NePABX} ]; " \ -e "read regex [sgnom .o. ~[ a a ?* | e e ?* ]] [{ NAIE,NePOE,UI} ]; " \ -e "read regex [sgnom .o. [ a a ?* | e e ?* ]] [{ NePIE,UI} ]; " \ -e "read regex [sgnom .o. ~[ a a ?* | e e ?* ]] [{- NABOX,NtPABIEX,NePAEBOX} ]; " \ -e "read regex [sgnom .o. [ a a ?* | e e ?* ]] [{- NIX,NePABO,NePEX} ]; " \ -e "read regex [sggen .o. ~[ a a ?* | e e ?* ]] [{ GaBO,NAIE,UI} ]; " \ -e "read regex [sggen .o. ~[ a a ?* | e e ?* ]] [{ NAIE,NePAE,UI} ]; " \ -e "read regex [sggen .o. [ a a ?* | e e ?* ]] [{ NePBO,NAB,NePIE,UI} ]; " \ -e "read regex [plgen .o. ~[ a a ?* | e e ?* ]] [{ GpBO,NAIE,NePAE,UI} ]; " \ -e "read regex [plgen .o. [ a a ?* | e e ?* ]] [{ GpBO,NePIE,UI} ]; " \ -e "read regex [sgnomcmp .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [sgnomcmp .o. [ a a ?* | e e ?* ]] [{ NePOE,NAB,UI} ]; " \ -e "read regex [sgnomcmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex sggencmp [{ GaBO,UI} ]; " \ -e "read regex [sggencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [sggencmp .o. [ a a ?* | e e ?* ]] [{- NAPIX,NAPB,NePABO} ]; " \ -e "read regex [sggencmp .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [sggencmp .o. [ a a ?* | e e ?* ]] [{- NABOIEX,NtPABX,NePAEOX} ]; " \ -e "read regex plgencmp [{ GpBO,UI} ]; " \ -e "read regex [plgencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmp .o. [ a a ?* | e e ?* ]] [{- NAPIX,NAPB,NePABO} ]; " \ -e "read regex defsggencmp [{ GaBO,UI} ]; " \ -e "read regex [defsggencmp .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex defplgencmp [{ GpBO,UI} ]; " \ -e "read regex defplgencmp [{- NABOIEX,NtPABIX,NePAEOX} ]; " \ -e "read regex sggencmpx [{ NePBO,NePIE,NAB,UI} ]; " \ -e "read regex [sggencmpx .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmpx .o. ~[ a a ?* | e e ?* ]] [{ NABO,UI} ]; " \ -e "read regex [plgencmpx .o. ~[ a a ?* | e e ?* ]] [{- NABOIEX,NtPABIX,NePAEBOX}]; " \ -e "read regex [plgencmpx .o. [ a a ?* | e e ?* ]] [{- NePABO} ]; " \ -e "read regex sgnomleft [{ NAIOE,UI} ]; " \ -e "read regex sggenleft [{ NaAIOE,UI} ]; " \ -e "read regex plgenleft [{ NpAIOE,UI} ]; " \ -e "read regex [otherforms .o. ~[ a a ?* | e e ?* ]] [{ NAIE,NePAE,UI} ]; " \ -e "read regex [otherforms .o. [ a a ?* | e e ?* ]] [{ NtPA+,NePIE,UI} ]; " \ -e "union net" \ -e "turn stack" \ -e "load stack < common/bin/usage-tags-remove.fst" \ -e "compose net" \ -e "save stack $@" \ -stop spellerproper: $(GTLANG)/bin/spellerproper-$(GTLANG).fst $(GTLANG)/bin/spellerproper-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ common/bin/propernoun-filter.fst @echo @echo "*** Building spellerproper-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/propernoun-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop ifeq ($(GTLANG), sma) PROPERSMA = -e "read regex [ @re\"common/polderland/N-Sg-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ .o. ~[ A a ?* | E e ?* ] ] [{ NAIE}]; " \ -e "read regex @re\"common/polderland/N-Sg-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ [{- NABOX,NtPAB,NePAEOX}]; " \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ [{ NABO}]; " \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ [{- NABOIEX,NtPAB,NePAEOX}]; " else PROPERSMA = endif spellerproper-plx: $(GTLANG)/bin/spellerproper-$(GTLANG)-plx.fst $(GTLANG)/bin/spellerproper-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spellerproper-$(GTLANG).fst @echo @echo "*** Building spellerproper-$(GTLANG)-plx.fst ***" ; @echo $(CFST) \ -e "read regex @re\"common/polderland/N-None-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define none" \ -e "read regex @re\"common/polderland/N-Last-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define last" \ -e "read regex @re\"common/polderland/Prop-Sg-Nom-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define sgnom" \ -e "read regex @re\"common/polderland/Prop-Sg-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define sggen" \ -e "read regex @re\"common/polderland/Prop-Pl-Gen-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define plgen" \ -e "read regex @re\"common/polderland/N-Sg-Nom-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define sgnomcmp" \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define sggencmp" \ -e "read regex @re\"common/polderland/N-Pl-Gen-Cmp-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define plgencmp" \ -e "read regex @re\"common/polderland/N-Sg-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define sggencmpx" \ -e "read regex @re\"common/polderland/N-Pl-Gen-Cmp-X-filter.regex\" \ .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ -e "define plgencmpx" \ -e "read regex @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ - [ none | last \ | sgnom | sggen | plgen \ | sgnomcmp | sggencmp | plgencmp \ | sggencmpx | plgencmpx ]; " \ -e "define otherforms" \ -e "read regex none [{ NI,UI} ]; " \ -e "read regex last [{ NePE,UI} ]; " \ -e "read regex sgnom [{ NePIE,UI} ]; " \ -e "read regex sggen [{ NePIE,UI} ]; " \ -e "read regex plgen [{ NePIE,UI} ]; " \ -e "read regex sgnomcmp [{ NePIE,UI} ]; " \ -e "read regex sgnomcmp [{- NIX,NePABO,NePEX}]; " \ -e "read regex sggencmp [{ NePIE,UI} ]; " \ -e "read regex sggencmp [{- NIX,NePABO,NePEX}]; " \ -e "read regex plgencmp [{ NePIE,UI} ]; " \ -e "read regex plgencmp [{- NIX,NePABO,NePEX}]; " \ -e "read regex sggencmpx [{ NePIE,UI} ]; " \ -e "read regex sggencmpx [{- NIX,NePABO,NePEX}]; " \ -e "read regex plgencmpx [{ NePIE,UI} ]; " \ -e "read regex plgencmpx [{- NIX,NePABO,NePEX}]; " \ -e "read regex otherforms [{ NePIE,UI} ]; " \ -e "union net" \ -e "turn stack" \ -e "load stack < common/bin/usage-tags-remove.fst" \ -e "compose net" \ -e "save stack $@" \ -stop # -e "read regex Rcompound [{- NePABOEIX} ]; " \ # -e "read regex @re\"common/polderland/Cmp-filter.regex\" \ # .o. @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" ; " \ # -e "define Rcompound" \ # -e "read regex [ @\"$(GTLANG)/bin/spellerproper-$(GTLANG).fst\" \ # .o. [ A a ?* | E e ?* ] ] [{ NtPAI+}]; " \ # -e "read regex [{-}] sgnom [{ NePAIBX} ]; " \ # -e "read regex [{-}] sggen [{ NePAIBX} ]; " \ # -e "read regex [{-}] plgen [{ NePAIBX} ]; " \ # -e "read regex [{-}] sgnomcmp [{ NePAIBX} ]; " \ # -e "read regex [{-}] sgnomcmp [{- NAPOX,NtPAOX,NtAOX,NAOX,NePABIX}]; " \ # -e "read regex [{-}] [otherforms .o. ~[ A a ?* | E e ?* ]] [{ NePAIBX} ]; " \ spellernums: $(GTLANG)/bin/spellernums-$(GTLANG).fst $(GTLANG)/bin/spellernums-$(GTLANG).fst: \ $(GTLANG)/bin/spellernonrec-$(GTLANG).fst \ common/bin/num-filter.fst @echo @echo "*** Building spellernums-$(GTLANG).fst ***" ; @echo $(CFST) -e "read regex @\"common/bin/num-filter.fst\" \ .o. @\"$(GTLANG)/bin/spellernonrec-$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop spellernums-plx: $(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst $(GTLANG)/bin/spellernums-$(GTLANG)-plx.fst: \ $(GTLANG)/bin/spellernums-$(GTLANG).fst @echo @echo "*** Building spellernums-$(GTLANG)-plx.fst ***" ; @echo $(CFST) -e "read regex @re\"common/polderland/num-to-NA.regex\" \ .o. @\"$(GTLANG)/bin/spellernums-$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop plx-conv-test: $(GTLANG)/polderland/testing/words-accept.txt \ $(GTLANG)/polderland/testing/words-reject.txt touch userdict $(SPELL) -u8 -0 -d -m $(GTLANG)/polderland/$(WINSPELL) \ < $(GTLANG)/polderland/testing/words-accept.txt \ > $(GTLANG)/polderland/testing/words-accept.actual-out.txt $(SPELL) -u8 -0 -d -m $(GTLANG)/polderland/$(WINSPELL) \ < $(GTLANG)/polderland/testing/words-reject.txt \ > $(GTLANG)/polderland/testing/words-reject.actual-out.txt # | grep -v ' ' \ rm -f userdict