## Process this file with automake to produce Makefile.in ## Copyright (C) 2011 Samediggi ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . GT_SPELLER_HFST=../generator-desktopspeller-gt-norm.hfst GT_SPELLER_ACCEPTOR=acceptor.default.hfst # Max compression for zipped files: ZIPFLAGS=-9 $(VERBOSITY) ### SNM 25.1.2015: Commented out xz compression, it isn't supported by libvoikko ## Use xz for zhfst compression if possible, otherwise fall back to zip: #if CAN_XZ #ZHFST_COMPRESSION=$(TAR) -c * | $(XZ) > ../../$@ #else ZHFST_COMPRESSION=$(ZIP) $(ZIPFLAGS) ../../$@ * #endif # CAN_XZ ###### Conditional string variables ###### #### Initial letters: # Set file name to the empty string or initial_letters.all depending on variable: initial_letter_deps=$(shell \ if [[ $(INITIAL_EDITS) != 'no' ]] ; then \ echo "initial_letters.all.%.hfst"; \ else \ echo ""; \ fi) # Set file name to the empty string or initial_letters.all depending on variable: initial_letter_fst_include=$(shell \ if [[ $(INITIAL_EDITS) != 'no' ]] ; then \ echo "( @\\\"initial_letters.all.$*.hfst\\\" )"; \ else \ echo ""; \ fi) # Set python script option depending on variable: initial_letter_error_model_option=$(shell \ if [[ $(INITIAL_EDITS) != 'no' ]] ; then \ echo "--no-string-initial-correction"; \ else \ echo ""; \ fi) # Set dependency file name(s) depending on variable value: initial_letter_all_deps=$(shell \ if [[ $(INITIAL_EDITS) == 'regex' ]] ; then \ echo "initial_letters.regex.%.hfst"; \ elif [[ $(INITIAL_EDITS) == 'txt' ]] ; then \ echo "initial_letters.txt.%.hfst"; \ elif [[ $(INITIAL_EDITS) == 'both' ]] ; then \ echo "initial_letters.regex.%.hfst initial_letters.txt.%.hfst"; \ else \ echo ""; \ fi) # Set file name to the empty string or initial_letters.all depending on variable: initial_letter_all_build=$(shell \ if [[ $(INITIAL_EDITS) == 'regex' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(INITIAL_EDITS) == 'txt' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(INITIAL_EDITS) == 'both' ]] ; then \ echo "$(AM_V_UNION)$(HFST_DISJUNCT) $^ -o $@"; \ else \ echo ""; \ fi) #### Strings: # Set file name to the empty string or strings.all depending on variable: strings_deps=$(shell \ if [[ $(STRING_EDITS) != 'no' ]] ; then \ echo "strings.all.%.hfst"; \ else \ echo ""; \ fi) # Set build command for strings.all depending on variable value: strings_fst_include=$(shell \ if [[ $(STRING_EDITS) != 'no' ]] ; then \ echo "$(AM_V_UNION)$(HFST_DISJUNCT) $^"; \ else \ echo "$(AM_V_GEN)cat $^"; \ fi) # Set dependency file name(s) depending on variable value: strings_all_deps=$(shell \ if [[ $(STRING_EDITS) == 'regex' ]] ; then \ echo "strings.regex.%.hfst"; \ elif [[ $(STRING_EDITS) == 'txt' ]] ; then \ echo "strings.txt.%.hfst"; \ elif [[ $(STRING_EDITS) == 'both' ]] ; then \ echo "strings.regex.%.hfst strings.txt.%.hfst"; \ else \ echo ""; \ fi) # Set build command depending on variable: strings_all_build=$(shell \ if [[ $(STRING_EDITS) == 'regex' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(STRING_EDITS) == 'txt' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(STRING_EDITS) == 'both' ]] ; then \ echo "$(AM_V_UNION)$(HFST_DISJUNCT) $^ -o $@"; \ else \ echo ""; \ fi) #### Final strings: # Set file name to the empty string or final_strings.all depending on variable: final_strings_deps=$(shell \ if [[ $(FINAL_STRING_EDITS) != 'no' ]] ; then \ echo "final_strings.all.%.hfst"; \ else \ echo ""; \ fi) # Set build command for strings.all depending on variable value: final_strings_fst_include=$(shell \ if [[ $(FINAL_STRING_EDITS) != 'no' ]] ; then \ echo "( @\\\"final_strings.all.$*.hfst\\\" )"; \ else \ echo ""; \ fi) # Set dependency file name(s) depending on variable value: final_strings_all_deps=$(shell \ if [[ $(FINAL_STRING_EDITS) == 'regex' ]] ; then \ echo "final_strings.regex.%.hfst"; \ elif [[ $(FINAL_STRING_EDITS) == 'txt' ]] ; then \ echo "final_strings.txt.%.hfst"; \ elif [[ $(FINAL_STRING_EDITS) == 'both' ]] ; then \ echo "final_strings.regex.%.hfst final_strings.txt.%.hfst"; \ else \ echo ""; \ fi) # Set build command depending on variable: final_strings_all_build=$(shell \ if [[ $(FINAL_STRING_EDITS) == 'regex' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(FINAL_STRING_EDITS) == 'txt' ]] ; then \ echo "$(AM_V_CP)cp -f $< $@"; \ elif [[ $(FINAL_STRING_EDITS) == 'both' ]] ; then \ echo "$(AM_V_UNION)$(HFST_DISJUNCT) $^ -o $@"; \ else \ echo ""; \ fi) #### Whole words: # Set file name to the empty string or words.%.txt depending on variable: words_deps=$(shell \ if [[ $(WORD_REPLACEMENTS) != 'no' ]] ; then \ echo "words.%.txt"; \ else \ echo ""; \ fi) ####### Automake targets: ######## GT_ERRMODELS= if WANT_SPELLERS if CAN_HFST #GT_ERRMODELS+=errmodel.edit-distance-1.hfst GT_ERRMODELS+=errmodel.default.hfst # libvoikko can't yet handle multiple error models - do NOT include this line: # errmodel.ocr.hfst GT_SPELLING_HFST=$(GTLANG2).zhfst voikkosharedir=$(datadir)/voikko/3/ #! @param GT_VOIKKO optional, set to spell checker automata names if #! installable voikkoshare_DATA=$(GT_SPELLING_HFST) $(ALT_WS_ZHFST_FILES) $(ALT_ORTH_ZHFST_FILES) endif # CAN_HFST endif # WANT_SPELLERS noinst_DATA=$(GT_ERRMODELS) \ $(INST_FILES) ################################### ####### HFST build rules: ######### ####### Easter egg version info: ####### # Easter egg content - depends also on the fst, to # make sure the easter egg is rebuilt every time the fst is rebuilt: easteregg.%.txt: $(srcdir)/version.txt $(GT_SPELLER_HFST) $(AM_V_GEN)$(GTCORE)/scripts/make-hfstspeller-version-easter-egg.sh \ $(GTLANG2) $(top_srcdir) $< $* > $@ # Easter egg suggestions: easteregg.%.suggtxt: easteregg.%.txt $(AM_V_GEN)sed -e 's/^/nuvviDspeller:/' < $< \ | sed = \ | sed 'N;s/\n/ /' \ | perl -pe 's/(.)\t(.+)/\2\t\1/' \ > $@ # Easter egg string acceptor: easteregg.%.hfst: easteregg.%.txt $(AM_V_GEN)$(HFST_STRINGS2FST) $(HFST_FLAGS) -j < $< \ | $(HFST_PROJECT) $(HFST_FLAGS) --project=lower > $@ ####### Error model: ####### # Error model building - edit distance based on transducer alphabet: editdist.%.regex: editdist.%.txt $(initial_letter_deps) $(AM_V_GEN)$(GTCORE)/scripts/editdist.py \ --verbose \ --swap \ --epsilon='@0@' \ --default-weight=$(DEFAULT_WEIGHT) \ --regex \ --input=$< \ --output-file=$@ \ $(initial_letter_error_model_option) initial_letters.txt.%.hfst: initial_letters.%.txt $(AM_V_STR2FST)grep -v '^#' $< | grep -v '^$$' \ | $(HFST_STRINGS2FST) $(HFST_FLAGS) -j -p \ -o $@ initial_letters.regex.%.hfst: initial_letters.%.regex $(AM_V_RGX2FST)$(HFST_REGEXP2FST) -S $(HFST_FLAGS) -i $<\ -o $@ initial_letters.all.%.hfst: $(initial_letter_all_deps) $(initial_letter_all_build) final_strings.txt.%.hfst: final_strings.%.txt $(AM_V_STR2FST)grep -v '^#' $< | grep -v '^$$' \ | $(HFST_STRINGS2FST) $(HFST_FLAGS) -j \ -o $@ final_strings.regex.%.hfst: final_strings.%.regex $(AM_V_RGX2FST)$(HFST_REGEXP2FST) -S $(HFST_FLAGS) -i $<\ -o $@ final_strings.all.%.hfst: $(final_strings_all_deps) $(final_strings_all_build) # Helper fst: anystar.hfst: $(AM_V_RGX2FST)echo "?*" | $(HFST_REGEXP2FST) -o $@ # Error model building - list of strings known to be misspelled: strings.txt.%.hfst: strings.%.txt anystar.hfst $(AM_V_STR2FST)grep -v '^#' $< | grep -v '^$$' \ | $(HFST_STRINGS2FST) $(HFST_FLAGS) -j \ | $(HFST_CONCATENATE) anystar.hfst - \ | $(HFST_CONCATENATE) - anystar.hfst \ -o $@ # strings regex file: # Multiply the strings-regex file with the specified value. # This makes the total edit distance for the content of the regex file N times # larger as the edit distance, since the file is multiplied again as part of # the editStrings build target. The idea is that the regex should contain a # highly targeted set of frequent spelling errors. strings.regex.%.hfst: strings.%.regex anystar.hfst $(AM_V_RGX2FST)$(HFST_REGEXP2FST) -S $(HFST_FLAGS) -i $<\ | $(HFST_CONCATENATE) anystar.hfst - \ | $(HFST_CONCATENATE) - anystar.hfst \ | $(HFST_REPEAT) -f 1 -t $(STRING_REGEX_EDIT_DISTANCE) \ -o $@ strings.all.%.hfst: $(strings_all_deps) $(strings_all_build) # Combine edit distance with string pattern edits, then multiply according to # the specified editing distance. The strings part is included depending on # variable setting in Makefile.am. editdist.all.%.hfst: $(strings_deps) editdist.%.hfst $(strings_fst_include) \ | $(HFST_REPEAT) -f 1 -t $(EDIT_DISTANCE) \ -o $@ # Error model building - list of words known to be misspelled: words.%.hfst: $(words_deps) easteregg.%.suggtxt $(AM_V_STR2FST)grep -h -v '^#' $^ | grep -v '^$$' \ | $(HFST_STRINGS2FST) $(HFST_FLAGS) -j -o $@ # The final error model is assembled here: errmodel.%.hfst: words.%.hfst \ $(initial_letter_deps) \ editdist.all.%.hfst \ $(final_strings_deps) $(AM_V_RGX2FST)printf "\ [ @\"words.$*.hfst\" \ | \ [ \ $(initial_letter_fst_include) \ @\"editdist.all.$*.hfst\" \ $(final_strings_fst_include) \ ] \ ];" \ | $(HFST_REGEXP2FST) -S -E $(HFST_FLAGS) \ | $(HFST_FST2FST) $(HFST_FLAGS) -f olw -o $@ ####### Alternate error model: ####### # Alternatively, the error model can be constructed as a long list of regular # expressions, semicolon separated: errmodel.%.hfst: errmodel.%.regex easteregg.%.hfst $(AM_V_GEN)$(HFST_REGEXP2FST) $(HFSTFLAGS) -S -i $< \ | $(HFST_DISJUNCT) - easteregg.$*.hfst \ -o $@ # ... or as an xfscript file: errmodel.%.hfst: errmodel.%.xfscript easteregg.%.hfst $(AM_V_GEN)$(HFST_REGEXP2FST) $(HFSTFLAGS) -S -i $< \ | $(HFST_DISJUNCT) - easteregg.$*.hfst \ -o $@ ####### Speller acceptor: ####### # Build the automaton used for the speller $(GT_SPELLER_ACCEPTOR): \ acceptor.%.hfst: $(GT_SPELLER_HFST) easteregg.%.hfst $(AM_V_PROJECT)cat $< \ | $(HFST_PROJECT) $(HFST_FLAGS) --project=lower \ | $(HFST_MINIMIZE_SPELLER) $(HFST_FLAGS) \ | $(HFST_DISJUNCT) - easteregg.$*.hfst \ | $(HFST_FST2FST) $(HFST_FLAGS) -f olw \ -o $@ ####### *.zhfst file: ####### # Finally build the zhfst file, and make a copy in a subdir named '3', so that # we can test it without installing it (the '3' dir is a voikko requirement): $(GT_SPELLING_HFST): $(srcdir)/index.xml \ $(GT_ERRMODELS) \ $(GT_SPELLER_ACCEPTOR) $(AM_V_at)rm -f $@ $(AM_V_at)$(MKDIR_P) build/$@ $(AM_V_at)rm -f build/$@/* $(AM_V_at)cp $(srcdir)/index.xml build/$@/index.xml $(AM_V_at)cp $(GT_SPELLER_ACCEPTOR) build/$@/$(GT_SPELLER_ACCEPTOR) $(AM_V_at)cp $(GT_ERRMODELS) build/$@/$(GT_ERRMODELS) $(AM_V_ZIP)cd build/$@/ && $(ZHFST_COMPRESSION) $(AM_V_at)$(MKDIR_P) 3 $(AM_V_at)cp -f $@ 3/ ####### Other targets: ########### clean-local: -rm -f *.hfst *.xfst *.zhfst easteregg.* *.oxt *.xpi *.zip -rm -rf 3 *.service build -rm -f editdist.*.regex if ! [ "x$(CORPUSNAME)" = "x" ] ; then \ rm -f $(CORPUSNAME).* ; \ fi # Keep these intermediate targets when building using --debug: .SECONDARY: editdist.all.default.hfst \ strings.default.hfst \ editdist.default.hfst \ editdist.default.regex \ words.default.hfst \ initial_letters.list.default.hfst \ initial_letters.regex.default.hfst \ initial_letters.all.default.hfst \ final_strings.default.hfst include $(top_srcdir)/am-shared/tools-spellcheckers-fstbased-desktop-hfst_prods_n_upload-dir-include.am include $(top_srcdir)/am-shared/tools-spellcheckers-fstbased-desktop-hfst_alt_orth-dir-include.am include $(top_srcdir)/am-shared/tools-spellcheckers-fstbased-desktop-hfst_alt_ws-dir-include.am include $(top_srcdir)/am-shared/regex-include.am include $(top_srcdir)/am-shared/silent_build-include.am # vim: set ft=automake: