## Process this file with automake to produce Makefile.in ## Copyright (C) 2011 Samediggi ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . ####### Compilation variables: ####### HFST_REGEXP2FST_FLAGS="--xerox-composition=ON" ####### Source file defs: ######## # 1) GTD Core regex files: GIELLA_FILTER_CORE_REGEX_SRCS=\ convert_to_flags-CmpNP-tags.regex \ split-CmpNP-tags.regex \ extract-Sem_Plc-strings.regex \ make-optional-adv_comp-tags.regex \ make-optional-homonymy-tags.regex \ make-optional-hyph-tags.regex \ make-optional-NGminip-tags.regex \ make-optional-transitivity-tags.regex \ make-optional-v1-tags.regex \ make-optional-variant-tags.regex \ remove-adv_comp-tags.regex \ remove-derivation-position-tags.regex \ remove-homonymy-tags.regex \ remove-mwe-tags.regex \ remove-norm-comp-tags.regex \ remove-number-string-tags.regex \ remove-orig_lang-tags.regex \ remove-orthography-tags.regex \ remove-transitivity-tags.regex \ remove-usage-tags.regex \ remove-usage_except_NGminip-tags.regex \ remove-usage_except_speller-tags.regex \ remove-variant-tags.regex \ remove-hyphenation-marks.regex \ remove-infl_boundary-marks.regex \ remove-infl_deriv-borders.regex \ remove-nonfirst_infl_boundary-marks.regex \ remove-word-boundary.regex \ remove-ABBR-strings.regex \ remove-ACR-strings.regex \ remove-CLB-strings.regex \ remove-hyphenated-strings.regex \ remove-MinusSpell-strings.regex \ remove-MT-strings.regex \ remove-mwe-strings.regex \ remove-NA-strings.regex \ remove-NG-strings.regex \ remove-NGminip-strings.regex \ remove-Orth_IPA-strings.regex \ remove-Orth_Strd-strings.regex \ remove-Prop-strings.regex \ remove-PUNCT-strings.regex \ remove-RCmpnd-strings.regex \ remove-UseCirc-strings.regex \ remove-use_marg-strings.regex \ remove-variant-strings.regex \ reorder-subpos-tags.regex \ tag-lowercase_error-strings.regex \ extract-OLang_ENG-strings.regex \ extract-OLang_FIN-strings.regex \ extract-OLang_NNO-strings.regex \ extract-OLang_NOB-strings.regex \ extract-OLang_SME-strings.regex \ extract-OLang_SWE-strings.regex \ remove-OLang_ENG-strings.regex \ remove-OLang_FIN-strings.regex \ remove-OLang_NNO-strings.regex \ remove-OLang_NOB-strings.regex \ remove-OLang_SME-strings.regex \ remove-OLang_SWE-strings.regex \ remove-OLang_UND-strings.regex # 2) GTD Core **generated** regex files: GIELLA_FILTER_CORE_GENERATED_REGEX_SRCS=\ make-optional-semantic-tags.regex \ remove-semantic-tags.regex \ reorder-semantic-tags.regex \ remove-derivation-strings.regex \ make-optional-error-tags.regex \ remove-error-tags.regex \ remove-error-strings.regex \ remove-dialect-tags.regex \ $(DIALECT_REGEXES) # 3) GTD Core xfscript filter files: GIELLA_FILTER_CORE_XFSCRIPT_SRCS= # 4) GTD Core **generated** xfscript files: GIELLA_FILTER_CORE_GENERATED_XFSCRIPT_SRCS= # 5) GTD Core lexc filter files: GIELLA_FILTER_CORE_LEXC_SRCS= # 6) GTD Core **generated** lexc files: GIELLA_FILTER_CORE_GENERATED_LEXC_SRCS= # Variable for all source files - regex: REGEX_SRCS=$(GIELLA_FILTER_CORE_REGEX_SRCS) \ $(GIELLA_FILTER_LOCAL_REGEX_SRCS) \ $(GIELLA_FILTER_CORE_GENERATED_REGEX_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_REGEX_SRCS) # Variable for all source files - xfscript: XFSCRIPT_SRCS=\ $(GIELLA_FILTER_CORE_XFSCRIPT_SRCS) \ $(GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS) \ $(GIELLA_FILTER_CORE_GENERATED_XFSCRIPT_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_XFSCRIPT_SRCS) # Variable for all source files - lexc: LEXC_SRCS=$(GIELLA_FILTER_CORE_LEXC_SRCS) \ $(GIELLA_FILTER_LOCAL_LEXC_SRCS) \ $(GIELLA_FILTER_CORE_GENERATED_LEXC_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_LEXC_SRCS) # Variable for all generated source files: GENERATED_FILES=\ $(GIELLA_FILTER_CORE_GENERATED_REGEX_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_REGEX_SRCS) \ $(GIELLA_FILTER_CORE_GENERATED_XFSCRIPT_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_XFSCRIPT_SRCS)\ $(GIELLA_FILTER_CORE_GENERATED_LEXC_SRCS) \ $(GIELLA_FILTER_LOCAL_GENERATED_LEXC_SRCS) # Define variable before using it: GIELLA_FILTER_TARGETS= # Generate the filter targets from the source filenames: if CAN_HFST GIELLA_FILTER_TARGETS+=$(patsubst %.regex,%.hfst,$(REGEX_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.xfscript,%.hfst,$(XFSCRIPT_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.lexc,%.hfst,$(LEXC_SRCS)) endif # CAN_HFST if CAN_XFST GIELLA_FILTER_TARGETS+=$(patsubst %.regex,%.xfst,$(REGEX_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.xfscript,%.xfst,$(XFSCRIPT_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.lexc,%.xfst,$(LEXC_SRCS)) endif # CAN_XFST if CAN_FOMA GIELLA_FILTER_TARGETS+=$(patsubst %.regex,%.foma,$(REGEX_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.xfscript,%.foma,$(XFSCRIPT_SRCS)) GIELLA_FILTER_TARGETS+=$(patsubst %.lexc,%.foma,$(LEXC_SRCS)) endif # CAN_FOMA # need to check way to list build targets like automake #! @param GIELLA_FILTER_LOCAL_SRCS required, contains all local additions EXTRA_DIST=$(GIELLA_FILTER_LOCAL_REGEX_SRCS) \ $(GIELLA_FILTER_LOCAL_XFSCRIPT_SRCS) \ $(GIELLA_FILTER_LOCAL_LEXC_SRCS) \ $(EXTRA_SRCS) ####### Automake targets: ######## # @param GIELLA_FILTER_TARGETS required noinst_DATA=$(GIELLA_FILTER_TARGETS) BUILT_SOURCES=$(GENERATED_FILES) CLEANFILES=$(GENERATED_FILES) # The or construction needs to be put into a variable for the parentheses to # survive 'make' and be visible to the shell - this at the same time silences # grep. GREPTAGSCOMMAND=( grep -E '(^\+.|.\+$$)' || echo '+NoTagsFound' ) # Variable to hold filename for file containing all tags used # (all symbols starting or ending with '+') GT_TAGS=gttags.txt # Extract the sigma set using either HFST or Xfst depending on the configuration # The sigma extraction is tuned to the peculiarities of both, and does produce # the exact same output. The sigma set can be used to extract further symbol # sets. The file sigma.txt contains one symbol pr line, and the whole line # is the symbol (minus final newline). if CAN_HFST # Hfst: break on comma + space in three steps, to preserve space as a symbol. sigma.txt: $(top_builddir)/src/morphology/lexicon.hfst $(AM_V_GEN)$(HFST_SUMMARIZE) -v $^ \ | grep -A1 '^sigma set' \ | grep -v '^sigma set' \ | sed 's/, /\\\\\\/g' \ | perl -pe 's/\\\\\\/\n/g' \ | grep -v '^$$' \ | egrep -v '(@_EPSILON_SYMBOL_@|@_IDENTITY_SYMBOL_@|@_UNKNOWN_SYMBOL_@)' \ > $@ else if CAN_XFST # Special treatment of space (" "), break on space, special treatment of # reserved symbols ("X") and double quote. The Xfst file needs to be sorted. sigma.txt: $(top_builddir)/src/morphology/lexicon.xfst $(AM_V_GEN)$(XFST) -q \ -e "load stack $^" \ -e "print sigma" \ -stop \ | grep -v '^Size' \ | sed 's/^Sigma: //' \ | sed 's/ //' \ | sed 's/" "/XXXXX/g' \ | tr ' ' '\n' \ | sed 's/XXXXX/ /g' \ | grep -v '^$$' \ | sed 's/^"\(.\)"$$/\1/' \ | sed 's/%\"/"/' \ | LCALL=C sort \ > $@ else # If neither Hfst nor Xfst is enabled, create an empty file: sigma.txt: touch $@ endif # CAN_XFST endif # CAN_HFST gttags.txt: sigma.txt $(AM_V_GEN)$(GREPTAGSCOMMAND) < $< > $@ ####### Build rules for building dialect filters: ####### # DIALECT_TARGETS_?FST is constructed dynamically based on the dialects # specified in configure.ac. if HAVE_DIALECTS DIALECT_PREFIXES=$(addprefix remove-all_dialects_but_,$(DIALECTS)) DIALECT_SUFFIXES=$(addsuffix -strings,$(DIALECT_PREFIXES)) DIALECT_REGEXES=$(addsuffix .regex,$(DIALECT_SUFFIXES)) DIALECT_TARGETS_XFST=$(addsuffix .xfst,$(DIALECT_SUFFIXES)) DIALECT_TARGETS_HFST=$(addsuffix .hfst,$(DIALECT_SUFFIXES)) DIALECT_TARGETS_FOMA=$(addsuffix .foma,$(DIALECT_SUFFIXES)) endif # HAVE_DIALECTS ######## Build rules for extracting tags: ######## ## ## Add one target for each type of tags to be ## extracted, specifying the string to match the ## tag set as the last option in the command. This ## is safe even for languages not using a specific ## tag set, as a default dummy tag will be used ## instead, and the resulting filter will be ## harmless (unless a language uses the tag ## +Sem/DummyTag, +Dial/DummyTag, etc.). dialect-tags.txt: $(GT_TAGS) $(AM_V_GEN)$(GTCORE)/scripts/extract-tags.sh $< $@ +Dial/ derivation-tags.txt: $(GT_TAGS) $(AM_V_GEN)$(GTCORE)/scripts/extract-tags.sh $< $@ +Der/ error-tags.txt: $(GT_TAGS) $(AM_V_GEN)$(GTCORE)/scripts/extract-tags.sh $< $@ +Err/ semantic-tags.txt: $(GT_TAGS) $(AM_V_GEN)$(GTCORE)/scripts/extract-tags.sh $< $@ +Sem/ ####### Build rules for creating regexes from tag list: ####### # Create regex from tag list for making that tag list optional. # The conditional is there to allow for tag list specific shell scripts, ie only # optional in certain contexts. If not found, it will fall back to a default # shell script. make-optional-%-tags.regex: %-tags.txt $(AM_V_GEN)if \ test -e $(GTCORE)/scripts/taglist2make_optional_$*_tags_regex.sh ; \ then \ $(GTCORE)/scripts/taglist2make_optional_$*_tags_regex.sh $< > $@ ; \ else \ $(GTCORE)/scripts/taglist2make_optional_tags_regex.sh $< > $@ ; \ fi # Create regex from tag list for removing the tags in the tag list from the fst. # The conditional is there to allow for tag list specific shell scripts, ie only # remove in certain contexts. If not found, it will fall back to a default # shell script. remove-%-tags.regex: %-tags.txt $(AM_V_GEN)if test -e $(GTCORE)/scripts/taglist2remove_$*_tags_regex.sh ; \ then \ $(GTCORE)/scripts/taglist2remove_$*_tags_regex.sh $< > $@ ; \ else \ $(GTCORE)/scripts/taglist2remove_tags_regex.sh $< > $@ ; \ fi # Remove strings specific to other dialects than the requested one: remove-all_dialects_but_%-strings.regex: $(AM_V_GEN)$(GTCORE)/scripts/generate-dialect-filter-regex.sh \ $@ $* "$(DIALECTS)" ### Remove strings with tags as specified: # Specific variant - take into account an exception file: remove-%-strings.regex: %-tags.txt \ remove-%-strings-modifications.regex $(AM_V_GEN)if test -e $(GTCORE)/scripts/taglist2remove_$*_strings_regex.sh;\ then \ $(GTCORE)/scripts/taglist2remove_$*_strings_regex.sh $@ $* $^ ; \ else \ $(GTCORE)/scripts/taglist2remove_strings_regex.sh $@ $* $^ ; \ fi # General variant - remove all strings without exception: remove-%-strings.regex: %-tags.txt $(AM_V_GEN)if test -e $(GTCORE)/scripts/taglist2remove_$*_strings_regex.sh;\ then \ $(GTCORE)/scripts/taglist2remove_$*_strings_regex.sh $@ $* $^ ; \ else \ $(GTCORE)/scripts/taglist2remove_strings_regex.sh $@ $* $^ ; \ fi ### Reorder tags: reorder-%-tags.regex: %-tags.txt $(AM_V_GEN)$(GTCORE)/scripts/taglist2reorder_$*_tags_regex.sh \ $@ $^ ####### Copy regex files from the GTD core if not found locally: ######## %.regex: $(GTCORE)/giella-shared/common/src/filters/%.regex $(AM_V_CP)cp -f $< $@ ####### Other targets: ########### clean-local: -rm -f *.hfst *.xfst *.foma *.txt $(GIELLA_FILTER_CORE_REGEX_SRCS) # Include files for the actual build instructions and for verbosity control: include $(top_srcdir)/am-shared/regex-include.am include $(top_srcdir)/am-shared/xfscript-include.am include $(top_srcdir)/am-shared/lexc-include.am include $(top_srcdir)/am-shared/silent_build-include.am # vim: set ft=automake: