## Process this file with automake to produce Makefile.in ## Copyright (C) 2011 Samediggi ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . # Commands needed to build: # For Xerox: abbr.txt to be used with preprocess # For Hfst: a pmatch fst to do tokenisation+lookup in one go # # The two tool sets (preprocess + fst & pmatch fst) should ideally give the same # output. More importantly, when combined with either lookup2cg or cg-cont the # output should be the same. if CAN_XFST GENERATOR=generator-gt-desc.xfst else if CAN_HFST GENERATOR=generator-gt-desc.hfstol else GENERATOR=false endif # CAN_HFST endif # CAN_XFST GIELLA_PMHFSTS= if CAN_HFST # Only build the pmatch/hfst-proc2 fst if analysers are enabled. if WANT_MORPHOLOGY if WANT_TOKENISERS GIELLA_PMHFSTS+=tokeniser-disamb-gt-desc.pmhfst endif # WANT_TOKENISERS if WANT_GRAMCHECK GIELLA_PMHFSTS+=tokeniser-gramcheck-gt-desc.pmhfst endif # WANT_GRAMCHECK endif # WANT_MORPHOLOGY hfstdatadir=$(datadir)/giella/$(GTLANG) hfstdata_DATA=$(GIELLA_PMHFSTS) $(ABBR) endif # CAN_HFST ########################################################################### ### Build the abbr.txt file used with preprocess: # Source files for abbr extraction: if WANT_ABBR ABBR=abbr.txt endif # WANT_ABBR STEMSRCS:=$(filter-out \ %propernouns.lexc,$(wildcard $(top_srcdir)/src/morphology/stems/*.lexc)) OTHRSRCS:=$(filter-out \ %lexicon.lexc,$(wildcard $(top_srcdir)/src/morphology/*.lexc)) # Convert source files to comma-separated list: empty:= comma:=, space:=$(empty) $(empty) ABBRSRCS:=$(subst \ $(space),$(comma),$(strip $(STEMSRCS) $(GENRSRCS) $(OTHRSRCS))) SCRIPTDIR=$(GTCORE)/scripts # Build the abbr.txt file: abbr: $(ABBR) $(ABBR): \ $(top_srcdir)/src/morphology/stems/abbreviations.lexc \ $(top_srcdir)/tools/data/paradigm.abbr.txt \ $(top_srcdir)/tools/data/corpustags.txt \ $(STEMSRCS) $(GENRSRCS) $(OTHRSRCS) \ ../../src/$(GENERATOR) $(AM_V_GEN)perl -I $(SCRIPTDIR) $(SCRIPTDIR)/extract-abbr.pl \ --paradigm=$(top_srcdir)/tools/data/paradigm.abbr.txt \ --tags=$(top_srcdir)/tools/data/corpustags.txt \ --fst=../../src/$(GENERATOR) \ --output=$@.tmp \ --abbr_lex=$< \ --lex=$(ABBRSRCS) $(AM_V_at)sed -e 's/\+MWE//' < $@.tmp | uniq > $@ $(AM_V_at)rm -f $@.tmp ########################################################################### ### Build a tokenising analyser based on a pmatch script and the ### regular disamb-analyser - and the same applies to the grammar checker: # Take the analyser file from src/*.hfst, and replace the Giella tags with # CG compatible ones: analyser-%-gt-desc.hfst: $(top_builddir)/src/analyser-%-gt-desc.hfst \ filters/make-CG-tags.hfst $(AM_V_HXFST)$(PRINTF) "read regex \ @\"$<\" \ .o. @\"filters/make-CG-tags.hfst\".i \ ;\n\ save stack $@\n\ quit\n" | $(HFST_XFST) # Prepare analysers used for building the tokenisers: analyser_relabelled-%-gt-desc.hfst: analyser-%-gt-desc.hfst \ btrelabeller.hfst \ relabeller.hfst $(AM_V_HSUBST)$(HFST_SUBSTITUTE) \ -f '@P.Pmatch.Backtrack@' -T btrelabeller.hfst $< \ | $(HFST_SUBSTITUTE) \ -f '@P.Pmatch.Loc@' -T relabeller.hfst \ -o $@ # This is the real tokeniser build target: tokeniser-%.pmhfst: tokeniser-%.pmscript \ analyser_relabelled-%.hfst \ ../../src/morphology/url.hfst $(AM_V_PM2FST)$(HFST_PMATCH2FST) < $< > $@.tmp $(AM_V_at)mv -f $@.tmp $@ # Helper fst one: btrelabeller.hfst: $(AM_V_TXT2FST)$(PRINTF) \ '0\t1\t@0@\t@PMATCH_BACKTRACK@\n1\t2\t@0@\t@PMATCH_INPUT_MARK@\n2\n' \ | $(HFST_TXT2FST) -e '@0@' $(HFST_FORMAT) \ -o $@ # Helper fst two: relabeller.hfst: $(AM_V_TXT2FST)$(PRINTF) '0\t1\t@0@\t@PMATCH_INPUT_MARK@\n1\n' \ | $(HFST_TXT2FST) -e '@0@' $(HFST_FORMAT) \ -o $@ # Fallback pattern target for no local processing cases: %.hfst: %.tmp.hfst $(AM_V_CP)cp -f $< $@ ####### Includes: ########### include $(top_srcdir)/am-shared/silent_build-include.am include $(top_srcdir)/am-shared/hfst-format-include.am