## Include this file in top-level srcdir to compile FSTs ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ ####### Automake targets: ######## APERTIUM_ANALYSER_HFST= APERTIUM_ANALYSERS_GT_HFST= APERTIUM_ANALYSERS_AP_HFST= APERTIUM_ANALYSERS_ATTGZ= APERTIUM_GENERATORS_HFST= APERTIUM_GENERATOR_ATTGZ= APERTIUM_CG_BIN= APERTIUM_GENERATED_TAGSET_RELABEL_SRCS= if WANT_APERTIUM # The multiple parallel pattern rules further down are fragile (or: make is # fragile). We need to specify here as the first target to build the dependency # of the first pattern target (ie the non-last pattern dependency). Without # this make will give up. DO NOT REMOVE analyser-mt-gt-desc.hfst BELOW!!! APERTIUM_ANALYSER_HFST+=analyser-mt-gt-desc.hfst analyser-mt-gt-desc.hfstol # Build list of target language transducers from the list of target languages: # => analyser-mt-gt-desc.nob.hfst etc. APERTIUM_ANALYSERS_GT_HFST+=\ $(addsuffix .hfstol,$(addprefix analyser-mt-gt-desc.,$(APERTIUM_TARGET_LANGS))) # Build list of apertium tagged files by replacing -gt- with -apertium-: # analyser-mt-gt-desc.nob.hfst => analyser-mt-apertium-desc.nob.hfst etc. APERTIUM_ANALYSERS_AP_HFST+=\ $(subst -gt-,-apertium-,$(APERTIUM_ANALYSERS_GT_HFST)) # Build list of att.gz files by adding the suffix .att.gz to the basenames # of the filenames: # analyser-mt-apertium-desc.nob.hfst => analyser-mt-apertium-desc.nob.att.gz APERTIUM_ANALYSERS_ATTGZ+=\ $(addsuffix .att.gz,$(basename $(APERTIUM_ANALYSERS_AP_HFST))) # Build a list of Apertium style target filenames: # # Analysers: # apertium-sme-nob.sme-nob.LR.att.gz # apertium-smn-sme.sme-smn.LR.att.gz # # Generators: # apertium-sme-nob.sme-nob.RL.att.gz # # Apertium filename pattern: # # dirname-with-lang-pair.lang-pair.LR # - dirname-with-lang-pair = fixed string, same as containing AP dir name # - lang-pair = the pair in the direction being used, as in: from-to # - LR = analyser # - RL = generator # Loop over target languages and pair names and build the final target names # except the suffix part: APERTIUM_PAIRS=$(shell for ll in $(APERTIUM_TARGET_LANGS); do\ for ld in $(APERTIUM_TARGET_LANG_NAME); do\ if test $$(echo $$ld | grep $$ll); then\ echo "$$ld.$(GTLANG)-$$ll" ;\ fi\ done ;\ done) # Add the suffixes: # "apertium-sme-nob.sme-nob" -> "apertium-sme-nob.sme-nob.LR.att.gz": APERTIUM_MORPH_PAIRS=$(addsuffix .LR.att.gz,$(APERTIUM_PAIRS)) # Apertium style target filenames with stem variable % instead of target # language, used in a pattern rule further down: APERTIUM_PAIRS_VAR=$(shell for ll in $(APERTIUM_TARGET_LANGS); do\ for ld in $(APERTIUM_TARGET_LANG_NAME); do\ if test $$(echo $$ld | grep $$ll); then\ echo "$$ld.$(GTLANG)-%" ;\ fi\ done ;\ done) # "apertium-sme-%.sme-nob" -> "apertium-sme-%.sme-nob.LR.att.gz": APERTIUM_MORPH_PAIRS_V=$(addsuffix .LR.att.gz,$(APERTIUM_PAIRS_VAR)) APERTIUM_GENERATORS_HFST+=generator-mt-gt-norm.hfstol \ generator-mt-apertium-norm.hfstol APERTIUM_GENERATOR_ATTGZ+=apertium-$(GTLANG).$(GTLANG).RL.att.gz APERTIUM_GENERATED_TAGSET_RELABEL_SRCS+=tagsets/apertium.relabel if WANT_SYNTAX APERTIUM_CG_BIN+=$(GTLANG).mor.rlx.bin \ $(GTLANG).syn.rlx.bin # $(GTLANG).dep.rlx.bin endif # WANT_SYNTAX # Apertium - GTDivvun filename correspondences: # apertium-sme-nob.sme-nob.LR.att.gz = analyser-mt-apertium-desc.nob.att.gz # apertium-sme.sme.RL.att.gz = generator-mt-apertium-norm.att.gz # sme.mor.rlx.bin = disambiguation-mt-apertium.bin # sme.syn.rlx.bin = functions-mt-apertium.bin # sme.dep.rlx.bin = dependency-mt-apertium.bin endif # WANT_APERTIUM if CAN_HFST hfstdatadir=$(datadir)/giella/$(GTLANG) noinst_DATA=$(APERTIUM_ANALYSER_HFST) \ $(APERTIUM_ANALYSERS_GT_HFST) \ $(APERTIUM_ANALYSERS_AP_HFST) \ $(APERTIUM_GENERATORS_HFST) \ $(APERTIUM_CG_SRC) \ $(APERTIUM_GENERATED_TAGSET_RELABEL_SRCS) hfstdata_DATA=$(APERTIUM_MORPH_PAIRS) \ $(APERTIUM_GENERATOR_ATTGZ) \ $(APERTIUM_CG_BIN) endif # CAN_HFST ####### Build rules: ######## # Variable to hold filename for file containing all tags used # (all symbols starting or ending with '+') TAG_FILE=tagsets/gttags.txt # Variable to hold filename for fst used as source for extracting tags, # sans extension TAG_EXTRACTION_FST=analyser-mt-gt-desc # Convert GT-tags to Apertium-tags: tagsets/apertiumtags.txt: tagsets/gttags.txt $(AM_V_GEN)tr -d '+' < $< \ | tr '/[:upper:]' '_[:lower:]' \ | awk '{print "%<"$$0"%>"}' > $@ # Generate relabel file for replacing Giella tags with Apertium tags: tagsets/apertium.relabel: tagsets/gttags.txt \ tagsets/apertiumtags.txt $(AM_V_GEN)paste $^ | tr -d '%' > $@ # Apertium generator - GTD tags. All filtering happens here except dialect tags happens here: generator-mt-gt-norm.tmp.hfst: $(top_builddir)/src/generator-raw-gt-desc.hfst \ $(top_builddir)/src/filters/make-optional-transitivity-tags.hfst \ $(top_builddir)/src/filters/make-optional-semantic-tags.hfst \ $(top_builddir)/src/filters/make-optional-v1-tags.hfst \ $(top_builddir)/src/filters/remove-number-string-tags.hfst \ $(top_builddir)/src/filters/remove-NG-strings.hfst \ $(top_builddir)/src/filters/remove-usage-tags.hfst \ $(top_builddir)/src/filters/remove-error-strings.hfst \ $(top_builddir)/src/filters/remove-orthography-tags.hfst \ $(top_builddir)/src/filters/remove-Orth_IPA-strings.hfst \ $(top_builddir)/src/filters/remove-orig_lang-tags.hfst \ $(top_builddir)/src/filters/remove-hyphenation-marks.hfst \ $(top_builddir)/src/filters/remove-infl_deriv-borders.hfst \ $(top_builddir)/src/filters/remove-word-boundary.hfst \ $(top_builddir)/src/orthography/downcase-derived_proper-strings.compose.hfst $(AM_V_HXFST)$(PRINTF) "set xerox-composition ON \n\ read regex \ @\"$(top_builddir)/src/filters/make-optional-semantic-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/make-optional-transitivity-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/make-optional-v1-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-number-string-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-usage-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-error-strings.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-NG-strings.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-orig_lang-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-orthography-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-Orth_IPA-strings.hfst\" \ .o. @\"$<\" \ .o. @\"$(top_builddir)/src/orthography/downcase-derived_proper-strings.compose.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-word-boundary.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-hyphenation-marks.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-infl_deriv-borders.hfst\" \ ;\n\ save stack $@\n\ quit\n" | $(HFST_XFST) -p $(MORE_VERBOSITY) $(HFST_FORMAT) # Apertium analyser - GTD tags. Target language independent filtering # happens here: analyser-mt-gt-desc.tmp.hfst: $(top_builddir)/src/generator-raw-gt-desc.hfst \ $(top_builddir)/src/filters/remove-dialect-tags.hfst \ $(top_builddir)/src/filters/remove-variant-tags.hfst \ $(top_builddir)/src/filters/remove-number-string-tags.hfst \ $(top_builddir)/src/filters/remove-usage-tags.hfst \ $(top_builddir)/src/filters/remove-hyphenation-marks.hfst \ $(top_builddir)/src/filters/remove-infl_deriv-borders.hfst \ $(top_builddir)/src/filters/remove-orthography-tags.hfst \ $(top_builddir)/src/filters/remove-orig_lang-tags.hfst \ $(top_builddir)/src/filters/remove-Orth_IPA-strings.hfst \ $(top_builddir)/src/filters/remove-word-boundary.hfst \ $(top_builddir)/src/orthography/downcase-derived_proper-strings.compose.hfst $(AM_V_HXFST)$(PRINTF) "set xerox-composition ON \n\ read regex \ @\"$(top_builddir)/src/filters/remove-dialect-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-variant-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-number-string-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-usage-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-orig_lang-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-orthography-tags.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-Orth_IPA-strings.hfst\" \ .o. @\"$<\" \ .o. @\"$(top_builddir)/src/orthography/downcase-derived_proper-strings.compose.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-word-boundary.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-hyphenation-marks.hfst\" \ .o. @\"$(top_builddir)/src/filters/remove-infl_deriv-borders.hfst\" \ ;\n\ save stack $@\n\ quit\n" | $(HFST_XFST) -p $(MORE_VERBOSITY) $(HFST_FORMAT) # Target language specific filtering - build depends on the availability # of a filter file. # WITH filter file available (the source file is inverted with the .i modifier): analyser-mt-gt-desc.%.tmp.hfst: analyser-mt-gt-desc.hfst \ $(top_builddir)/src/filters/remove-derivation-strings.%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"$(top_builddir)/src/filters/remove-derivation-strings.$*.hfst\" \ .o. @\"$<\".i \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) $(HFST_FORMAT) \ -S --xerox-composition=ON \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # With NO filter file available, invert the file to allow the general pattern # *.tmp.hfst -> invert -> *.hfst # to apply correctly: analyser-mt-gt-desc.%.tmp.hfst: analyser-mt-gt-desc.hfst $(AM_V_INVERT)$(HFST_INVERT) -i $< \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # Relabel GTDivvun tags to Apertium tags - generator, step 1: generator-mt-apertium-norm.tmp1.hfst: \ generator-mt-gt-norm.hfst \ tagsets/apertium.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFSTFLAGS) -i $< \ -F tagsets/apertium.relabel \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # Relabel GTDivvun tags to Apertium tags - generator, step 2a: # * relabel file # AND # * general tag change regex file generator-mt-apertium-norm.%.hfst: \ generator-mt-apertium-norm.%1.hfst \ tagsets/modify-tags.hfst \ $(srcdir)/tagsets/apertium.postproc.relabel $(AM_V_RGX2FST)$(PRINTF) "\ @\"tagsets/modify-tags.hfst\".i \ .o. @\"$<\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ | $(HFST_SUBSTITUTE) $(HFSTFLAGS) \ -F $(srcdir)/tagsets/apertium.postproc.relabel \ -o $@ # Relabel GTDivvun tags to Apertium tags - generator, step 2b: # * relabel file ONLY generator-mt-apertium-norm.%.hfst: \ generator-mt-apertium-norm.%1.hfst \ $(srcdir)/tagsets/apertium.postproc.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFSTFLAGS) -i $< \ -F $(srcdir)/tagsets/apertium.postproc.relabel \ -o $@ # Relabel GTDivvun tags to Apertium tags - generator, step 2c: # * general tag change regex file ONLY generator-mt-apertium-norm.%.hfst: \ generator-mt-apertium-norm.%1.hfst \ tagsets/modify-tags.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"tagsets/modify-tags.hfst\".i \ .o. @\"$<\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 1a - gt -> apertium.tmp1 analyser-mt-apertium-desc.%.tmp1.hfst: \ analyser-mt-gt-desc.%.hfst \ tagsets/apertium.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFSTFLAGS) -i $< \ -F tagsets/apertium.relabel \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 1b.a - apertium.tmp1 -> apertium.tmp # * relabel file # AND # * general tag change regex file analyser-mt-apertium-desc.%.tmp.hfst: \ analyser-mt-apertium-desc.%.tmp1.hfst \ tagsets/modify-tags.hfst \ $(srcdir)/tagsets/apertium.postproc.relabel $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/modify-tags.hfst\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ | $(HFST_SUBSTITUTE) $(HFSTFLAGS) \ -F $(srcdir)/tagsets/apertium.postproc.relabel \ -o $@ # Step 1b.b - apertium.tmp1 -> apertium.tmp # * relabel file ONLY analyser-mt-apertium-desc.%.tmp.hfst: \ analyser-mt-apertium-desc.%.tmp1.hfst \ $(srcdir)/tagsets/apertium.postproc.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFSTFLAGS) -i $< \ -F $(srcdir)/tagsets/apertium.postproc.relabel \ -o $@ # Step 1b.c - apertium.tmp1 -> apertium.tmp # * general tag change regex file ONLY analyser-mt-apertium-desc.%.tmp.hfst: \ analyser-mt-apertium-desc.%.tmp1.hfst \ tagsets/modify-tags.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/modify-tags.hfst\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 2a - apertium.tmp -> apertium # - BOTH target language specific modifying AND relabeling analyser-mt-apertium-desc.%.hfst: analyser-mt-apertium-desc.%.tmp.hfst \ tagsets/modify-tags.%.hfst \ $(srcdir)/tagsets/apertium.%.relabel $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/modify-tags.$*.hfst\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ | $(HFST_SUBSTITUTE) $(HFSTFLAGS) \ -F $(srcdir)/tagsets/apertium.$*.relabel \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 2b - apertium.tmp -> apertium # - ONLY target language specific relabeling analyser-mt-apertium-desc.%.hfst: analyser-mt-apertium-desc.%.tmp.hfst \ $(srcdir)/tagsets/apertium.%.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFSTFLAGS) -i $< \ -F $(srcdir)/tagsets/apertium.$*.relabel \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 2c - apertium.tmp -> apertium # - ONLY target language specific modifying analyser-mt-apertium-desc.%.hfst: analyser-mt-apertium-desc.%.tmp.hfst \ tagsets/modify-tags.%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/modify-tags.$*.hfst\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) -S --xerox-composition=ON \ | $(HFST_FST2FST) $(HFSTFLAGS) -t \ -o $@ # Relabel GTD tags to Apertium tags - analyser: # % = TARGETLANG # Step 2d - apertium.tmp -> apertium # - NO target language specific processing - just copy analyser-mt-apertium-desc.%.hfst: analyser-mt-apertium-desc.%.tmp.hfst $(AM_V_CP)cp -f $< $@ %.att: %.hfst $(AM_V_FST2TXT)$(HFST_FST2FST) $(HFSTFLAGS) -w -i $< \ | $(HFST_FST2TXT) -o $@ %.att.gz: %.att $(AM_V_GZIP)$(GZIP) -9 -c $< > $@ ##### Disambiguation and syntactic parsing targets # Copy files from the src dirs: dependency-mt-gt.cg3: \ $(GIELLA_SHARED)/smi/src/syntax/dependency.cg3 $(AM_V_CP)cp -f $< $@ functions-mt-gt.cg3: \ $(GIELLA_SHARED)/smi/src/syntax/functions.cg3 $(AM_V_CP)cp -f $< $@ %-mt-gt.cg3: $(top_srcdir)/src/syntax/%.cg3 $(AM_V_GEN)grep -v '#RemoveFromApertium' < $< > $@ %.cg3: $(top_srcdir)/src/syntax/%.cg3 $(AM_V_GEN)grep -v '#RemoveFromApertium' < $< > $@ # Make sure included files are copied in before starting to compile: $(APERTIUM_CG_SRC): $(APERTIUM_CG_INCLUDE_SRC) # Change the tags from GTDivvun format to Apertium format: %-apertium.bin: %-gt.bin $(srcdir)/tagsets/gt2apertium.cg3relabel $(AM_V_GEN)$(CG_RELABEL) $< $(srcdir)/tagsets/gt2apertium.cg3relabel $@ #### Copy to Apertium filenames $(APERTIUM_MORPH_PAIRS_V): \ analyser-mt-apertium-desc.%.att.gz $(AM_V_CP)cp -f $< $@ apertium-$(GTLANG).$(GTLANG).RL.att.gz: generator-mt-apertium-norm.att.gz $(AM_V_CP)cp -f $< $@ $(GTLANG).mor.rlx.bin: disambiguation-mt-apertium.bin $(AM_V_CP)cp -f $< $@ $(GTLANG).syn.rlx.bin: functions-mt-apertium.bin $(AM_V_CP)cp -f $< $@ #### HFST transducer fallback target: #### - inversion is needed FOR THE ANALYSER because the hfst model is upside #### down compared to Xerox AT APPLICATION TIME. The other command makes the #### final transducer ready for use. analyser%.hfst: analyser%.tmp.hfst $(AM_V_INVERT)$(HFST_INVERT) $< -o $@ # If you override this goal, you have to remove dialect tags in the overriding goal! generator-mt-gt%.hfst: generator-mt-gt%.tmp.hfst \ $(top_builddir)/src/filters/remove-dialect-tags.hfst $(AM_V_HXFST)$(PRINTF) "set xerox-composition ON \n\ read regex \ @\"$(top_builddir)/src/filters/remove-dialect-tags.hfst\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(HFST_XFST) -p $(MORE_VERBOSITY) # General fallback target for generators: generator%.hfst: generator%.tmp.hfst $(AM_V_CP)cp -f $< $@ #### Include files: include $(top_srcdir)/am-shared/lookup-include.am include $(top_srcdir)/am-shared/vislcg3-include.am include $(top_srcdir)/am-shared/hfst-format-include.am include $(top_srcdir)/am-shared/tag-extraction-include.am include $(top_srcdir)/am-shared/silent_build-include.am