## Include this file in top-level srcdir to compile FSTs ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ # First attempt at building disambiguation fst's. The analyser differs from the # regular analyser by NEITHER removing variant and homonym tags, NOR removing # semantic tags or Err/Orth. The variant and homonym tags are there to help # choosing forms, as are Err/Orth and Sem-tags. analyser-disamb-gt-desc.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-variant-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Additional target to support languages with alternative writing systems: define alt_ws_disamb_analysers analyser-disamb-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ orthography/spellrelax.$(1).compose.% \ orthography/$(DEFAULT_WS)-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-dialect-tags.$$*\" \ .o. @\"filters/remove-area-tags.$$*\" \ .o. @\"filters/remove-orig_lang-tags.$$*\" \ .o. @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/$(DEFAULT_WS)-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach alt_ws,$(ALT_WSS),$(eval $(call alt_ws_disamb_analysers,$(alt_ws)))) # Additional target to support languages with alternative orthographies: define alt_orth_disamb_analysers analyser-disamb-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/remove-Orth_IPA-strings.% \ orthography/spellrelax.$(1).compose.% \ orthography/inituppercase.compose.% \ orthography/downcase-derived_proper-strings.compose.% \ orthography/$(DEFAULT_ORTH)-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-dialect-tags.$$*\" \ .o. @\"filters/remove-area-tags.$$*\" \ .o. @\"filters/remove-orig_lang-tags.$$*\" \ .o. @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"$$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$$*\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/$(DEFAULT_ORTH)-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach alt_orth,$(ALT_ORTHS),$(eval $(call alt_orth_disamb_analysers,$(alt_orth)))) # ... alternatively using a filter from RAW to target: define alt_orth_disamb_analysers_from_raw analyser-disamb-gt-desc.$(1).tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/remove-Orth_IPA-strings.% \ orthography/spellrelax.$(1).compose.% \ orthography/inituppercase.compose.% \ orthography/downcase-derived_proper-strings.compose.% \ orthography/raw-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-dialect-tags.$$*\" \ .o. @\"filters/remove-area-tags.$$*\" \ .o. @\"filters/remove-orig_lang-tags.$$*\" \ .o. @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"$$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$$*\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/raw-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach alt_orth,$(ALT_ORTHS),$(eval $(call alt_orth_disamb_analysers_from_raw,$(alt_orth)))) # The grammar checker analyser is the same as the disambiguator analyser, but # potentially/most likely with language-specific adaptations: analyser-gramcheck-gt-desc.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orig_lang-tags.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # And the same for alternate writing systems etc: analyser-gramcheck-gt-desc.%.tmp.hfst: analyser-disamb-gt-desc.%.tmp.hfst $(AM_V_CP)cp -f $< $@ # Default fallback processing for the grammarchecker analysers: define giella_gramcheck_analysers analyser-gramcheck%.$(1): analyser-gramcheck%.tmp.$(1) \ filters/remove-word-boundary.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"$$<\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ ;\n\ $$(INVERT_HFST)\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_gramcheck_analysers,$(fst))))