## Include this file in top-level srcdir to compile FSTs ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ ##### BEGIN Hfst target list ##### if CAN_HFST if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-disamb-gt-desc.hfstol if WANT_TOKENISERS GT_ANALYSERS+=analyser-pmatchdisamb-gt-desc.hfst endif # WANT_TOKENISERS endif # WANT_MORPHOLOGY endif # CAN_HFST ##### END Hfst target list ##### ##### BEGIN Xerox target list ##### if CAN_XFST if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-disamb-gt-desc.xfst endif # WANT_MORPHOLOGY endif # CAN_XFST ##### END Xerox target list ##### ##### BEGIN Foma target list ##### if CAN_FOMA if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-disamb-gt-desc.foma endif # WANT_MORPHOLOGY endif # CAN_FOMA ##### END Foma target list ##### # Disambiguation analysers. They differ from the # regular analyser by NEITHER removing variant and homonym tags, NOR removing # semantic tags or Err/Orth. The variant and homonym tags are there to help # choosing forms, as are Err/Orth and Sem-tags. analyser-disamb-gt-desc.tmp1.%: analyser-raw-gt-desc.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/block-mwe-compounds.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-variant-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"filters/block-mwe-compounds.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Additional target to support languages with alternative writing systems: define alt_ws_disamb_analysers analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/block-mwe-compounds.% \ orthography/spellrelax.$(1).compose.% \ orthography/$(DEFAULT_WS)-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"filters/block-mwe-compounds.$$*\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/$(DEFAULT_WS)-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach alt_ws,$(ALT_WSS),$(eval $(call alt_ws_disamb_analysers,$(alt_ws)))) # Additional target to support languages with alternative orthographies: define alt_orth_disamb_fst analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/block-mwe-compounds.% \ orthography/spellrelax.$(1).compose.% \ orthography/inituppercase.compose.% \ orthography/downcase-derived_proper-strings.compose.% \ orthography/$(DEFAULT_ORTH)-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"filters/block-mwe-compounds.$$*\" \ .o. @\"$$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$$*\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/$(DEFAULT_ORTH)-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) # ... alternatively using a filter from RAW to target: analyser-disamb-gt-desc.$(1).tmp1.%: analyser-raw-gt-desc.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-usage-tags.% \ filters/remove-variant-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/block-mwe-compounds.% \ orthography/spellrelax.$(1).compose.% \ orthography/inituppercase.compose.% \ orthography/downcase-derived_proper-strings.compose.% \ orthography/raw-to-$(1).compose.% $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-orthography-tags.$$*\" \ .o. @\"filters/remove-usage-tags.$$*\" \ .o. @\"filters/remove-variant-tags.$$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$$*\" \ .o. @\"filters/block-mwe-compounds.$$*\" \ .o. @\"$$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$$*\" \ .o. @\"filters/remove-hyphenation-marks.$$*\" \ .o. @\"filters/remove-infl_deriv-borders.$$*\" \ .o. @\"filters/remove-word-boundary.$$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/raw-to-$(1).compose.$$*\" \ .o. @\"orthography/spellrelax.$(1).compose.$$*\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach alt_orth,$(ALT_ORTHS),$(eval $(call alt_orth_disamb_fst,$(alt_orth)))) # As a separate last step we remove everything that is related to pmatch, when # building the regular disamb analyser: define disambs analyser-disamb-gt-desc%tmp.$(1): analyser-disamb-gt-desc%tmp1.$(1) \ filters/remove-Use_minusGC-tags.$(1) \ filters/remove-Use_GC-strings.$(1) \ filters/remove-Use_minus_PMatch-tags.$(1) \ filters/remove-Use_PMatch-strings.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-Use_minus_PMatch-tags.$(1)\" \ .o. @\"filters/remove-Use_minusGC-tags.$(1)\" \ .o. @\"filters/remove-Use_GC-strings.$(1)\" \ .o. @\"filters/remove-Use_PMatch-strings.$(1)\" \ .o. @\"$$<\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call disambs,$(fst)))) # We build a special variant of the disamb analyser for use with pmatch, where # the only diff from regular disamb is that we have removed strings and tags # not relevant to the pmatch version (and kept the pmatch counterpart w/o tags): define pmatch_disambs analyser-pmatchdisamb-gt-desc%tmp.$(1): analyser-disamb-gt-desc%tmp1.$(1) \ filters/remove-Use_minusGC-tags.$(1) \ filters/remove-Use_GC-strings.$(1) \ filters/remove-Use_minus_PMatch-strings.$(1) \ filters/remove-Use_PMatch-tags.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-Use_PMatch-tags.$(1)\" \ .o. @\"filters/remove-Use_minusGC-tags.$(1)\" \ .o. @\"filters/remove-Use_GC-strings.$(1)\" \ .o. @\"filters/remove-Use_minus_PMatch-strings.$(1)\" \ .o. @\"$$<\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call pmatch_disambs,$(fst))))