## Include this file in top-level srcdir to compile FSTs ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ ####### Automake targets: ######## if WANT_DICTIONARIES if CAN_HFST GT_ANALYSERS+=analyser-dict-gt-desc.hfstol \ analyser-dict-gt-desc-mobile.hfstol GT_GENERATORS+=generator-dict-gt-norm.hfstol endif # CAN_HFST if CAN_XFST GT_ANALYSERS+=analyser-dict-gt-desc.xfst \ analyser-dict-gt-desc-mobile.xfst GT_GENERATORS+=generator-dict-gt-norm.xfst endif # CAN_XFST endif # WANT_DICTIONARIES ##### END Xerox target list ##### # Building dictionary fsts. The dictionary analyser differs from the # regular analyser by NOT removing variant and homonym tags, so that we can # generate proper (mini)paradigms for the given lemmas and variants (the # analysis string is used as input to the generator). # Tags to be removed: # - Sem/-taggar # - OLang/* # - MWE analyser-dict-gt-desc.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-usage_except_NGminip-tags.% \ filters/remove-semantic-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$*\" \ .o. @\"filters/remove-semantic-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # The mobile analyser is just like the regular dictionary analyser, # except for one additional set of spellrelaxes: analyser-dict-gt-desc-mobile.tmp.%: analyser-dict-gt-desc.tmp.% \ orthography/spellrelax-mobile-keyboard.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"$<\" \ .o. @\"orthography/spellrelax-mobile-keyboard.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # The generator does NOT NGminip tagged strings, since we do # not want them in the miniparadigms generated by this transducer. At the same # time, this transducer DOES CONTAIN the homonym and variant tags, to govern # proper paradigm generation. # Obligatory tags: # - homonymy tags # - variant tags # - Use/NGminip ??? - check with Lene! generator-dict-gt-norm.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/make-optional-hyph-tags.% \ filters/make-optional-transitivity-tags.% \ filters/make-optional-semantic-tags.% \ filters/make-optional-v1-tags.% \ filters/make-optional-adv_comp-tags.% \ filters/remove-dialect-tags.% \ filters/remove-orig_lang-tags.% \ filters/remove-usage_except_NGminip-tags.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-error-strings.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/make-optional-transitivity-tags.$*\" \ .o. @\"filters/make-optional-hyph-tags.$*\" \ .o. @\"filters/make-optional-semantic-tags.$*\" \ .o. @\"filters/make-optional-v1-tags.$*\" \ .o. @\"filters/make-optional-adv_comp-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$*\" \ .o. @\"filters/remove-error-strings.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) ############################################################### # Additional targets to support multiple writing systems: define giella_dict_alt_ws_analysers analyser-dict-gt-desc.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-semantic-tags.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ orthography/spellrelax.%.compose.$(1) \ orthography/$$(DEFAULT_WS)-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-semantic-tags.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/$$(DEFAULT_WS)-to-$$*.compose.$(1)\"\ .o. @\"orthography/spellrelax.$$*.compose.$(1)\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_dict_alt_ws_analysers,$(fst)))) define giella_dict_alt_ws_generators generator-dict-gt-norm.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/make-optional-hyph-tags.$(1) \ filters/make-optional-transitivity-tags.$(1) \ filters/make-optional-semantic-tags.$(1) \ filters/make-optional-v1-tags.$(1) \ filters/make-optional-adv_comp-tags.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ filters/remove-error-strings.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ orthography/$$(DEFAULT_WS)-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"filters/make-optional-transitivity-tags.$(1)\" \ .o. @\"filters/make-optional-hyph-tags.$(1)\" \ .o. @\"filters/make-optional-semantic-tags.$(1)\" \ .o. @\"filters/make-optional-v1-tags.$(1)\" \ .o. @\"filters/make-optional-adv_comp-tags.$(1)\" \ .o. @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-error-strings.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/$$(DEFAULT_WS)-to-$$*.compose.$(1)\"\ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_dict_alt_ws_generators,$(fst)))) ############################################################### # Additional targets to support alternative orthographies: define giella_dict_alt_orth_analysers analyser-dict-gt-desc.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-semantic-tags.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ orthography/spellrelax.%.compose.$(1) \ orthography/raw-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"orthography/raw-to-$$*.compose.$(1)\".i \ .o. @\"filters/make-optional-transitivity-tags.$(1)\" \ .o. @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-semantic-tags.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/raw-to-$$*.compose.$(1)\"\ .o. @\"orthography/spellrelax.$$*.compose.$(1)\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) analyser-dict-gt-desc.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-semantic-tags.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ orthography/spellrelax.%.compose.$(1) \ orthography/$(DEFAULT_ORTH)-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"orthography/$$(DEFAULT_ORTH)-to-$$*.compose.$(1)\".i \ .o. @\"filters/make-optional-transitivity-tags.$(1)\" \ .o. @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-semantic-tags.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/$$(DEFAULT_ORTH)-to-$$*.compose.$(1)\"\ .o. @\"orthography/spellrelax.$$*.compose.$(1)\" \ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_dict_alt_orth_analysers,$(fst)))) define giella_dict_alt_orth_generators generator-dict-gt-norm.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/make-optional-hyph-tags.$(1) \ filters/make-optional-transitivity-tags.$(1) \ filters/make-optional-semantic-tags.$(1) \ filters/make-optional-v1-tags.$(1) \ filters/make-optional-adv_comp-tags.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ filters/remove-error-strings.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ orthography/$(DEFAULT_ORTH)-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"orthography/$$(DEFAULT_ORTH)-to-$$*.compose.$(1)\".i \ .o. @\"filters/make-optional-transitivity-tags.$(1)\" \ .o. @\"filters/make-optional-hyph-tags.$(1)\" \ .o. @\"filters/make-optional-semantic-tags.$(1)\" \ .o. @\"filters/make-optional-v1-tags.$(1)\" \ .o. @\"filters/make-optional-adv_comp-tags.$(1)\" \ .o. @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-error-strings.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/$$(DEFAULT_ORTH)-to-$$*.compose.$(1)\"\ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) generator-dict-gt-norm.%.tmp.$(1): analyser-raw-gt-desc.$(1) \ filters/make-optional-hyph-tags.$(1) \ filters/make-optional-transitivity-tags.$(1) \ filters/make-optional-semantic-tags.$(1) \ filters/make-optional-v1-tags.$(1) \ filters/make-optional-adv_comp-tags.$(1) \ filters/remove-area-tags.$(1) \ filters/remove-dialect-tags.$(1) \ filters/remove-orig_lang-tags.$(1) \ filters/remove-usage_except_NGminip-tags.$(1) \ filters/remove-orthography-tags.$(1) \ filters/remove-Orth_IPA-strings.$(1) \ filters/remove-error-strings.$(1) \ filters/remove-hyphenation-marks.$(1) \ filters/remove-infl_deriv-borders.$(1) \ filters/remove-word-boundary.$(1) \ orthography/raw-to-%.compose.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "read regex \ @\"orthography/raw-to-$$*.compose.$(1)\".i \ .o. @\"filters/make-optional-transitivity-tags.$(1)\" \ .o. @\"filters/make-optional-hyph-tags.$(1)\" \ .o. @\"filters/make-optional-semantic-tags.$(1)\" \ .o. @\"filters/make-optional-v1-tags.$(1)\" \ .o. @\"filters/make-optional-adv_comp-tags.$(1)\" \ .o. @\"filters/remove-area-tags.$(1)\" \ .o. @\"filters/remove-dialect-tags.$(1)\" \ .o. @\"filters/remove-orig_lang-tags.$(1)\" \ .o. @\"filters/remove-usage_except_NGminip-tags.$(1)\" \ .o. @\"filters/remove-error-strings.$(1)\" \ .o. @\"filters/remove-orthography-tags.$(1)\" \ .o. @\"filters/remove-Orth_IPA-strings.$(1)\" \ .o. @\"$$<\" \ .o. @\"filters/remove-hyphenation-marks.$(1)\" \ .o. @\"filters/remove-infl_deriv-borders.$(1)\" \ .o. @\"filters/remove-word-boundary.$(1)\" \ .o. @\"orthography/raw-to-$$*.compose.$(1)\"\ ;\n\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_dict_alt_orth_generators,$(fst)))) ############################################################### # Fallback target for mobile variants of the alternating orth, ws analysers: define giella_dict_alt_ws_mobile_analysers analyser-dict-gt-desc-mobile.%.tmp.$(1): analyser-dict-gt-desc.%.$(1) $$(AM_V_CP)cp -f $$< $$@ endef $(foreach fst,hfst xfst foma,$(eval $(call giella_dict_alt_ws_mobile_analysers,$(fst))))