## Include this file in top-level srcdir to compile FSTs ## Copyright: Sámediggi/Divvun/UiT ## Licence: GPL v3+ # *.gt.* designates the tagset being used. # At the end of the makefile, there is support for automatic compilation of # other tagsets, given that the proper tagset relabeling files are defined, # and that the target files are defined as part of the 'all' target. # # Filenames are built as follows: # basictype-application-tagset-normativity[-dialect].fsttype # # 'application' is not specified for the regular/default morphological # analysis/generation. # # Examples: # analyser-oahpa-gt-desc.hfst # generator-apertium-apertium-norm_single.hfst # analyser-gt-desc.xfst # # Full details regarding transducer filenames can be found at: # # http://divvun.no/doc/infra/infraremake/TransducerNamesInTheNewInfra.html ####### Automake targets: ######## # The following two variables are used to force Make to choose one of two # alternative paths to reach the same end target for pattern rules, where the # wrong path builds on hfstol files all the way. This is overridden by # specifying the non-hfstol intermediate files as non-intalling targets. GT_RAW= # Variable to hold intermediate *.hfst targets to force make to go via them # before building *.hfstol target: GT_FSTs_NO_INSTALL=$(subst hfstol,hfst,$(filter %.hfstol, \ $(GT_ANALYSERS) $(GT_GENERATORS))) ##### BEGIN Hfst target list ##### if CAN_HFST # Always build the raw generator if hfst is enabled: GT_RAW+=generator-raw-gt-desc.hfst analyser-raw-gt-desc.hfst if WANT_TWOSTEP_INTERSECT COMPOSE_INTERSECT_FLAG=--fast endif # WANT_TWOSTEP_INTERSECT if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-gt-desc.hfstol \ analyser-gt-norm.hfstol \ analyser-disamb-gt-desc.hfstol if WANT_GRAMCHECK GT_ANALYSERS += analyser-gramcheck-gt-desc.hfstol endif # WANT_GRAMCHECK endif # WANT_MORPHOLOGY if WANT_GENERATION GT_GENERATORS+=generator-gt-desc.hfstol \ generator-gt-norm.hfstol endif # WANT_GENERATION if WANT_GLOSSERS GT_RAW+=analyser-raw-gt-glossing.hfst GT_ANALYSERS+=analyser-gt-glossing.hfstol GT_GENERATORS+=generator-gt-glossing.hfstol endif # WANT_GLOSSERS if WANT_MORPHER GT_ANALYSERS+=morpher-gt-desc.hfstol endif # WANT_MORPHER endif # CAN_HFST ##### END Hfst target list ##### ##### BEGIN Xerox target list ##### if CAN_XFST # Always build the raw analsyer if Xerox is enabled: GT_RAW+=analyser-raw-gt-desc.xfst if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-gt-desc.xfst \ analyser-gt-norm.xfst \ analyser-disamb-gt-desc.xfst endif # WANT_MORPHOLOGY if WANT_GENERATION GT_GENERATORS+=generator-gt-desc.xfst \ generator-gt-norm.xfst endif # WANT_GENERATION if WANT_GLOSSERS GT_RAW+=analyser-raw-gt-glossing.xfst GT_ANALYSERS+=analyser-gt-glossing.xfst GT_GENERATORS+=generator-gt-glossing.xfst endif # WANT_GLOSSERS if WANT_MORPHER GT_ANALYSERS+=morpher-gt-desc.xfst endif # WANT_MORPHER endif # CAN_XFST ##### END Xerox target list ##### ##### BEGIN Foma target list ##### if CAN_FOMA # Always build the raw analsyer if Xerox is enabled: GT_RAW+=analyser-raw-gt-desc.foma if WANT_MORPHOLOGY GT_ANALYSERS+=analyser-gt-desc.foma \ analyser-gt-norm.foma \ analyser-disamb-gt-desc.foma endif # WANT_MORPHOLOGY if WANT_GENERATION GT_GENERATORS+=generator-gt-desc.foma \ generator-gt-norm.foma endif # WANT_GENERATION if WANT_GLOSSERS GT_RAW+=analyser-raw-gt-glossing.foma GT_ANALYSERS+=analyser-gt-glossing.foma GT_GENERATORS+=generator-gt-glossing.foma endif # WANT_GLOSSERS if WANT_MORPHER GT_ANALYSERS+=morpher-gt-desc.foma endif # WANT_MORPHER endif # CAN_FOMA ##### END Foma target list ##### hfstdatadir=$(datadir)/giella/$(GTLANG) hfstdata_DATA=$(GT_ANALYSERS) \ $(GT_GENERATORS) \ $(ALT_WS_ANALYSERS) \ $(ALT_WS_GENERATORS) \ $(ALT_ORTH_ANALYSERS) \ $(ALT_ORTH_GENERATORS) noinst_DATA=$(GT_RAW) \ $(GT_FSTs_NO_INSTALL) ################################################## ######## Build rules for Xerox and HFST: ######### # # Each target transducer is defined together # for both transducer types, # first for HFST then for Xerox. This # makes it easy to check that the build # is parallell for the two transducer # types. ################################################## # The "raw" transducer contains all tags and symbols available on the analysis # side. Some of them are optional for generation, some are only needed for # special tools. On the lower side, the raw transducer contains all # morphological boundaries and hyphenation symbols. As such, this transducer # can NOT be used for morphological analysis, and hardly for generation. # First build a tmp1 raw file as the intersection between lexicon and rules: # HFST if WANT_REVERSED_INTERSECT # Do this is reversed intersect is enabled: generator-raw-gt-desc.tmp1.hfst: morphology/lexicon.rev.hfst \ phonology/$(GTLANG)-phon.rev.hfst $(AM_V_INTRSCT)\ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $< \ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \ $(MORE_VERBOSITY) $(HFST_FLAGS) \ -2 phonology/$(GTLANG)-phon.rev.hfst \ | $(HFST_REVERSE) \ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \ -o $@ else # Otherwise do this: generator-raw-gt-desc.tmp1.hfst: morphology/lexicon.hfst \ phonology/$(GTLANG)-phon.compose.hfst $(AM_V_INTRSCT)\ $(HFST_DETERMINIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) $<\ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \ | $(HFST_COMPOSE_INTERSECT) $(COMPOSE_INTERSECT_FLAG) \ $(MORE_VERBOSITY) $(HFST_FLAGS) \ -2 phonology/$(GTLANG)-phon.compose.hfst \ | $(HFST_MINIMIZE) $(MORE_VERBOSITY) $(HFST_FLAGS) \ -o $@ endif # WANT_REVERSED_INTERSECT # XEROX analyser-raw-gt-desc.tmp1.xfst: morphology/lexicon.xfst \ phonology/$(GTLANG)-phon.compose.xfst $(AM_V_LEXC)$(PRINTF) \ "read-source morphology/lexicon.xfst\nread-rules phonology/$(GTLANG)-phon.compose.xfst\ncompose-result\nsave-result $@\nquit\n" \ | $(LEXC) $(VERBOSITY) # FOMA analyser-raw-gt-desc.tmp1.foma: morphology/lexicon.foma \ phonology/$(GTLANG)-phon.compose.foma $(AM_V_FOMA)$(PRINTF) \ "read regex \ @\"$<\" \ .o. @\"phonology/$(GTLANG)-phon.compose.foma\" \ ; \n\ save stack $@\n\ quit\n" \ | $(FOMA) $(VERBOSITY) # ... then apply tag reordering to tmp1 to ensure the same tag order in all # subsequent processing; this creates the regular tmp file, which can further # get local processing to produce the final raw file: ### HFST - the raw files are generators, thus we need a special target (but ### we try to keep the code as identical as possible, thus pattern rule): # Variant a) - with a language-specific tag reordering script applied: generator-raw-gt-desc.tmp.%: generator-raw-gt-desc.tmp1.% \ filters/reorder-tags.$(GTLANG).% \ filters/reorder-semantic-tags.% \ filters/reorder-subpos-tags.% \ filters/remove-mwe-tags.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/reorder-tags.$(GTLANG).$*\"\ .o. @\"filters/reorder-subpos-tags.$*\" \ .o. @\"filters/reorder-semantic-tags.$*\" \ .o. @\"filters/remove-mwe-tags.$*\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Variant b) - without a language-specific tag reordering script: generator-raw-gt-desc.tmp.%: generator-raw-gt-desc.tmp1.% \ filters/reorder-semantic-tags.% \ filters/reorder-subpos-tags.% \ filters/remove-mwe-tags.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/reorder-subpos-tags.$*\" \ .o. @\"filters/reorder-semantic-tags.$*\" \ .o. @\"filters/remove-mwe-tags.$*\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) #### Xerox & FOMA - no special treatment: # Variant a) - with a language-specific tag reordering script applied: analyser-raw-gt-desc.tmp.%: analyser-raw-gt-desc.tmp1.% \ filters/reorder-tags.$(GTLANG).% \ filters/reorder-semantic-tags.% \ filters/reorder-subpos-tags.% \ filters/remove-mwe-tags.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/reorder-tags.$(GTLANG).$*\"\ .o. @\"filters/reorder-subpos-tags.$*\" \ .o. @\"filters/reorder-semantic-tags.$*\" \ .o. @\"filters/remove-mwe-tags.$*\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Variant b) - without a language-specific tag reordering script: analyser-raw-gt-desc.tmp.%: analyser-raw-gt-desc.tmp1.% \ filters/reorder-semantic-tags.% \ filters/reorder-subpos-tags.% \ filters/remove-mwe-tags.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/reorder-subpos-tags.$*\" \ .o. @\"filters/reorder-semantic-tags.$*\" \ .o. @\"filters/remove-mwe-tags.$*\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Target to build glossing raw analysers, ie one where all tags are in the # position specified in lexc: # Hfst: generator-raw-gt-glossing.tmp.%: generator-raw-gt-desc.tmp1.% $(AM_V_CP)cp $< $@ # Xerox/Foma: analyser-raw-gt-glossing.tmp.%: analyser-raw-gt-desc.tmp1.% $(AM_V_CP)cp $< $@ # Target required for the pattern rules below to work for HFST builds: analyser-raw-gt-%.hfst: generator-raw-gt-%.hfst $(AM_V_CP)cp $< $@ # To get a transducer usable for a certain application, the raw transducer needs # to be filtered and manipulated to get the target tag set and surface symbols. # That is done in all subsequent targets, which builds on the raw transducer # built above. # This is the default, descriptive analyser: # Visible tags (ie do NOT remove): # - variant tags # - the Err/Orth tag # Invisible tags (ie to be removed): # - semantic tags # - homonymy tags analyser-gt-desc.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-number-string-tags.% \ filters/remove-usage-tags.% \ filters/remove-semantic-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-semantic-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # This is the default, descriptive generating transducer. generator-gt-desc.tmp.%: analyser-raw-gt-desc.% \ filters/make-optional-transitivity-tags.% \ filters/make-optional-homonymy-tags.% \ filters/make-optional-hyph-tags.% \ filters/make-optional-variant-tags.% \ filters/make-optional-semantic-tags.% \ filters/make-optional-error-tags.% \ filters/make-optional-adv_comp-tags.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-number-string-tags.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ filters/remove-usage-tags.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/make-optional-transitivity-tags.$*\" \ .o. @\"filters/make-optional-homonymy-tags.$*\" \ .o. @\"filters/make-optional-hyph-tags.$*\" \ .o. @\"filters/make-optional-variant-tags.$*\" \ .o. @\"filters/make-optional-semantic-tags.$*\" \ .o. @\"filters/make-optional-error-tags.$*\" \ .o. @\"filters/make-optional-adv_comp-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) analyser-gt-glossing.tmp.%: analyser-raw-gt-glossing.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-number-string-tags.% \ filters/remove-usage-tags.% \ filters/remove-semantic-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-semantic-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # This is the default, descriptive generating transducer. generator-gt-glossing.tmp.%: analyser-raw-gt-glossing.% \ filters/make-optional-transitivity-tags.% \ filters/make-optional-homonymy-tags.% \ filters/make-optional-hyph-tags.% \ filters/make-optional-variant-tags.% \ filters/make-optional-semantic-tags.% \ filters/make-optional-error-tags.% \ filters/make-optional-adv_comp-tags.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-number-string-tags.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ filters/remove-usage-tags.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/make-optional-transitivity-tags.$*\" \ .o. @\"filters/make-optional-homonymy-tags.$*\" \ .o. @\"filters/make-optional-hyph-tags.$*\" \ .o. @\"filters/make-optional-variant-tags.$*\" \ .o. @\"filters/make-optional-semantic-tags.$*\" \ .o. @\"filters/make-optional-error-tags.$*\" \ .o. @\"filters/make-optional-adv_comp-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # This is the normative analyser: # Remove sub-standard/error forms to get a normative transducer analyser-gt-norm.tmp.%: analyser-raw-gt-desc.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-variant-tags.% \ filters/remove-number-string-tags.% \ filters/remove-usage-tags.% \ filters/remove-error-strings.% \ filters/remove-semantic-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ orthography/inituppercase.compose.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-variant-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-semantic-tags.$*\" \ .o. @\"filters/remove-error-strings.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ; \n\ define fst \n\ set flag-is-epsilon ON\n\ read regex fst \ .o. @\"orthography/inituppercase.compose.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # This is the normative generating transducer. generator-gt-norm.tmp.%: analyser-raw-gt-desc.% \ filters/make-optional-transitivity-tags.% \ filters/make-optional-homonymy-tags.% \ filters/make-optional-hyph-tags.% \ filters/make-optional-variant-tags.% \ filters/make-optional-semantic-tags.% \ filters/make-optional-adv_comp-tags.% \ filters/remove-area-tags.% \ filters/remove-dialect-tags.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% \ filters/remove-number-string-tags.% \ filters/remove-usage-tags.% \ filters/remove-orthography-tags.% \ filters/remove-Orth_IPA-strings.% \ filters/remove-orig_lang-tags.% \ filters/remove-error-strings.% \ orthography/downcase-derived_proper-strings.compose.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ @\"filters/make-optional-transitivity-tags.$*\" \ .o. @\"filters/make-optional-homonymy-tags.$*\" \ .o. @\"filters/make-optional-hyph-tags.$*\" \ .o. @\"filters/make-optional-variant-tags.$*\" \ .o. @\"filters/make-optional-semantic-tags.$*\" \ .o. @\"filters/make-optional-adv_comp-tags.$*\" \ .o. @\"filters/remove-area-tags.$*\" \ .o. @\"filters/remove-dialect-tags.$*\" \ .o. @\"filters/remove-number-string-tags.$*\" \ .o. @\"filters/remove-usage-tags.$*\" \ .o. @\"filters/remove-error-strings.$*\" \ .o. @\"filters/remove-orig_lang-tags.$*\" \ .o. @\"filters/remove-orthography-tags.$*\" \ .o. @\"filters/remove-Orth_IPA-strings.$*\" \ .o. @\"$<\" \ .o. @\"orthography/downcase-derived_proper-strings.compose.$*\" \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) # Create generic tagset variants of *analyzers* # HFST: # Standard descriptive analyser-%-desc.hfst: analyser-gt-desc.hfst tagsets/%.relabel $(AM_V_FST2FST)$(HFST_FST2FST) $(HFST_FLAGS) $(HFST_FORMAT) -t $< \ | $(HFST_SUBSTITUTE) $(HFST_FLAGS) $(HFST_FORMAT) \ -F $(srcdir)/tagsets/$*.relabel \ -o $@ # Relabeling using compiled regex: analyser-%-desc.hfst: analyser-gt-desc.hfst tagsets/%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/$*.hfst\".i \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) $(HFST_FORMAT) \ -S --xerox-composition=ON \ -o $@ # Standard normative analyser-%-norm.hfst: analyser-gt-norm.hfst tagsets/%.relabel $(AM_V_FST2FST)$(HFST_FST2FST) $(HFST_FLAGS) $(HFST_FORMAT) -t $< \ | $(HFST_SUBSTITUTE) $(HFST_FLAGS) $(HFST_FORMAT) \ -F $(srcdir)/tagsets/$*.relabel \ -o $@ # Relabeling using compiled regex: analyser-%-norm.hfst: analyser-gt-norm.hfst tagsets/%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"$<\" \ .o. @\"tagsets/$*.hfst\".i \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) $(HFST_FORMAT) \ -S --xerox-composition=ON \ -o $@ # Create tagset variants of *generators* # Standard descriptive generator-%-desc.hfst: generator-gt-desc.hfst tagsets/%.relabel $(AM_V_FST2FST)$(HFST_FST2FST) $(HFST_FLAGS) $(HFST_FORMAT) -t $< \ | $(HFST_SUBSTITUTE) $(HFST_FLAGS) $(HFST_FORMAT) \ -F $(srcdir)/tagsets/$*.relabel \ -o $@ # Relabeling using compiled regex: generator-%-desc.hfst: generator-gt-desc.hfst tagsets/%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"tagsets/$*.hfst\" \ .o. @\"$<\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) $(HFST_FORMAT) \ -S --xerox-composition=ON \ -o $@ # Standard normative generator-%-norm.hfst: generator-gt-norm.hfst tagsets/%.relabel $(AM_V_FST2FST)$(HFST_FST2FST) $(HFST_FLAGS) $(HFST_FORMAT) -t $< \ | $(HFST_SUBSTITUTE) $(HFST_FLAGS) $(HFST_FORMAT) \ -F $(srcdir)/tagsets/$*.relabel \ -o $@ # Relabeling using compiled regex: generator-%-norm.hfst: generator-gt-norm.hfst tagsets/%.hfst $(AM_V_RGX2FST)$(PRINTF) "\ @\"tagsets/$*.hfst\" \ .o. @\"$<\" \ ;" \ | $(HFST_REGEXP2FST) $(HFST_FLAGS) $(HFST_FORMAT) \ -S --xerox-composition=ON \ -o $@ # XEROX/XFST: # Standard descriptive analyser-%-desc.xfst: analyser-gt-desc.xfst tagsets/%.xfst $(AM_V_XFST)$(PRINTF) "read regex \ @\"tagsets/$*.xfst\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST) $(VERBOSITY) # Standard normative analyser-%-norm.xfst: analyser-gt-norm.xfst tagsets/%.xfst $(AM_V_XFST)$(PRINTF) "read regex \ @\"tagsets/$*.xfst\" \ .o. @\"$<\" \ ;\n\ save stack $@\n\ quit\n" | $(XFST) $(VERBOSITY) # Create tagset variants of *generators* # Standard descriptive generator-%-desc.xfst: generator-gt-desc.xfst tagsets/%.xfst $(AM_V_XFST)$(PRINTF) "read regex \ @\"$<\" \ .o. @\"tagsets/$*.xfst\".i \ ;\n\ save stack $@\n\ quit\n" | $(XFST) $(VERBOSITY) # Standard normative generator-%-norm.xfst: generator-gt-norm.xfst tagsets/%.xfst $(AM_V_XFST)$(PRINTF) "read regex \ @\"$<\" \ .o. @\"tagsets/$*.xfst\".i \ ;\n\ save stack $@\n\ quit\n" | $(XFST) $(VERBOSITY) # lemmatization is a special case of tagset variant lemmatize.default.hfst: analyser-gt-desc.hfst tagsets/lemmatize.relabel $(AM_V_HSUBST)$(HFST_SUBSTITUTE) $(HFST_FLAGS) $(HFST_FORMAT) \ -F $(srcdir)/tagsets/lemmatize.relabel -i $< -o $@ # morpher is a morph segmenting variant: taloautoissani -> talo#auto>i>ssa>ni morpher-gt-desc.tmp.%: analyser-raw-gt-desc.% \ orthography/inituppercase.compose.% \ orthography/spellrelax.compose.% \ filters/remove-hyphenation-marks.% \ filters/remove-infl_deriv-borders.% \ filters/remove-word-boundary.% $(AM_V_XFST_TOOL)$(PRINTF) "read regex \ [ [@\"$<\" \ .o. @\"orthography/inituppercase.compose.$*\" \ .o. @\"orthography/spellrelax.compose.$*\" ].l \ .o. @\"filters/remove-hyphenation-marks.$*\" \ .o. @\"filters/remove-infl_deriv-borders.$*\" \ .o. @\"filters/remove-word-boundary.$*\" ] \ ;\n\ save stack $@\n\ quit\n" | $(XFST_TOOL) ####### Other targets: ########### # Copy the raw transducer. This enables language-specific processing of the raw # transducer, if required. define giella_raw_fsts %raw-gt-desc.$(1): %raw-gt-desc.tmp.$(1) $$(AM_V_CP)cp -f $$< $$@ endef $(foreach fst,hfst xfst foma,$(eval $(call giella_raw_fsts,$(fst)))) define giella_raw_error_fsts %raw-gt-error.$(1): %raw-gt-error.tmp.$(1) $$(AM_V_CP)cp -f $$< $$@ endef $(foreach fst,hfst xfst foma,$(eval $(call giella_raw_error_fsts,$(fst)))) # Default fallback processing for the analysers: define giella_default_analysers analyser%.$(1): analyser%.tmp.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "\ load stack $$<\n\ $$(INVERT_HFST)\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_default_analysers,$(fst)))) # Default fallback processing for the generators: define giella_default_generators generator%.$(1): generator%.tmp.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "\ load stack $$<\n\ $$(INVERT_XFST)$$(INVERT_FOMA)\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_default_generators,$(fst)))) # Default fallback processing for the morpher: define giella_default_morphers morpher%.$(1): morpher%.tmp.$(1) $$(AM_V_XFST_TOOL)$$(PRINTF) "\ load stack $$<\n\ $$(INVERT_HFST)\ save stack $$@\n\ quit\n" | $$(XFST_TOOL) endef $(foreach fst,hfst xfst foma,$(eval $(call giella_default_morphers,$(fst)))) # cleaning clean-local: -rm -f *.hfst *.xfst *.foma *.hfstol include $(top_srcdir)/am-shared/src_oahpa-include.am include $(top_srcdir)/am-shared/src_dictionary-include.am include $(top_srcdir)/am-shared/src_disamb-include.am include $(top_srcdir)/am-shared/src_alt_ws-include.am include $(top_srcdir)/am-shared/src_alt_orth-include.am include $(top_srcdir)/am-shared/lookup-include.am include $(top_srcdir)/am-shared/regex-include.am include $(top_srcdir)/am-shared/xfscript-include.am include $(top_srcdir)/am-shared/hfst-format-include.am include $(top_srcdir)/am-shared/silent_build-include.am # vim: set ft=automake: