# ******************************************************************** # # This is a common makefile that builds the Sami morphological parsers # # ******************************************************************** # # Version: $Id$ # =============================== # # Variable definitions # # =============================== # # The following variables should come from the environment, and should never be # commented in. Please run script/gtsetup.sh to add them to .bashrc or .profile. # They are here for human reference only: # # GTHOME = # GTPRIV = # GTBIG = GTLANG=$(TARGET) ifeq ($(strip $(GTLANG)),) $(error Error: GTLANG is not defined!) endif GTSHORTLANG=$(GTLANG) ifeq ($(strip $(GTLANG)),sme) GTSHORTLANG=se endif QUIET = -q TEST = false SP = false M4 = m4 M4FLAGS = # Tools used when compiling the transducers UFST = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8 $(QUIET) CFST = xfst $(QUIET) XFST = xfst $(QUIET) TWOLC = twolc $(QUIET) LEXC = lexc $(QUIET) LOOKUP = lookup $(QUIET) GUNZIP = /usr/bin/gunzip SCP = scp -p SSH = ssh VISLCG3 = vislcg3 MKDIR = mkdir -p # Directories: SCRIPTDIR = $(GTHOME)/gt/script ifeq ($(strip $(DIALECT)),) DIALECT=GG endif ifneq "$(SP)" "false" SPFST = -e "read regex [ 0 @< [%+G3 | %+G7] ] ;" -e "compose net" else SPFST = -e "" endif # Version-related info: DATE = $(shell date +%Y%m%d) VERSION = $(shell cat $(GTLANG)/polderland/version.txt | tr -d " ") #ifeq (victorio.uit.no, $(shell hostname)) #CFST = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8 #XFST = /opt/sami/xerox/bin/xfst -utf8 #LEXC = /opt/sami/xerox/bin/lexc -utf8 #TWOLC = /opt/sami/xerox/bin/twolc -utf8 #endif ifeq (giellatekno.uit.no, $(shell hostname)) CFST = xfst.2.15.5 $(QUIET) endif ############################################################################# # LexC source file definitions for all languages - additions below if needed: ############################################################################# # The basic source file definition: SRCS = $(GTLANG)/src/$(GTLANG)-lex.txt \ $(GTLANG)/src/verb-$(GTLANG)-lex.txt \ $(GTLANG)/src/pp-$(GTLANG)-lex.txt \ $(GTLANG)/src/pronoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/interjection-$(GTLANG)-lex.txt \ $(GTLANG)/src/conjunction-$(GTLANG)-lex.txt \ $(GTLANG)/src/subjunction-$(GTLANG)-lex.txt \ $(GTLANG)/src/particle-$(GTLANG)-lex.txt \ $(GTLANG)/src/noun-$(GTLANG)-lex.txt \ $(GTLANG)/src/numeral-$(GTLANG)-lex.txt \ $(GTLANG)/src/adj-$(GTLANG)-lex.txt \ $(GTLANG)/src/adv-$(GTLANG)-lex.txt \ $(GTLANG)/src/punct-$(GTLANG)-lex.txt ################################################## # Language-specific additions to the source files: ################################################## ############## # GTLANG=sme # ############## ifeq ($(GTLANG), sme) SRCS += $(GTLANG)/src/acro-$(GTLANG)-lex.txt \ $(GTLANG)/src/abbr-$(GTLANG)-lex.txt \ $(GTLANG)/src/clitics-$(GTLANG)-lex.txt \ $(GTLANG)/src/verb-$(GTLANG)-morph.txt \ $(GTLANG)/src/adj-$(GTLANG)-morph.txt \ $(GTLANG)/src/noun-$(GTLANG)-morph.txt \ $(GTLANG)/src/poss-$(GTLANG)-morph.txt \ $(GTLANG)/src/compound-$(GTLANG)-lex.txt \ $(GTLANG)/src/pronoun-$(GTLANG)-morph.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt endif ############## # GTLANG=smj # ############## ifeq ($(GTLANG), smj) SRCS += $(GTLANG)/src/abbr-$(GTLANG)-lex.txt \ $(GTLANG)/src/acro-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt endif ############## # GTLANG=sma # ############## ifeq ($(GTLANG), sma) SRCS += $(GTLANG)/src/abbr-$(GTLANG)-lex.txt \ $(GTLANG)/src/acro-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-morph.txt endif ############## # GTLANG=smn # ############## ifeq ($(GTLANG), smn) # (propernoun -tmp still not in place) SRCS += $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-morph.txt endif ############## # GTLANG=sms # ############## ifeq ($(GTLANG), sms) SRCS += $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-morph.txt \ $(GTLANG)/src/noun-$(GTLANG)-morph.txt \ $(GTLANG)/src/verb-$(GTLANG)-morph.txt endif ############## # GTLANG=sjd # ############## ifeq ($(GTLANG), sjd) SRCS += $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt endif ############## # GTLANG=sje # ############## ifeq ($(GTLANG), sje) SRCS += $(GTLANG)/src/abbr-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt endif all: $(GTLANG) sme: \ common \ ifst-norm \ ifst-restr \ oahpa-ifst-norm \ dict-ifst-norm \ fst-norm \ n-sme.fst \ site.fst \ corr \ allcaps \ typos.fst \ pos.fst \ errdown \ s-tag.fst \ properr.fst \ ped-fst \ ped-tol-fst \ smi-dep \ val.fst \ foreign.fst \ num.fst \ inum.fst \ clock.fst \ iclock.fst \ date.fst \ idate.fst # abbr \ # compiles in 15 min: make abbr GTLANG=sme # sme-dis.rle \ # s-sme.fst \ # d-sme.fst \ # hyph-sme.fst # n-sme Norw tags, s-sme Sámi tags, d-sme dis-style tags # we don't need the hyph as part of the standard target smj: \ common \ fst-norm \ ifst-norm \ corr \ allcaps \ typos.fst \ pos.fst \ dis-bin \ smi-dep \ clock.fst \ iclock.fst # abbr \ # compiles in 15 min: make abbr GTLANG=smj sma: \ common \ ifst-norm \ ifst-restr \ corr \ fst-norm \ typos.fst \ ped-fst \ dis-bin \ smi-dep \ phon.fst \ num.fst \ inum.fst \ clock.fst \ iclock.fst \ date.fst \ idate.fst \ hyph-sma.fst # abbr \ # compiles in 15 min: make abbr GTLANG=sma smn: \ common \ ifst-norm \ fst-norm \ date.fst \ idate.fst \ clock.fst \ iclock.fst sms: \ common sjd: \ common \ s-tag.fst \ hyphrules \ num.fst \ inum.fst \ clock.fst \ iclock.fst \ date.fst \ idate.fst sje: \ common \ ifst-norm \ fst-norm common: \ fst \ inverse.fst \ missing # Included makefiles: include Makefile.hfst include Makefile.plx # Target for number and clock automata (note inconsistent i-naming) # text > number number > text # clock clock = usage-tags iclock = use-NG-filter # number inum = usage-tags num = use-NG-filter # Intermediate automaton without filters int-clock.fst: $(GTLANG)/int/int-clock-$(GTLANG).fst $(GTLANG)/int/int-clock-$(GTLANG).fst: $(GTLANG)/src/clock-$(GTLANG).lexc @echo @echo "*** $(@F) ***" @echo $(XFST) -e "read lexc $<" \ -e "save stack $@" \ -stop # 04:00 to njieljie. string removal: use-NG-filter iclock.fst: $(GTLANG)/bin/iclock-$(GTLANG).fst $(GTLANG)/bin/iclock-$(GTLANG).fst: \ $(GTLANG)/int/int-clock-$(GTLANG).fst \ common/src/usage-tags-remove.regex \ common/src/use-NG-filter.regex \ common/src/use-NA-filter.regex @echo @echo "*** Building $@ ***" @echo $(XFST) -e "load < $<" \ -e "read regex @re\"common/src/usage-tags-remove.regex\";" \ -e "read regex @re\"common/src/use-NG-filter.regex\";" \ -e "read regex @re\"common/src/use-NA-filter.regex\";" \ -e "compose net" \ -e "invert net" \ -e "save stack $@" \ -stop # njieljie AND njielje to 04:00. tag removal: usage-tags-remove clock.fst: $(GTLANG)/bin/clock-$(GTLANG).fst $(GTLANG)/bin/clock-$(GTLANG).fst: \ $(GTLANG)/int/int-clock-$(GTLANG).fst \ common/src/usage-tags-remove.regex \ common/src/use-NA-filter.regex @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read regex @\"$<\" ;" \ -e "read regex @re\"common/src/usage-tags-remove.regex\";" \ -e "compose net" \ -e "save stack $@" \ -stop # original version, with NA-line # $(XFST) -e "read regex @\"$<\" ;" \ # -e "read regex @re\"common/src/use-NA-filter.regex\";" \ # -e "read regex @re\"common/src/usage-tags-remove.regex\";" \ # -e "compose net" \ # -e "save stack $@" \ # -stop # Target for number > text conversion: 4:njieljie. use-NG-filter.regex (remove string) num.fst: $(GTLANG)/bin/$(GTLANG)-num.fst $(GTLANG)/bin/$(GTLANG)-num.fst: $(GTLANG)/src/$(GTLANG)-num.txt @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read regex @re\"common/src/NumNum-filter.regex\".i;" \ -e "read lexc $<" \ -e "read regex @re\"common/src/NumNum-filter.regex\";" \ -e "read regex @re\"common/src/use-NG-filter.regex\";" \ -e "compose net" \ -e "save stack $@" \ -stop # Target for text > number conversion: njeljie:4, nieljie:4, usage-tags-remove.fst inum.fst: $(GTLANG)/bin/$(GTLANG)-inum.fst $(GTLANG)/bin/$(GTLANG)-inum.fst: $(GTLANG)/src/$(GTLANG)-num.txt \ common/src/String-filter.regex \ common/src/usage-tags-remove.regex \ common/src/NumNum-filter.regex \ common/src/usage-tags-remove.regex \ common/src/spellrelax.regex @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read regex @re\"common/src/String-filter.regex\";" \ -e "read regex @re\"common/src/NumNum-filter.regex\".i;" \ -e "read lexc $<" \ -e "read regex @re\"common/src/NumNum-filter.regex\";" \ -e "compose net" \ -e "invert net" \ -e "read regex @re\"common/src/usage-tags-remove.regex\".i;" \ -e "read regex @re\"common/src/spellrelax.regex\";" \ -e "turn stack" \ -e "compose net" \ -e "save stack $@" \ -stop # Intermediate automaton without filters int-date.fst: $(GTLANG)/int/int-date-$(GTLANG).fst $(GTLANG)/int/int-date-$(GTLANG).fst: $(GTLANG)/src/date-$(GTLANG).lexc @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read lexc $<" \ -e "save stack $@" \ -stop # Date generator, letter to number (12.3. etc.) date.fst: $(GTLANG)/bin/date-$(GTLANG).fst $(GTLANG)/bin/date-$(GTLANG).fst: $(GTLANG)/int/int-date-$(GTLANG).fst @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read regex @\"$<\" ;" \ -e "read regex @re\"common/src/usage-tags-remove.regex\";" \ -e "compose net" \ -e "save stack $@" \ -stop # Inverse date generator, number (12.3. etc.) to letter idate.fst: $(GTLANG)/bin/idate-$(GTLANG).fst $(GTLANG)/bin/idate-$(GTLANG).fst: $(GTLANG)/int/int-date-$(GTLANG).fst @echo @echo "*** Building $@ ***" @echo @$(XFST) -e "load < $<" \ -e "invert net" \ -e "read regex @re\"common/src/use-NG-filter.regex\";" \ -e "compose net" \ -e "save stack $@" \ -stop # ======================================================= # # Building different versions of the basic fst tagger # # ======================================================= # ########################################################### # Common utility transducers # ########################################################### ########################################################### # Utility transducers made of single regexes: # ########################################################### # In order to make pos.fst we need a binary tag-pos.fst # All tags except the POS one are deleted. tag-pos.fst: common/src/tag-pos.fst # We want to delete the +TV +IV and other tags for the generator tag-not-save.fst: common/bin/tag-not-save.fst # We want to delete Actor and G3 for generators other than the oahpa one tag-not-save-but-oahpa.fst: common/bin/tag-not-save-but-oahpa.fst # We want to delete the +v1 etc. that are used in dict generation remove-variant-homonym-tags.fst: common/bin/remove-variant-homonym-tags.fst usage-tags-remove.fst: common/bin/usage-tags-remove.fst speller-tags-remove.fst: common/bin/speller-tags-remove.fst # Some hyphen conversion: hyphen-convert.fst: common/bin/hyphen-convert.fst # Some hyphen deletion: hyphen-remove.fst: common/bin/hyphen-remove.fst # In order to make n-$(GTLANG).fst we need a binary tag-no.fst tag-no.fst: common/bin/tag-no.fst # Builds the errdown.fst file, to downcase initial uppercase letters # It allows us to detect misstyped names (lowercase instead of upper) errdown: common/bin/errdown.fst # This goal is to build the inituppercase.fst file, it depends on # inituppercase.regex. The resulting fst allows for viessu / Viessu, i.e. # initial casing of all the words in the lexicon inituppercase: common/bin/inituppercase.fst # This goal is to allow for Scandinavian ä/æ and ö/ø mix, etc. spellrelax: common/bin/spellrelax.fst # Actual target to compile regex files into Xerox transducers: common/bin/%.fst: common/src/%.regex @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex < $< " \ -e "save stack $@ " \ -stop ########################################################### # Utility transducers made of xfst scripts: # ########################################################### # This goal is to make a transducer for filenames, urls and mail addresses webadr: common/bin/webadr.fst # This goal builds downcase-derived-proper.fst # The resulting transducer allows for downcasing of derived names, # such as oslolaš < Oslo downcase: common/bin/downcase-derived-proper.fst # This transducer allows for all-caps words, such as VIESSU, OSLO # but not for e.g. VieSu, OslO # It is used in an xfst script (bin/cap-sme), but seldom, since it is slow. allcaps: common/bin/allcaps.fst # Xfst script to remove digraphs of the X7, X8 etc type digraph-infl.fst: common/bin/digraph-infl.fst # This goal is to remove hyphens to make the spellers work remove-hyphen.fst: common/bin/remove-hyphen.fst # Actual target to compile common xfst script files into Xerox transducers: common/bin/%.fst: common/src/%.xfst @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "source $<" \ -e "save stack $@" \ -stop ########################################################### # Language-specific utility transducers # ########################################################### ########################################################### # Utility transducers made of single regexes: # ########################################################### # This goal builds derivation-filter.fst # The resulting transducer will only allow derivations following # a certain pattern as described in $(GTLANG)-lex.txt derivation-filter: derivation-filter.fst derivation-filter.fst: $(GTLANG)/bin/derivation-filter.fst # Actual target to compile language regexes into Xerox transducers: $(GTLANG)/bin/%.fst: $(GTLANG)/src/%.regex @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex < $<" \ -e "save stack $@" \ -stop ########################################################### # Common LexC resources: # ########################################################### # This goal is to make a regex for dates common/bin/dates-as-digits.fst: common/src/dates-as-digits.lexc @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read lexc $<" \ -e "save stack $@" \ -stop ########################################################### # Real transducers comming here: # ########################################################### # Target for building a temporary propernoun lexicon # that combines north sámi lexicon with the lule sámi one. propernoun-$(GTLANG)-lex-tmp.txt: $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt: \ $(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \ $(GTLANG)/src/propernoun-$(GTLANG)-morph.txt \ $(GTHOME)/giella-shared/smi/src/morphology/stems/smi-propernouns.lexc @echo @echo "*** Building $@ ***" ; @echo cat $(word 2,$^) $< > $@ echo "" >> $@ echo "! <--- Dump from SMI -->" >> $@ echo "" >> $@ ifneq ($(GTLANG), sme) smesmjdump.pl \ $(GTHOME)/giella-shared/smi/src/morphology/stems/smi-propernouns.lexc \ >> $@ else cat \ $(GTHOME)/giella-shared/smi/src/morphology/stems/smi-propernouns.lexc \ >> $@ endif # NEW Targets by Tomi ##################### # New targets to build final fst's !! # Presently, the TWOLC targets are not covered here, since they still depend # on the use of m4 macros to split the source code in different ways to produce # the alternations we need (normative, descriptive, hyphenating). Further down # the line we will stop using m4, and instead use a three-step approach: # 1. build a descriptive transducer, with non-normative forms tagged, and # including all hyphenation points as visible symbols # 2. in a copy of the result in 1, remove all non-normative paths - this # becomes the normative transducer, with visible hyphenation points, to be # used for hyphenation # 3. remove (ie turn to zero) all hyphenation points in the original in 1) - # this will be our regular descriptive transducer; and in a copy of the # result in 2), do the same with the normative, hyphenating transducer, to # turn it into a regular, normative transducer. # This way we will achieve the same as using m4 today, but doing it all in a # clean, transducer-based way. # Here we build the final generator, an inverted transducer of the analyzer. # It is dependent upon sm*.save ifst: inverse.fst i$(GTLANG).fst: inverse.fst inverse.fst: $(GTLANG)/bin/i$(GTLANG).fst $(GTLANG)/bin/i$(GTLANG).fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/tag-not-save-but-oahpa.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ $(GTLANG)/bin/$(GTLANG).save @echo @echo "*** Building the inverse $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/tag-not-save-but-oahpa.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "invert net" \ -e "save stack $@" \ -stop # Here we build a normative generator, an inverted transducer of the normative # analyzer. It is dependent upon sm*-norm.save ifst-norm: inverse-norm.fst i$(GTLANG)-norm.fst: inverse-norm.fst inverse-norm.fst: $(GTLANG)/bin/i$(GTLANG)-norm.fst $(GTLANG)/bin/i$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/tag-not-save-but-oahpa.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ common/bin/derivation-filter.fst \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save @echo @echo "*** Building the normative, inverse $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/tag-not-save-but-oahpa.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"common/bin/derivation-filter.fst\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-norm.save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "invert net" \ -e "save stack $@" \ -stop # Here we build a normative generator, as ifst-norm above, but tailored # for oahpa generation. It is dependent upon sm*-norm.save oahpa-ifst-norm: oahpa-inverse-norm.fst oahpa-i$(GTLANG)-norm.fst: oahpa-inverse-norm.fst oahpa-inverse-norm.fst: $(GTLANG)/bin/oahpa-i$(GTLANG)-norm.fst $(GTLANG)/bin/oahpa-i$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ common/bin/derivation-filter.fst \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save @echo @echo "*** Building the normative, inverse $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/derivation-filter.fst\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-norm.save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "invert net" \ -e "save stack $@" \ -stop # Here we build a normative generator, as ifst-norm above, but tailored # for dictionary generation. It is dependent upon sm*-norm.save dict-ifst-norm: dict-inverse-norm.fst dict-i$(GTLANG)-norm.fst: dict-inverse-norm.fst dict-inverse-norm.fst: $(GTLANG)/bin/dict-i$(GTLANG)-norm.fst $(GTLANG)/bin/dict-i$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ common/bin/derivation-filter.fst \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save @echo @echo "*** Building the normative, inverse $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"common/bin/derivation-filter.fst\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-norm.save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "invert net" \ -e "save stack $@" \ -stop # Here we build a restrictive generator, an inverted transducer of the restrictive # analyzer, which gives only ONE form for each analysis. The philosophy is that this shall # be used for speech generation. It is dependent upon sm*-restr.save ## ifst-restr: inverse-$(DIALECT).restr.fst ## i$(GTLANG)-$(DIALECT).restr.fst: inverse-$(DIALECT).restr.fst ## inverse-$(DIALECT).restr.fst: $(GTLANG)/bin/i$(GTLANG)-$(DIALECT).restr.fst ## $(GTLANG)/bin/i$(GTLANG)-$(DIALECT).restr.fst: \ ## common/bin/tag-not-save.fst \ ## common/bin/remove-variant-homonym-tags.fst \ ## common/bin/tag-not-save-but-oahpa.fst \ ## common/bin/downcase-derived-proper.fst \ ## common/bin/derivation-filter.fst \ ## $(GTLANG)/bin/focus-filter.fst \ ## $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save ## @echo ## @echo "*** Building the restrictive, inverse $@ ***" ## @echo ## $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ ## .o. @\"common/bin/tag-not-save-but-oahpa.fst\" \ ## .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ ## .o. @\"common/bin/derivation-filter.fst\" \ ## .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ ## .o. @\"$(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save\" \ ## .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ ## -e "invert net" \ ## -e "save stack $@" \ ## -stop ## ## # Here we build a restrictive generator, an inverted transducer of the restrictive ## # analyzer, just as inf-restr above. In addition to ifst-restr, it also contains ## # tags Actor and G3 importand for governing the precise behaviour of paradigms in Oahpa. ## ifst-restr: inverse-$(DIALECT).restr.fst i$(GTLANG)-$(DIALECT).restr.fst: inverse-$(DIALECT).restr.fst inverse-$(DIALECT).restr.fst: $(GTLANG)/bin/i$(GTLANG)-$(DIALECT).restr.fst $(GTLANG)/bin/i$(GTLANG)-$(DIALECT).restr.fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/downcase-derived-proper.fst \ common/bin/derivation-filter.fst \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save @echo @echo "*** Building the restrictive, inverse $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/derivation-filter.fst\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "invert net" \ -e "save stack $@" \ -stop # Here we build a normative generator with hyphenation, an inverted transducer # of the normative analyzer, used for paradigm generation. hi-norm: hi-norm.fst hi-norm.fst: h-inverse-norm.fst hifst-norm: h-inverse-norm.fst hi$(GTLANG)-norm: h-inverse-norm.fst hi$(GTLANG)-norm.fst: h-inverse-norm.fst h-inverse-norm.fst: $(GTLANG)/bin/hi$(GTLANG)-norm.fst $(GTLANG)/bin/hi$(GTLANG)-norm.fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/tag-not-save-but-oahpa.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ common/bin/derivation-filter.fst \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/hyph-$(GTLANG).save \ $(GTLANG)/bin/hyphrules-$(GTLANG).fst @echo @echo "*** Building the normative, inverse, hyphenated $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/tag-not-save-but-oahpa.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"common/bin/derivation-filter.fst\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/hyph-$(GTLANG).save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "set flag-is-epsilon ON" \ -e "read regex @\"$(GTLANG)/bin/hyphrules-$(GTLANG).fst\".i ; " \ -e "turn stack" \ -e "compose net" \ -e "invert net" \ -e "save stack $@" \ -stop # We want an analyzer with POS tags only. It takes the linguistic # fst as input and gives us an alternate pos.fst. pos.fst: $(GTLANG)/bin/pos-$(GTLANG).fst $(GTLANG)/bin/pos-$(GTLANG).fst: \ common/bin/tag-pos.fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building $@ with POS tags ***" @echo $(CFST) -e "read regex @\"$<\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop # We want an analyzer with valency information. It takes the ordinary fst # as input and gives us an alternate val-LANG.fst with valency info val.fst: $(GTLANG)/bin/val-$(GTLANG).fst $(GTLANG)/bin/val-$(GTLANG).fst: \ $(GTLANG)/bin/valency-$(GTLANG).fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building $@ ***" @echo $(CFST) -e "read regex @\"$<\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop # In order to make val-$(GTLANG).fst we need a binary valency-$(GTLANG).fst # This goal depends on valency-$(GTLANG).regex valtag.fst: $(GTLANG)/bin/valency-$(GTLANG).fst $(GTLANG)/bin/valency-$(GTLANG).fst: $(GTLANG)/src/valency-$(GTLANG).regex @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex < $< " \ -e "save stack $@" \ -stop # We want an analyzer with Norwegian tags. It takes the linguistic # fst as input and gives us an alternate n-$(GTLANG).fst n-$(GTLANG).fst: $(GTLANG)/bin/n-$(GTLANG).fst $(GTLANG)/bin/n-$(GTLANG).fst: \ common/bin/tag-no.fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building $@ ***" @echo $(CFST) -e "read regex @\"$<\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop # We also want an analyzer with Sami tags. It takes the linguistic # sme.fst as input and gives us an alternate s-sme.fst s-tag.fst: $(GTLANG)/bin/s-$(GTLANG).fst $(GTLANG)/bin/s-$(GTLANG).fst: \ common/bin/tag-$(GTLANG).fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building $@ ***" @echo $(CFST) -e "read regex @\"$<\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).fst\" ; " \ -e "save stack $@" \ -stop # In order to make s-$(GTLANG).fst we need a binary tag-$(GTLANG).fst # This goal depends on tag-sme.regex tag.fst: common/bin/tag-$(GTLANG).fst common/bin/tag-$(GTLANG).fst: common/src/tag-$(GTLANG).regex @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex < $< " \ -e "save stack $@" \ -stop # This goal is to build the final analyser. fst: $(GTLANG)/bin/$(GTLANG).fst $(GTLANG)/bin/$(GTLANG).fst: \ $(GTLANG)/bin/$(GTLANG).save \ $(GTLANG)/bin/$(GTLANG)-num.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/usage-tags-remove.fst \ common/bin/inituppercase.fst \ common/bin/spellrelax.fst \ common/bin/downcase-derived-proper.fst \ common/bin/webadr.fst @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex ( \ @\"common/bin/usage-tags-remove.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @re\"common/src/use-NA-filter.regex\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).save\" \ .o. @\"common/bin/inituppercase.fst\" \ .o. @\"common/bin/downcase-derived-proper.fst\" \ .o. @\"common/bin/spellrelax.fst\" ) ; " \ -e "save stack $@" \ -stop # Here I added coseconv to the -norm.fst in order to analyse Mun etc. fst-norm: $(GTLANG)/bin/$(GTLANG)-norm.fst $(GTLANG)-norm.fst: $(GTLANG)/bin/$(GTLANG)-norm.fst $(GTLANG)/bin/$(GTLANG)-norm.fst: \ common/bin/usage-tags-remove.fst \ common/bin/inituppercase.fst \ common/bin/downcase-derived-proper.fst \ $(GTLANG)/bin/$(GTLANG)-norm.save @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex \ @\"common/bin/usage-tags-remove.fst\" \ .o. @re\"common/src/use-NA-filter.regex\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-norm.save\" \ .o. @\"common/bin/inituppercase.fst\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "save stack $@" \ -stop fst-restr: $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.fst $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.fst: \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex \ @\"common/bin/usage-tags-remove.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "save stack $@" \ -stop # Here we build an analyser that doesn't include semantic tags. fst-site: site.fst $(GTLANG)-site.fst: site.fst site.fst: $(GTLANG)/bin/$(GTLANG)-site.fst $(GTLANG)/bin/$(GTLANG)-site.fst: \ common/bin/tag-not-save.fst \ common/bin/remove-variant-homonym-tags.fst \ common/bin/tag-not-save-but-oahpa.fst \ common/bin/usage-tags-remove.fst \ common/bin/downcase-derived-proper.fst \ $(GTLANG)/bin/$(GTLANG).save @echo @echo "*** Building $@ ***" @echo $(XFST) -e "read regex @\"common/bin/tag-not-save.fst\" \ .o. @\"common/bin/tag-not-save-but-oahpa.fst\" \ .o. @\"common/bin/remove-variant-homonym-tags.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "save stack $@" \ -stop hyph: hyph-$(GTLANG).fst hyph-$(GTLANG).fst: $(GTLANG)/bin/hyph-$(GTLANG).fst $(GTLANG)/bin/hyph-$(GTLANG).fst: \ $(GTLANG)/bin/hyph-$(GTLANG).save \ $(GTLANG)/bin/$(GTLANG)-norm.fst \ $(GTLANG)/bin/hyphrules-$(GTLANG).fst @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex @\"$(GTLANG)/bin/hyph-$(GTLANG).save\".i \ .o. @\"$(GTLANG)/bin/$(GTLANG)-norm.fst\" ; " \ -e "set flag-is-epsilon ON" \ -e "read regex @\"$(GTLANG)/bin/hyphrules-$(GTLANG).fst\" ; " \ -e "compose net" \ -e "save stack $@" \ -stop # This goal is to make a regex for hyphenator rules hyphrules: hyphrules.fst hyphrules.fst: $(GTLANG)/bin/hyphrules-$(GTLANG).fst $(GTLANG)/bin/hyphrules-$(GTLANG).fst: $(GTLANG)/src/hyph-$(GTLANG).txt @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "source $<" \ -e "save stack $@" \ -stop # The following four goals are to make ped-$LANG.fst, a file # marking certain errors as such. # This goal makes the pedagogical fst based on the NORMATIVE fst: ped-fst: $(GTLANG)/bin/ped-$(GTLANG).fst $(GTLANG)/bin/ped-$(GTLANG).fst: \ $(GTLANG)/int/properr.fst \ $(GTLANG)/bin/$(GTLANG)-norm.fst \ common/bin/speller-tags-remove.fst \ common/bin/usage-tags-remove.fst @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex [ \ @\"common/bin/speller-tags-remove.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"$<\" \ .o. @\"common/bin/usage-tags-remove.fst\".i ] \ | @\"$(GTLANG)/bin/$(GTLANG)-norm.fst\" ; " \ -e "save stack $@ " \ -stop # This goal makes the pedagogical fst based on the DESCRIPTIVE fst: ped-tol-fst: $(GTLANG)/bin/ped-tol-$(GTLANG).fst $(GTLANG)/bin/ped-tol-$(GTLANG).fst: \ $(GTLANG)/int/properr.fst \ $(GTLANG)/bin/$(GTLANG).fst \ common/bin/speller-tags-remove.fst \ common/bin/usage-tags-remove.fst @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex [ \ @\"common/bin/speller-tags-remove.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @\"$<\" \ .o. @\"common/bin/usage-tags-remove.fst\".i ] \ | @\"$(GTLANG)/bin/$(GTLANG).fst\" ; " \ -e "save stack $@ " \ -stop # This transducer only finds downcased names properr.fst: $(GTLANG)/int/properr.fst $(GTLANG)/int/properr.fst: $(GTLANG)/bin/$(GTLANG).save \ common/bin/errdown.fst @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex @re\"common/src/lowercaseerr.regex\" \ .o. @re\"common/src/prop-save-filter.regex\" \ .o. @\"$<\" \ .o. @\"common/bin/errdown.fst\" ; " \ -e "save stack $@ " \ -stop # New targets to build save fst's with using filters # The nonrec-$GTLANG.fst file combines the *.save file with a filter to # remove unwanted derivational patterns. nonrec: $(GTLANG)/bin/nonrec-$(GTLANG).fst $(GTLANG)/bin/nonrec-$(GTLANG).fst: \ common/bin/downcase-derived-proper.fst \ common/bin/usage-tags-remove.fst \ common/bin/derivation-filter.fst \ common/src/use-circ-filter.regex \ common/src/use-sub-filter.regex \ $(GTLANG)/bin/focus-filter.fst \ $(GTLANG)/bin/$(GTLANG).save @echo @echo "*** Building $@ ***" ; @echo $(CFST) -e "read regex \ @\"common/bin/derivation-filter.fst\" \ .o. @\"common/bin/usage-tags-remove.fst\" \ .o. @re\"common/src/use-circ-filter.regex\" \ .o. @re\"common/src/use-sub-filter.regex\" \ .o. @\"$(GTLANG)/bin/focus-filter.fst\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).save\" \ .o. @\"common/bin/downcase-derived-proper.fst\" ; " \ -e "save stack $@" \ -stop # This goal is to build a restrictive smX.save # Note that usage-tags-remove.fst is placed __over__ the NG line, # in order not to remove the NG tag that the NG filter shall use. save-restr: $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save $(GTLANG)/bin/$(GTLANG)-$(DIALECT).restr.save: \ $(GTLANG)/bin/$(GTLANG).save @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex @\"common/bin/usage-tags-remove.fst\" \ .o. @re\"common/src/use-sub-filter.regex\" \ .o. @re\"common/src/use-NG-filter.regex\" \ .o. @re\"common/src/dial-$(DIALECT)-filter.regex\" \ .o. @\"$<\" \ .o. @re\"common/src/use-sub-filter.regex\" ; " \ -e "save stack $@ " \ -stop # The goal is to build a normative smX.save save-norm: $(GTLANG)/bin/$(GTLANG)-norm.save $(GTLANG)/bin/$(GTLANG)-norm.save: \ $(GTLANG)/bin/$(GTLANG).save \ common/src/use-sub-filter.regex @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex @re\"common/src/use-sub-filter.regex\" \ .o. @\"$<\" \ .o. @re\"common/src/use-sub-filter.regex\" ; " \ -e "save stack $@ " \ -stop # This is a normative transducer with lexical hyphenation points, used # for lower morphosyntactic representation and upper (hyphenated) wordform. #save-ihyph: $(GTLANG)/bin/hyph-i$(GTLANG).save #$(GTLANG)/bin/hyph-i$(GTLANG).save: \ # $(GTLANG)/bin/hyph-$(GTLANG).save # @echo # @echo "*** Building $@ ***" ; # @echo # $(XFST) -e "load stack $<" \ # -e "invert net" \ # -e "save stack $@" \ # -stop # This is a non-inverted version of the previous one. It is used to # generate wordforms with hyphenation mark for input to speller generation, used # for upper morphosyntactic representation and lower wordform. save-hyph: $(GTLANG)/bin/hyph-$(GTLANG).save $(GTLANG)/bin/hyph-$(GTLANG).save: \ $(GTLANG)/bin/$(GTLANG)-hyph-twolc.save @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex @\"$<\" ; " \ -e "save stack $@" \ -stop # Build a regular save file by removing the morph borders: save: $(GTLANG)/bin/$(GTLANG).save $(GTLANG)/bin/$(GTLANG).save: \ $(GTLANG)/bin/hyph-$(GTLANG).save \ common/src/remove-morph-borders.regex @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read regex @\"$<\" \ .o. @re\"common/src/remove-morph-borders.regex\" ; " \ -e "save stack $@" \ -stop # Lex+twol transducer - this is the all-in-one transducer, the starting point # for most everything - all hyphenation points and morph borders are visible: save-hyph-twolc: $(GTLANG)/bin/$(GTLANG)-hyph-twolc.save $(GTLANG)/bin/$(GTLANG)-hyph-twolc.save: $(GTLANG)/bin/$(GTLANG)-lexc.save \ $(GTLANG)/bin/twol-$(GTLANG).bin @echo @echo "*** Building $@ ***" ; @echo printf "read-source $< \n\ read-rules $(GTLANG)/bin/twol-$(GTLANG).bin \n\ compose-result \n\ save-result $@ \n\ quit \n" > tmp/save-hyph-script-$(GTLANG) $(LEXC) < tmp/save-hyph-script-$(GTLANG) rm -f tmp/save-hyph-script-$(GTLANG) # This is the new all inclusive lexical fst before the twolc rules are applied. # The POS tag +N is moved in front of the sub-pos tag +G3. # This solution is only a work-around in lack of a more general and language # independent solution. save-lexc: $(GTLANG)/bin/$(GTLANG)-lexc.save $(GTLANG)/bin/$(GTLANG)-lexc.save: $(SRCS) @echo @echo "*** Building $@ ***" ; @echo @$(MKDIR) $(GTLANG)/int @cat $(SRCS) > $(GTLANG)/int/all-$(GTLANG)-lex.txt $(XFST) \ -e "read lexc $(GTLANG)/int/all-$(GTLANG)-lex.txt" \ -e "compose net" \ $(SPFST) \ -e "read regex [ 0 @< %+N || [%+G3 | %+G7] [ ? - %+Cmp ]* _ ] ;" \ -e "read regex [ %+N <- [. .] || _ [%+G3 | %+G7] ] ;" \ -e "compose net" \ -e "save stack $@ " \ -stop # The second goal is to build twol-smX.bin # This goal depends on twol-smX.txt twol: $(GTLANG)/bin/twol-$(GTLANG).bin $(GTLANG)/bin/twol-$(GTLANG).bin: $(GTLANG)/src/twol-$(GTLANG).txt @echo @echo "*** Building $@ ***" ; @echo @printf "read-grammar $< \n\ compile \n\ save-binary $@ \n\ quit \n" > tmp/twol-script-$(GTLANG) $(TWOLC) < tmp/twol-script-$(GTLANG) @rm -f tmp/twol-script-$(GTLANG) ## This target builds a binary CG file: #dis-bin: $(GTLANG)/bin/$(GTLANG)-dis.bin #$(GTLANG)/bin/$(GTLANG)-dis.bin: $(GTLANG)/src/$(GTLANG)-dis.rle # @echo # @echo "*** Building $@ ***" # @echo "Note: This requires a relatively new vislcg3 file." # @echo "The morphological parsers are not affected if you" # @echo "fail to build this file, it is for disambiguation." # @echo # @echo # $(VISLCG3) -g $^ \ # --grammar-only --grammar-bin $@ # This is the common dep-bin target for sme, smj, sma smi-dep: smi/bin/smi-dep.bin smi/bin/smi-dep.bin: ../giella-shared/smi/src/syntax/dependency.cg3 @echo @echo "*** Building $@ ***" @echo "Note: This requires a relatively new vislcg3 file." @echo "The morphological parsers are not affected if you" @echo "fail to build this file, it's for dependency syntax." @echo $(VISLCG3) --grammar $^ \ --grammar-only --grammar-bin $@ # This is the dep-bin target for languages other than sme, smj, sma # It is currently not in use, but awaits the inclusion of other lgs into this Makefile. dep-bin: $(GTLANG)/bin/$(GTLANG)-dep.bin $(GTLANG)/bin/$(GTLANG)-dep.bin: $(GTLANG)/src/$(GTLANG)-dep.rle @echo @echo "*** Building $@ ***" @echo "Note: This requires a relatively new vislcg3 file." @echo "The morphological parsers are not affected if you" @echo "fail to build this file, it's for dependency syntax." @echo $(VISLCG3) --grammar $^ \ --grammar-only --grammar-bin $@ # =========================== # # Building preprocessor files # # =========================== # # Here we build d-sme.fst, the morphological tagger geared towards # disambiguation. disamb.fst: $(GTLANG)/bin/d-$(GTLANG).fst $(GTLANG)/bin/d-$(GTLANG).fst: \ common/bin/dis-tag.fst \ $(GTLANG)/bin/$(GTLANG).fst @echo @echo "*** Building $@, fst w/ tags for disambiguation ***" @echo $(XFST) -e "read regex @\"$<\" \ .o. @\"$(GTLANG)/bin/$(GTLANG).fst\" ;" \ -e "save stack $@" \ -stop # We want to make a parser with tags for parsing. # In order to get that we make a tag modifier dis-tag.fst: common/bin/dis-tag.fst common/bin/dis-tag.fst: common/src/dis-tag.txt @echo @echo "*** Building the tag manipulator $@ ***" ; @echo $(XFST) -e "source $<" \ -e "save stack $@" \ -stop # Here we make the abbrevation file for our current preprocessor, # the perl-based preprocess (located in the script catalogue) empty:= comma:=, space:=$(empty) $(empty) ABBRSRCS=$(subst $(space),$(comma),$(SRCS)) abbr: $(GTLANG)/bin/abbr.txt $(GTLANG)/bin/abbr.txt: \ $(GTLANG)/src/abbr-$(GTLANG)-lex.txt \ $(GTLANG)/src/$(GTLANG)-num.txt \ $(GTLANG)/bin/i$(GTLANG).fst \ $(SCRIPTDIR)/abbr-extract.pl \ $(SCRIPTDIR)/langTools/Util.pm \ cwb/paradigm.txt \ cwb/korpustags.txt @echo @echo "*** Extracting abbreviations from $< to $@ ***" ; @echo @perl -I $(SCRIPTDIR) $(SCRIPTDIR)/abbr-extract.pl \ --paradigm=cwb/paradigm.txt \ --tags=cwb/korpustags.txt \ --fst=$(GTLANG)/bin/i$(GTLANG).fst \ --output=$@.tmp \ --abbr_lex=$< \ --lex=$(ABBRSRCS),$(GTLANG)/src/$(GTLANG)-num.txt,$(GTLANG)/src/abbr-$(GTLANG)-lex.txt @sed -e 's/\+MWE//' < $@.tmp | uniq > $@ @rm -f $@.tmp corr: $(GTLANG)/bin/corr.txt $(GTLANG)/bin/corr.txt: $(GTLANG)/src/typos.txt grep -v ' ' $< | grep ' &' | cut -f1-2 > $@ # Here we build a transducer that gives us only the Sámi wordforms missing from # our transducers. Non-Sámi words from Norwegian, Finnish, English, etc. are # filtered out by this script, as are registered typos. missing: $(GTLANG)/bin/missing $(GTLANG)/bin/missing: @echo @echo "*** Building $@ ***" ; @echo @printf "analyzer bin/$(GTLANG).fst\n\ foreign ../common/bin/foreign.fst\n\ typos ../common/bin/typos.fst\n\ webadr ../common/bin/webadr.fst\n\n\ analyzer\n\ foreign\n\ typos\n\ webadr\n" > $@ # This target looks quite unfinished... cap-$(GTLANG): $(GTLANG)/bin/cap-$(GTLANG) $(GTLANG)/bin/cap-$(GTLANG): common/src/allcaps.xfst @echo @echo "*** Building $@ ***" ; @echo @printf "analyzer $(GTLANG)/bin/$(GTLANG).fst\n\ allcaps common/bin/allcaps.fst\n\n\ allcaps analyzer \n" > $@ foreign.fst: common/bin/foreign.fst common/bin/foreign.fst: $(SCRIPTDIR)/old-foreign.txt $(SCRIPTDIR)/new-foreign.txt @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read text $(SCRIPTDIR)/old-foreign.txt" \ -e "read text $(SCRIPTDIR)/new-foreign.txt" \ -e "union net" \ -e "save stack $@" \ -stop newforeign.fst: common/bin/new-foreign.fst common/bin/new-foreign.fst: $(SCRIPTDIR)/new-foreign.txt @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read text < $<" \ -e "save stack $@" \ -stop oldforeign.fst: common/bin/old-foreign.fst common/bin/old-foreign.fst: $(SCRIPTDIR)/old-foreign.txt @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read text < $<" \ -e "save stack $@" \ -stop # Note! This is an automaton, not a transducer. typos: typos.fst typos.fst: $(GTLANG)/bin/typos.fst $(GTLANG)/bin/typos.fst: $(GTLANG)/bin/typoslist.txt @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "read text < $<" \ -e "save stack $@" \ -stop # This document contains the list of typographical errors, # __not__ the list of corrections. typoslist.txt: $(GTLANG)/bin/typoslist.txt $(GTLANG)/bin/typoslist.txt: $(GTLANG)/src/typos.txt @echo @echo "*** Building $@ ***" ; @echo @cut -f1 $< > $@ # Here we build a phonetic transducer phon.fst: $(GTLANG)/bin/phon-$(GTLANG).fst $(GTLANG)/bin/phon-$(GTLANG).fst: $(GTLANG)/src/phon-$(GTLANG).xfst @echo @echo "*** Building $@ ***" ; @echo $(XFST) -e "source $<" \ -e "save stack $@" \ -stop # Here we build a hyphenator #hyph-$(GTLANG).fst: $(GTLANG)/bin/hyph-$(GTLANG).fst #$(GTLANG)/bin/hyph-$(GTLANG).fst: $(GTLANG)/src/hyph-$(GTLANG).txt # @echo # @echo "*** Building $@ ***" ; # @echo # @printf "source $< \n\ # save stack $@ \n\ # quit \n" > tmp/hyph-script-$(GTLANG) # $(XFST) < tmp/hyph-script-$(GTLANG) # @rm -f tmp/hyph-script-$(GTLANG) # Compile m4-rules in sme-dis.rle. # Call with make dis GTLANG=sme or # make dis M4FLAGS="-DFLAG_NAME" GTLANG=sme # This is an obsolete M4 solution for cg2. Delete when dust settled. # dis:dis.rle # $(GTLANG)-dis.rle:dis.rle # NAMEFLAGS=$(subst $(space),,$(M4FLAGS)) # dis.rle: $(GTLANG)/src/$(GTLANG)-dis.rle # $(M4) $(M4FLAGS) $< > $(GTLANG)/bin/$(GTLANG)-dis$(NAMEFLAGS).rle ########################################### # Targets to test morphological transducers ########################################### fsttest: $(GTLANG)/testing/$(GTLANG)-tests.yaml \ $(GTLANG)/bin/$(GTLANG)-norm.fst \ $(GTLANG)/bin/i$(GTLANG)-norm.fst @echo @echo "*** Testing $(GTLANG)-norm.fst using HfstTester.py ***" ; @echo # HfstTester.py -Cicv -S xerox $< HfstTester.py -icv -S xerox $< bftest: tmp/$(GTLANG)-bf-analysed.txt tmp/$(GTLANG)-bf-analysed.txt: \ tmp/$(GTLANG)-baseforms.txt \ $(GTLANG)/bin/$(GTLANG)-norm.fst @echo @echo "*** Testing all $(GTLANG) baseforms using the normative transducer ***" ; @echo $(LOOKUP) $(GTLANG)/bin/$(GTLANG)-norm.fst< $< > $@ @if [ `grep '?' $@ | wc -l` -eq 0 ] ; then \ echo ; \ echo "All baseforms are accepted by the normative transducer!"; \ echo ; \ else \ echo ; \ echo "ERROR!!!" ; \ echo "One or more baseforms were not accepted. Please have a look"; \ echo "at the file:"; \ echo "$@"; \ echo ; \ exit 2 ; \ fi ##################################### # Targets to test CG "products" ##################################### # A general target, to run all CG tests at once (add more dependent # targets as they become available): cgtests: cgtest cgtest: distest deptest distest: tmp/distest-$(GTLANG).txt tmp/distest-$(GTLANG).txt: $(GTLANG)/corp/correct/testkorpus.lo.corr.txt \ $(GTLANG)/corp/correct/testkorpus.dis.corr.txt \ $(GTLANG)/src/$(GTLANG)-dis.rle cat $< | lookup2cg \ | vislcg3 -g $(GTLANG)/src/$(GTLANG)-dis.rle > $@ diff $(GTLANG)/corp/correct/testkorpus.dis.corr.txt $@ | see deptest: tmp/deptest-$(GTLANG).txt tmp/deptest-$(GTLANG).txt: $(GTLANG)/corp/correct/testkorpus.lo.corr.txt \ $(GTLANG)/corp/correct/testkorpus.dis.corr.txt \ $(GTLANG)/corp/correct/testkorpus.dep.corr.txt \ $(GTLANG)/src/$(GTLANG)-dis.rle \ $(GTLANG)/src/$(GTLANG)-dep.rle lookup2cg $< \ | vislcg3 -g $(GTLANG)/src/$(GTLANG)-dis.rle \ | vislcg3 -g $(GTLANG)/src/$(GTLANG)-dep.rle > $@ diff $(GTLANG)/corp/correct/testkorpus.dep.corr.txt $@ | see # Finally an option to remove all the binary files clean: rm -rf common/bin/* # rm -rf common/int/* rm -rf $(GTLANG)/bin/* rm -rf $(GTLANG)/int/* rm -f tmp/*-$(GTLANG)* rm -f tmp/$(GTLANG)-* rm -f $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt rm -f $(GTLANG)/src/num-$(GTLANG)-lex.txt