sinclude $(GTLANG)/analyser.mk
# include phonrules.mk

# CFST   = xfst -utf8
# ======================================================= #
# Building different versions of the basic fst tagger     #
# ======================================================= #


# Target for building a temporary propernoun lexicon
# that combines north sámi lexicon with the lule sámi one.
propernoun-$(GTLANG)-lex-tmp.txt: $(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt
$(GTLANG)/src/propernoun-$(GTLANG)-lex-tmp.txt: \
		$(GTLANG)/src/propernoun-$(GTLANG)-lex.txt \
		$(GTLANG)/src/propernoun-$(GTLANG)-morph.txt \
		sme/src/propernoun-sme-lex.txt
	@echo
	@echo "*** Building tmp/propernoun-$(GTLANG)-lex-tmp.txt ***" ;
	@echo
	@cat $(word 2,$^) $< > $@
ifneq ($(GTLANG), sme)
	@echo "! <--- Dump from sme -->" >> $@
	script/smesmjdump.pl sme/src/propernoun-sme-lex.txt >> $@
endif


# The first goal is to build smX.save
# This goal depends on twol-smX.bin and a bunch of lexicon files
save: $(GTLANG)/bin/$(GTLANG).save
$(GTLANG)/bin/$(GTLANG).save: \
		$(GTLANG)/bin/twol-$(GTLANG)-descriptive.bin \
		$(SRCS)
	@echo
	@echo "*** Building $(GTLANG).save ***" ;
	@echo
	printf "compile-source $(SRCS) \n\
	read-rules $< \n\
	compose-result \n\
	save-result $@ \n\
	quit \n" > tmp/save-script
	$(LEXC) < tmp/save-script
	rm -f tmp/save-script


# The second goal is to build a normative smX.save
save-norm: $(GTLANG)/bin/$(GTLANG)-norm.save
$(GTLANG)/bin/$(GTLANG)-norm.save: \
		$(GTLANG)/bin/twol-$(GTLANG).bin \
		$(NORMFILES)
	@echo
	@echo "*** Building $(GTLANG)-norm.save ***" ;
	@echo
	printf "compile-source $(NORMFILES) \n\
	read-rules $< \n\
	compose-result \n\
	save-result $@ \n\
	quit \n" > tmp/save-script
	$(LEXC) < tmp/save-script
	rm -f tmp/save-script


# This goal is to build a restrictive smX.save
save-restr: $(GTLANG)/bin/$(GTLANG)-restr.save
$(GTLANG)/bin/$(GTLANG)-restr.save: \
		$(GTLANG)/bin/twol-$(GTLANG).bin \
		$(RESTRFILES)
	@echo
	@echo "*** Building $(GTLANG)-restr.save ***" ;
	@echo
	printf "compile-source $(RESTRFILES) \n\
	read-rules $< \n\
	compose-result \n\
	save-result $@ \n\
	quit \n" > tmp/save-script
	$(LEXC) < tmp/save-script
	rm -f tmp/save-script


# We need a variant of this third goal, to build a non-recursive sme.save
# This goal depends on twol-XXX.bin and a bunch of lexicon files

nonrec.save: $(GTLANG)/bin/nonrec-$(GTLANG).save
$(GTLANG)/bin/nonrec-$(GTLANG).save: \
		$(GTLANG)/bin/twol-$(GTLANG).bin \
		$(NONRECFILES)
	@echo
	@echo "*** Building nonrec-$(GTLANG).save ***" ;
	@echo
	@printf "compile-source $(NONRECFILES) \n\
	read-rules $< \n\
	compose-result \n\
	save-result $@ \n\
	quit \n" > tmp/nonrec-script
	$(LEXC) < tmp/nonrec-script
	@rm -f tmp/nonrec-script

# The nonrec-$GTLANG.fst file combines the *.save file with a filter to
# remove unwanted derivational patterns.
nonrec: $(GTLANG)/bin/nonrec-$(GTLANG).fst
$(GTLANG)/bin/nonrec-$(GTLANG).fst: \
		$(GTLANG)/bin/nonrec-$(GTLANG).save \
		$(GTLANG)/bin/derivation-filter.fst \
		common/bin/downcase.fst
	@echo
	@echo "*** Building nonrec-$(GTLANG).fst ***" ;
	@echo
	@printf "$(NONREC)" > tmp/nonrec-fst-script
	@printf "save stack $@ \n\
	quit \n" >> tmp/nonrec-fst-script
	$(CFST) < tmp/nonrec-fst-script
	@rm -f tmp/nonrec-fst-script


# ======================================================= #
# Building different versions of the basic fst tagger     #
# ======================================================= #


# We want an analyzer with POS tags only. It takes the linguistic 
# fst as input and gives us an alternate pos.fst. 

pos.fst: $(GTLANG)/bin/pos-$(GTLANG).fst
$(GTLANG)/bin/pos-$(GTLANG).fst: \
		common/bin/tag-pos.fst \
		$(GTLANG)/bin/$(GTLANG).fst
	@echo
	@echo "*** Building pos-$(GTLANG).fst, $(GTLANG).fst with POS tags ***"
	@echo
	@printf "read regex [[@\"$<\"] .o. \
	[@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\
	save stack $@ \n\
	quit \n" > tmp/pos-fst-script
	$(CFST) < tmp/pos-fst-script
	@rm -f tmp/pos-fst-script

# In order to make pos.fst we need a binary tag-pos.fst
# This goal depends on tag-pos.regex. The way it
# is done is that all tags except the POS one are deleted.

tag-pos.fst: common/bin/tag-pos.fst
common/bin/tag-pos.fst: common/src/tag-pos.regex
	@echo
	@echo "*** Building tag-pos.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/tag-pos-script
	$(XFST) < tmp/tag-pos-script
	@rm -f tmp/tag-pos-script


# We want to delete the +TV +IV tags for the generator (and other
# tags later on. For that we need our tag-deleter.
tag-not-save.fst: common/bin/tag-not-save.fst
common/bin/tag-not-save.fst: common/src/tag-not-save.regex
	@echo
	@echo "*** Building tag-not-save.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/tag-not-save-script
	$(XFST) < tmp/tag-not-save-script
	@rm -f tmp/tag-not-save-script


tag-inclusion-filter.fst: common/bin/tag-inclusion-filter.fst
common/bin/tag-inclusion-filter.fst: common/src/tag-inclusion-filter.regex
	@echo
	@echo "*** Building tag-inclusion-filter.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/tag-inclusion-filter-script
	$(XFST) < tmp/tag-inclusion-filter-script
	@rm -f tmp/tag-inclusion-filter-script


# We want an analyzer with Norwegian tags. It takes the linguistic 
# fst as input and gives us an alternate n-$(GTLANG).fst

n-$(GTLANG).fst: $(GTLANG)/bin/n-$(GTLANG).fst
$(GTLANG)/bin/n-$(GTLANG).fst: \
		common/bin/tag-no.fst \
		$(GTLANG)/bin/$(GTLANG).fst
	@echo
	@echo "*** Building n-$(GTLANG).fst, $(GTLANG).fst with Norwegian tags ***"
	@echo
	@printf "read regex [[@\"$<\"] .o. \
	[@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\
	save stack $@ \n\
	quit \n" > tmp/n-fst-script
	$(CFST) < tmp/n-fst-script
	@rm -f tmp/n-fst-script


# In order to make n-$(GTLANG).fst we need a binary tag-no.fst
# This goal depends on tag-no.regex

tag-no.fst: common/bin/tag-no.fst
common/bin/tag-no.fst: common/src/tag-no.regex
	@echo
	@echo "*** Building tag-no.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/tag-no-script
	$(XFST) < tmp/tag-no-script
	@rm -f tmp/tag-no-script

# We also want an analyzer with Sami tags. It takes the linguistic 
# sme.fst as input and gives us an alternate s-sme.fst

s-$(GTLANG).fst: $(GTLANG)/bin/s-$(GTLANG).fst
$(GTLANG)/bin/s-$(GTLANG).fst: \
		common/bin/tag-$(GTLANG).fst \
		$(GTLANG)/bin/$(GTLANG).fst
	@echo
	@echo "*** Building s-$(GTLANG).fst, $(GTLANG).fst with Sami tags ***"
	@echo
	@printf "read regex [[@\"$<\"] .o. \
	[@\"$(GTLANG)/bin/$(GTLANG).fst\"]] ; \n\
	save stack $@ \n\
	quit \n" > tmp/s-fst-script
	$(CFST) < tmp/s-fst-script
	@rm -f tmp/s-fst-script

# In order to make s-$(GTLANG).fst we need a binary tag-$(GTLANG).fst
# This goal depends on tag-sme.regex

tag-$(GTLANG).fst: common/bin/tag-$(GTLANG).fst
common/bin/tag-$(GTLANG).fst: common/src/tag-$(GTLANG).regex
	@echo
	@echo "*** Building tag-$(GTLANG).fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/tag-script
	$(XFST) < tmp/tag-script
	@rm -f tmp/tag-script


# This goal is to build the final analyser. It depends on all the files.

fst: $(GTLANG)/bin/$(GTLANG).fst
$(GTLANG)/bin/$(GTLANG).fst: \
		common/bin/nohardhyphen.fst \
		common/bin/caseconv.fst \
		common/bin/spellrelax.fst \
		common/bin/downcase.fst \
		common/bin/webadr.fst \
		$(GTLANG)/bin/$(GTLANG).save \
		$(GTLANG)/bin/$(GTLANG)-num.fst
	@echo
	@echo "*** Building $(GTLANG).fst ***" ;
	@echo
	@printf "$(FST)" > tmp/fst-script
	@printf "save stack $@ \n\
	quit \n" >> tmp/fst-script
	$(CFST) < tmp/fst-script
	@rm -f tmp/fst-script


fst-norm: $(GTLANG)/bin/$(GTLANG)-norm.fst
$(GTLANG)/bin/$(GTLANG)-norm.fst: \
		common/bin/nohardhyphen.fst \
		common/bin/downcase.fst \
		$(GTLANG)/bin/$(GTLANG)-norm.save
	@echo
	@echo "*** Building $(GTLANG)-norm.fst ***" ;
	@echo
	@printf "$(FSTNORM)" > tmp/fst-script
	@printf "save stack $@ \n\
	quit \n" >> tmp/fst-script
	$(CFST) < tmp/fst-script
	@rm -f tmp/fst-script

fst-restr: $(GTLANG)/bin/$(GTLANG)-restr.fst
$(GTLANG)/bin/$(GTLANG)-restr.fst: \
		common/bin/nohardhyphen.fst \
		common/bin/downcase.fst \
		$(GTLANG)/bin/$(GTLANG)-restr.save
	@echo
	@echo "*** Building $(GTLANG)-restr.fst ***" ;
	@echo
	@printf "$(FSTRESTR)" > tmp/fst-script
	@printf "save stack $@ \n\
	quit \n" >> tmp/fst-script
	$(CFST) < tmp/fst-script
	@rm -f tmp/fst-script


# This goal is to make a regex for filenames, urls and mail addresses
webadr: webadr.fst
webadr.fst: common/bin/webadr.fst
common/bin/webadr.fst: common/src/webadr.txt
	@echo
	@echo "*** Building webadr.fst ***" ;
	@echo
	printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/webadr-script
	$(XFST) < tmp/webadr-script
	rm -f tmp/webadr-script


# This goal is to make a regex for dates
num: num.fst
num.fst: common/bin/num.fst
common/bin/num.fst: common/src/num.txt
	@echo
	@echo "*** Building num.fst ***" ;
	@echo
	printf "compile-source $< \n\
	source-to-result \n\
	save-result $@ \n\
	quit \n" > tmp/num-script
	$(LEXC) < tmp/num-script
	rm -f tmp/num-script


# This goal is to allow for Scandinavian ä/æ and ö/ø mix
spellrelax: spellrelax.fst
spellrelax.fst: common/bin/spellrelax.fst
common/bin/spellrelax.fst: common/src/spellrelax.regex
	@echo
	@echo "*** Building spellrelax.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/spellrelax-script
	$(XFST) < tmp/spellrelax-script
	@rm -f tmp/spellrelax-script


# The second goal is to build the caseconv.fst file
# This goal depends on case.regex
# The resulting transducer allows for viessu / Viessu, i.e. initial
# casing of all the words in the lexicon
caseconv: caseconv.fst
caseconv.fst: common/bin/caseconv.fst
common/bin/caseconv.fst: common/src/case.regex
	@echo
	@echo "*** Building caseconv.fst ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/caseconv-script
	$(XFST) < tmp/caseconv-script
	@rm -f tmp/caseconv-script


# This goal builds downcasing.fst
# The resulting transducer allows for downcasing of derived names, 
# such as oslolaš < Oslo
downcase: downcase.fst
downcase.fst: common/bin/downcase.fst
common/bin/downcase.fst: common/src/downcase.regex
	@echo
	@echo "*** Building downcase.fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/downcase-script
	$(XFST) < tmp/downcase-script
	@rm -f tmp/downcase-script


# This goal depends on allcaps.regex
# The resulting transducer allows for all-caps words, such as VIESSU, OSLO
# but not for e.g. VieSu, OslO
# It is used in an xfst script (bin/cap-sme), but seldomly so, since it is slow.
allcaps: allcaps.fst
allcaps.fst: common/bin/allcaps.fst
common/bin/allcaps.fst: common/src/allcaps.regex \
		$(GTLANG)/bin/cap-$(GTLANG)
	@echo
	@echo "*** Building allcaps.fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/allcaps-script
	$(XFST) < tmp/allcaps-script
	@rm -f tmp/allcaps-script


digraph-infl.fst: common/bin/digraph-infl.fst
common/bin/digraph-infl.fst: common/src/digraph-infl.regex
	@echo
	@echo "*** Building digraph-infl.fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/digraph-infl-script
	$(XFST) < tmp/digraph-infl-script
	@rm -f tmp/digraph-infl-script

# Target for clock simulator

iclock.fst: $(GTLANG)/bin/iclock-$(GTLANG).fst
$(GTLANG)/bin/iclock-$(GTLANG).fst: $(GTLANG)/bin/clock-$(GTLANG).fst
	@echo "*** iclock-$(GTLANG).fst ***"
	@printf "load < $(GTLANG)/bin/clock-$(GTLANG).fst \n\
	invert net \n\
	save stack $@ \n\
	quit \n" >> tmp/iclock-script
	$(XFST) < tmp/iclock-script
	@rm -f tmp/iclock-script


clock.fst: $(GTLANG)/bin/clock-$(GTLANG).fst
$(GTLANG)/bin/clock-$(GTLANG).fst: $(GTLANG)/src/clock-$(GTLANG).lexc
	@echo "*** clock-$(GTLANG).fst ***"
	@printf "compile-source $< \n\
	source-to-result \n\
	save-result $@ \n\
	quit \n" > tmp/generate-clock-script
	$(LEXC) < tmp/generate-clock-script
	@rm -f generate-clock-script

num.fst: $(GTLANG)/bin/$(GTLANG)-num.fst
$(GTLANG)/bin/$(GTLANG)-num.fst: \
	$(GTLANG)/src/$(GTLANG)-num.txt \
	$(GTLANG)/polderland/generated_nums-plx.txt
	@printf "compile-source $< \n\
	source-to-result \n\
	save-result $@ \n\
	quit \n" > tmp/generate-num-script
	$(LEXC) < tmp/generate-num-script
	@rm -f generate-num-script

# Here we build the final generator, an inverted transducer of the analyzer.
# It is dependent upon sm*.save

ifst: inverse.fst
inverse.fst: $(GTLANG)/bin/i$(GTLANG).fst
$(GTLANG)/bin/i$(GTLANG).fst: \
	common/bin/tag-not-save.fst \
	common/bin/downcase.fst \
	$(GTLANG)/bin/$(GTLANG).save
	@echo
	@echo "*** Building the inverse i$(GTLANG).fst ***"
	@echo 
	@printf "$(INVERTNET)" > tmp/ifst-script
	@printf "invert net \n\
	save stack $@ \n\
	quit \n" >> tmp/ifst-script
	$(XFST) < tmp/ifst-script
	@rm -f tmp/ifst-script


# Here we build a normative generator, an inverted transducer of the normative
# analyzer. It is dependent upon sm*-norm.save

ifst-norm: inverse-norm.fst
inverse-norm.fst: $(GTLANG)/bin/i$(GTLANG)-norm.fst
$(GTLANG)/bin/i$(GTLANG)-norm.fst: \
	common/bin/tag-not-save.fst \
	common/bin/downcase.fst \
	$(GTLANG)/bin/$(GTLANG)-norm.save \
	$(GTLANG)/bin/derivation-filter.fst
	@echo
	@echo "*** Building the normative, inverse i$(GTLANG)-norm.fst ***"
	@echo 
	@printf "$(INVERTNORM)" > tmp/ifst-script
	@printf "invert net \n\
	save stack $@ \n\
	quit \n" >> tmp/ifst-script
	$(XFST) < tmp/ifst-script
	@rm -f tmp/ifst-script


# Here we build a restrictive generator, an inverted transducer of the restrictive
# analyzer, which gives only ONE form for each analysis. The philosophy is that this 
# shall be used for speech generation. It is dependent upon sm*-restr.save
# This file will be parametrized later on, for $VARIANT.

ifst-restr: inverse-restr.fst
inverse-restr.fst: $(GTLANG)/bin/i$(GTLANG)-restr.fst
$(GTLANG)/bin/i$(GTLANG)-restr.fst: \
	common/bin/tag-not-save.fst \
	common/bin/downcase.fst \
	$(GTLANG)/bin/$(GTLANG)-restr.save \
	$(GTLANG)/bin/derivation-filter.fst
	@echo
	@echo "*** Building the restrictive, inverse i$(GTLANG)-restr.fst ***"
	@echo 
	@printf "$(INVERTRESTR)" > tmp/ifst-script
	@printf "invert net \n\
	save stack $@ \n\
	quit \n" >> tmp/ifst-script
	$(XFST) < tmp/ifst-script
	@rm -f tmp/ifst-script


# Here we build a normative generator with hyphenation, an inverted transducer
# of the normative analyzer, used for paradigm generation.

hi-norm: hi-norm.fst
hi-norm.fst: h-inverse-norm.fst
hifst-norm: h-inverse-norm.fst
hi$(GTLANG)-norm: h-inverse-norm.fst
hi$(GTLANG)-norm.fst: h-inverse-norm.fst
h-inverse-norm.fst: $(GTLANG)/bin/hi$(GTLANG)-norm.fst
$(GTLANG)/bin/hi$(GTLANG)-norm.fst: \
	common/bin/tag-not-save.fst \
	common/bin/downcase.fst \
	$(GTLANG)/bin/hyph-$(GTLANG).save \
	$(GTLANG)/bin/derivation-filter.fst \
	$(GTLANG)/bin/hyphrules-$(GTLANG).fst
	@echo
	@echo "*** Building the normative, inverse, hyphenated ***"
	@echo "***            hi$(GTLANG)-norm.fst                   ***"
	@echo 
	@printf "read regex [ [@\"common/bin/tag-not-save.fst\"        ] .o. \n\
	[@\"$(GTLANG)/bin/derivation-filter.fst\"] .o. \n\
	[@\"$(GTLANG)/bin/hyph-$(GTLANG).save\"  ] .o. \n\
	[@\"common/bin/downcase.fst\"            ]	 \n\
	] ; \n" >tmp/hi-norm-script
	@printf "set flag-is-epsilon ON \n" >> tmp/hi-norm-script
	@printf "read regex [@\"$(GTLANG)/bin/hyphrules-$(GTLANG).fst\".i ] ; \n"\
	>> tmp/hi-norm-script
	printf "turn stack \n\
	compose net \n\
	invert net \n\
	save stack $@ \n\
	quit \n" >> tmp/hi-norm-script
	$(XFST) < tmp/hi-norm-script
	@rm -f tmp/hi-norm-script

# This goal builds derivation-filter.fst
# The resulting transducer will only allow derivations following
# a certain pattern as described in $(GTLANG)-lex.txt
derivation-filter: derivation-filter.fst
derivation-filter.fst: $(GTLANG)/bin/derivation-filter.fst
$(GTLANG)/bin/derivation-filter.fst: $(GTLANG)/src/derivation-filter.regex
	@echo
	@echo "*** Building derivation-filter.fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/derivation-filter-script
	$(XFST) < tmp/derivation-filter-script
	@rm -f tmp/derivation-filter-script


# This goal is to remove hyphens to make the spellers work
remove-hyphen.fst: common/bin/remove-hyphen.fst
common/bin/remove-hyphen.fst: common/src/remove-hyphen.regex
	@echo
	@echo "*** Building $@ ***" ;
	@echo
	@printf "read regex < $< \n\
	save stack $@ \n\
	quit \n" > tmp/remove-hyphen-script
	$(XFST) < tmp/remove-hyphen-script
	@rm -f tmp/remove-hyphen-script


# =========================== #
# Building preprocessor files #
# =========================== #

foreign.fst: common/bin/foreign.fst 
common/bin/foreign.fst: script/old-foreign.txt script/new-foreign.txt
	@echo
	@echo "*** Building a transducer for foreign words ***" ;
	@echo
	@printf "read text script/old-foreign.txt \n\
	read text script/new-foreign.txt  \n\
	union net \n\
	save stack $@ \n\
	quit \n" > tmp/foreign-script
	$(XFST) < tmp/foreign-script
	@rm -f tmp/foreign-script

newforeign.fst: common/bin/new-foreign.fst 
common/bin/new-foreign.fst: script/new-foreign.txt
	@echo
	@echo "*** Our transducer for new foreign words ***" ;
	@echo
	@printf "read text < $< \n\
	save stack $@ \n\
	quit \n" > tmp/new-foreign-script
	$(XFST) < tmp/new-foreign-script
	@rm -f tmp/new-foreign-script

oldforeign.fst: common/bin/old-foreign.fst 
common/bin/old-foreign.fst: script/old-foreign.txt
	@echo
	@echo "*** Our ready-built transducer for foreign words ***" ;
	@echo
	@printf "read text < $< \n\
	save stack $@ \n\
	quit \n" > tmp/old-foreign-script
	$(XFST) < tmp/old-foreign-script
	@rm -f tmp/old-foreign-script


typos: typos.fst
typos.fst: common/bin/typos.fst
common/bin/typos.fst: common/bin/typoslist.txt
	@echo
	@echo "*** Our transducer for typographical errors ***" ;
	@echo
	@printf "read text < $< \n\
	save stack $@ \n\
	quit \n" > tmp/typos-script
	$(XFST) < tmp/typos-script
	@rm -f tmp/typos-script

# Here we build a phonetic transducer

phon.fst: $(GTLANG)/bin/phon-$(GTLANG).fst
$(GTLANG)/bin/phon-$(GTLANG).fst: $(GTLANG)/src/phon-$(GTLANG).xfst
	@echo
	@echo "*** Building phon-$(GTLANG).fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > tmp/phon-script
	$(XFST) < tmp/phon-script
	@rm -f phon-script