# This is a makefile that builds the cor morphological parser
# *****************************************************************
# This is a dummy file. cor refers to the name of the language, xxx to the 
# ISO code of the language, for use in file names.

# This is a preliminary file that builds a cor parser based upon
# xfst and not twolc.

XFST = xfst -utf8
CFST = xfst -utf8
LEXC = lexc -utf8

ifeq (victorio.uit.no, $(shell hostname))
XFST = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8
CFST = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8
LEXC = /opt/sami/xerox/bin/lexc -utf8
endif


SRCS =  cor-lex.txt \
	adv-cor-lex.txt \
	noun-cor-lex.txt \
	noun-cor-morph.txt \
	propernoun-cor-lex.txt \
	propernoun-cor-morph.txt \
	pp-cor-lex.txt \
	cc-cor-lex.txt \
	adj-cor-lex.txt \
	adj-cor-morph.txt \
	verb-cor-lex.txt \
	verb-cor-morph.txt \
	cs-cor-lex.txt \
	punct-cor-lex.txt \
	pron-cor-lex.txt 


# Here we build a converter from KK to SWF. Not done.

orth-cor.fst: ../bin/orth-cor.fst
../bin/orth-cor.fst: orth-cor.xfst ../bin/icor.fst \
	../bin/tok.fst #../bin/kor-dis.bin
	@echo
	@echo "*** Building orth-cor.fst ***" ;
	@echo
	@printf "source $< \n\
	save stack $@ \n\
	quit \n" > ../tmp/orth-script
	$(XFST) < ../tmp/orth-script
	@rm -f ../tmp/orth-script


# Here we build the final generator , an inverted transducer of the analyzer.
# It is dependent upon cor.save

icor.fst: ../bin/icor.fst
../bin/icor.fst: ../bin/cor.fst
	@echo
	@echo "*** Building the inverse icor.fst ***"
	@echo
	@printf "load ../bin/cor.save \n\
	invert net \n\
	save stack ../bin/icor.fst \n\
	quit \n" > ../../tmp/icor-fst-script
	$(XFST) < ../../tmp/icor-fst-script
	@rm -f ../../tmp/icor-fst-script


# This goal is to build the final analyser. It depends on all the files.

cor.fst: ../bin/cor.fst
../bin/cor.fst: ../bin/cor.save ../bin/caseconv.fst ../bin/spellrelax.fst
	@echo
	@echo "*** Building cor.fst ***" ;
	@echo
	@printf "read regex  [[@\"../bin/cor.save\"] .o. [@\"../bin/caseconv.fst\" .o. \
	@\"../bin/spellrelax.fst\"]] ; \n\
	save stack ../bin/cor.fst \n\
	quit \n" > ../../tmp/cor-fst-script
	$(XFST) < ../../tmp/cor-fst-script
	@rm -f ../../tmp/cor-fst-script


# Let us just make a binary disambiguator
# It can be used instead of the source file.
dis-bin: ../bin/cor-dis.bin
../bin/cor-dis.bin: cor-dis.rle
	@echo
	@echo "*** Building a binary disambiguator cor-dis.bin ***" ;
	@echo
	@vislcg3 --grammar cor-dis.rle --grammar-only --grammar-bin ../bin/cor-dis.bin -C UTF-8


# This goal is to allow for the c system, ec.

spellrelax.fst: ../bin/spellrelax.fst
../bin/spellrelax.fst: spellrelax.regex
	@echo
	@echo "*** Building spellrelax.fst ***" ;
	@echo
	@printf "read regex < spellrelax.regex \n\
	save stack ../bin/spellrelax.fst \n\
	quit \n" > ../../tmp/spellrelax-sma-script
	@xfst -utf8 < ../../tmp/spellrelax-sma-script
	@rm -f ../../tmp/spellrelax-sma-script


# The second goal is to build the caseconv.fst file
# This goal depends on case.regex

caseconv.fst: ../bin/caseconv.fst
../bin/caseconv.fst: case.regex
	@echo
	@echo "*** Building caseconv.fst ***" ;
	@echo
	@printf "read regex < case.regex \n\
	save stack ../bin/caseconv.fst \n\
	quit \n" > ../../tmp/caseconv-script
	$(XFST) < ../../tmp/caseconv-script
	@rm -f ../../tmp/caseconv-script


# Another goal is to build a preprocessor.This goal depends on tok.txt

tok.fst: ../bin/tok.fst
../bin/tok.fst: tok.txt
	@echo
	@echo "*** Building the tokenizer tok.fst ***" ;
	@echo
	@printf "source tok.txt \n\
	save stack ../bin/tok.fst \n\
	quit \n" > ../../tmp/tok-script
	$(XFST) < ../../tmp/tok-script
	@rm -f ../../tmp/tok-script


cor.save: ../bin/cor.save
../bin/cor.save: ../bin/xfst-cor.bin ../bin/cor-lex.save
	@echo
	@echo "*** Building the parser cor.save ***"
	@echo
	@printf "read regex [[@\"../bin/cor-lex.save\"] .o. \
	[@\"../bin/xfst-cor.bin\"]] ; \n\
	save stack ../bin/cor.save \n\
	quit \n" > ../../tmp/cor-save-script
	$(XFST) < ../../tmp/cor-save-script
	@rm -f ../../tmp/cor-save-script


# The second goal is to build cor-lex.save
# This goal depends on a bunch of lexicon files
cor-lex.save: ../bin/cor-lex.save
../bin/cor-lex.save: $(SRCS) 
	@echo
	@echo "*** Building cor-lex.save ***" ;
	@echo
	printf "compile-source $(SRCS) \n\
	save-source ../bin/cor-lex.save \n\
	quit \n" > ../../tmp/cor-lex-save-script
	$(LEXC) < ../../tmp/cor-lex-save-script
	rm -f ../../tmp/cor-lex-save-script


# The first goal is to build xfst-cor.bin
# This goal depends on xfst-cor.txt

xfst-cor.bin: ../bin/xfst-cor.bin
../bin/xfst-cor.bin: xfst-cor.txt
	@echo
	@echo "*** Building xfst-cor.bin ***" ;
	@echo
	@printf "source xfst-cor.txt \n\
	save stack ../bin/xfst-cor.bin \n\
	quit \n" > ../../tmp/xfst-cor-script
	$(XFST) -utf8 < ../../tmp/xfst-cor-script
	@rm -f ../../tmp/xfst-cor-script


# ##################################################### #
# 														#
#                    Speller section                    #
#														#
# ##################################################### #

# commands to compile:
#
# make wordlist
# make sorted
# make hunspellspeller
# cd ../hunspell/
# hunspell -d ./cor
# (and enter words)
# hunspell -d ./cor -l ../corp/kk.news.txt 
# (and have a look)


# This target must be built with M4FLAGS=-DHUNSPELL
hunspellspeller: sorted
	@echo
	@echo "*** Making $(TARGET) ../hunspell dictionary ***"
	filter_plx_file ../hunspell/dics ../hunspell/sorted-list.txt
	echo `wc -l ../hunspell/dics_tmp.dic` > ../hunspell/cor.dic
	cat ../hunspell/dics_tmp.dic >> ../hunspell/cor.dic
	# lag en fil aff_intro
	cat ../hunspell/aff_intro > ../hunspell/cor.aff
	# lag fonetiske regler (dette blir språkspesifikt, la denne evt. være tom)
	cat ../hunspell/phonrules >> ../hunspell/cor.aff
	cat ../hunspell/dics_tmp.aff >> ../hunspell/cor.aff


hsp-conversion: ifst-norm lexc2xspell #$(HSPSRCjava) $(HSPSRCprefix)


# Here we make full-form data files
# The generated word list is sent directly stdout, and then to gzip
# This is to avoid breaking a 2Gb file size limit in the Xerox tools.
# THIS COMMAND CAN ONLY BE RUN SUCCESSFULLY ON VICTORIO!
# It requires the commercial fst tool to be able to print all upper-words.
# (note: in sme we did lower-words, but here upper, as we use icor.fst)
wordlist: ../bin/icor.fst
	@echo
	@echo "*** Building full-form wordlist ***"
	@echo
	@printf "load stack < $< \n\
	upper-words \n\
	quit \n" > ../tmp/wordlist-script
	$(CFST) -f ../tmp/wordlist-script -q > ../tmp/wordlist.txt
	#@rm -f ../tmp/wordlist-script

sorted: wordlist
	@echo "*** Sorting wordlist ***"
	@echo
	@LC_ALL=C sort -T ../tmp -u -o ../hunspell/sorted-list.txt ../tmp/wordlist.txt


# ##################################################### #
# 														#
#                   Clean target                        #
#														#
# ##################################################### #


clean:
	@rm -f ../bin/*.bin ../bin/*.fst ../bin/*.save