# ******************************************************************** #
# This is a common makefile that connects to all the other makefiles   #
# ******************************************************************** #
# Version: $Id$

sinclude $(TARGET)/Makefile   # language-specific variables esp. SRCS files.
include analyser.mk
include mk-files/disamb.mk
include mk-files/speller.mk
include mk-files/hyph.mk
include mk-files/phonrules.mk

# =============================== #
# Variable definitions		  #
# =============================== #

M4 = m4
M4FLAGS = 
DOC = 
SHORTDOC = $(shell basename $(DOC) | cut -d "." -f1 | tr -d '-' )

# Tools used when compiling the transducers

UFST    = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8
CFST   = xfst #-utf8
XFST   = xfst #-utf8
#TWOLC  = twolc #-utf8
FSTTOOL = xfst #-utf8
TWOLCTOOL = twolc #-utf8
LEXC   = lexc #-utf8
ifeq ($(MORPHOPHONTOOL), xfst)
tust:
	@echo "***hoi***"
COMPILER = $(FSTTOOL)
else
tust:
	@echo "***hei***"
COMPILER = $(TWOLCTOOL)
endif


GUNZIP = /usr/bin/gunzip
SCP = scp -p
SSH = ssh
VISLCG3 = vislcg3


# Some other tools
SORT = /sw/bin/sort
REZ  = /Developer/Tools/Rez
SETFILE = /Developer/Tools/SetFile
HYPHCORR = script/hyphenConverter.sh

# Version-related info:
DATE = $(shell date +%Y%m%d)
VERSION = $(shell cat $(TARGET)/polderland/version.txt | tr -d " ")

ifeq (victorio.uit.no, $(shell hostname))
CFST = /opt/sami/xerox/c-fsm/ix86-linux2.6-gcc3.4/bin/fst -utf8
XFST = /opt/sami/xerox/bin/xfst -utf8
LEXC = /opt/sami/xerox/bin/lexc -utf8
TWOLC = /opt/sami/xerox/bin/twolc -utf8
endif

# aSpell tools
ASPELL = aspell
ASPELL_FLAGS = --encoding=utf-8
PREZIP = prezip-bin
lang = se

aspell_version = 0.1-1
cwl_files =  $(TARGET)/aspell/$(lang).cwl
data_files = $(TARGET)/aspell/$(lang)_affix.dat \
		$(TARGET)/aspell/$(lang).dat \
		$(TARGET)/aspell/l_$(lang).cset \
		$(TARGET)/aspell/l_$(lang).cmap
doc_files = 
extra_files = $(TARGET)/aspell/configure \
		 $(TARGET)/aspell/info \
		 $(TARGET)/aspell/Makefile.pre
multi_files = $(TARGET)/aspell/$(lang).multi
rws_files =   $(TARGET)/aspell/$(lang).rws

aspell_distdir = aspell6-$(lang)-$(aspell_version)

FSTBASE =	@\"common/bin/nohardhyphen.fst\"  .o. \
		@\"common/bin/downcase.fst\"

FST = (	@\"$(TARGET)/bin/$(TARGET).save\" .o. \
		@\"common/bin/caseconv.fst\"      .o. $(FSTBASE) .o. \
		@\"common/bin/spellrelax.fst\" )      \
		| @\"common/bin/webadr.fst\"

FSTNORM = @\"$(TARGET)/bin/$(TARGET)-norm.save\" .o. \
		@\"common/bin/downcase.fst\" .o. $(FSTBASE)

FSTRESTR = @\"$(TARGET)/bin/$(TARGET)-restr.save\" .o. \
		@\"common/bin/downcase.fst\" .o. $(FSTBASE)

INVERTNET = read regex [ [@\"common/bin/tag-not-save.fst\" ] .o. \
		[@\"$(TARGET)/bin/$(TARGET).save\"] .o. \
		[@\"common/bin/downcase.fst\"     ]     \
		] ; \n

TAGINCL = $(TARGET)/int/$(TARGET)-lex.spel \
		$(TARGET)/int/propernoun-$(TARGET)-lex-tmp.spel \
		$(TARGET)/int/noun-$(TARGET)-lex.spel \
		$(TARGET)/int/verb-$(TARGET)-lex.spel \
		$(TARGET)/int/adj-$(TARGET)-lex.spel \
		$(TARGET)/int/abbr-$(TARGET)-lex.spel

INVERTHYPH = read regex [ [@\"common/bin/tag-not-save.fst\"        ] .o. \
		[@\"$(TARGET)/bin/derivation-filter.fst\"] .o. \
		[@\"$(TARGET)/bin/hyph-$(TARGET).save\"  ] .o. \
		[@\"common/bin/downcase.fst\"            ] .o. \
		[@\"$(TARGET)/bin/hyphrules-$(TARGET).fst\".i ] \
		] ; \n

INVERTNORM = read regex [ [@\"common/bin/tag-not-save.fst\"        ] .o. \
		[@\"$(TARGET)/bin/derivation-filter.fst\"] .o. \
		[@\"$(TARGET)/bin/$(TARGET)-norm.save\"  ] .o. \
		[@\"common/bin/downcase.fst\"            ]     \
		] ; \n

INVERTRESTR = read regex [ [@\"common/bin/tag-not-save.fst\"        ] .o. \
		[@\"$(TARGET)/bin/derivation-filter.fst\"] .o. \
		[@\"$(TARGET)/bin/$(TARGET)-restr.save\"  ] .o. \
		[@\"common/bin/downcase.fst\"            ]     \
		] ; \n

SPLRNONREC = read regex [ [@\"$(TARGET)/bin/derivation-filter.fst\"     ] .o. \
		[@\"$(TARGET)/bin/spellernonrec-$(TARGET).save\"  ] .o. \
		[@\"common/bin/downcase.fst\"                 ] .o. \
		[@\"common/bin/remove-hyphen.fst\"            ] .o. \
		[@\"$(TARGET)/bin/hyphrules-$(TARGET).fst\".i ]     \
		] ; \n

NONREC = read regex [ [@\"$(TARGET)/bin/derivation-filter.fst\"] .o. \
		[@\"$(TARGET)/bin/nonrec-$(TARGET).save\"] .o. \
		[@\"common/bin/downcase.fst\"            ]     \
		] ; \n

HYPH = read regex ( @\"$(TARGET)/bin/hyphrules-$(TARGET).fst\"  .o. \
		@\"$(TARGET)/bin/hyph-i$(TARGET).save\"     .o. \
		@\"$(TARGET)/bin/$(TARGET)-norm.fst\" ) ; \n


common: fst \
	inverse.fst \
	foreign.fst \
	num.fst \
	webadr.fst \
	missing 


# Here we make full-form data files
# The generated word list is sent directly stdout, and then to gzip
# This is to avoid breaking a 2Gb file size limit in the Xerox tools.
# THIS COMMAND CAN ONLY BE RUN SUCCESSFULLY ON VICTORIO!
# It requires the commercial fst tool to be able to print all lower-words.
wordlist: $(TARGET)/wordlist-$(TARGET).txt
$(TARGET)/wordlist-$(TARGET).txt: $(TARGET)/bin/nonrec-$(TARGET).fst
	@echo
	@echo "*** Building $(TARGET) full-form wordlist ***"
	@echo
	@printf "load stack < $< \n\
	lower-words \n\
	quit \n" > tmp/wordlist-script
	$(CFST) -f tmp/wordlist-script -q | gzip -f > $@.gz
	@rm -f tmp/wordlist-script


printlarge: $(TARGET)/polderland/large-$(TARGET)-plx.txt.gz
$(TARGET)/polderland/large-$(TARGET)-plx.txt.gz: \
		$(TARGET)/bin/spellerverbs-$(TARGET)-plx.fst \
		$(TARGET)/bin/spellernouns-$(TARGET)-plx.fst \
		$(TARGET)/bin/spelleradjs-$(TARGET)-plx.fst \
		$(TARGET)/bin/spellerproper-$(TARGET)-plx.fst \
		common/bin/hyphen-convert.fst
	@echo
	@echo "*** Printing ONE large (sorted) file, $(TARGET) ***"
	@echo
	@printf "load stack < $(TARGET)/bin/spellerverbs-$(TARGET)-plx.fst \n\
	load stack < $(TARGET)/bin/spellernouns-$(TARGET)-plx.fst \n\
	load stack < $(TARGET)/bin/spelleradjs-$(TARGET)-plx.fst \n\
	load stack < $(TARGET)/bin/spellerproper-$(TARGET)-plx.fst \n\
	union net \n\
	load stack < common/bin/hyphen-convert.fst \n\
	turn stack \n\
	compose net \n\
	lower-side \n\
	sort \n\
	print words \n\
	quit \n" > tmp/largelist-script
	LANG= $(CFST) -f tmp/largelist-script -q | \
		gzip -f > $@
	@rm -f tmp/largelist-script


# Downloads the specified files from our public server to the relevant
# polderland folder.
plx-download:
	@echo
	@echo "*** Downloading $(TARGET) PLX files ***"
	@echo
	$(LYNX) $(SRCSITE)/adj-$(TARGET)-plx.txt.gz \
		> $(TARGET)/polderland/adj-$(TARGET)-plx.txt.gz
	$(LYNX) $(SRCSITE)/noun-$(TARGET)-plx.txt.gz \
		> $(TARGET)/polderland/noun-$(TARGET)-plx.txt.gz
	$(LYNX) $(SRCSITE)/propernoun-$(TARGET)-plx.txt.gz \
		> $(TARGET)/polderland/propernoun-$(TARGET)-plx.txt.gz
	$(LYNX) $(SRCSITE)/verb-$(TARGET)-plx.txt.gz \
		> $(TARGET)/polderland/verb-$(TARGET)-plx.txt.gz


# Here we make the abbrevation file for our current preprocessor, 
# the perl-based preprocess (located in the script catalogue)

empty:=
comma:=,
space:=$(empty) $(empty)
ABBRSRCS=$(subst $(space),$(comma),$(LEXICALSRCS))

abbr: $(TARGET)/bin/abbr.txt
$(TARGET)/bin/abbr.txt: script/abbr-extract script/langTools/Util.pm $(LEXICALSRCS) \
		$(TARGET)/src/$(TARGET)-num.txt \
		$(TARGET)/bin/i$(TARGET).fst \
		cwb/paradigm.txt cwb/korpustags.txt
	@echo
	@echo "*** Extracting abbreviations from abbr-$(TARGET)-lex.txt to abbr.txt ***" ;
	@echo
	@perl -I script script/abbr-extract \
		--paradigm=cwb/paradigm.txt \
		--tags=cwb/korpustags.txt \
		--fst=$(TARGET)/bin/i$(TARGET).fst \
		--output=$@ \
		--abbr_lex=$(TARGET)/src/abbr-$(TARGET)-lex.txt \
		--lex=$(ABBRSRCS),$(TARGET)/src/$(TARGET)-num.txt,$(TARGET)/src/abbr-$(TARGET)-lex.txt


# Here we build a transducer that gives us only the Sámi wordforms missing from
# our transducers. Non-Sámi words from Norwegian, Finnish, English, etc. are
# filtered out by this script, as are registered typos.

missing: $(TARGET)/bin/missing
$(TARGET)/bin/missing:
	@echo
	@echo "*** Generating missing ***";
	@echo
	@printf "analyzer bin/$(TARGET).fst\n\
	foreign ../common/bin/foreign.fst\n\
	typos ../common/bin/typos.fst\n\
	webadr ../common/bin/webadr.fst\n\n\
	analyzer\n\
	foreign\n\
	typos\n\
	webadr\n" > $@


cap-$(TARGET): $(TARGET)/bin/cap-$(TARGET)
$(TARGET)/bin/cap-$(TARGET):
	@echo
	@echo "*** Generating cap-$(TARGET) ***";
	@echo
	@printf "analyzer $(TARGET)/bin/$(TARGET).fst\n\
	allcaps common/bin/allcaps.fst\n\n\
	allcaps analyzer \n" > $@

#CHECK THIS!! Move this file
typoslist.txt: common/bin/typoslist.txt
common/bin/typoslist.txt: $(TARGET)/src/typos.txt
	@echo
	@echo "*** Our list of common typographical errors ***" ;
	@echo
	@cut -f1 $< > $@


# Finally an option to remove all the binary files

clean:
	@rm -f common/bin/*.fst common/bin/*.save common/bin/*.bin common/bin/*.txt
#	@rm -f common/int/*.fst
	@rm -f $(TARGET)/bin/*.fst $(TARGET)/bin/*.save $(TARGET)/bin/*.bin \
		 $(TARGET)/bin/*.rle
	@rm -f $(TARGET)/bin/cap-$(TARGET) $(TARGET)/bin/missing
	@rm -f $(TARGET)/bin/abbr.txt

rec-clean:
	@rm -f $(NONRECFILES)

speller-clean: clean
	@rm -f $(TARGET)/polderland/$(WINSPELL)
	@rm -f $(TARGET)/polderland/$(MACSPELL)
	@rm -f $(TARGET)/polderland/middle-noun-$(TARGET)-plx.txt
	@rm -f $(TARGET)/polderland/generated_nums-plx.txt
	@rm -f $(PLXSRCfst)
	@rm -f $(PLXSRCjava)
	@rm -r tmp/$(TARGET)/*
	@rmdir tmp/$(TARGET)
	@rm -f tmp/*
	@rm -f $(TARGET)/int/*spel
	@rm -f $(TARGET)/hunspell/*sme.txt