#############################################
# Makefile for the proofing tools test bench.
#############################################

# Find out whether we have access to the bound corpus:
BOUNDTEST =
ifeq (victorio.uit.no, $(shell hostname))
BOUNDTEST = yes
else ifeq (divvun.no, $(shell hostname))
BOUNDTEST = yes
endif

# The $DOC variable should always be specified. The below string is just a dummy
# placeholder to get rid of error messages. The variable is only used in certain
# speller test targets, you can ignore it in all other cases.
DOC = tmp/GoldstandardTexts.txt
SHORTDOC = $(shell basename $(DOC) .xml )

BDOC = tmp/GoldstandardBoundTexts.txt
SHORTBDOC = $(shell basename $(BDOC) .xml )

REVISION = $(shell cat $(GTLANG)/polderland/revision.txt)
TESTTIME = $(shell date +%H%M)

# Tools:
LN = ln -sf
MV = mv -f
LXC2SPELLDIR = ../tools/lexc2xspell

# Language-dependent Polderland tools:
ifeq ($(GTLANG), sme)
SPELL  = $(PLTOOLSDIR)/spellSamiNort
HYPHEN = $(PLTOOLSDIR)/SamiNortHyphMac
endif
ifeq ($(GTLANG), smj)
SPELL  = $(PLTOOLSDIR)/spellSamiLule
HYPHEN = $(PLTOOLSDIR)/SamiLuleHyphMac
endif
ifeq ($(GTLANG), sma)
SPELL  = $(PLTOOLSDIR)/spellSamiSout
HYPHEN = $(PLTOOLSDIR)/SamiSoutHyphMac
endif

# Speller testing:
PROOFTESTBASE   = $(GTBIG)/techdoc/proof
ifeq ($(LIC),bound)
PROOFTESTBASE   = $(GTPRIV)/techdoc/proof
endif
SPLTESTREPDIR   = $(PROOFTESTBASE)/spelling/testing
HYPTESTREPDIR   = $(PROOFTESTBASE)/hyph/testing
TESTTOOL        = pl
TESTXSLDIR      = ../xtdoc/sd/src/documentation/resources/stylesheets
PROOFDISTDIR    = ../prooftools
PROOFDISTSHARED = $(PROOFDISTDIR)/toollibs/shared
WORDVERSION     = 2004
# Use the speller lexicons found in the prooftools dir.
# To make sure you have the latest speller lexicons, do:
#   cd $GTHOME/prooftools
#   make mslex-download
# then go back here, and run the tests you want.
SPELLERLEX      = $(PROOFDISTDIR)/tmp/download/$(WINSPELL)

# Regression file location depends on the test tool;
# default is the polderland dir (used by both pl and mw tools):
SPTOOLDIR = polderland
ifeq ($(TESTTOOL), hu)
SPTOOLDIR = hunspell
else ifeq ($(TESTTOOL), vk)
SPTOOLDIR = voikko
else ifeq ($(TESTTOOL), vkmalaga)
SPTOOLDIR = voikko
else ifeq ($(TESTTOOL), vkhfst)
SPTOOLDIR = voikko
else ifeq ($(TESTTOOL), hfst)
SPTOOLDIR = hfst
endif

##### Default target: ######
#all: spelltest - not possible as long as we are part of the main makefile - it
# will default all gt/ making to testing the proofing  tools!!!

############################
download: download-pl
download-pl:
	cd ../prooftools && make mslex-download

#####################################
# Targets to test proofing tools
#####################################

# A general target, to run all speller tests at once (add more dependent
# targets as they become available):
ifeq (xyes, x$(BOUNDTEST))
spelltest: svnup typos-test wordtype-test baseform-test regression-test correct-test
else
spelltest: svnup typos-test wordtype-test baseform-test correct-test regression-test
endif

svnup:
	svn -q up . $$GTFREE/stable/goldstandard/ $$GTBOUND/stable/goldstandard/

#####################################
# Short-hand targets:

# Test speller on typos.txt:
typos-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/typos/$(DATE)-$(TESTTIME)-typos.xml

# Check that all words in past and present bug reports are correctly dealt with:
regression-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/regression/$(DATE)-$(TESTTIME)-regression.xml

# Run the baseform self test - check that all baseforms in our lexicons are
# correctly recognised:
baseform-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/baseform/$(DATE)-$(TESTTIME)-baseform.xml

# Run the wordtype test - check that a number word construction types are
# correctly recognised or corrected (in case of spelling errors):
wordtype-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/wordtype/$(DATE)-$(TESTTIME)-wordtype.xml

# Run the paradigm self test - check that all inflected forms of a selected
# set of words are recognised:
paradigm-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/paradigm/$(DATE)-$(TESTTIME)-paradigm.xml

# Test speller on correct-marked corpus docs, including bound if on vic or XS:
ifeq (xyes, x$(BOUNDTEST))
correct-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/goldstandard/$(DATE)-$(TESTTIME)-$(SHORTDOC).xml \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/goldstandard/$(DATE)-$(TESTTIME)-$(SHORTBDOC).xml
else
correct-test: \
	$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/goldstandard/$(DATE)-$(TESTTIME)-$(SHORTDOC).xml
endif

#####################################
# Target to create graphs of some of the speller test results data:
spellergraphs:
	@python3.2  $$GTHOME/gt/script/dvchart.py \
				$$GTBIG/techdoc/proof/spelling/testing/ \
				$$GTHOME/xtdoc/techdoc/src/documentation/skins/pelt/scripts/

#####################################
# The final target is a simple but sufficient xml format,
# to which the raw test data is converted:
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/typos/$(DATE)-$(TESTTIME)-%.xml \
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/baseform/$(DATE)-$(TESTTIME)-%.xml \
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/wordtype/$(DATE)-$(TESTTIME)-%.xml \
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/paradigm/$(DATE)-$(TESTTIME)-%.xml \
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/regression/$(DATE)-$(TESTTIME)-%.xml \
$(SPLTESTREPDIR)/$(GTLANG)/$(TESTTOOL)/goldstandard/$(DATE)-$(TESTTIME)-%.xml : \
	tmp/sp-%-$(TESTTOOL)-$(GTLANG).txt \
	tmp/sp-%-$(TESTTOOL)-$(GTLANG).txt.out \
	$(GTLANG)/$(SPTOOLDIR)/version.txt \
	$(SCRIPTDIR)/speller-testres.pl
	@echo
	@echo "*** Collecting $* results, transforming to XML. Output:  ***" ;
	@echo
	@echo "$@" ;
	@echo
	mkdir -p $(@D)
	$(SCRIPTDIR)/speller-testres.pl \
		--$(TESTTOOL) \
		--input=$< \
		--output=$<.out \
		--document=$(<F) \
		--date=$(DATE)-$(TESTTIME) \
		--version="`cat $(GTLANG)/$(SPTOOLDIR)/version.txt`" \
		--toolversion="`cat tmp/sp-$(TESTTOOL)-version.txt`" \
		--xml=$@
	if [ -f $(@D)/latest-$*.xml ] ; then \
		cd $(@D) && $(MV) latest-$*.xml previous-$*.xml ; \
	fi
	cd $(@D) && $(LN) $(@F) latest-$*.xml

#####################################
# Run whatever test was requested using the ...

# ... Polderland command line speller:
.PRECIOUS: tmp/sp-%-pl-$(GTLANG).txt.out
tmp/sp-%-pl-$(GTLANG).txt.out: tmp/sp-%-pl-$(GTLANG).txt
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
	touch userdict
#	time -p $(SPELL) -u8 -0 -m $(SPELLERLEX) \
#		< $<.c1 > $@ 2> $@.time
	$(SPELL) -u8 -0 -d -m $(SPELLERLEX) \
		< $<.c1 > $@
	rm -f userdict
# Add speller tool version here
	$(SPELL) --version 2>&1 | rev | cut -d'/' -f1 | rev \
		> tmp/sp-$(TESTTOOL)-version.txt

# ... speller in MS Word through AppleScript.
# Due to MS Word's definition of a word, we can't include words with certain
# chars as part of the input data:-( The chars are: - . :
#
# NB! This target can only run on Macs with MS Word installed!
# For best behaviour, uncheck Preferences>General>Show Gallery at Startup
# in MS Word.
.PRECIOUS: tmp/sp-%-mw-$(GTLANG).txt.out
tmp/sp-%-mw-$(GTLANG).txt.out: tmp/sp-%-mw-$(GTLANG).txt \
							   $(SCRIPTDIR)/spellcheckWithMSWord.applescript
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
	@grep -v '[-.:]'  $<.c1 | tr '\n' ' ' | iconv -f UTF-8 \
		-t UTF-16 > $<.c1.utf16
	iconv -f UTF-16 -t UTF-8 < $<.c1.utf16 | pbcopy
	osascript $(SCRIPTDIR)/spellcheckWithMSWord.applescript \
		$(GTLANG) \
		`pwd`/$<.c1.utf16 \
		`pwd`/$@.utf16 \
		`pwd`/tmp/sp-$(TESTTOOL)-version.txt \
		$(WORDVERSION)
	@iconv -f UTF-16 -t UTF-8 $@.utf16 > $@
	@rm -f $@.utf16

# ...hunspell command line speller:
.PRECIOUS: tmp/sp-%-hu-$(GTLANG).txt.out
tmp/sp-%-hu-$(GTLANG).txt.out: tmp/sp-%-hu-$(GTLANG).txt
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
# Add speller tool version:
	hunspell --version | head -n 1 > tmp/sp-$(TESTTOOL)-version.txt
# Run the actual speller:
	hunspell -a --check-url -d $(GTLANG)/hunspell/$(GTLANG) $<.c1 > $@

# ...voikko command line speller, malaga backend (Finnish only):
.PRECIOUS: tmp/sp-%-vkmalaga-$(GTLANG).txt.out
tmp/sp-%-vkmalaga-$(GTLANG).txt.out: tmp/sp-%-vkmalaga-$(GTLANG).txt
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
	@voikkospell -s ignore_dot=1 < $<.c1 > $@
# Add speller tool version here
	@voikkospell --version > tmp/sp-$(TESTTOOL)-version.txt

# ...voikko command line speller, hfst backend (any hfst language):
.PRECIOUS: tmp/sp-%-vkhfst-$(GTLANG).txt.out
tmp/sp-%-vkhfst-$(GTLANG).txt.out: tmp/sp-%-vkhfst-$(GTLANG).txt
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
	@voikkospell -s -d $(GTLANG) -p $(GTLANG)/voikko/ ignore_dot=1 < $<.c1 > $@
# Add speller tool version here
	@voikkospell --version > tmp/sp-$(TESTTOOL)-version.txt

# ...hfst command-line speller tool (library front end):
.PRECIOUS: tmp/sp-%-hfst-$(GTLANG).txt.out
tmp/sp-%-hfst-$(GTLANG).txt.out: tmp/sp-%-hfst-$(GTLANG).txt
	@echo
	@echo "*** Running $* $(TESTTOOL) test - $(GTLANG)  ***" ;
	@echo
	hfst-ospell $(GTLANG)/hfst/$(GTLANG)-speller.zhfst < $<.c1 > $@
# Add speller tool version here
	@hfst-ospell --version 2> tmp/sp-$(TESTTOOL)-version.txt

#####################################
# Prepare input data for spell-checking - these sections are specific to each
# test type.

#####################################
# Typos-test input preprocessing:
tmp/sp-typos-$(TESTTOOL)-$(GTLANG).txt: $(GTLANG)/src/typos.txt
	@echo
	@echo "*** Preparing typos test - $(GTLANG)  ***" ;
	@echo
# Add easter egg trigger, and remove unwanted lines:
	@echo "nuvviDspeller	Divvun" | cat - $< | \
		grep -v '^[[:alnum:]]* ' | \
		grep -v '^#' | \
		grep -v '^!' | \
		grep -v '^$$' > $@
# Extract the second column, and add it as correct input:
	@grep -v '^#' $< | \
		grep -v '^$$' | \
		cut -f2 | \
		tr ' ' '\n' | \
		grep -v '^.$$' | \
		perl -ple 's/^(.*)$$/$$1\t/;' >> $@
# Extract the first column to use as input for spell checker:
	@cut -f1 $@ > $@.c1

#####################################
# Correct-corpus test input preprocessing:
.PHONY: tmp/sp-$(SHORTDOC)-$(TESTTOOL)-$(GTLANG).txt
tmp/sp-$(SHORTDOC)-$(TESTTOOL)-$(GTLANG).txt:
	@echo
	@echo "*** Preparing corpus test - $(GTLANG)  ***" ;
	@echo
# If $DOC is the reserved name (see top of this file), collect all correct docs:
ifeq "x$(DOC)" "xtmp/GoldstandardTexts.txt"
	@echo "*** on ALL stable correct-texts ***" ;
	@echo
	ccat -l $(GTLANG) -a -S -ort -C -r $$GTFREE/stable/goldstandard/converted/$(GTLANG)/ \
		| LOCALE=C sort \
		| $(SCRIPTDIR)/spell-preprocess.pl \
		> $@.tmp
# If $DOC is the reserved name (see top of this file), collect all correct docs:
else ifeq "x$(BDOC)" "xtmp/GoldstandardBoundTexts.txt"
	@echo "*** on ALL stable correct-texts in $$GTBOUND ***" ;
	@echo
# Make sure the dir exists before attempting to do further work:
	@if [ ! -d "$$GTBOUND/stable/goldstandard/converted/$(GTLANG)/" ] ; then \
		echo ; \
		echo "*** There is no stable goldstandard dir for $(GTLANG)! ***" ; \
		echo "*** Bailing out.                                 ***" ; \
		echo ; \
		exit 1 ; \
	fi
	ccat -l $(GTLANG) -a -S -ort -C -r $$GTBOUND/stable/goldstandard/converted/$(GTLANG)/ \
		| LOCALE=C sort \
		| $(SCRIPTDIR)/spell-preprocess.pl \
		> $@.tmp
# Make sure we got some content, if not - bail out:
	@if [ ! -s "$@.tmp" ] ; then \
		echo ; \
		echo "*** No useful content in $$GTBOUND! ***" ; \
		echo "*** Bailing out.              ***" ; \
		echo ; \
		exit 1 ; \
	fi
# else use the specified $DOC only
else
	@echo "*** on $(DOC) only ***" ;
	@echo
	ccat -l $(GTLANG) -a -S -ort -C $(DOC) \
		| LOCALE=C sort \
		| $(SCRIPTDIR)/spell-preprocess.pl \
		> $@.tmp
endif
	@echo "nuvviDspeller	Divvun" | cat - $@.tmp > $@
	@cut -f1 $@ > $@.c1
	@rm -f $@.tmp

#####################################
# Regression-test input preprocessing:
tmp/sp-regression-$(TESTTOOL)-$(GTLANG).txt: \
		$(GTLANG)/$(SPTOOLDIR)/regression.txt
	@echo
	@echo "*** Preparing regression test - $(GTLANG)  ***" ;
	@echo
	@echo "nuvviDspeller	Divvun" | cat - $< | \
		grep -v '^[[:alnum:]]* ' | \
		grep -v '^#' | \
		grep -v '^$$' > $@
	@cut -f1 $@ > $@.c1

#####################################
# Baseform-test input preprocessing:
tmp/sp-baseform-$(TESTTOOL)-$(GTLANG).txt: tmp/$(GTLANG)-baseforms.txt
	@echo
	@echo "*** Spell-checking all baseforms in the $(GTLANG) lexicon.  ***" ;
	@echo
	@rm -f $@
	@rm -f $@.tmp
	@rm -f $@.unrec
	@touch userdict
	@$(SPELL) -u8 -0 -m $(GTLANG)/polderland/$(MACSPELL) < $< > $@.tmp
	@rm -f userdict
	@echo
	@echo "*** Extracting all unrecognised baseforms - $(GTLANG)       ***" ;
	@echo
	@grep '^Gett' $@.tmp | cut -d ' ' -f4 | perl -pe 's/\.\.\.//' > $@.unrec
	@grep 'Check returns' $@.tmp | cut -d '@' -f2 | cut -d "'" -f2 >> $@.unrec
	@sort -u $@.unrec | perl -ple 's/^(.*)$$/$$1\t/;' > $@.tmp
	@echo "nuvviDspeller	Divvun" | cat - $@.tmp > $@
	@cut -f1 $@ > $@.c1

# Check that all baseforms are recognised by the normative transducer
norm-selftest: tmp/$(GTLANG)-norm-unrec.txt
tmp/$(GTLANG)-norm-unrec.txt: tmp/$(GTLANG)-baseforms.txt \
		$(GTLANG)/bin/$(GTLANG)-norm.fst
	@echo
	@echo "*** Analyzing all baseforms in the $(GTLANG) lexicon.  ***" ;
	@echo
	@lookup -q -flags mbTT -utf8 $(GTLANG)/bin/$(GTLANG)-norm.fst \
	< $< | grep '\?' | cut -f1 > $@
	@echo
	@echo "*** Ready. Unrecognised baseforms in $@  ***" ;
	@echo
	wc -l $@

# Extract all baseforms from the LexC files:
baseforms: tmp/$(GTLANG)-baseforms.txt
tmp/$(GTLANG)-baseforms.txt: $(LXC2SPELLDIR)/src/Baseforms.class \
		$(SRCS)
	@echo
	@echo "*** Building list of baseforms.                       ***" ;
	@echo
	@rm -f $@
	@rm -f $@.tmp
	@for file in $(SRCS) ; do \
		java -cp $(LXC2SPELLDIR)/build Baseforms \
			$$file >> $@.tmp ; \
	done
	@sort -u $@.tmp | tr -d '\t' > $@

# Build the baseform extraction tool:
$(LXC2SPELLDIR)/src/Baseforms.class:
	@echo
	@echo "*** Building baseform extraction tool.                ***" ;
	@echo
	ant -buildfile $(LXC2SPELLDIR)/build.xml

#####################################
# Paradigm testing input processing:
# Regression-test input preprocessing:
paradigms: tmp/sp-paradigm-$(TESTTOOL)-$(GTLANG).txt
tmp/sp-paradigm-$(TESTTOOL)-$(GTLANG).txt: \
		$(GTLANG)/testing/paradigm-$(GTLANG).txt
	@echo
	@echo "*** Preparing paradigm test - $(GTLANG)  ***" ;
	@echo
	@perl $(SCRIPTDIR)/paradigms2speller.pl $< > $@.tmp
	@echo "nuvviDspeller	Divvun" | cat - $@.tmp > $@
	@rm -f $@.tmp
	@cut -f1 $@ > $@.c1

$(GTLANG)/testing/paradigm-$(GTLANG).txt: \
		$(GTLANG)/testing/gen-paradigms.sh \
		$(GTLANG)/testing/gen-paradigms.pl \
		$(GTLANG)/testing/parawlist.txt \
		$(GTLANG)/testing/adj-codes.txt \
		$(GTLANG)/testing/noun-codes.txt \
		$(GTLANG)/testing/num-codes.txt \
		$(GTLANG)/testing/prop-codes.txt \
		$(GTLANG)/testing/verb-codes.txt \
		$(GTLANG)/bin/i$(GTLANG)-norm.fst
	@echo
	@echo "*** Generating paradigms - $(GTLANG)  ***" ;
	@echo
	@cd $(GTLANG)/testing/ && ./gen-paradigms.sh

#####################################
# Wordform-test input preprocessing:
tmp/sp-wordtype-$(TESTTOOL)-$(GTLANG).txt: \
		$(GTLANG)/testing/speller-testbed-$(GTLANG).txt
	@echo
	@echo "*** Preparing wordtype test - $(GTLANG)  ***" ;
	@echo
	@echo "nuvviDspeller	Divvun" | cat - $< | \
		grep -v '^[[:alnum:]]* ' | \
		grep -v '^#' | \
		grep -v '^$$' > $@
# Extract the second column, and add it as correct input:
	@grep -v '^#' $< | \
		grep -v '^$$' | \
		cut -f2 | \
		tr ' ' '\n' | \
		grep -v '^.$$' | \
		perl -ple 's/^(.*)$$/$$1\t/;' >> $@
# Extract the first column to use as input for spell checker:
	@cut -f1 $@ > $@.c1

#####################################
# Hyphenation testing starts here:
hyphtest: hyphregression hyphwordtypes

hyphregression: \
	$(HYPTESTREPDIR)/regression-$(TESTTOOL)-forrest-$(GTLANG)-$(DATE)-$(TESTTIME).xml

hyphwordtypes: \
	$(HYPTESTREPDIR)/wordtypes-$(TESTTOOL)-forrest-$(GTLANG)-$(DATE)-$(TESTTIME).xml

#####################################
# The final target is a Forrest doc containing the results and some calculated
# statistics:
$(HYPTESTREPDIR)/%-$(TESTTOOL)-forrest-$(GTLANG)-$(DATE)-$(TESTTIME).xml: \
	$(HYPTESTREPDIR)/%-$(TESTTOOL)-$(GTLANG)-$(DATE)-$(TESTTIME).xml \
	$(TESTXSLDIR)/hyphtest2document.xsl
	@echo
	@echo "*** Converting to ForrestDoc. Output in:              ***" ;
	@echo
	@echo "$@" ;
	@echo
	@xsltproc \
		--param testlang "'$(GTLANG)'" \
		--param testtype "'$*'" \
		$(TESTXSLDIR)/hyphtest2document.xsl $< | \
		xmllint --encode UTF-8 --output $@ --format -

#####################################
# Convert the test results to a simple but sufficient xml format:
.PRECIOUS: $(HYPTESTREPDIR)/%-$(TESTTOOL)-$(GTLANG)-$(DATE)-$(TESTTIME).xml
$(HYPTESTREPDIR)/%-$(TESTTOOL)-$(GTLANG)-$(DATE)-$(TESTTIME).xml: \
	tmp/hy-%-$(TESTTOOL)-$(GTLANG).txt \
	tmp/hy-%-$(TESTTOOL)-$(GTLANG).txt.out
	@echo
	@echo "*** Collecting $* results, transforming to XML. Output:  ***" ;
	@echo
	@echo "$@" ;
	@echo
	$(SCRIPTDIR)/hyphen-testres.pl \
		--$(TESTTOOL) \
		--input=$< \
		--output=$<.out \
		--document=$< \
		--date=$(DATE) \
		--version="`cat $<.out.version`" \
		--toolversion="`cat tmp/hyph-$(TESTTOOL)-version.txt`" \
		--xml=$@

#####################################
# Run the actual hyphenation test here:
.PRECIOUS: tmp/hy-%-pl-$(GTLANG).txt.out
tmp/hy-%-pl-$(GTLANG).txt.out: tmp/hy-%-pl-$(GTLANG).txt
	@echo
	@echo "*** Hyphenation testing - $(GTLANG)  ***" ;
	@echo
	cat $<.f1 | $(HYPHEN) -w -m $(GTLANG)/polderland/$(HYPHPATT) > $@
# Add lexicon signature/version here
	@echo "nuvviDspeller	Divvun" > $@.verstmp
	@touch userdict
	$(SPELL) -u8 -0 -m $(GTLANG)/polderland/$(HYPHDIC) < $@.verstmp \
		> $@.verstmp2
	grep 'version' $@.verstmp2 | cut -f2 > $@.version
	@rm -f userdict $@.verstmp $@.verstmp2
# Add hyphenator tool version here
	$(HYPHEN) --version 2> tmp/hyph-$(TESTTOOL)-version.txt

#####################################
# Prepare the hyphenation data -
# Wordtypes data (check hyphenation of a number of different word constructs):
tmp/hy-wordtypes-$(TESTTOOL)-$(GTLANG).txt: \
		$(GTLANG)/testing/hyphenation.txt \
		$(GTLANG)/polderland/$(HYPHPATT)
#		prepare-hyphentest
	@echo
	@echo "*** Preparing hyphenation test data - $(GTLANG)  ***" ;
	@echo
	grep -v "^#" $< | grep -v "^\s*$$" | grep -v ' .*	' > $@
	cut -f1 $@ | tr -d '^' > $@.f1

#####################################
# Prepare the hyphenation data -
# Regression data:
tmp/hy-regression-$(TESTTOOL)-$(GTLANG).txt: \
		$(GTLANG)/polderland/hyph-regressions.txt \
		$(GTLANG)/polderland/$(HYPHPATT) \
		prepare-hyphentest
	@echo
	@echo "*** Preparing hyphenation test data - $(GTLANG)  ***" ;
	@echo
	grep -v "^#" $< | grep -v "^\s*$$" | grep -v ' .*	' > $@
	cut -f1 $@ | tr -d '^' > $@.f1

#####################################
# Make sure the latest lexicon hyphenation file is used:
prepare-hyphentest:
	@echo
	@echo "*** Copying hyphenation lexicon file - $(GTLANG)  ***" ;
	@echo
	if [ -f $(SPELLERLEX) ] ; then \
		cp $(SPELLERLEX) $(GTLANG)/polderland/$(HYPHDIC) ; \
	else \
		echo "No speller lexicon found. Please do: cd $$GTHOME/prooftools/ && make mslex-download" ;\
	fi

#####################################
# Make sure the latest pattern hyphenation file is used:
$(GTLANG)/polderland/$(HYPHPATT): $(PROOFDISTSHARED)/$(GTLANG)/hyph/$(HYPHPATT)
	@echo
	@echo "*** Copying hyphenation pattern lexicon - $(GTLANG)  ***" ;
	@echo
	cp $(PROOFDISTSHARED)/$(GTLANG)/hyph/$(HYPHPATT) \
	   $(GTLANG)/polderland/$(HYPHPATT)