# This is a makefile that builds the sme-nob translation parser # It should be rewritten with Twig or something similar instead of # the shellscript we have now # =========== Tools: ============= # LEXC = lexc -utf8 XFST = xfst -utf8 XSLT = /opt/local/share/java/saxon8.jar XQL = java net.sf.saxon.Query JARF = -jar SSH = /usr/bin/ssh # =========== Paths & files: ============= # GTHOME = ../../../gt SMETESTING = $(GTHOME)/sme/testing SMENOBMAC = deliv/macdict/objects SMENOBNAME = Nordsamisk-norsk ordbok.dictionary SMENOBZIP = smenob-mac.dictionary.tgz UPLOADDIR = sd@giellatekno.uit.no:xtdoc/sd/src/documentation/content/xdocs DOWNLOADDIR = http://www.divvun.no/static_files ADJ = adjective_smenob.xml ADV = adverb_smenob.xml NOUNC = nounCommon_smenob.xml NOUNP = nounProper_smenob.xml OTHER = other_smenob.xml VERB = verb_smenob.xml SN_XML = smenob.xml SN_XSL = smenob.xsl SN_LEXC = smenob.lexc SN_HTML = smenob.html SN_FST = smenob.fst S_DIC = sme.dic S_FST = smedic.fst SRC = src BIN = bin SCRIPTS = scripts BEGIN = @echo "*** Generating the $@-file ***" END = @echo "Done." # =========== Other: ============= # DATE = $(shell date +%Y%m%d) # fst for the sme-nob dictionary #Pseudocode: #Make a lexc file: #Print the first line: LEXICON Root #Then, for each entry, make lines of the format smelemma:firstnobtranslation # ; #Then print the result to file. #Then make xfst read it with the command read lexc. # The trick is that only the first ../tmp/smenob-save-script $(XFST) < ../tmp/smenob-save-script @rm -rf ../tmp/smenob-save-script @echo $(END) @echo # # fst for the Sámi words in the dictionary # # Pseudocode: # # Pick the lemmas, and print them to list # # Read the list into xfst # # Save as an automaton. # # The perlscript for glossing should use smenob.lexc or something similar. # # # # Create the smedic.fst-file $(S_FST): $(S_DIC) @echo $(BEGIN) @echo @printf "read text < $(BIN)/$< \n\ save stack $> \n\ quit \n" > ../tmp/smedic-save-script $(XFST) < ../tmp/smedic-save-script @rm -f ../tmp/smedic-save-script @rm -f $(BIN)/$< # # Create the sme.dic file from the smenob.xml-file $(S_DIC): $(SN_XML) @echo $(BEGIN) @java $(JARF) $(XSLT) $(BIN)/$(SN_XML) $(SCRIPTS)/get-lemma.xsl > $(BIN)/$@ @echo $(END) @echo # Create the lexc file from the smenob.xml-file (using XQuery 1.0) $(SN_LEXC): $(SN_XML) @echo $(BEGIN) @$(XQL) $(SCRIPTS)/smenob-pairs.xql smenob=../$(BIN)/$< > $(BIN)/$@ @echo $(END) @echo # Create the lexc file from the smenob.xml-file (using XSLT 2.0, which is not as nice as XQuery) # $(SN_LEXC): $(SN_XML) # @echo # $(BEGIN) # @java $(JARF) $(XSLT) $(BIN)/$< $(SCRIPTS)/smenob-pairs.xsl > $(BIN)/$@ # @echo # $(END) # @echo # Create a simple HTML file for local browsing of the whole dictionary $(SN_HTML): $(SN_XML) $(SCRIPTS)/$(SN_XSL) @echo $(BEGIN) @java $(JARF) $(XSLT) $(BIN)/$(SN_XML) $(SCRIPTS)/$(SN_XSL) > $(BIN)/$@ @echo $(END) @echo # Create the smenob.xml-file by unifing the individual parts (using XQuery 1.0) $(SN_XML): @echo $(BEGIN) @$(XQL) $(SCRIPTS)/collect-smenob-parts.xql \ adj=../$(SRC)/$(ADJ) adv=../$(SRC)/$(ADV) nounc=../$(SRC)/$(NOUNC) \ nounp=../$(SRC)/$(NOUNP) other=../$(SRC)/$(OTHER) verb=../$(SRC)/$(VERB) > $(BIN)/$@ @echo $(END) @echo # Create the smenob.xml-file by unifing the individual parts (using XSLT 2.0, which is not as nice as XQuery) # $(SN_XML): # @echo # $(BEGIN) # @java $(JARF) $(XSLT) $(SCRIPTS)/dummy.xml $(SCRIPTS)/collect-smenob-parts.xsl \ # adj=../$(SRC)/$(ADJ) adv=../$(SRC)/$(ADV) nounc=../$(SRC)/$(NOUNC) \ # nounp=../$(SRC)/$(NOUNP) other=../$(SRC)/$(OTHER) verb=../$(SRC)/$(VERB) > $(BIN)/$@ # @echo # $(END) # @echo # Target to make a MacOS X/Dictionary.app compatible dictionary bundle: macdict: macdict/smenob.xml macdict/smenob.xml: src/smenob.xml \ scripts/smenob2macdict.xsl \ scripts/add-paradigm.xsl @echo @echo "*** Extracting words from dictionary file. ***" @echo grep '(.+)<.*$$/\2 \1/' | \ grep ' [nav]$$' | grep -v '^-' | sort -u \ > $(SMETESTING)/dictwords.txt @echo @echo "*** Generating paradigms. ***" @echo cd $(SMETESTING) && ./gen-paradigms.sh dictwords.txt @echo @echo "*** Building smenob.xml for MacOS X Dictionary ***" @echo java -Xmx1024m \ org.apache.xalan.xslt.Process \ -in $< \ -out $@.tmp \ -xsl scripts/add-paradigm.xsl \ -param gtpath $(SMETESTING) @xsltproc scripts/smenob2macdict.xsl $@.tmp > $@ rm -f $@.tmp @cd deliv/macdict && make # Target to upload a MacOSX dictionary module upload-macdict: @echo @echo "*** TAR-ing and ZIP-ing smenob Mac dictionary ***" @echo tar -czf $(SMENOBZIP) "$(SMENOBMAC)/$(SMENOBNAME)" @echo @echo "*** Uploading smenob Mac dictionary to www.divvun.no ***" @echo @scp $(SMENOBZIP) $(UPLOADDIR)/static_files/$(DATE)-$(SMENOBZIP) @$(SSH) sd@divvun.no \ "cd staticfiles/ && ln -sf $(DATE)-$(SMENOBZIP) $(SMENOBZIP)" @echo @echo "*** New zip file for smenob Mac dictionary now available at: ***" @echo @echo "$(DOWNLOADDIR)/$(DATE)-$(SMENOBZIP)" @echo @echo "*** Permlink to newest version is always: ***" @echo @echo "$(DOWNLOADDIR)/$(SMENOBZIP)" @echo clean: @rm -f $(BIN)/*fst $(BIN)/*dic $(BIN)/*lexc $(BIN)/*list $(BIN)/*html $(BIN)/*xml $(BIN)/*txt