SET WORD CLASS // if ($_GET['word_class'] == "noun") { $word_class = 'N'; } elseif ($_GET['word_class'] == "verb") { $word_class = 'V'; } else { // particle $word_class = 'P'; } ?> // // Program variables // $xfstdir = $global['PATH_TO_XEROX'] ; $fstdir = $global['PATH_TO_FST_BIN'] ; $fstchar = $global['XEROX_CHARSET'] ; // // Files for analyzer // /* if ($_GET['fst'] == "g") { $fst = $fstdir . "/g-kal.fst"; // Greenlandic tags } elseif ($_GET['fst'] == "d") { $fst = $fstdir . "/d-kal.fst"; // Danish tags } else { $fst = $fstdir . "/kal.fst"; // Linguistic tags } */ $fst = $fstdir . "/kal.fst"; // Linguistic tags // File for generator $ifst = $fstdir . "/ikal.fst"; // // How to create string from form: // $text = $_GET['text']; // // Character encoding: // // Special characters in the text (e.g. literal ampersands, plus signs and equal signs // typed by the user) must be encoded for transmission, to prevent confusion with // the delimiters used by CGI); here is the magic formula to undo the CGI encodings // $text =~ s/%(..)/pack("c",hex($1))/ge ; // need to be fixed for PHP. // Change the plus signs back to the original spaces typed by the user // Well, no, here we want the pluses $text = preg_replace('/\+/',' ',$text); // IF ANALYSE. ELSE DON'T DO IT. COMPUTER ANALYSE ALWAYS. $text = preg_replace('/\s\s+/',' ', $text); // strips excess whitespace from a string // Removing the unsecure characters from the input. $text = preg_replace('/[;<>\*\|`&\$!#\(\)\[\]\{\}:\'\"]/',' ',$text); $text = preg_replace('/\?/',' ?',$text); // make space before question marks (?) $text = preg_replace('/\./',' .',$text); // make space before dot (.) $text = preg_replace('/\,/',' ,',$text); // make space before (,) // split the text into words crudely on spaces $words = preg_split('/\s+/', $text) ; // if we reach here, then the user did indeed one or more words; // join the words back into a single string // each remaining word now separated by spaces $allwords = implode(" ", $words) ; // // And here is where the actual lookup/lookdown gets done: // // 1. echo the string $allwords via a pipe to tr, which replaces spaces with newlines // 2. pipe the now tokenized text (one word per line) to the lookup application // (which has some flags set, and which accesses kal.fst or ikal.fst) // 3. The output of lookup is assigned as the value of $result if ($_GET['pid'] == "analyse") { // IF ANALYSE --> Lookup exec("echo $allwords | tr \" \" \"\n\" | $xfstdir/lookup -flags L\" => \"LTT -d $fst $fstchar", $output); } elseif ($_GET['pid'] == "generate") { // WORD IN PUBLIC FORM // =================== include($global['PATH_TO_ANALYSE']); // WORD IN BASEFORM // ================ //$an_word = $allwords; // FORM OF GENERATOR // ================= if ($_GET['word_class'] == "noun") { $allwords = $an_word.'+N'.$_GET['case'].$_GET['form'].$_GET['genitive'].$_GET['clitic']; } elseif ($_GET['word_class'] == "verb") { $allwords = $an_word.$_GET['affix'].'+V'.$_GET['mood'].$_GET['person'].$_GET['objective'].$_GET['clitic']; } elseif ($_GET['word_class'] == "particle") { $allwords = $an_word.$_GET['clitic']; } else { $allwords = $an_word; } // AND FINALY WE GENERATE IT // IF GENERATE --> Lookdown exec("echo $allwords | tr \" \" \"\n\" | $xfstdir/lookup -flags mbL\" => \"LTT -d $ifst $fstchar", $output); } $result = implode("\n",$output); // Now we need to parse the $result string to output the information as HTML // This information will be directed automatically back to the user's browser for display // // first split the $result into solutiongroups (one solutiongroup for each input word) // given the way that 'lookup' formats its results, solutiongroups are separated by // two newline characters $solutiongroups = preg_split('/\n\n/', $result); // the following is basically a loop over the original input words, now // associated with their solutions foreach ($solutiongroups as $solutiongroup) { echo "\n