xquery version "3.0";
(:~
: Defines all the RestXQ endpoints used by the XForms.
:)
module namespace iskkan="http://localhost:8080/apps/restxq/iskkan";
import module namespace config="http://divvun.no/iskkan/config" at "config.xqm";
import module namespace display="http://divvun.no/iskkan/display" at "display.xql";
import module namespace functx = "http://www.functx.com" ;
import module namespace xmldb="http://exist-db.org/xquery/xmldb";
import module namespace dbutil="http://exist-db.org/xquery/dbutil" at "/db/apps/shared-resources/content/dbutils.xql";
declare namespace rest="http://exquery.org/ns/restxq";
declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";
declare namespace json="http://www.json.org";
declare option exist:serialize "method=json media-type=application/json encoding=UTF-8";
declare
%rest:GET
%rest:path("/iskkan/error-types")
%rest:query-param("timestamp", "{$timestamp}", "null")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:types($timestamp as xs:string*) {
{
let $result := collection($config:data-root)//spelltestresult[./header/timestamp = $timestamp]
let $types := distinct-values(
$result/results/word
[./original/@status = "error"]
[./speller/@status = "error"]
/errors/error/@type)
return
for $type in $types
return
{$type}
}
};
declare
%rest:GET
%rest:path("/iskkan/summary-table")
%rest:query-param("timestamp", "{$timestamp}", "null")
%rest:query-param("edit_dist", "{$edit_dist}")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:table-json($timestamp as xs:string*, $edit_dist as xs:integer*) {
{
let $result := collection($config:data-root)
//spelltestresult[./header/timestamp = $timestamp]
/results/word
[./original/@status = "error"]
[./speller/@status = "error"]
return
for $word in $result
return (
{string($word/original)},
{string($word/expected)},
{string($word/edit_dist)},
{
for $sugg in $word/suggestions/suggestion
return
{string($sugg)}
}
)
}
};
declare
%rest:GET
%rest:path("/iskkan/selector")
%rest:query-param("collection", "{$collection}", "/db/apps/iskkan/data")
(:%rest:produces("application/xml", "text/xml"):)
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:selector($collection as xs:string*) {
{
iskkan:ls($collection)
}
};
declare function iskkan:ls($collection as xs:string*) as element()* {
if (xmldb:collection-available($collection)) then
(
for $col in xmldb:get-child-collections($collection)
return
(
{$col},
{
iskkan:ls($collection || '/' || $col),
for $document in xmldb:get-child-resources($collection || '/' || $col)
return
{$document}
{$collection || '/' || $col || '/' || $document}
}
)
)
else ()
};
declare
%rest:GET
%rest:path("/graph")
%rest:query-param("timestamp", "{$timestamp}", "null")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:graph($timestamp as xs:string*) {
{
for $result in collection($config:data-root)//spelltestresult[./header/timestamp = $timestamp]
let $words := $result/results/word[./original/@status = "error"][./speller/@status = "error"]
let $types := distinct-values($words/errors/error/@type)
return
for $pos in 1 to 6
return
Position {$pos},
{
for $type in $types
let $hits :=
if ($pos ne 6) then $words[./errors/error/@type=$type][./position=$pos]
else $words[./errors/error/@type=$type][./position>=$pos]
return
T {string($type)}
{count($hits)}
}
}
};
declare
%rest:GET
%rest:path("/iskkan/results")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:results() {
{
for $headers in collection($config:data-root)//header
return
{
for $header in $headers
let $lang := $header/lang,
$engine := string($header/engine/@abbreviation)
return (
$header/timestamp,
$header/lang,
$header/testtype,
{$engine},
$header/lexicon,
$header/document)
}
}
};
declare
%rest:GET
%rest:path("/iskkan/summary-json")
%rest:query-param("timestamp", "{$timestamp}", "null")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:summary-json($timestamp as xs:string*) {
let $invocation := local:summary($timestamp)
return (
precision{$invocation('precision')}
recall{$invocation('recall')}
accuracy{$invocation('accuracy')}
{$invocation('words')}
{$invocation('original')('correct')}
{$invocation('original')('error')}
{$invocation('speller')('correct')}
{$invocation('speller')('error')}
{$invocation('positives')('true')}
{$invocation('positives')('false')}
{$invocation('negatives')('true')}
{$invocation('negatives')('false')}
{local:suggestionsummary($invocation('tp')),
local:suggestionsummary-nosugg($invocation('tp'))}
)
};
declare
%rest:GET
%rest:path("/iskkan/suggestion-graph")
%rest:query-param("timestamp", "{$timestamp}", "null")
%rest:produces("application/json")
%output:media-type("application/json")
%output:method("json")
function iskkan:suggestion-graph($timestamp as xs:string*) {
for $testresult in collection($config:data-root)//spelltestresult[./header/timestamp = $timestamp]/results
let $tp := $testresult/word[./original/@status = "error"][./speller/@status = "error"]
(:return {count($tp)}:)
let $result := (
for $pos in 1 to 6
let $pos_set := $tp[./position = $pos]
return
(
for $ed in 1 to 3
let $hits := $pos_set[./edit_dist = $ed]
return
{count($hits)}
{$ed}
)
)
return local:return-resource($result)
};
declare
%private
function local:suggestionsummary($el as element()*) {
let $word-count := count($el)
return
for $dist in 1 to 3
return
for $p in 1 to 6
(:) let $hits := $el[./edit_dist>=$dist][./position>=$p] :)
let $hits :=
if ($dist ne 3 and $p ne 6) then
(: 1-2 1-5:)
$el[./edit_dist=$dist][./position=$p]
else if ($dist eq 3 and $p eq 6) then
(: 3 6:)
$el[./edit_dist>=$dist][./position>=$p]
else if ($dist eq 3) then
(: 3 1-5:)
$el[./edit_dist>=$dist][./position=$p]
else
(: 1-2 6:)
$el[./edit_dist=$dist][./position>=$p]
return (
{"Position " || $p} (: 6 => more than 5 :)
{"Editing distance=" || $dist}
{round-half-to-even((count($hits) div $word-count) * 100, 2)}
)
};
declare
%private
function local:suggestionsummary-nosugg($el as element()*) {
let $word-count := count($el)
return
for $dist in 1 to 3
let $hits :=
if ($dist ne 3) then
$el[exists(./position) eq false][./edit_dist=$dist] |
$el[xs:integer(./position) eq 0][./edit_dist=$dist]
else
$el[exists(./position) eq false][./edit_dist>=$dist] |
$el[xs:integer(./position) eq 0][./edit_dist>=$dist]
let $sugg := if(exists($hits/suggestions))
then "Wrong suggestions"
else "No suggestions"
return (
{$sugg}
{"Editing distance=" || $dist}
{round-half-to-even((count($hits) div $word-count) * 100, 2)}
)
};
(:
:)
declare
%private
function local:summary($timestamp as xs:string*) {
for $result in collection($config:data-root)//spelltestresult[./header/timestamp = $timestamp]
let $hits := $result/results,
$words := count($hits/word),
$origcorr := $hits/word[./original/@status = "correct"],
$origerr := $hits/word[./original/@status = "error"],
$spcorr := $hits/word[./speller/@status = "correct"],
$sperr := $hits/word[./speller/@status = "error"],
(:
$tp := count(functx:value-intersect($origerr, $sperr)),
$tn := count(functx:value-intersect($origcorr , $spcorr)),
$fp := count(functx:value-intersect($origerr , $spcorr)),
$fn := count(functx:value-intersect($origcorr , $sperr)),
:)
$tp := $hits/word[./original/@status = "error"][./speller/@status = "error"],
$tn := $hits/word[./original/@status = "correct"][./speller/@status = "correct"],
$fn := $hits/word[./original/@status = "error"][./speller/@status = "correct"],
$fp := $hits/word[./speller/@status = "error"][./original/@status = "correct"],
$invocation := map
{
'words' := $words,
'tp' := $tp,
'original' := map
{
'correct' := count($origcorr),
'error' := count($origerr)
},
'speller' := map
{
'correct' := count($spcorr),
'error' := count($sperr)
},
'positives' := map
{
'true' := count($tp),
'false' := count($fp)
},
'negatives' := map
{
'true' := count($tn),
'false' := count($fn)
},
'precision' := round-half-to-even(count($tp) div sum(count($tp) + count($fp)) * 100, 2),
'recall' := round-half-to-even(count($tp) div sum(count($tp) + count($fn)) * 100, 2),
'accuracy' := round-half-to-even((sum(count($tp) + count($tn)) div $words) * 100, 2)
}
return $invocation
};