ApplicationSettings: app_name: "Neahttadigisánit" app_meta_title: "Neahttadigisánit" short_name: "saan" default_locale: "fi" default_pair: ["sms", "fin"] mobile_default_pair: false polyglot_lookup: true locales_available: # - "sms" - "fi" - "en" - "no" - "ru" # TODO: language-specific sets meta_description: > Free online, mobile friendly dictionaries for Skolt Saami meta_keywords: > skolt saami, koltansaame, mobile, dictionary, free, russian Tools: xfst_lookup: &LOOKUP '/usr/bin/lookup' hfst_lookup: &HLOOKUP '/usr/local/bin/hfst-optimized-lookup' opt: &OPT '/opt/smi/' Morphology: sms: tool: *LOOKUP file: [*OPT, '/sms/bin/analyser-dict-gt-desc.xfst'] inverse_file: [*OPT, '/sms/bin/generator-dict-gt-norm.xfst'] format: 'xfst' options: compoundBoundary: "+Cmp#" derivationMarker: "+Der" tagsep: '+' inverse_tagsep: '+' smsM: tool: *LOOKUP file: [*OPT, '/sms/bin/analyser-dict-gt-desc-mobile.xfst'] inverse_file: [*OPT, '/sms/bin/generator-dict-gt-norm.xfst'] format: 'xfst' options: compoundBoundary: "+Cmp#" derivationMarker: "+Der" tagsep: '+' inverse_tagsep: '+' fin: tool: *LOOKUP file: [*OPT, '/fin/bin/analyser-dict-gt-desc.xfst'] inverse_file: [*OPT, '/fin/bin/generator-dict-gt-norm.xfst'] format: 'xfst' options: compoundBoundary: "+Use/Circ#" derivationMarker: "+Der" tagsep: '+' inverse_tagsep: '+' nob: tool: *LOOKUP file: [*OPT, '/nob/bin/analyser-dict-gt-desc.xfst'] inverse_file: [*OPT, '/nob/bin/generator-dict-gt-norm.xfst'] format: 'xfst' options: compoundBoundary: "+Use/Circ#" derivationMarker: "+Der" tagsep: '+' inverse_tagsep: '+' rus: tool: *LOOKUP file: [*OPT, '/rus/bin/analyser-gt-desc.xfst'] inverse_file: [*OPT, '/rus/bin/generator-gt-norm.xfst'] format: 'xfst' options: compoundBoundary: "+Use/Circ#" derivationMarker: "+Der" tagsep: '+' inverse_tagsep: '+' Languages: - iso: sms minority_lang: true - iso: eng - iso: fin - iso: rus - iso: nob Dictionaries: - source: sms target: fin path: 'dicts/sms-all.xml' show_korp_search: True korp_search_host: "http://gtweb.uit.no/korp" input_variants: &spell_relax - type: "standard" description: !gettext "Standard" example: "(âčǯđǧǥǩŋ̌šž)" onscreen_keyboard: &SMS_KEYS - "ʹ" - "ʼ" - "â" - "č" - "ʒ" - "ǯ" - "đ" - "ǧ" - "ǥ" - "ǩ" - "ŋ" - "õ" - "š" - "ž" - "å" - "ä" - "ö" # NB: must be same as analyser name short_name: "sms" - type: "mobile" description: !gettext "Mobile friendly" example: "(aczdggknsz)" short_name: "smsM" onscreen_keyboard: *SMS_KEYS # aczdggknsz # âčǯđǧǥǩŋ̌šž - source: sms target: eng path: 'dicts/test_lexica/sms.xml' input_variants: *spell_relax - source: sms target: rus show_korp_search: True korp_search_host: "http://gtweb.uit.no/korp" path: 'dicts/sms-all.xml' input_variants: *spell_relax - source: sms target: nob show_korp_search: True korp_search_host: "http://gtweb.uit.no/korp" path: 'dicts/sms-all.xml' input_variants: *spell_relax - source: fin target: sms path: 'dicts/finsms.xml' - source: nob target: sms path: 'dicts/nobsms.xml' - source: rus target: sms path: 'dicts/russms.xml' ReaderConfig: sms: multiword_lookups: false # NB: this is JavaScript format, since it will be sent to the reader via # config. It must also be stored as a string. # # JavaScript is problematic with unicode, and \w in a regex ends up meaning # just the ASCII range, thus it's necessary to be very explicit. # word_regex: | [\u00C0-\u1FFF\u2C00-\uD7FF\w´]+ word_regex_opts: "g"