ApplicationSettings:
  app_name: "Neahttadigisánit"
  app_meta_title: "Neahttadigisánit"
  short_name: "saan"
  default_locale: "fi"
  default_pair: ["sms", "fin"]
  mobile_default_pair: false
  polyglot_lookup: true
  locales_available:
    # - "sms"
    - "fi"
    - "en"
    - "no"
    - "ru"
  # TODO: language-specific sets
  meta_description: >
      Free online, mobile friendly dictionaries for Skolt Saami
  meta_keywords: >
      skolt saami, koltansaame, mobile, dictionary, free, russian

Tools:
  xfst_lookup: &LOOKUP '/usr/bin/lookup'
  hfst_lookup: &HLOOKUP '/usr/local/bin/hfst-optimized-lookup'
  opt: &OPT '/opt/smi/'

Morphology:
  sms:
    tool: *LOOKUP
    file: [*OPT, '/sms/bin/analyser-dict-gt-desc.xfst']
    inverse_file: [*OPT, '/sms/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "+Cmp#"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'
  smsM:
    tool: *LOOKUP
    file: [*OPT, '/sms/bin/analyser-dict-gt-desc-mobile.xfst']
    inverse_file: [*OPT, '/sms/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "+Cmp#"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'
  fin:
    tool: *LOOKUP
    file: [*OPT, '/fin/bin/analyser-dict-gt-desc.xfst']
    inverse_file: [*OPT, '/fin/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "+Use/Circ#"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'
  nob:
    tool: *LOOKUP
    file: [*OPT, '/nob/bin/analyser-dict-gt-desc.xfst']
    inverse_file: [*OPT, '/nob/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "+Use/Circ#"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'
  rus:
    tool: *LOOKUP
    file: [*OPT, '/rus/bin/analyser-gt-desc.xfst']
    inverse_file: [*OPT, '/rus/bin/generator-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "+Use/Circ#"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'

Languages:
  - iso: sms
    minority_lang: true
  - iso: eng
  - iso: fin
  - iso: rus
  - iso: nob

Dictionaries:
  - source: sms
    target: fin
    path: 'dicts/sms-all.xml'
    show_korp_search: True
    korp_search_host: "http://gtweb.uit.no/korp"
    input_variants: &spell_relax
      - type: "standard"
        description: !gettext "Standard"
        example: "(âčǯđǧǥǩŋ̌šž)"
        onscreen_keyboard: &SMS_KEYS
          - "ʹ"
          - "ʼ"
          - "â"
          - "č"
          - "ʒ"
          - "ǯ"
          - "đ"
          - "ǧ"
          - "ǥ"
          - "ǩ"
          - "ŋ"
          - "õ"
          - "š"
          - "ž"
          - "å"
          - "ä"
          - "ö"
        # NB: must be same as analyser name
        short_name: "sms"
      - type: "mobile"
        description: !gettext "Mobile friendly"
        example: "(aczdggknsz)"
        short_name: "smsM"
        onscreen_keyboard: *SMS_KEYS
        # aczdggknsz
        # âčǯđǧǥǩŋ̌šž

  - source: sms
    target: eng
    path: 'dicts/test_lexica/sms.xml'
    input_variants: *spell_relax

  - source: sms
    target: rus
    show_korp_search: True
    korp_search_host: "http://gtweb.uit.no/korp"
    path: 'dicts/sms-all.xml'
    input_variants: *spell_relax

  - source: sms
    target: nob
    show_korp_search: True
    korp_search_host: "http://gtweb.uit.no/korp"
    path: 'dicts/sms-all.xml'
    input_variants: *spell_relax

  - source: fin
    target: sms
    path: 'dicts/finsms.xml'

  - source: nob
    target: sms
    path: 'dicts/nobsms.xml'

  - source: rus
    target: sms
    path: 'dicts/russms.xml'


ReaderConfig:
  sms:
    multiword_lookups: false
    # NB: this is JavaScript format, since it will be sent to the reader via
    # config. It must also be stored as a string.
    #
    # JavaScript is problematic with unicode, and \w in a regex ends up meaning
    # just the ASCII range, thus it's necessary to be very explicit.
    #
    word_regex: |
      [\u00C0-\u1FFF\u2C00-\uD7FF\w´]+
    word_regex_opts: "g"