# This is a sample configuration file, with some actual values to give a
# sense of what should be provided.

# As a general note, if any strings provided here need to be localized
# to individual project locales, they must be marked with the !gettext
# flag, e.g.:
#
#   some_setting: !gettext "String to translate"
#
# These will thus be extracted to the translation files.

# Non-linguistic settings.
ApplicationSettings:
  # `app_name` is what is displayed in the upper-left corner on every page.
  app_name: "Neahttadigisánit"

  # This will be the same name as PROJNAME, which is also the file name, and
  # potentially the subdomain.
  short_name: "sample"

  # This will be the default bookmark title suggested to the user, does
  # not have to resemble the app name, or anything, and the following is
  # just a suggestion.
  mobile_bookmark_name: "NDS: Sánit"

  # The default locale must be a 2-character ISO if one exists for the
  # language, otherwise 3-character ISO codes may be used.
  default_locale: "se"

  # This is the default language pair that will be displayed to the
  # user, as a list, where the first element is the source and the
  # second element is the target language.
  default_pair: ["sme", "nob"]

  # This is the same, but will be displayed to users who navigate to the
  # page on a mobile browser.
  mobile_default_pair: ["SoMe", "nob"]

  # ## Some optional display settings. Setting these to `true` enables them.
  #
  # `grouped_nav` enables navigation to be sorted into groups by minority
  # language. This is recommended for projects with lots of language pairs and
  # dictionaries.
  #
  grouped_nav: false
  # 
  # Also recommended for larger projects is the following setting. This uses a
  # different mobile menu style, which has esparate submenus for language
  # groups, dictionaries, and subsequently variants (if there are any).
  #
  new_mobile_nav: false

  # 
  # These are the people who will be emailed when there is an error. NB:
  # it may be useful to use aliases, e.g., my.email+alias@gmail.com in
  # order to make filtering errors more easy.
  admins_to_email:
    - "someone.goes.here@email.com"
  #
  # Use three-character ISO codes only when there is no corresponding
  # two-character ISO for the language.
  locales_available:
    - "sma"
    - "no"
    - "fi"
  # Meta description and keywords for Google search results.
  app_meta_title: >
     Digibaakoeh gaskeviermesne 
  meta_description: >
     Free, mobile-friendly dictionaries for Olonetsian, Livonian, and Kven.
  meta_keywords: >
     kven, olonets, olonetsian, livonian, livvi, liivi 

# For default locals, these must correspond with what exists in the
# translations/ directory. In the case of languages with a 2-character ISO, use
# this, instead of the 3-character ISO. There is no connection between these
# settings and language pair names however, so those may be kept in 3-character
# format.


##
### Linguistic settings
##

# 
# Some helpful aliases for paths below.
Tools:
  xfst_lookup: &LOOKUP '/usr/bin/lookup'
  opt: &OPT '/opt/smi'

# Note that YAML doesn't support concatenating strings directly, 
# so in this case I use list notation. See the examples below.
#
# To avoid repeating, feel free to set aliases for things like
# shared FST format settings (as demonstrated with `&sme_fst_options` and 
# `<<: *sme_fst_options`. This is a YAML alias pattern.

Morphology:
  # This is the default setup for sme.
  sme:
    tool: *LOOKUP
    file: [*OPT, '/sme/bin/analyser-dict-gt-desc.xfst']
    inverse_file: [*OPT, '/sme/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options: &sme_fst_options
      compoundBoundary: "+Cmp#"     # A string that separates compounds in analyses
      derivationMarker: "+Der"      # A string that marks words as derivations
      tagsep: '+'                   # The character that separates parts of a tag
      inverse_tagsep: '+'           # The character that separates parts of a tag in the inverse analyser

  # This is an example of an input variant. Here we use a different analyzer,
  # but the same generator. Also, `options` are copied from above.
  SoMe:
    tool: *LOOKUP
    file: [*OPT, '/sme/bin/analyser-dict-gt-desc-mobile.xfst']
    inverse_file: [*OPT, '/sme/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      <<: *sme_fst_options
  nob:
    tool: *LOOKUP
    file: [*OPT, '/nob/bin/analyser-dict-gt-desc.xfst']
    inverse_file: [*OPT, '/nob/bin/generator-dict-gt-norm.xfst']
    format: 'xfst'
    options:
      compoundBoundary: "#+Cmp+"
      derivationMarker: "+Der"
      tagsep: '+'
      inverse_tagsep: '+'

# For now, make sure that all languages that will be available are
# defined here.  example, if only one dictionary file is defined,
# `sme-nob`, there will still need to be a definition for language names
# for `nob`.
#
Languages:
  - iso: sme
    # this ensures that if there are multiple minority languages in a project,
    # that they will be sorted into groups by this status. Input variants do
    # not need this marking.
    minority_lang: true 

  - iso: SoMe
    variant: True # this makes the fab process skip attempting `svn up` here.

  - iso: nob
  - iso: fin
  - iso: sma

# Here are the actual definitions of what dictionaries are available in the
# system. What is presented in the UI also adheres to the order that is here.
#
# The first pair example here is very complex, and includes mobile spell-relax
# and Korp integration. A minimal example follows in the next definition.
#
Dictionaries:
  - source: sme
    target: nob
    path: 'dicts/sme-nob.all.xml'
    input_variants:
      # "standard" and "mobile" are special values, otherwise templates and
      # swapping on mobile devices won't work
      - type: "standard"
        # Marking this with gettext ensures that it will be extracted to .po
        # files. This will be displayed to users.
        description: !gettext "Standard"
        # Provide a _brief_ example to users of what this standard means
        example: "(<em>áčđŋšŧž</em>)"
        # This is the name for the analyzer (defined in Morphology)
        short_name: "sme"

      - type: "mobile"
        description: !gettext "Social media"
        example: !gettext "(incl. <em>acdnstz</em>)"
        # This is the name for the analyzer (defined in Morphology)
        short_name: "SoMe"

        # "

    # Following are some optional settings for integration with Korp.
    # default is False
    show_korp_search: True
    # If there are any input variants, specify this. NB: 'standard' and
    # 'mobile' are special values for `type`, and used in presenting
    # mobile users the correct default pair. If a variant is anything
    # other than the standard or mobile, use something else.
    #
    # Note that it's also a good idea to mark the strings here with
    # the gettext marker, so that they're translated.
    #
    # use http://meyerweb.com/eric/tools/dencoder/ if things are
    # unreadable or do not work
    #
    # Here, whatever the user input is will be replaced into the
    # following string, marked by USER_INPUT
    wordform_search_url: &alt_korp_search 
      "http://gtweb.uit.no/korp/#search=word%7CUSER_INPUT&page=0"
    #
    # Here, whatever the input lemma is will be replaced into the
    # following string, marked by INPUT_LEMMA
    #
    # cqp|[lemma = "INPUT_LEMMA"]
    lemma_search_url: &korp_lemma_search 
      "http://gtweb.uit.no/korp/#page=0&search-tab=2&search=cqp%7C%5Blemma%20%3D%20%22INPUT_LEMMA%22%5D"
    # "] [word = "
    lemma_multiword_delimiter: &korp_lemma_delim 
      "%22%5D%20%5Bword%20%3D%20%22"

  # This is the most minimal example of what is needed.
  - source: nob
    target: sme
    path: 'dicts/nob-sme.all.xml'

# Everything after this point is optional. You may delete them if you do not
# need them.

# These settings are for the client-side reader bookmarklet. 
ReaderConfig:
  Settings:
    # This controlls the location that lookups are performed from, if
    # they are different from the path that the project will be served
    # out of, for example, if the API must be run from another location
    # for reasons like providing SSL.
    # 
    # Each path here must not include the scheme, and have no trailing
    # slash.
    api_host: "localhost:5000/itwewina"
    media_host: "localhost:5000/itwewina"
    # Define the symbol that appears for the bookmarklet settings menu
    # (default is Á)
    reader_symbol: "ʔ"
  hdn:
    multiword_lookups: true
    multiword_list:
      # File format-- plain text, but line coments allowed:
      # Use %WORD% as a marker for any word these may co-occur with, i.e., if
      # the user clicks on a %WORD%, then search for these environments too.
      file: "configs/language_specific_rules/reader/hdn_multiwords.txt"
    # NB: this is JavaScript format, since it will be sent to the reader via
    # config. It must also be stored as a string.
    #
    # JavaScript is problematic with unicode, and \w in a regex ends up meaning
    # just the ASCII range, thus it's necessary to be very explicit.
    #
    # Also this example shows how to include periods and apostrophes in
    # word unit detection.
    #
    # http://regexpal.com/
    #
    word_regex: |
      [\u00C0-\u1FFF\u2C00-\uD7FF\w\.']+
    word_regex_opts: "g"