# This is a sample configuration file, with some actual values to give a
# sense of what should be provided.
# As a general note, if any strings provided here need to be localized
# to individual project locales, they must be marked with the !gettext
# flag, e.g.:
#
# some_setting: !gettext "String to translate"
#
# These will thus be extracted to the translation files.
# Non-linguistic settings.
ApplicationSettings:
# `app_name` is what is displayed in the upper-left corner on every page.
app_name: "Neahttadigisánit"
# This will be the same name as PROJNAME, which is also the file name, and
# potentially the subdomain.
short_name: "sample"
# This will be the default bookmark title suggested to the user, does
# not have to resemble the app name, or anything, and the following is
# just a suggestion.
mobile_bookmark_name: "NDS: Sánit"
# The default locale must be a 2-character ISO if one exists for the
# language, otherwise 3-character ISO codes may be used.
default_locale: "se"
# This is the default language pair that will be displayed to the
# user, as a list, where the first element is the source and the
# second element is the target language.
default_pair: ["sme", "nob"]
# This is the same, but will be displayed to users who navigate to the
# page on a mobile browser.
mobile_default_pair: ["SoMe", "nob"]
# ## Some optional display settings. Setting these to `true` enables them.
#
# `grouped_nav` enables navigation to be sorted into groups by minority
# language. This is recommended for projects with lots of language pairs and
# dictionaries.
#
grouped_nav: false
#
# Also recommended for larger projects is the following setting. This uses a
# different mobile menu style, which has esparate submenus for language
# groups, dictionaries, and subsequently variants (if there are any).
#
new_mobile_nav: false
#
# These are the people who will be emailed when there is an error. NB:
# it may be useful to use aliases, e.g., my.email+alias@gmail.com in
# order to make filtering errors more easy.
admins_to_email:
- "someone.goes.here@email.com"
#
# Use three-character ISO codes only when there is no corresponding
# two-character ISO for the language.
locales_available:
- "sma"
- "no"
- "fi"
# Meta description and keywords for Google search results.
app_meta_title: >
Digibaakoeh gaskeviermesne
meta_description: >
Free, mobile-friendly dictionaries for Olonetsian, Livonian, and Kven.
meta_keywords: >
kven, olonets, olonetsian, livonian, livvi, liivi
# For default locals, these must correspond with what exists in the
# translations/ directory. In the case of languages with a 2-character ISO, use
# this, instead of the 3-character ISO. There is no connection between these
# settings and language pair names however, so those may be kept in 3-character
# format.
##
### Linguistic settings
##
#
# Some helpful aliases for paths below.
Tools:
xfst_lookup: &LOOKUP '/usr/bin/lookup'
opt: &OPT '/opt/smi'
# Note that YAML doesn't support concatenating strings directly,
# so in this case I use list notation. See the examples below.
#
# To avoid repeating, feel free to set aliases for things like
# shared FST format settings (as demonstrated with `&sme_fst_options` and
# `<<: *sme_fst_options`. This is a YAML alias pattern.
Morphology:
# This is the default setup for sme.
sme:
tool: *LOOKUP
file: [*OPT, '/sme/bin/analyser-dict-gt-desc.xfst']
inverse_file: [*OPT, '/sme/bin/generator-dict-gt-norm.xfst']
format: 'xfst'
options: &sme_fst_options
compoundBoundary: "+Cmp#" # A string that separates compounds in analyses
derivationMarker: "+Der" # A string that marks words as derivations
tagsep: '+' # The character that separates parts of a tag
inverse_tagsep: '+' # The character that separates parts of a tag in the inverse analyser
# This is an example of an input variant. Here we use a different analyzer,
# but the same generator. Also, `options` are copied from above.
SoMe:
tool: *LOOKUP
file: [*OPT, '/sme/bin/analyser-dict-gt-desc-mobile.xfst']
inverse_file: [*OPT, '/sme/bin/generator-dict-gt-norm.xfst']
format: 'xfst'
options:
<<: *sme_fst_options
nob:
tool: *LOOKUP
file: [*OPT, '/nob/bin/analyser-dict-gt-desc.xfst']
inverse_file: [*OPT, '/nob/bin/generator-dict-gt-norm.xfst']
format: 'xfst'
options:
compoundBoundary: "#+Cmp+"
derivationMarker: "+Der"
tagsep: '+'
inverse_tagsep: '+'
# For now, make sure that all languages that will be available are
# defined here. example, if only one dictionary file is defined,
# `sme-nob`, there will still need to be a definition for language names
# for `nob`.
#
Languages:
- iso: sme
# this ensures that if there are multiple minority languages in a project,
# that they will be sorted into groups by this status. Input variants do
# not need this marking.
minority_lang: true
- iso: SoMe
variant: True # this makes the fab process skip attempting `svn up` here.
- iso: nob
- iso: fin
- iso: sma
# Here are the actual definitions of what dictionaries are available in the
# system. What is presented in the UI also adheres to the order that is here.
#
# The first pair example here is very complex, and includes mobile spell-relax
# and Korp integration. A minimal example follows in the next definition.
#
Dictionaries:
- source: sme
target: nob
path: 'dicts/sme-nob.all.xml'
input_variants:
# "standard" and "mobile" are special values, otherwise templates and
# swapping on mobile devices won't work
- type: "standard"
# Marking this with gettext ensures that it will be extracted to .po
# files. This will be displayed to users.
description: !gettext "Standard"
# Provide a _brief_ example to users of what this standard means
example: "(áčđŋšŧž)"
# This is the name for the analyzer (defined in Morphology)
short_name: "sme"
- type: "mobile"
description: !gettext "Social media"
example: !gettext "(incl. acdnstz)"
# This is the name for the analyzer (defined in Morphology)
short_name: "SoMe"
# "
# Following are some optional settings for integration with Korp.
# default is False
show_korp_search: True
# If there are any input variants, specify this. NB: 'standard' and
# 'mobile' are special values for `type`, and used in presenting
# mobile users the correct default pair. If a variant is anything
# other than the standard or mobile, use something else.
#
# Note that it's also a good idea to mark the strings here with
# the gettext marker, so that they're translated.
#
# use http://meyerweb.com/eric/tools/dencoder/ if things are
# unreadable or do not work
#
# Here, whatever the user input is will be replaced into the
# following string, marked by USER_INPUT
wordform_search_url: &alt_korp_search
"http://gtweb.uit.no/korp/#search=word%7CUSER_INPUT&page=0"
#
# Here, whatever the input lemma is will be replaced into the
# following string, marked by INPUT_LEMMA
#
# cqp|[lemma = "INPUT_LEMMA"]
lemma_search_url: &korp_lemma_search
"http://gtweb.uit.no/korp/#page=0&search-tab=2&search=cqp%7C%5Blemma%20%3D%20%22INPUT_LEMMA%22%5D"
# "] [word = "
lemma_multiword_delimiter: &korp_lemma_delim
"%22%5D%20%5Bword%20%3D%20%22"
# This is the most minimal example of what is needed.
- source: nob
target: sme
path: 'dicts/nob-sme.all.xml'
# Everything after this point is optional. You may delete them if you do not
# need them.
# These settings are for the client-side reader bookmarklet.
ReaderConfig:
Settings:
# This controlls the location that lookups are performed from, if
# they are different from the path that the project will be served
# out of, for example, if the API must be run from another location
# for reasons like providing SSL.
#
# Each path here must not include the scheme, and have no trailing
# slash.
api_host: "localhost:5000/itwewina"
media_host: "localhost:5000/itwewina"
# Define the symbol that appears for the bookmarklet settings menu
# (default is Á)
reader_symbol: "ʔ"
hdn:
multiword_lookups: true
multiword_list:
# File format-- plain text, but line coments allowed:
# Use %WORD% as a marker for any word these may co-occur with, i.e., if
# the user clicks on a %WORD%, then search for these environments too.
file: "configs/language_specific_rules/reader/hdn_multiwords.txt"
# NB: this is JavaScript format, since it will be sent to the reader via
# config. It must also be stored as a string.
#
# JavaScript is problematic with unicode, and \w in a regex ends up meaning
# just the ASCII range, thus it's necessary to be very explicit.
#
# Also this example shows how to include periods and apostrophes in
# word unit detection.
#
# http://regexpal.com/
#
word_regex: |
[\u00C0-\u1FFF\u2C00-\uD7FF\w\.']+
word_regex_opts: "g"