# -*- encoding: utf-8 -*-
import os, sys

pre_lemma_tags = [
    'RdplW',
    'RdplS',
]

# TODO: replace PV/ with user-defined regex or something
def process_crk_analysis(analysis_line):
    """ Take an analysis line, and return a tuple of the lemma,
    followed by a reformatted tag where tags before the lemma are
    combined with tags after the lemma. Strings without any preverb
    material are not changed.

        >>> process_crk_analysis("wordform\tPV/asdf+PV/bbq+lemma+POS+Type+Sg1")
        ('wordform', 'lemma+PV/asdf+PV/bbq+POS+Type+Sg1')
        >>> process_crk_analysis("wordform\tlemma+POS+Type+Sg1")
        ('wordform', 'lemma+POS+Type+Sg1')
        >>> process_crk_analysis("wordform\tPV/asdf+PV/bbq+lemma")
        ('wordform', 'lemma+PV/asdf+PV/bbq')
        >>> process_crk_analysis("ninahnipan\tRdplS+nipâw+V+AI+Ind+Prs+1Sg")
        ('ninahnipan', 'nipâw+RdplS+V+AI+Ind+Prs+1Sg')
        >>> process_crk_analysis("ninanahnipan\tRdplW+RdplS+nipâw+V+AI+Ind+Prs+1Sg")
        ('ninanahnipan', 'nipâw+RdplW+RdplS+V+AI+Ind+Prs+1Sg')

    NB: For the purposes of redisplaying the stem in NDS, we add
    +Tpl/Lemma in between any preceding tags and following tags.

        >>> process_crk_analysis("ninanahnipan\tRdplW+RdplS+nipâw+V+AI+Ind+Prs+1Sg")
        ('ninanahnipan', 'nipâw+RdplW+RdplS+Tpl/Lemma+V+AI+Ind+Prs+1Sg')

    """


    wordform, _, analysis_string = analysis_line.partition('\t')

    lemma = False

    tag_sep = '+'

    parts = analysis_string.split(tag_sep)
    has_preverbs = False

    for p in parts:
        if p.startswith('PV/') or p in pre_lemma_tags:
            has_preverbs = True
        else:
            lemma = p
            break

    preverbs, _, tag = analysis_string.partition(tag_sep + lemma + tag_sep)

    # When there is no `tag` from the partition ...
    if len(tag) == 0:
        # ... because there are preverbs and nothing else
        if has_preverbs:
            # remove the lemma from the tag
            preverbs = preverbs.replace(tag_sep + lemma, '')
        # ... because there are no preverbs
        else:
            _lem, _, tag = analysis_string.partition(lemma + tag_sep)
            preverbs = None

    reformatted_tag_parts = [lemma]

    if preverbs:
        reformatted_tag_parts.append(preverbs)
    if tag:
        reformatted_tag_parts.append('Tpl/Lemma')
        reformatted_tag_parts.append(tag)

    reformatted_tag = tag_sep.join(reformatted_tag_parts)

    return (wordform, reformatted_tag)

def main():
    print '--'
    print process_crk_analysis("PV/asdf+PV/bbq+lemma+POS+Type+Sg1")
    # ('lemma', 'PV/asdf+PV/bbq+POS+Type+Sg1')
    print process_crk_analysis("lemma+POS+Type+Sg1")
    # ('lemma', 'lemma+POS+Type+Sg1')
    print process_crk_analysis("PV/asdf+PV/bbq+lemma")

    print process_crk_analysis("ninahnipan\tRdplS+nipâw+V+AI+Ind+Prs+1Sg")
    # ('ninahnipan', 'nipâw+RdplS+V+AI+Ind+Prs+1Sg')
    print process_crk_analysis("ninanahnipan\tRdplW+RdplS+nipâw+V+AI+Ind+Prs+1Sg")
    # ('ninanahnipan', 'nipâw+RdplW+RdplS+V+AI+Ind+Prs+1Sg')

if __name__ == "__main__":
    sys.exit(main())

