# An AWK preprocessor to pull doc comments out of Xerox lexc, twolc and VISL CG3
# files. 
#
# The doc comments are recognised by sequence of /^!! /, that is, two comment
# signs at the beginning of a line.
#
# The doc comments are assumed to be already in jspwiki markup with
# following special additions:
# - Multichar_Symbols block is handled specially and its contents are used in
#   output as lists
# - LEXICONs and Rules blocks are handled specially and their names are saved
#   for use in headings that directly follow them with @LEXNAME@ and @RULENAME@
# - a doc comment starting with $ or € and space
#   is used to denote an example, and a test case
# - a doc comment starting with $ or € without space is used to change test
#   settings
BEGIN {
    # Initialise the referable variables
    LEXNAME="@OUTSIDE_LEXICONS@";
    RULENAME="@OUTSIDE RULES@";
    CODE="@NO CODE@";
    SOMETHING_WRONG="TRUE";
}
function expand_variables(s) {
    # expand all our doc comment variables
    return gensub("@CODE@", CODE, "g", 
              gensub("@RULENAME@", RULENAME, "g",
                     gensub("@LEXNAME@", LEXNAME, "g", s)));
}
/^[[:space:]]*$/ {
    # retaining empty lines of code will greatly help excessive squeezing
    # of subsequent paragraphs
    printf("\n");
}
/^!![€$][^ ]/ {
    printf("\n__%s examples:__\n", 
           gensub("!![€$][^ ]* *", "", 1));
}
/^!!€ / {
    if (NF >= 4) {
        printf("* __%s __ {{%s }} (Eng.", $2, $3);
        for (i = 4; i <= NF; i++) {
            printf(" %s", $i);
        }
        printf(")\n");
    }
    else if (NF == 3) {
        printf("* __%s __ {{%s }}\n", $2, $3);
    }
    else if (NF == 2) {
        printf("* __%s __\n", $2);
    }
    else {
        print("* ???");
    }
}
/^!!\$ / {
    if (NF >= 4) {
        printf("* __*%s __ {{%s }} (is not standard language", $2, $3);
        for (i = 4; i <= NF; i++) {
            printf(" %s", $i);
        }
        printf(")\n");
    }
    else if (NF == 3) {
        printf("* __*%s __ {{%s }} (is not standard language)\n", $2, $3);
    }
    else if (NF == 2) {
        printf("* __*%s __ (is not standard language)\n", $2);
    }
    else {
        print("* ???");
    }
}
/^!!¥ / {
    printf("This construct is not supported anymore:\n {{{%s}}} ", $0);
}
/^[^!].*!!= / {
    CODE=gensub("!!=.*", "", 1);
    if ($0 ~ /@CODE@/)
    {
        print(expand_variables(gensub(".*!!=", "", 1)));
    }
    else
    {
        print(expand_variables(gensub("!!=", " ", 1)));
    }
}
/^[^!].*!!≈ / {
    CODE=gensub("  *", " ", "g",
           gensub("^ *", "", 1,
           gensub(" *!!≈.*", "", 1)));
    if ($0 ~ /@CODE@/)
    {
        print(expand_variables(gensub(".*!!≈", "", 1)));
    }
    else
    {
        printf("%s ", CODE);
        print(expand_variables(gensub("!!≈", " ", 1)));
    }
}
/^!! / {print(expand_variables(gensub(".*!! ", "", 1))); }
/^[^!]+!! / {print(expand_variables(gensub(".*!! ", "", 1))); }
/!!/ {SOMETHING_WRONG="FALSE";}
/^Multichar_Symbols/ {LEXNAME=$1;}
/^LEXICON / {LEXNAME=$2;}
/^"[^"]*"/ {
    RULENAME=gensub("!.*", "", 1, gensub("\"", "", "g"));
}
END {
    if (SOMETHING_WRONG=="TRUE") {
        print("There was no content!");
        exit(1);
    }
}