cat Muitalus.dis.corr.txt | grep '"<' |egrep -v '"<(\.|,|\(|\)|\-|:|;|\?|\!)>"' | cat Muitalus.dis.corr.txt | grep -v '"<' |grep '".*"' |cut -d '"' -f2- |cut -d ' ' -f1,2 |tr -d '"' |egrep -v '(PUNCT|CLB)' | wc -l 48254 grep -v '"<' Muitalus.dis.corr.txt |sed 's/^$/¢/' |tr '\n' ' ' |tr '¢' '\n' | tr '\t' ' ' | sed 's/^ //' > an_text/muitalusAnSnt.txt #alfabetisk cat Muitalus.dis.corr.txt | grep -v '"<' |grep '".*"' |cut -d '"' -f2- |egrep -v '(PUNCT|CLB)' | sed 's/ N / N¢ /' | sed 's/ V / V¢ /' | sed 's/ A / A¢ /' | sed 's/ Num / Num¢ /' | sed 's/ Adv / Adv¢ /' | sed 's/ Pron / Pron¢ /' | sed 's/ Interj / Interj¢ /' | sed 's/ Pcle / Pcle¢ /' | sed 's/ CC / CC¢ /' | sed 's/ CS / CS¢ /' | sed 's/ Po / Po¢ /' |sed 's/ Pr / Pr¢ /' |cut -d '¢' -f1 |sed 's/ TV\*//' |sed 's/ IV\*//' |sed 's/ TV//' |sed 's/ IV//' |tr -d '#"' |sort -u > an_text/lemma_alph.txt cat Muitalus.dis.corr.txt | grep '"<' |egrep -v '"<(\.|,|\(|\)|\-|:|;|\?|\!)>"' |cut -d '"' -f2 |tr -d '<>' |sort -u > an_text/wordform_alph.txt cat an_text/lemma_alph.txt |grep ' N$' > an_text/noun_alph.txt cat an_text/lemma_alph.txt |grep ' V$' > an_text/verb_alph.txt cat an_text/lemma_alph.txt |grep ' A$' > an_text/adj_alph.txt cat an_text/lemma_alph.txt |grep ' Adv$' > an_text/adv_alph.txt cat an_text/lemma_alph.txt |grep ' CC$' > an_text/cc_alph.txt cat an_text/lemma_alph.txt |grep ' CS$' > an_text/cs_alph.txt cat an_text/lemma_alph.txt |grep ' Interj$' > an_text/interj_alph.txt cat an_text/lemma_alph.txt |grep ' Num$' > an_text/num_alph.txt cat an_text/lemma_alph.txt |grep ' Pcle$' > an_text/pcle_alph.txt cat an_text/lemma_alph.txt |grep ' Po$' > an_text/po_alph.txt cat an_text/lemma_alph.txt |grep ' Pr$' > an_text/pr_alph.txt cat an_text/lemma_alph.txt |egrep ' (Pr|Po)$' | cut -d ' ' -f1 |sort -u > an_text/po_pr_alph.txt cat an_text/lemma_alph.txt |grep ' Pron$' > an_text/pron_alph.txt #frekvens cat Muitalus.dis.corr.txt | grep -v '"<' |grep '".*"' |cut -d '"' -f2- |egrep -v '(PUNCT|CLB)' | sed 's/ N / N¢ /' | sed 's/ V / V¢ /' | sed 's/ A / A¢ /' | sed 's/ Num / Num¢ /' | sed 's/ Adv / Adv¢ /' | sed 's/ Pron / Pron¢ /' | sed 's/ Interj / Interj¢ /' | sed 's/ Pcle / Pcle¢ /' | sed 's/ CC / CC¢ /' | sed 's/ CS / CS¢ /' | sed 's/ Po / Po¢ /' |sed 's/ Pr / Pr¢ /' |cut -d '¢' -f1 |sed 's/ TV\*//' |sed 's/ IV\*//' |sed 's/ TV//' |sed 's/ IV//' |tr -d '#"' |sort | uniq -c | sort -nr > an_text/lemma_freq.txt cat Muitalus.dis.corr.txt | grep '"<' |egrep -v '"<(\.|,|\(|\)|\-|:|;|\?|\!)>"' |cut -d '"' -f2 |tr -d '<>' |sort | uniq -c | sort -nr > an_text/wordform_freq.txt cat an_text/lemma_freq.txt |grep ' N$' > an_text/noun_freq.txt cat an_text/lemma_freq.txt |grep ' V$' > an_text/verb_freq.txt cat an_text/lemma_freq.txt |grep ' A$' > an_text/adj_freq.txt cat an_text/lemma_freq.txt |grep ' Adv$' > an_text/adv_freq.txt cat an_text/lemma_freq.txt |grep ' CC$' > an_text/cc_freq.txt cat an_text/lemma_freq.txt |grep ' CS$' > an_text/cs_freq.txt cat an_text/lemma_freq.txt |grep ' Interj$' > an_text/interj_freq.txt cat an_text/lemma_freq.txt |grep ' Num$' > an_text/num_freq.txt cat an_text/lemma_freq.txt |grep ' Pcle$' > an_text/pcle_freq.txt cat an_text/lemma_freq.txt |grep ' Pron$' > an_text/pron_freq.txt cat an_text/lemma_freq.txt |grep ' Po$' > an_text/po_freq.txt cat an_text/lemma_freq.txt |grep ' Pr$' > an_text/pr_freq.txt cat Muitalus.dis.corr.txt | grep -v '"<' |grep '".*"' |cut -d '"' -f2- |egrep -v '(PUNCT|CLB)' | egrep ' (Pr|Po) ' |cut -d ' ' -f1 |tr -d '"' |sort | uniq -c | sort -nr > an_text/po_pr_freq.txt #a tergo cat an_text/wordform_alph.txt |rev | sort | rev > an_text/wordform_ater.txt cat an_text/lemma_alph.txt |cut -d ' ' -f2- > two cat an_text/lemma_alph.txt |cut -d ' ' -f1 > one paste two one |rev |sort |rev |cut -f2- > lemma paste two one |rev |sort |rev |cut -f1 > pos paste lemma pos > an_text/lemma_ater.txt rm one two lemma pos cat an_text/lemma_ater.txt |grep 'N$' > an_text/noun_ater.txt cat an_text/lemma_ater.txt |grep 'V$' > an_text/verb_ater.txt cat an_text/lemma_ater.txt |grep 'A$' > an_text/adj_ater.txt cat an_text/lemma_ater.txt |grep 'Adv$' > an_text/adv_ater.txt cat an_text/lemma_ater.txt |grep 'CC$' > an_text/cc_ater.txt cat an_text/lemma_ater.txt |grep 'CS$' > an_text/cs_ater.txt cat an_text/lemma_ater.txt |grep 'Interj$' > an_text/interj_ater.txt cat an_text/lemma_ater.txt |grep 'Num$' > an_text/num_ater.txt cat an_text/lemma_ater.txt |grep 'Pcle$' > an_text/pcle_ater.txt cat an_text/lemma_ater.txt |grep 'Pron$' > an_text/pron_ater.txt cat an_text/lemma_ater.txt |grep 'Po$' > an_text/po_ater.txt cat an_text/lemma_ater.txt |grep 'Pr$' > an_text/pr_ater.txt cat an_text/po_pr_freq.txt |rev | cut -d ' ' -f1 | sort | rev > an_text/po_pr_ater.txt