Made overview of comments containing errorinfo for i in `find $GTBIG -name goldstandard -type d`; do fgrep '' $i/*.xml; done | grep errorinfo | sed -e 's_.*__' -e 's___' | sort -u > sorted-errorinfo.txt Process these strings: ./handle_comments.pl < sorted-errorinfo.txt Results in the file huff.xml