package werti.uima.ae; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import werti.uima.types.annot.EnhanceXML; /** * Annotate all <e> tags. * * @author Adriane Boyd */ public class EnhanceXMLAnnotator extends JCasAnnotator_ImplBase { private static final Logger log = LogManager.GetLogger(EnhanceXMLAnnotator.class); /** * Mark up all werti spans. */ public void process(JCas cas) throws AnalysisEngineProcessException { log.debug("Starting markup recognition"); String s = cas.getDocumentText().toLowerCase(); // was: final String s //Set fullstops after heading texts. /*s = s.replace("","."); s = s.replace("",".");*/ // regex to match the enhance spans Pattern enhancePatt = Pattern.compile("]*)?>(.*?)", Pattern.DOTALL); Matcher enhanceMatcher = enhancePatt.matcher(s); while (enhanceMatcher.find()) { // create tag for enhance start tag final EnhanceXML starttag = new EnhanceXML(cas); starttag.setBegin(enhanceMatcher.start()); starttag.setEnd(enhanceMatcher.start(2)); starttag.setTag_name("spanwertiview"); starttag.addToIndexes(); // create tag for enhance end tag final EnhanceXML endtag = new EnhanceXML(cas); endtag.setBegin(enhanceMatcher.end(2)); endtag.setEnd(enhanceMatcher.end()); endtag.setTag_name("spanwertiview"); endtag.setClosing(true); endtag.addToIndexes(); } log.debug("Finished markup recognition"); } }