package werti.util; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.apache.uima.jcas.JCas; import com.google.gson.Gson; /** * JSONEnhancer produces a JSON array containing each enhanced span from a CAS * containing a sequence of <e> spans and Enhancements. * * @author Adriane Boyd * */ public class JSONEnhancer { private JCas cas; private String activity; private static final Logger log = Logger.getLogger(JSONEnhancer.class); /** * @param cCas CAS with annotations for the topic * @param aActivity Activity name */ public JSONEnhancer(final JCas cCas, String aActivity) { cas = cCas; activity = aActivity; } /** * Converts a CAS with Enhancements to an array of enhanced spans * in JSON format. * * @return JSON string of CAS including enhancements */ public String enhance() { String enhanced = EnhancerUtils.casToEnhanced(cas, activity); enhanced = enhancedToJSON(enhanced); return enhanced; } /** * Converts a string containing a sequence of enhanced spans into * a JSON array where each array element is wrapped with a that * minimizes layout changes. * * @param enhanced string to enhance * @return JSON string of CAS including enhancements */ private String enhancedToJSON(String enhanced) { log.debug("Starting conversion of to HTML"); // TODO: the regex at least should be moved somewhere where it can be shared between this function // and the EnhanceXMLAnnotator (the EnhanceXML UIMA annotations are no longer useful because we've // inserted the enhancements) // regex to match the enhance spans Pattern enhancePatt = Pattern.compile("]*)>(.*?)", Pattern.DOTALL); Pattern counterPatt = Pattern.compile("id=\"(\\d+)\""); Matcher enhanceMatcher = enhancePatt.matcher(enhanced); HashMap newNodes = new HashMap(); while (enhanceMatcher.find()) { // find index Matcher counterMatcher = counterPatt.matcher(enhanceMatcher.group(1)); int counter = 0; if (counterMatcher.find()) { counter = Integer.parseInt(counterMatcher.group(1)); } // wrap outer span around each group // TODO: calling addSpanStyle for each group is probably very inefficient, // need to fix preserveWhitespace problem with Jsoup //newNodes.put(counter, EnhancerUtils.addSpanStyle("" + enhanceMatcher.group(2) + "")); newNodes.put(counter, "" + enhanceMatcher.group(2) + ""); } // convert the list of new nodes to JSON and return Gson gson = new Gson(); String result = gson.toJson(newNodes); log.debug("Finished conversion of to HTML"); return result; } }