/* * XmlTools.java * * Oystein * ... not Johan's XmlTools.java * source: */ package aksis.alignment; import org.w3c.dom.*; import java.util.*; import javax.swing.*; // ### not xml utility, but... import java.util.regex.*; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; //import javax.xml.transform.OutputKeys; import java.io.*; import org.w3c.dom.*; class XmlTools { /* ### bruker standard getTextContent() isteden. fantes kanskje ikke i johan sin tid public static String getText(Node node) { // We need to retrieve the text from elements, entity // references, CDATA sections, and text nodes; but not // comments or processing instructions int type = node.getNodeType(); if (type == Node.COMMENT_NODE || type == Node.PROCESSING_INSTRUCTION_NODE) { return ""; } StringBuffer text = new StringBuffer(); String value = node.getNodeValue(); if (value != null) text.append(value); if (node.hasChildNodes()) { NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); text.append(getText(child)); } } return text.toString(); } */ public static String getXmlContent(Node node) { // use a Transformer to convert element to xml string String xmlString = "*** error in method getXmlContent() ***"; try { // ### en Transformer kan gjenbrukes. legge den i model? TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(); //transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // 2006-09-19 StreamResult result = new StreamResult(new StringWriter()); DOMSource source = new DOMSource(node); transformer.transform(source, result); xmlString = result.getWriter().toString(); //System.out.println(xmlString); /* // 2006-09-19. bruker heller OMIT_XML_DECLARATION // strip away header int pos = xmlString.indexOf("?>"); if (pos > -1 ) { xmlString = xmlString.substring(pos+2); } */ // 2006-09-19. ###flyttet fra AElement sin toString(). og endret //Pattern pattern = Pattern.compile("[\\n\\r]+"); Pattern pattern = Pattern.compile("[ \\n\\r]+"); // 2006-09-19 Matcher matcher = pattern.matcher(xmlString); xmlString = matcher.replaceAll(" "); } catch (TransformerConfigurationException tce) { // Error generated by the parser System.out.println ("*** Transformer Factory error: " + tce.getMessage()); // Use the contained exception, if any Throwable x = tce; if (tce.getException() != null) { x = tce.getException(); } x.printStackTrace(); } catch (TransformerException te) { // Error generated by the parser System.out.println ("*** Transformation error: " + te.getMessage()); // Use the contained exception, if any Throwable x = te; if (te.getException() != null) { x = te.getException(); } x.printStackTrace(); } return xmlString; } //public static NodeList getElementsByTagNames(Document doc, String[] tags) { //public static NodeList getElementsByTagNames(Document doc, String[] tags) throws EmptyElementException { //public static NodeList getElementsByTagNames(Document doc, String[] tags) throws Exception { // 2006-09-22 public static NodeList getElementsByTagNames(Document doc, String[] tags, String specialCharacters) throws Exception { // 2006-09-22. 2006-10-03 // 2006-10-03. // specialCharacters contains the characters that should be stripped off words // when segmenting text into words.. // this method must know these characters to be able to discover empty alignable elements // = alignable elements containing no words // the standard Element method getElementsByTagName() // returns a NodeList of all descendant Elements // with a given tag name, in document order. // the special tag value "*" matches all tags. // so it can be used to get a list of all elements, // or all elements with a certain tag. // this method gets a list of elements satisfying more than one tag. // but it can only be used on a Document //System.out.println("getElementsByTagNames() called"); Pattern pattern = Pattern.compile("[ \\n\\r\\t]+"); // 2006-09-19. ### \t for sikkerhets skyld ### // first get a list of all elements NodeList list = doc.getElementsByTagName("*"); // then extract the desired elements to a new list List newList = new ArrayList(); //newList = null; Element nextElement; List tagList = Arrays.asList(tags); //System.out.println("list.getLength()=" + list.getLength()); for (int i = 0; i < list.getLength(); i++) { //System.out.println("i=" + i); nextElement = (Element)(list.item(i)); String tag = nextElement.getTagName(); //System.out.println("tag=" + tag); if (tagList.contains(tag)) { //System.out.println("desired"); // 2006-09-19. 2006-10-03 // test if empty, i.e, if contains no words Matcher matcher = pattern.matcher(nextElement.getTextContent()); String test = matcher.replaceAll(""); //System.out.println("test=" + test); // now after all whitespace has been removed check for remaining special characters boolean empty = true; for (int j = 0; j < test.length(); j++) { //System.out.println("j=" + j); //System.out.println("test.charAt(j)=" + test.charAt(j)); if (specialCharacters.indexOf(test.charAt(j)) == -1) { // found a non-special character empty = false; break; } } //System.out.println("test='" + test + "'"); //if (test.equals("")) { if (empty) { // the element is empty //ErrorMessage.error("Empty alignable element - not allowed"); //throw new EmptyElementException(); //throw new EmptyElementException(nextElement.getXmlContent()); // 2006-09-22 throw new Exception("empty element " + getXmlContent(nextElement)); // 2006-09-22 } // end 2006-09-19. 2006-10-03 newList.add(nextElement); // ### hvorfor går dette i bøtta? og hvordan? //System.out.println("i repeat - desired"); } } // ... //System.out.println("newList.size()=" + newList.size()); // ### slik? //return (NodeList)newList; // ### eller slik? final List finalList = newList; NodeList nodeList = new NodeList() { // return # of items in the list public int getLength() { return finalList.size(); } // return the i-th item public Node item(int index) { return (index < finalList.size()) ? (Node)finalList.get(index) : null; } }; return nodeList; } public static Node getNextRelevantSiblingElement(Node node, HashMap relevantNames) { // get nearest next sibling that // (1) is an Element, and // (2) has a name among those in relevantNames Node next = node.getNextSibling(); while (next != null) { if (next.getNodeType() == Node.ELEMENT_NODE) { if (relevantNames.containsKey(next.getNodeName())) { return next; } } next = next.getNextSibling(); } return null; } public static Node getPreviousRelevantSiblingElement(Node node, HashMap relevantNames) { // get nearest previous sibling that // (1) is an Element, and // (2) has a name among those in relevantNames Node prev = node.getPreviousSibling(); while (prev != null) { if (prev.getNodeType() == Node.ELEMENT_NODE) { if (relevantNames.containsKey(prev.getNodeName())) { return prev; } } prev = prev.getPreviousSibling(); } return null; } public static Node getRelevantAncestorElement(Node node, HashMap relevantNames) { // get the nearest ancestor that // (1) is an Element, and // (2) has a name among those in relevantNames Node parent = node.getParentNode(); while (parent != null) { if (parent.getNodeType() == Node.ELEMENT_NODE) { if (relevantNames.containsKey(parent.getNodeName())) { return parent; } } parent = parent.getParentNode(); } return null; } public static Node getRightMostDescendant(Node node) { Node temp = node.getLastChild(); // 2006-10-04. gikk feil omkring her. ser suspekt ut. setter inn test if (temp == null) { // node is leaf node. no descendants at all return null; } // end 2006-10-04 while (temp.getLastChild() != null) { temp = temp.getLastChild(); } return temp; } public static Node getRelevantLastDescendantElement(Node node, HashMap relevantNames) { // get the last (rightmost) descendant that // (1) is an Element, and // (2) has a name among those in relevantNames Node current = getRightMostDescendant(node); // 2006-10-04. gikk feil omkring her. ser suspekt ut. setter inn test if (current == null) { // node is leaf node. no descendants at all return null; } // end 2006-10-04 while (current != node) { if (current == current.getParentNode().getLastChild()) { // inspect current's parent Node parent = current.getParentNode(); if (parent.getNodeType() == Node.ELEMENT_NODE) { if (relevantNames.containsKey(parent.getNodeName())) { return parent; } } } if (current.getPreviousSibling() != null) { Node sibling = current.getPreviousSibling(); if (sibling.hasChildNodes()) { current = getRightMostDescendant(sibling); } else { current = sibling; } // inspect current if (current.getNodeType() == Node.ELEMENT_NODE) { if (relevantNames.containsKey(current.getNodeName())) { return current; } } } else { current = current.getParentNode(); } } return null; } // ### not xml utility, but... // used to search in unaligned.elements[t] public static Element getElementByIdInDefaultListModel(DefaultListModel elements, String id) { //System.out.println("-1"); for (int i=0; i