Accepts a String (or a char array) as an input and stores the data in a tree structure.
For example, given the input:
<foo> <bar>baz</bar> <qux fox="jump"/> </foo>
Output will be:
XMLElement{elementName='foo', children=[XMLElement{elementName='bar', elementValue='baz'}, XMLElement{elementName='qux', attributes=[ElementAttribute{name='fox', value='jump'}]} }
I would like to hear your criticism on design principles (SRP, DRY, KISS, etc..), readability (naming of variables, methods) and maintainability (code structure, methods) of the code you see.
Already notes in the comments of the code but:
- XML provided as input must not contain any XML comments.
- Mixed data such as:
<MyElement>Some <b>Mixed</b> Data</MyElement>
is not supported.
Without further ado, let's jump into the code..
Entity classes
XMLElement.java
package xml2json2; import java.util.ArrayList; import java.util.List; public class XMLElement { private String elementName; // can not be null private String elementValue = ""; private List<ElementAttribute> attributes = new ArrayList<>(); // can be empty private List<XMLElement> children = new ArrayList<>(); // can be empty public String getElementName() { return elementName; } public void setElementName(String elementName) { this.elementName = elementName; } public String getElementValue() { return elementValue; } public void setElementValue(String elementValue) { this.elementValue = elementValue; } public List<ElementAttribute> getAttributes() { return attributes; } public List<XMLElement> getChildren() { return children; } @Override public String toString() { final StringBuffer sb = new StringBuffer("XMLElement{"); sb.append("elementName='").append(elementName).append('\''); if (!elementValue.equals("")) { sb.append(", elementValue='").append(elementValue).append('\''); } if (attributes.size() != 0) { sb.append(", attributes=").append(attributes); } if (children.size() != 0) { sb.append(", children=").append(children); } sb.append('}'); return sb.toString(); } }
ElementAttribute.java
package xml2json2; public class ElementAttribute { private String name; private String value; public String getName() { return name; } public void setName(String name) { this.name = name; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } @Override public String toString() { final StringBuffer sb = new StringBuffer("ElementAttribute{"); sb.append("name='").append(name).append('\''); sb.append(", value='").append(value).append('\''); sb.append('}'); return sb.toString(); } }
Processor
XMLElementTreeBuilderImpl.java
package xml2json2; // References: // XML Spec : https://www.liquid-technologies.com/XML // Regex : https://regexone.com /* This tree builder does not support elements with mixed data such as: <MyElement>Some <b>Mixed</b> Data</MyElement>. Mixed data can contain text and child elements within the containing element. This is typically only used to mark up data (HTML etc). Its typically only used to hold mark-up/formatted text entered by a person, it is typically not he best choice for storing machine readable data as adds significant complexity to the parser. */ /* XML to be processed must not contain any comments! */ public class XMLElementTreeBuilderImpl { private char[] xmlArray; private int currentIndex = 0; // This class has only 2 public methods: public XMLElement buildTreeFromXML(String xml) { return buildTreeFromXML(xml.toCharArray()); } public XMLElement buildTreeFromXML(char[] arr) { this.xmlArray = arr; XMLElement root = nodeFromStringRecursively(); return root; } // Everything else below here is private, i.e. inner workings of the class.. private XMLElement nodeFromStringRecursively() { final XMLElement xmlElement = new XMLElement(); clearWhiteSpace(); if (tagStart()) { // A new XML Element is starting.. currentIndex++; final String elementName = parseStartingTagElement(); // finishes element name.. xmlElement.setElementName(elementName); } clearWhiteSpace(); // We have not closed our tag yet.. // At this point we might have attributes.. Lets add them if they exist.. while (isLetter()) { addAttribute(xmlElement); clearWhiteSpace(); } // At this point we will have one of the following in current index: // [/] -> Self closing tag.. // [>] -> Tag ending - (Data or children or starting or immediately followed by an ending tag..) if (selfClosingTagEnd()) { return xmlElement; } // At this point we are sure this element was not a self closing element.. currentIndex++; // skipping the tag close character, i.e. '>' // At this point we are facing one of the following cases: // Assume our starting tag was <foo> for the examples.. // 1 - [</] : Immediate tag end. "</foo>" // 2 - [\sw]+[</] : Any whitespace or any alphanumeric character, one or more repetitions, followed by tag end. "sample</foo>" // 3 - [\s]*(<![CDATA[...]]>)[\s]*[</] : Zero or more white space, followed by CDATA. followed by zero or more white space. "<![CDATA[...]]></foo> // 4 - [\s]*[<]+ : Zero or more white space, followed by one or more child start.. int currentCase = currentCase(); switch (currentCase) { case 1: // Immediate closing tag, no data to set, no children to add.. Do nothing. break; case 2: setData(xmlElement); break; case 3: setCData(xmlElement); case 4: while (currentCase() == 4) { // Add children recursively. final XMLElement childToken = nodeFromStringRecursively(); xmlElement.getChildren().add(childToken); } } walkClosingTag(); return xmlElement; } private String parseStartingTagElement() { final StringBuilder elementNameBuilder = new StringBuilder(); while (!isWhiteSpace() && !selfClosingTagEnd() && !tagEnd()) { elementNameBuilder.append(charAtCurrentIndex()); currentIndex++; } final String elementName = elementNameBuilder.toString(); return elementName; } private void addAttribute(XMLElement xmlElement) { // Attribute name.. final StringBuilder attributeNameBuilder = new StringBuilder(); while (!isWhiteSpace() && charAtCurrentIndex() != '=') { attributeNameBuilder.append(charAtCurrentIndex()); currentIndex++; } // Everything in between that is not much of interest to us.. clearWhiteSpace(); currentIndex++; // Passing the '=' clearWhiteSpace(); currentIndex++; // Passing the '"' // Attribute value.. final StringBuilder attributeValueBuilder = new StringBuilder(); while (charAtCurrentIndex() != '"') { attributeValueBuilder.append(charAtCurrentIndex()); currentIndex++; } currentIndex++; // Passing the final '"' clearWhiteSpace(); // Build the attribute object and.. final ElementAttribute elementAttribute = new ElementAttribute(); elementAttribute.setName(attributeNameBuilder.toString()); elementAttribute.setValue(attributeValueBuilder.toString()); // ..add the attribute to the xmlElement xmlElement.getAttributes().add(elementAttribute); } private int currentCase() { if (endTagStart()) { return 1; } if (cDataStart()) { return 3; } if (tagStart() && !endTagStart()) { return 4; } // Here we will look forward, so we need to keep track of where we actually started.. int currentIndexRollBackPoint = currentIndex; while (!endTagStart() && !cDataStart() && !tagStart()) { currentIndex++; if (endTagStart()) { currentIndex = currentIndexRollBackPoint; return 2; } if (cDataStart()) { currentIndex = currentIndexRollBackPoint; return 3; } if (tagStart() && !endTagStart()) { currentIndex = currentIndexRollBackPoint; return 4; } } throw new UnsupportedOperationException("Encountered an unsupported XML."); } private void setData(XMLElement xmlElement) { final StringBuilder dataBuilder = new StringBuilder(); while (!tagStart()) { dataBuilder.append(charAtCurrentIndex()); currentIndex++; } String data = dataBuilder.toString(); data = data.replaceAll("<", "<"); data = data.replaceAll(">", ">"); data = data.replaceAll(""", "\""); data = data.replaceAll("'", "\'"); data = data.replaceAll("&", "&"); xmlElement.setElementValue(data); } private void setCData(XMLElement xmlElement) { final StringBuilder cdataBuilder = new StringBuilder(); while (!endTagStart()) { cdataBuilder.append(charAtCurrentIndex()); currentIndex++; } String cdata = cdataBuilder.toString(); cdata = cdata.trim(); // cutting 9 chars because: <![CDATA[ cdata = cdata.substring(9, cdata.indexOf(']')); xmlElement.setElementValue(cdata); } private void walkClosingTag() { while (!tagEnd()) { currentIndex++; } currentIndex++; } // Convenience methods private void clearWhiteSpace() { while (isWhiteSpace()) { currentIndex++; } } private boolean isLetter() { return Character.isLetter(charAtCurrentIndex()); } private boolean isWhiteSpace() { return Character.isWhitespace(charAtCurrentIndex()); } private boolean tagStart() { return charAtCurrentIndex() == '<'; } private boolean tagEnd() { return charAtCurrentIndex() == '>'; } private boolean endTagStart() { return charAtCurrentIndex() == '<' && charAtNextIndex() == '/'; } private boolean selfClosingTagEnd() { return charAtCurrentIndex() == '/' && charAtNextIndex() == '>'; } private boolean cDataStart() { return charAtCurrentIndex() == '<' && charAtNextIndex() == '!' && xmlArray[currentIndex + 2] == '['; } private char charAtCurrentIndex() { return xmlArray[currentIndex]; } private char charAtNextIndex() { return xmlArray[currentIndex + 1]; } }
Unit Tests
package xml2json2; import java.util.List; public class TreeFromXMLBuilderImplTest { private static XMLElementTreeBuilderImpl treeFromXMLBuilder; public static void main(String[] args) { selfClosingTagWithoutSpace(); selfClosingTagWithSpace(); selfClosingTagWithNewLine(); emptyElementNoSpace(); emptyElementWithSpace(); emptyElementWithNewLine(); selfClosingTagWithAttributeNoSpace(); selfClosingTagWithAttributeWithSpace(); selfClosingTagWithMultipleAttributes(); xmlElementWithData(); xmlElementWithAttributeAndWithData(); xmlElementWithChild(); sampleXMLNote(); sampleXmlWithGrandChildren(); withCharacterData(); dataWithPreDefinedEntities(); } private static void selfClosingTagWithoutSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo/>"); assert xmlElement.getElementName().equals("foo") : "was : " + xmlElement.getElementName(); } private static void selfClosingTagWithSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo />"); assert xmlElement.getElementName().equals("foo") : "was : " + xmlElement.getElementName(); } private static void selfClosingTagWithNewLine() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo\n\n/>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); } private static void emptyElementNoSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo></foo>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); } private static void emptyElementWithSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo></foo >"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); } private static void emptyElementWithNewLine() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo></foo \n\n\n>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); } private static void selfClosingTagWithAttributeNoSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo bar=\"baz\"/>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); final ElementAttribute attribute = xmlElement.getAttributes().iterator().next(); assert attribute.getName().equals("bar") : "was: " + attribute.getName(); assert attribute.getValue().equals("baz") : "was: " + attribute.getValue(); } private static void selfClosingTagWithAttributeWithSpace() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo bar = \"baz\" />"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); final ElementAttribute attribute = xmlElement.getAttributes().iterator().next(); assert attribute.getName().equals("bar") : "was: " + attribute.getName(); assert attribute.getValue().equals("baz") : "was: " + attribute.getValue(); } private static void selfClosingTagWithMultipleAttributes() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo bar=\"baz\" qux=\"booze\"/>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); ElementAttribute attribute = xmlElement.getAttributes().get(0); assert attribute.getName().equals("bar") : "was: " + attribute.getName(); assert attribute.getValue().equals("baz") : "was: " + attribute.getValue(); attribute = xmlElement.getAttributes().get(1); assert attribute.getName().equals("qux") : "was: " + attribute.getName(); assert attribute.getValue().equals("booze") : "was: " + attribute.getValue(); } private static void xmlElementWithData() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo>bar</foo>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); assert xmlElement.getElementValue().equals("bar") : "was: " + xmlElement.getElementValue(); } private static void xmlElementWithAttributeAndWithData() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo baz = \"baz\" > bar </foo>"); assert xmlElement.getElementName().equals("foo") : "was: " + xmlElement.getElementName(); assert xmlElement.getElementValue().equals(" bar ") : "was: " + xmlElement.getElementValue(); ElementAttribute attribute = xmlElement.getAttributes().get(0); assert attribute.getName().equals("baz") : "was: " + attribute.getName(); assert attribute.getValue().equals("baz") : "was: " + attribute.getValue(); } private static void xmlElementWithChild() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML("<foo><bar></bar><tar>rat</tar><baz/></foo>"); assert xmlElement.getElementName().equals("foo"); assert xmlElement.getAttributes().isEmpty(); assert xmlElement.getChildren().size() == 3; assert xmlElement.getChildren().get(0).getElementName().equals("bar"); assert xmlElement.getChildren().get(0).getElementValue().equals(""); assert xmlElement.getChildren().get(1).getElementName().equals("tar"); assert xmlElement.getChildren().get(1).getElementValue().equals("rat"); assert xmlElement.getChildren().get(2).getElementName().equals("baz"); assert xmlElement.getChildren().get(2).getElementValue().equals(""); } private static void sampleXMLNote() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); String note = "<note>\n" + "<to>Tove</to>\n" + "<from>Jani</from>\n" + "<heading>Reminder</heading>\n" + "<body>Don't forget me this weekend!</body>\n" + "</note>" ; final XMLElement xmlElement = treeFromXMLBuilder.buildTreeFromXML(note); // For visual inspection.. System.out.println(xmlElement); } /* <foo> <bar> <baz>test</baz> </bar> <qux att="tta"> <fox>jumped</fox> </qux> </foo> */ private static void sampleXmlWithGrandChildren() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); String sampleWithGrandChildren = "<foo><bar><baz>test</baz></bar><qux att=\"tta\"><fox>jumped</fox></qux></foo>"; final XMLElement foo = treeFromXMLBuilder.buildTreeFromXML(sampleWithGrandChildren); assert foo.getElementName().equals("foo"); final List<XMLElement> children = foo.getChildren(); assert children.size() == 2; // bar and qux final XMLElement bar = children.get(0); assert bar.getElementName().equals("bar"); assert bar.getElementValue().equals(""); final List<XMLElement> barChildren = bar.getChildren(); assert barChildren.size() == 1; final XMLElement baz = barChildren.get(0); assert baz.getElementName().equals("baz"); assert baz.getElementValue().equals("test"); final XMLElement qux = children.get(1); assert qux.getAttributes().size() == 1; assert qux.getAttributes().get(0).getName().equals("att"); assert qux.getAttributes().get(0).getValue().equals("tta"); final List<XMLElement> quxChildren = qux.getChildren(); assert quxChildren.size() == 1; final XMLElement fox = quxChildren.get(0); assert fox.getElementName().equals("fox"); assert fox.getElementValue().equals("jumped"); // System.out.println(sampleWithGrandChildren); } private static void withCharacterData() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final String sampleXMLWithCData = "<foo><![CDATA[ This must be preserved!!! ]]></foo>"; final XMLElement root = treeFromXMLBuilder.buildTreeFromXML(sampleXMLWithCData); assert root.getElementName().equals("foo"); assert root.getElementValue().equals(" This must be preserved!!! ") : "was: " + root.getElementValue(); } private static void dataWithPreDefinedEntities() { treeFromXMLBuilder = new XMLElementTreeBuilderImpl(); final String withCharacterData = "<foo><>"'&</foo>"; final XMLElement root = treeFromXMLBuilder.buildTreeFromXML(withCharacterData); assert root.getElementValue().equals("<>\"\'&"); } }
Document doc = new SAXBuilder().build(new StringReader(mystringvar));
will parse an XML document in a string....\$\endgroup\$