StAXとDOMのパフォーマンスを計測してみた
今まで、javaでXMLを利用するときは専らDOM系、必要に応じてSAXにチューニングしてたんだけど、今さらながらStAXを使ってみた。
StAXはpull型のパーサーで比較的記述製が高く、かつ高速と評判。試しに書いてみたけど、少なくともJDKの素のDOMと比べるなら記述性も悪くない感じ。
一応こんな感じで書いた.
import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.stream.FactoryConfigurationError; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; public class XMLReader { public static class Item { private int id; private String title; private String date; public int getId() { return id; } public void setId(int id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getDate() { return date; } public void setDate(String date) { this.date = date; } } public static void main(String[] args) throws XMLStreamException, FactoryConfigurationError, ParserConfigurationException, SAXException, IOException, InterruptedException { StringBuilder xml = new StringBuilder(); xml.append(""); xml.append(""); for (int i = 0; i < 500000; i++) { xml.append(""); xml.append("item" + i + ""); xml.append("2012/07/01"); xml.append(""); } xml.append(""); xml.append(""); // StAX for (int i = 0; i < 10; i++) { List items = toItems1(xml.toString()); } System.out.println("===================="); // DOM for (int i = 0; i < 10; i++) { List items = toItems2(xml.toString()); } } private static List toItems1(String xml) throws FactoryConfigurationError, XMLStreamException { long start1 = System.currentTimeMillis(); XMLInputFactory factory = XMLInputFactory.newInstance(); XMLStreamReader reader = factory .createXMLStreamReader(new StringReader(xml)); List items = null; Item item = null; try { while (reader.hasNext()) { reader.next(); if (reader.isStartElement()) { if ("items".equals(reader.getLocalName())) { items = new ArrayList<>(); } else if ("item".equals(reader.getLocalName())) { item = new Item(); item.setId(Integer.parseInt(reader.getAttributeValue("", "id"))); } else if ("title".equals(reader.getLocalName())) { item.setTitle(reader.getElementText()); } else if ("date".equals(reader.getLocalName())) { item.setDate(reader.getElementText()); } } else if (reader.isEndElement()) { if ("item".equals(reader.getLocalName())) { items.add(item); } } } } finally { reader.close(); } long end1 = System.currentTimeMillis(); System.out.println("StAX:" + (end1 - start1) + " ms"); return items; } private static List toItems2(String xml) throws FactoryConfigurationError, XMLStreamException, ParserConfigurationException, SAXException, IOException { long start1 = System.currentTimeMillis(); InputSource is = new InputSource(new StringReader(xml)); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setXIncludeAware(true); factory.setNamespaceAware(true); DocumentBuilder docBuilder = factory.newDocumentBuilder(); Document doc = docBuilder.parse(is); List items = new ArrayList<>(); NodeList nodes = doc.getElementsByTagName("item"); for (int i = 0; i < nodes.getLength(); i++) { Element node = (Element) nodes.item(i); Item item = new Item(); item.setId(Integer.parseInt(node.getAttribute("id"))); item.setTitle(node.getElementsByTagName("title").item(0) .getTextContent()); item.setDate(node.getElementsByTagName("date").item(0) .getTextContent()); items.add(item); } long end1 = System.currentTimeMillis(); System.out.println("DOM:" + (end1 - start1) + " ms"); return items; } }
実行結果のベンチマークは下記の通り.
StAX:1137 ms StAX:926 ms StAX:898 ms StAX:792 ms StAX:651 ms StAX:659 ms StAX:739 ms StAX:673 ms StAX:780 ms StAX:656 ms ==================== DOM:2447 ms DOM:2743 ms DOM:4237 ms DOM:1904 ms DOM:1679 ms DOM:1958 ms DOM:1730 ms DOM:2041 ms DOM:2628 ms DOM:1843 ms
およそ2倍の性能差。この記述性でその性能なら悪くないなぁ.