読者です 読者をやめる 読者になる 読者になる

StAXとDOMのパフォーマンスを計測してみた

java

今まで、javaXMLを利用するときは専らDOM系、必要に応じてSAXにチューニングしてたんだけど、今さらながらStAXを使ってみた。

StAXはpull型のパーサーで比較的記述製が高く、かつ高速と評判。試しに書いてみたけど、少なくともJDKの素のDOMと比べるなら記述性も悪くない感じ。

 

一応こんな感じで書いた.

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.FactoryConfigurationError;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class XMLReader {
    public static class Item {
		private int id;
		private String title;
		private String date;

		public int getId() {
			return id;
		}

		public void setId(int id) {
			this.id = id;
		}

		public String getTitle() {
			return title;
		}

		public void setTitle(String title) {
			this.title = title;
		}

		public String getDate() {
			return date;
		}

		public void setDate(String date) {
			this.date = date;
		}

	}

	public static void main(String[] args) throws XMLStreamException,
			FactoryConfigurationError, ParserConfigurationException,
			SAXException, IOException, InterruptedException {

		StringBuilder xml = new StringBuilder();
		xml.append("");
		xml.append("");
		for (int i = 0; i < 500000; i++) {
			xml.append("");
			xml.append("item" + i + "");
			xml.append("2012/07/01");
			xml.append("");
		}
		xml.append("");
		xml.append("");
		// StAX
		for (int i = 0; i < 10; i++) {
			List items = toItems1(xml.toString());
		}
		System.out.println("====================");
		// DOM
		for (int i = 0; i < 10; i++) {
			List items = toItems2(xml.toString());
		}
	}

	private static List toItems1(String xml)
			throws FactoryConfigurationError, XMLStreamException {
		long start1 = System.currentTimeMillis();

		XMLInputFactory factory = XMLInputFactory.newInstance();
		XMLStreamReader reader = factory
				.createXMLStreamReader(new StringReader(xml));

		List items = null;
		Item item = null;
		try {
			while (reader.hasNext()) {
				reader.next();
				if (reader.isStartElement()) {
					if ("items".equals(reader.getLocalName())) {
						items = new ArrayList<>();
					} else if ("item".equals(reader.getLocalName())) {
						item = new Item();
						item.setId(Integer.parseInt(reader.getAttributeValue("", "id")));
					} else if ("title".equals(reader.getLocalName())) {
						item.setTitle(reader.getElementText());
					} else if ("date".equals(reader.getLocalName())) {
						item.setDate(reader.getElementText());
					}
				} else if (reader.isEndElement()) {
					if ("item".equals(reader.getLocalName())) {
						items.add(item);
					}
				}
			}
		} finally {
			reader.close();
		}

		long end1 = System.currentTimeMillis();
		System.out.println("StAX:" + (end1 - start1) + " ms");

		return items;
	}

	private static List toItems2(String xml)
			throws FactoryConfigurationError, XMLStreamException,
			ParserConfigurationException, SAXException, IOException {
		long start1 = System.currentTimeMillis();

		InputSource is = new InputSource(new StringReader(xml));
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
		factory.setXIncludeAware(true);
		factory.setNamespaceAware(true);
		DocumentBuilder docBuilder = factory.newDocumentBuilder();
		Document doc = docBuilder.parse(is);

		List items = new ArrayList<>();
		NodeList nodes = doc.getElementsByTagName("item");
		for (int i = 0; i < nodes.getLength(); i++) {
			Element node = (Element) nodes.item(i);
			Item item = new Item();
			item.setId(Integer.parseInt(node.getAttribute("id")));
			item.setTitle(node.getElementsByTagName("title").item(0)
					.getTextContent());
			item.setDate(node.getElementsByTagName("date").item(0)
					.getTextContent());
			items.add(item);
		}

		long end1 = System.currentTimeMillis();
		System.out.println("DOM:" + (end1 - start1) + " ms");

		return items;
	}

}
    

実行結果のベンチマークは下記の通り.

StAX:1137 ms
StAX:926 ms
StAX:898 ms
StAX:792 ms
StAX:651 ms
StAX:659 ms
StAX:739 ms
StAX:673 ms
StAX:780 ms
StAX:656 ms
====================
DOM:2447 ms
DOM:2743 ms
DOM:4237 ms
DOM:1904 ms
DOM:1679 ms
DOM:1958 ms
DOM:1730 ms
DOM:2041 ms
DOM:2628 ms
DOM:1843 ms

およそ2倍の性能差。この記述性でその性能なら悪くないなぁ.