XMLのパース(解析)
xml_parserを使ったXMLのパース
XML パーサ
PHP
// XMLを読み込む
$xml = file_get_contents('foo.xml');
// XMLパーサを作成
$xml_parser = xml_parser_create();
// 空白文字を読み飛ばす
xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 1);
// パース処理
xml_parse_into_struct($xml_parser, $xml, $vals);
// XMLパーサを解放
xml_parser_free($xml_parser);
テスト用に、Google アラートで配信先をフィードにし、XMLを生成。
元のXML
<?xml version="1.0"?>
<feed
xmlns:media="http://search.yahoo.com/mrss/"
xmlns:gr="http://www.google.com/schemas/reader/atom/"
xmlns:idx="urn:atom-extension:indexing"
xmlns="http://www.w3.org/2005/Atom"
idx:index="no"
gr:dir="ltr">
<!-- Content-type: Preventing XSRF in IE. -->
<generator uri="http://www.google.com/reader">Google Reader</generator>
<id>tag:google.com,2005:reader/user/000000000000000000001/state/com.google/alerts/00000000000000000000</id>
<link rel="hub" href="http://pubsubhubbub.appspot.com/" />
<title>Google アラート - 私的雑録</title>
<link rel="self" href="http://www.google.com/alerts/feeds/000000000000000000001/00000000000000000000" />
<updated>2012-11-06T00:00:00Z</updated>
<entry gr:crawl-timestamp-msec="1352101396554">
<id gr:original-id="http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoATAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g">tag:google.com,2005:reader/item/e276e2a793830e88</id>
<title type="html">Fiddler HTTP デバッガ | <b>私的雑録</b></title>
<published>2012-11-06T00:00:00Z</published>
<updated>2012-11-06T00:00:00Z</updated>
<link
rel="alternate"
href="http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoATAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g"
type="text/html" />
<content type="html">Fiddler HTTP デバッガ:Fiddlerのインストールと基本的な使い方 -
<b>私的雑録</b> PHPをよく 書いている人の備忘録.<br><a
style="color:#228822"
href="http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoBDAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g"
title="http://php.o0o0.jp/article/2967174473163794">php.o0o0.jp/article/2967174473163794</a></content>
<author gr:unknown-author="true">
<name>(投稿者不明)</name>
</author>
<source gr:stream-id="user/000000000000000000001/state/com.google/alerts/00000000000000000000">
<id>tag:google.com,2005:reader/user/000000000000000000001/state/com.google/alerts/00000000000000000000</id>
<title type="html">Google アラート - 私的雑録</title>
</source>
</entry>
(略)
</feed>
パース後($valsをreturn)
Array
(
[0] => Array
(
[tag] => FEED
[type] => open
[level] => 1
[attributes] => Array
(
[XMLNS:MEDIA] => http://search.yahoo.com/mrss/
[XMLNS:GR] => http://www.google.com/schemas/reader/atom/
[XMLNS:IDX] => urn:atom-extension:indexing
[XMLNS] => http://www.w3.org/2005/Atom
[IDX:INDEX] => no
[GR:DIR] => ltr
)
)
[1] => Array
(
[tag] => GENERATOR
[type] => complete
[level] => 2
[attributes] => Array
(
[URI] => http://www.google.com/reader
)
[value] => Google Reader
)
[2] => Array
(
[tag] => ID
[type] => complete
[level] => 2
[value] => tag:google.com,2005:reader/user/00000000000000000000/state/com.google/alerts/00000000000000000000
)
[3] => Array
(
[tag] => LINK
[type] => complete
[level] => 2
[attributes] => Array
(
[REL] => hub
[HREF] => http://pubsubhubbub.appspot.com/
)
)
[4] => Array
(
[tag] => TITLE
[type] => complete
[level] => 2
[value] => Google アラート - 私的雑録
)
[5] => Array
(
[tag] => LINK
[type] => complete
[level] => 2
[attributes] => Array
(
[REL] => self
[HREF] => http://www.google.com/alerts/feeds/00000000000000000000/00000000000000000000
)
)
[6] => Array
(
[tag] => UPDATED
[type] => complete
[level] => 2
[value] => 2012-11-05T00:00:00Z
)
[7] => Array
(
[tag] => ENTRY
[type] => open
[level] => 2
[attributes] => Array
(
[GR:CRAWL-TIMESTAMP-MSEC] => 1352101396554
)
)
[8] => Array
(
[tag] => ID
[type] => complete
[level] => 3
[attributes] => Array
(
[GR:ORIGINAL-ID] => http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoATAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g
)
[value] => tag:google.com,2005:reader/item/e276e2a793830e88
)
[9] => Array
(
[tag] => TITLE
[type] => complete
[level] => 3
[attributes] => Array
(
[TYPE] => html
)
[value] => Fiddler HTTP デバッガ | <b>私的雑録</b>
)
[10] => Array
(
[tag] => PUBLISHED
[type] => complete
[level] => 3
[value] => 2012-11-05T00:00:00Z
)
[11] => Array
(
[tag] => UPDATED
[type] => complete
[level] => 3
[value] => 2012-11-05T00:00:00Z
)
[12] => Array
(
[tag] => LINK
[type] => complete
[level] => 3
[attributes] => Array
(
[REL] => alternate
[HREF] => http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoATAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g
[TYPE] => text/html
)
)
[13] => Array
(
[tag] => CONTENT
[type] => complete
[level] => 3
[attributes] => Array
(
[TYPE] => html
)
[value] => Fiddler HTTP デバッガ:Fiddlerのインストールと基本的な使い方 - <b>私的雑録</b> PHPをよく 書いている人の備忘録.<br><a style="color:#228822" href="http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoBDAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g" title="http://php.o0o0.jp/article/2967174473163794">php.o0o0.jp/article/2967174473163794</a>
)
[14] => Array
(
[tag] => AUTHOR
[type] => open
[level] => 3
[attributes] => Array
(
[GR:UNKNOWN-AUTHOR] => true
)
)
[15] => Array
(
[tag] => NAME
[type] => complete
[level] => 4
[value] => (author unknown)
)
[16] => Array
(
[tag] => AUTHOR
[type] => close
[level] => 3
)
[17] => Array
(
[tag] => SOURCE
[type] => open
[level] => 3
[attributes] => Array
(
[GR:STREAM-ID] => user/00000000000000000000/state/com.google/alerts/00000000000000000000
)
)
[18] => Array
(
[tag] => ID
[type] => complete
[level] => 4
[value] => tag:google.com,2005:reader/user/00000000000000000000/state/com.google/alerts/00000000000000000000
)
[19] => Array
(
[tag] => TITLE
[type] => complete
[level] => 4
[attributes] => Array
(
[TYPE] => html
)
[value] => Google アラート - 私的雑録
)
[20] => Array
(
[tag] => SOURCE
[type] => close
[level] => 3
)
[21] => Array
(
[tag] => ENTRY
[type] => close
[level] => 2
)
(略)
[37] => Array
(
[tag] => FEED
[type] => close
[level] => 1
)
)
$vals[9]['value']; でFiddler HTTP デバッガ | 私的雑録を返す。
SimpleXML
PHP
// XMLを読み込む
$xml = simplexml_load_file('foo.xml');
パース後($xmlをreturn)
SimpleXMLElement Object
(
[comment] => SimpleXMLElement Object
(
)
[generator] => Google Reader
[id] => tag:google.com,2005:reader/user/00000000000000000000/state/com.google/alerts/14593991743914576000
[link] => Array
(
[0] => SimpleXMLElement Object
(
[@attributes] => Array
(
[rel] => hub
[href] => http://pubsubhubbub.appspot.com/
)
)
[1] => SimpleXMLElement Object
(
[@attributes] => Array
(
[rel] => self
[href] => http://www.google.com/alerts/feeds/00000000000000000000/14593991743914576000
)
)
)
[title] => Google アラート - 私的雑録
[updated] => 2012-11-06T00:00:00Z
[entry] => Array
(
[0] => SimpleXMLElement Object
(
[id] => tag:google.com,2005:reader/item/e276e2a793830e88
[title] => Fiddler HTTP デバッガ | 私的雑録
[published] => 2012-11-06T00:00:00Z
[updated] => 2012-11-06T00:00:00Z
[link] => SimpleXMLElement Object
(
[@attributes] => Array
(
[rel] => alternate
[href] => http://www.google.com/url?sa=X&q=http://php.o0o0.jp/article/2967174473163794&ct=ga&cad=CAcQARgBIAEoATAAOABAk9zdhAVIAlgAYgVqYS1VUw&cd=KRVMUHPLtFs&usg=AFQjCNH0tbwBUkwdniLlTEla6_VZjXkd2g
[type] => text/html
)
)
[content] => Fiddler HTTP デバッガ:Fiddlerのインストールと基本的な使い方 - 私的雑録 PHPをよく 書いている人の備忘録.php.o0o0.jp/article/2967174473163794
[author] => SimpleXMLElement Object
(
[name] => (author unknown)
)
[source] => SimpleXMLElement Object
(
[id] => tag:google.com,2005:reader/user/00000000000000000000/state/com.google/alerts/00000000000000000000
[title] => Google アラート - 私的雑録
)
)
[1] => SimpleXMLElement Object
(
[id] => tag:google.com,2005:reader/item/cc84f31299324f50
[title] => Google アラート フィード
[published] => 2012-11-06T00:00:00Z
[updated] => 2012-11-06T00:00:00Z
[link] => SimpleXMLElement Object
(
[@attributes] => Array
(
[rel] => alternate
[href] => http://www.google.com/alerts/manage?hl=ja&gl=us
[type] => text/html
)
)
[content] => Google アラート フィードへようこそ。すべて検索キーワード「私的雑録」について新しいアラートをお届けします。キーワードによっては、すぐにアラートが表示されないものもあります。その場合は、アラート管理コンソールで、アラート用のキーワードを変更するか削除してください。Google アラートをご利用いただきありがとうございます。
[author] => SimpleXMLElement Object
(
[name] => Google アラート
)
[source] => SimpleXMLElement Object
(
[id] => tag:google.com,2005:reader/user/00000000000000000000/state/com.google/alerts/00000000000000000000
[title] => Google アラート - 私的雑録
)
)
)
)
$xml->entry[0]->title でFiddler HTTP デバッガ | 私的雑録を返す。