Shift_JISやらUTF-8やらのマルチバイト文字列をパーセントエンコーディングせずに渡すと、戻って来た時に壊れているのはなぜでしょうか?
以下の環境で確認しました。
- PHP5.0.4, PHP5.1.6 (CentOS4)
- PHP5.2.0 (MacOSX; 10.4.8)
内部エンコーディングがEUC-JPだからか、EUC-JPが特別だからか、EUC-JPのマルチバイト文字列を渡した場合はぼちぼち上手く行く模様。
この関数は、指定された URL が有効かどうかを調べるためのもの ではなく、単に URL を上で示した 要素に分解するだけのものです。不完全な URL であっても受け入れられますし、 そのような場合でも parse_url() は可能な限り 正しく解析しようとします。
PHP: parse_url - Manual
分解目的なら、[RFC3986]Appendix B. Parsing a URI Reference with a Regular Expressionにある正規表現を利用したら良いのかな。
ちなみに、PEAR::Net_URLの中でparse_urlが使われているので、Net_URL::getURLを使ってURLを組み立てようとした場合など、注意が必要かもしれません。
php.iniや環境変数などに依存する問題なのかもしれませんが、詳しく調査していません。
Uri.php
<?php /** * URI class * * @author koshigoe<KoshigoeBushou@gmail.com> * @since 2007/01/18 * @version $Id$ * @see RFC3986<http://www.ietf.org/rfc/rfc3986.txt> */ class Uri { private $_regex; private $_map; public function __construct() { $this->_regex = '|^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?|'; $this->_map = array( "scheme" => 2, "authority" => 4, "path" => 5, "query" => 7, "fragment" => 9, ); } /** * parse well-formed URI(or URI Reference) * * @param String $uri URI stirng * @return Array URI Components array */ public function parse($uri) { if (preg_match($this->_regex, $uri, $match) == 0) { return false; } $components = array(); foreach ($this->_map as $name => $number) { if (isset($match[$number])) { $components[$name] = $match[$number]; } } return $components; } } ?>
UriTest.php
<?php // Call UriTest::main() if this source file is executed directly. if (!defined("PHPUnit_MAIN_METHOD")) { define("PHPUnit_MAIN_METHOD", "UriTest::main"); } require_once "PHPUnit/Framework/TestCase.php"; require_once "PHPUnit/Framework/TestSuite.php"; require_once '/path/to/Uri.php'; /** * Test class for Uri. * Generated by PHPUnit_Util_Skeleton on 2007-01-18 at 23:18:14. */ class UriTest extends PHPUnit_Framework_TestCase { /** * Runs the test methods of this class. * * @access public * @static */ public static function main() { require_once "PHPUnit/TextUI/TestRunner.php"; $suite = new PHPUnit_Framework_TestSuite("UriTest"); $result = PHPUnit_TextUI_TestRunner::run($suite); } /** * Sets up the fixture, for example, open a network connection. * This method is called before a test is executed. * * @access protected */ protected function setUp() { $this->uri = new Uri(); } /** * Tears down the fixture, for example, close a network connection. * This method is called after a test is executed. * * @access protected */ protected function tearDown() { } /** * @todo Implement testParse(). */ public function testParse() { $patternSet = array(); $patternSet[] = array( "uri" => "http://www.example.com", "components" => array( "scheme" => "http", "authority" => "www.example.com", "path" => "", ) ); $patternSet[] = array( "uri" => "http://www.example.com:80", "components" => array( "scheme" => "http", "authority" => "www.example.com:80", "path" => "", ) ); $patternSet[] = array( "uri" => "http://www.example.com/", "components" => array( "scheme" => "http", "authority" => "www.example.com", "path" => "/", ) ); $patternSet[] = array( "uri" => "http://www.example.com?q", "components" => array( "scheme" => "http", "authority" => "www.example.com", "path" => "", "query" => "q", ) ); $patternSet[] = array( "uri" => "http://www.example.com?q=value#page", "components" => array( "scheme" => "http", "authority" => "www.example.com", "path" => "", "query" => "q=value", "fragment" => "page" ) ); $patternSet[] = array( "uri" => "/", "components" => array( "scheme" => "", "authority" => "", "path" => "/", ) ); $patternSet[] = array( "uri" => "?q=value", "components" => array( "scheme" => "", "authority" => "", "path" => "", "query" => "q=value" ) ); $patternSet[] = array( "uri" => "#sec1", "components" => array( "scheme" => "", "authority" => "", "path" => "", "query" => "", "fragment" => "sec1", ) ); $patternSet[] = array( "uri" => "", "components" => array( "scheme" => "", "authority" => "", "path" => "", ) ); $patternSet[] = array( "uri" => "http://jp.example.com/パス?クエリ#フラグメント", "components" => array( "scheme" => "http", "authority" => "jp.example.com", "path" => "/パス", "query" => "クエリ", "fragment" => "フラグメント", ) ); foreach ($patternSet as $pattern) { $expectedComponents = $pattern["components"]; $actualComponents = $this->uri->parse($pattern["uri"]); if (is_array($expectedComponents)) { foreach ($actualComponents as $name => $value) { $this->assertEquals($expectedComponents[$name], $actualComponents[$name]); } } else { var_dump($actualComponents); $this->assertFalse($actualComponents); } } } } // Call UriTest::main() if this source file is executed directly. if (PHPUnit_MAIN_METHOD == "UriTest::main") { UriTest::main(); } ?>

