diff --git a/README.md b/README.md index 55f3f3d..f4ea265 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ other words, this library simplifies getting the different components from URLs. PHP already provides a built in function `parse_url()`. However, that function behaves somewhat differently from the RFC definition, since it is more lenient -towards urls that do not exactly fit the specification. This library provides +towards URLs that do not exactly fit the specification. This library provides a more accurate implementation for parsing and even for validating URLs. While this library is called *URL* parser, it does, in fact, conform to the diff --git a/apigen.neon b/apigen.neon index 765b66a..be8b7e2 100644 --- a/apigen.neon +++ b/apigen.neon @@ -1,10 +1,10 @@ source: - src - + destination: api charset: - UTF-8 - + main: Riimu\Kit\UrlParser title: UrlParser php: false diff --git a/src/UrlInfo.php b/src/UrlInfo.php index f640cce..437c307 100644 --- a/src/UrlInfo.php +++ b/src/UrlInfo.php @@ -10,10 +10,10 @@ */ class UrlInfo { - /** @var string The original parsed url */ + /** @var string The original parsed URL */ private $url; - /** @var string[] All the nonempty parts parsed from the url */ + /** @var string[] All the nonempty parts parsed from the URL */ private $parts; /** @var integer[] List of known default ports */ @@ -25,8 +25,8 @@ class UrlInfo /** * Creates a new UrlInfo instance. - * @param string $url The original url that has been parsed - * @param string[] $parts The parts the have been parsed from the url + * @param string $url The original parsed URL + * @param string[] $parts The parts the have been parsed from the URL */ public function __construct($url, array $parts) { @@ -40,7 +40,7 @@ public function __construct($url, array $parts) /** * Returns the original URL. - * @return string The original URL that has been parsed. + * @return string The original parsed URL */ public function getUrl() { @@ -51,7 +51,7 @@ public function getUrl() * Returns the nonempty named parts from the parsed URL. * * The array contains all the nonempty parts that have been parsed from the - * url. The URL may consist of following parts: + * URL. The URL may consist of following parts: * * - scheme : Scheme defined in the URL (the part before '://') * - hier_part : Part that may consist of authority and path @@ -70,8 +70,8 @@ public function getUrl() * - path_noscheme : Path that begins with non empty segment without : * - path_rootless : Path that begins with non empty segment * - path_empty : This part is always empty, so it should never be returned - * - query : Query part of the url (the part after '?') - * - fragment : Fragment part of the url (the part after '#') + * - query : Query part of the URL (the part after '?') + * - fragment : Fragment part of the URL (the part after '#') * * @return string[] Named nonempty parts from the parsed URL */ @@ -103,8 +103,8 @@ private function findPart(array $list) } /** - * Returns the scheme part of the url. - * @return string|false Scheme part of the url or false if not defined + * Returns the scheme part of the URL. + * @return string|false Scheme part of the URL or false if not defined */ public function getScheme() { @@ -112,7 +112,7 @@ public function getScheme() } /** - * Returns the username from the userinfo part of the url. + * Returns the username from the userinfo part of the URL. * * Username is defined in the userinfo part, which is separated from the * host with '@'. If the userinfo part contains a colon, the username is @@ -127,7 +127,7 @@ public function getUsername() } /** - * Returns the password from the userinfo part of the url. + * Returns the password from the userinfo part of the URL. * * Password is defined in the userinfo part, which is separated from the * host with '@'. Password is the part of userinfo that comes after the @@ -158,8 +158,8 @@ private function getAuth($username) } /** - * Returns the host part of the url. - * @return string|false Host part from the url or false if not defined + * Returns the host part of the URL. + * @return string|false Host part from the URL or false if not defined */ public function getHostname() { @@ -172,7 +172,7 @@ public function getHostname() * If the IP address is defined in the URL itself, that IP address is * returned (without any enclosing characters or version information). If * the URL has a hostname instead, the IP address will be determined by - * gethostbyname() (unless the optional parameter is set to false). + * `gethostbyname()` (unless the optional parameter is set to false). * * @param boolean $resolve Whether to determine IP address for hostnames or not * @return string|false IP address for the URL or false if not defined @@ -202,7 +202,7 @@ private function resolveHost($hostname) /** * Returns the port from the URL or default one for the scheme. * - * If no port is present in the url and the first parameter is not set to + * If no port is present in the URL and the first parameter is not set to * false, this method will return the default port of the scheme for known * schemes. * @@ -244,8 +244,8 @@ public function getDefaultPort() } /** - * Returns the path part of the url. - * @return string Path part of the url or empty string if none defined + * Returns the path part of the URL. + * @return string Path part of the URL or empty string if none defined */ public function getPath() { @@ -276,8 +276,8 @@ public function getFileExtension() } /** - * Returns the query part of the url. - * @return string|false Query part of the url or false if not defined + * Returns the query part of the URL. + * @return string|false Query part of the URL or false if not defined */ public function getQuery() { @@ -288,8 +288,8 @@ public function getQuery() * Returns an array containing variables parsed from the query. * * The variables are parsed from the string returned by getQuery() using - * PHP's built in parse_str() function. Thus, the parsing is identical to - * parsing of $_GET variables. If the Query is empty, and empty array will + * PHP's built in `parse_str()` function. Thus, the parsing is identical to + * parsing of $_GET variables. If the query is empty, and empty array will * be returned. * * @return array Variables parsed from the query or empty array if none @@ -302,8 +302,8 @@ public function getVariables() } /** - * Returns the fragment part of the url. - * @return string|false Fragment part of the url or false if not defined + * Returns the fragment part of the URL. + * @return string|false Fragment part of the URL or false if not defined */ public function getFragment() { diff --git a/src/UrlParser.php b/src/UrlParser.php index 90c0125..6eb900f 100644 --- a/src/UrlParser.php +++ b/src/UrlParser.php @@ -5,22 +5,24 @@ /** * Provides a RFC 3986 compliant solution to URL parsing. * - * UrlParser provides a more accurate solution to parsing URLs compared to PHP's - * built in parse_url(). The URLs are parsed only as defined in the spec. This - * however, means that this class will not parse URLs that are incomplete or - * otherwise invalid according to the spec despite the fact that people commonly - * use these urls. + * UrlParser provides URL parsing methods that accurately comply with the + * specification. Unlike the built in function `parse_url()`, this library will + * not parse incomplete or invalid URLs in attempt to at least provide some + * information. While this library should parse all valid URLs, it does not + * mean that other applications always produce URLs that are valid according to + * the specification. * + * @see http://www.ietf.org/rfc/rfc3986.txt * @author Riikka Kalliomäki * @copyright Copyright (c) 2013, Riikka Kalliomäki * @license http://opensource.org/licenses/mit-license.php MIT License */ class UrlParser { - /** @var string PCRE pattern conforming the URI specification */ + /** @var string PCRE pattern conforming to the URI specification */ private $urlPattern; - /** @var string PCRE pattern conforming the relative-ref specification */ + /** @var string PCRE pattern conforming to the relative-ref specification */ private $relativePattern; /** @@ -37,16 +39,13 @@ public function __construct() /** * Parses the URL according to the URI specification. * - * This method will basically parse complete URLs. Essentially, the real - * requirement is that the URL must have the scheme defined. In other words - * 'www.example.com' will return null, but 'http://www.example.com' will - * return an UrlInfo object. - * - * Any string that cannot be parsed as an URL according to the specification - * will return a null value. + * URLs parsed according to the URI specification must have scheme. Any URL + * that does not define the scheme is considered invalid. For example, + * 'www.example.com' is not a valid URL, because it does not start with + * 'http://'. * * @param string $url URL to parse - * @return UrlInfo|null UrlInfo object from the URL or null on failure + * @return UrlInfo|null URL information object or null if the URL is invalid */ public function parseUrl($url) { @@ -60,14 +59,14 @@ public function parseUrl($url) /** * Parses the URL according to relative-ref specification. * - * The relative-ref spec differs from URI spec in that relative-ref never - * has the scheme part defined. Note that while 'www.example.com' can be - * parsed as relative url, it's actually part of the path and not the - * hostname. It will only be recognized as hostname if prefixed with two - * forward slashes, e.g. '//www.example.com'. + * Relative URLs cannot define a scheme. For example, '//www.example.com' is + * a valid relative url, because it's relative to the scheme. It is good to + * note that while 'www.example.com' is a valid relative URL, it is parsed + * as the path and not the hostname. Relative URL must start with '//' in + * order to define a hostname. * - * @param string $url Relative URL to parse - * @return UrlInfo|null UrlInfo object from the URL or null on failure + * @param string $url URL to parse + * @return UrlInfo|null URL information object or null if the URL is invalid */ public function parseRelative($url) {