src/java/net/URLDecoder.java

/*
 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package java.net;

import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.Objects;

/**
 * Utility class for HTML form decoding. This class contains static methods
 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
 * MIME format.
 * <p>
 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
 * that all characters in the encoded string are one of the following:
 * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
 * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
 * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
 * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
 * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
 * character &quot;{@code %}&quot; is allowed but is interpreted
 * as the start of a special escaped sequence.
 * <p>
 * The following rules are applied in the conversion:
 *
 * <ul>
 * <li>The alphanumeric characters &quot;{@code a}&quot; through
 *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
 *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
 *     through &quot;{@code 9}&quot; remain the same.
 * <li>The special characters &quot;{@code .}&quot;,
 *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
 *     &quot;{@code _}&quot; remain the same.
 * <li>The plus sign &quot;{@code +}&quot; is converted into a
 *     space character &quot; &nbsp; &quot; .
 * <li>A sequence of the form "<i>{@code %xy}</i>" will be
 *     treated as representing a byte where <i>xy</i> is the two-digit
 *     hexadecimal representation of the 8 bits. Then, all substrings
 *     that contain one or more of these byte sequences consecutively
 *     will be replaced by the character(s) whose encoding would result
 *     in those consecutive bytes.
 *     The encoding scheme used to decode these characters may be specified,
 *     or if unspecified, the default encoding of the platform will be used.
 * </ul>
 * <p>
 * There are two possible ways in which this decoder could deal with
 * illegal strings.  It could either leave illegal characters alone or
 * it could throw an {@link java.lang.IllegalArgumentException}.
 * Which approach the decoder takes is left to the
 * implementation.
 *
 * @author Mark Chamness
 * @author Michael McCloskey
 * @since 1.2
 */
// 解码URL
public class URLDecoder {
    
    // The platform default encoding
    static String dfltEncName = URLEncoder.dfltEncName;
    
    /**
     * Decodes a {@code x-www-form-urlencoded} string.
     * The platform's default encoding is used to determine what characters
     * are represented by any consecutive sequences of the form
     * "<i>{@code %xy}</i>".
     *
     * @param s the {@code String} to decode
     *
     * @return the newly decoded {@code String}
     *
     * @deprecated The resulting string may vary depending on the platform's
     * default encoding. Instead, use the decode(String,String) method
     * to specify the encoding.
     */
    //※ 使用指定的字符集解码url；已过时，建议明确指定字符集
    @Deprecated
    public static String decode(String url) {
        
        String str = null;
        
        try {
            str = decode(url, dfltEncName);
        } catch(UnsupportedEncodingException e) {
            // The system should always have the platform default
        }
        
        return str;
    }
    
    /**
     * Decodes an {@code application/x-www-form-urlencoded} string using
     * a specific encoding scheme.
     *
     * <p>
     * This method behaves the same as {@linkplain decode(String s, Charset charset)}
     * except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
     * using the given encoding name.
     *
     * @param s   the {@code String} to decode
     * @param enc The name of a supported
     *            <a href="../lang/package-summary.html#charenc">character
     *            encoding</a>.
     *
     * @return the newly decoded {@code String}
     *
     * @throws UnsupportedEncodingException If character encoding needs to be consulted, but
     *                                      named character encoding is not supported
     * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
     * when illegal strings are encountered.
     * @see URLEncoder#encode(java.lang.String, java.lang.String)
     * @since 1.4
     */
    // 使用指定的字符集解码url
    public static String decode(String url, String charsetName) throws UnsupportedEncodingException {
        if(charsetName.length() == 0) {
            throw new UnsupportedEncodingException("URLDecoder: empty string enc parameter");
        }
        
        try {
            Charset charset = Charset.forName(charsetName);
            return decode(url, charset);
        } catch(IllegalCharsetNameException | UnsupportedCharsetException e) {
            throw new UnsupportedEncodingException(charsetName);
        }
    }
    
    /**
     * Decodes an {@code application/x-www-form-urlencoded} string using
     * a specific {@linkplain java.nio.charset.Charset Charset}.
     * The supplied charset is used to determine
     * what characters are represented by any consecutive sequences of the
     * form "<i>{@code %xy}</i>".
     * <p>
     * <em><strong>Note:</strong> The <a href=
     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
     * World Wide Web Consortium Recommendation</a> states that
     * UTF-8 should be used. Not doing so may introduce
     * incompatibilities.</em>
     *
     * @param s       the {@code String} to decode
     * @param charset the given charset
     *
     * @return the newly decoded {@code String}
     *
     * @throws NullPointerException     if {@code s} or {@code charset} is {@code null}
     * @throws IllegalArgumentException if the implementation encounters illegal
     *                                  characters
     * @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
     * when illegal strings are encountered.
     * @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
     * @since 10
     */
    // 使用指定的字符集解码url
    public static String decode(String url, Charset charset) {
        Objects.requireNonNull(charset, "Charset");
        boolean needToChange = false;
        int numChars = url.length();
        StringBuilder sb = new StringBuilder(numChars>500 ? numChars / 2 : numChars);
        int i = 0;
        
        char c;
        byte[] bytes = null;
        while(i<numChars) {
            c = url.charAt(i);
            switch(c) {
                case '+':
                    sb.append(' ');
                    i++;
                    needToChange = true;
                    break;
                case '%':
                    /*
                     * Starting with this instance of %, process all
                     * consecutive substrings of the form %xy. Each
                     * substring %xy will yield a byte. Convert all
                     * consecutive  bytes obtained this way to whatever
                     * character(s) they represent in the provided
                     * encoding.
                     */
                    
                    try {
                        
                        // (numChars-i)/3 is an upper bound for the number
                        // of remaining bytes
                        if(bytes == null)
                            bytes = new byte[(numChars - i) / 3];
                        int pos = 0;
                        
                        while(((i + 2)<numChars) && (c == '%')) {
                            int v = Integer.parseInt(url, i + 1, i + 3, 16);
                            if(v<0)
                                throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape " + "(%) pattern - negative value");
                            bytes[pos++] = (byte) v;
                            i += 3;
                            if(i<numChars)
                                c = url.charAt(i);
                        }
                        
                        // A trailing, incomplete byte encoding such as
                        // "%x" will cause an exception to be thrown
                        
                        if((i<numChars) && (c == '%'))
                            throw new IllegalArgumentException("URLDecoder: Incomplete trailing escape (%) pattern");
                        
                        sb.append(new String(bytes, 0, pos, charset));
                    } catch(NumberFormatException e) {
                        throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage());
                    }
                    needToChange = true;
                    break;
                default:
                    sb.append(c);
                    i++;
                    break;
            }
        }
        
        return (needToChange ? sb.toString() : url);
    }
}