alvinalexander.com | career | drupal | java | mac | mysql | perl | scala | uml | unix  

Java example source code file (URIUtils.java)

This example Java source code file (URIUtils.java) is included in the alvinalexander.com "Java Source Code Warehouse" project. The intent of this project is to help you "Learn Java by Example" TM.

Learn more about this Java project at its project page.

Java - Java tags/keywords

object, string, stringbuilder, uri_reserved, uriutils

The URIUtils.java Java example source code

/*
 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.nashorn.internal.runtime;

import static jdk.nashorn.internal.runtime.ECMAErrors.uriError;

/**
 * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties
 *
 */
public final class URIUtils {

    private URIUtils() {
    }

    static String encodeURI(final Object self, final String string) {
        return encode(self, string, false);
    }

    static String encodeURIComponent(final Object self, final String string) {
        return encode(self, string, true);
    }

    static String decodeURI(final Object self, final String string) {
        return decode(self, string, false);
    }

    static String decodeURIComponent(final Object self, final String string) {
        return decode(self, string, true);
    }

    // abstract encode function
    private static String encode(final Object self, final String string, final boolean component) {
        if (string.isEmpty()) {
            return string;
        }

        final int len = string.length();
        final StringBuilder sb = new StringBuilder();

        for (int k = 0; k < len; k++) {
            final char C = string.charAt(k);
            if (isUnescaped(C, component)) {
                sb.append(C);
                continue;
            }

            if (C >= 0xDC00 && C <= 0xDFFF) {
                return error(string, k);
            }

            int V;
            if (C < 0xD800 || C > 0xDBFF) {
                V = C;
            } else {
                k++;
                if (k == len) {
                    return error(string, k);
                }

                final char kChar = string.charAt(k);
                if (kChar < 0xDC00 || kChar > 0xDFFF) {
                    return error(string, k);
                }
                V = ((C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000);
            }

            try {
                sb.append(toHexEscape(V));
            } catch (final Exception e) {
                throw uriError(e, "bad.uri", string, Integer.toString(k));
            }
        }

        return sb.toString();
    }

    // abstract decode function
    private static String decode(final Object self, final String string, final boolean component) {
        if (string.isEmpty()) {
            return string;
        }

        final int           len = string.length();
        final StringBuilder sb  = new StringBuilder();

        for (int k = 0; k < len; k++) {
            final char ch = string.charAt(k);
            if (ch != '%') {
                sb.append(ch);
                continue;
            }
            final int start = k;
            if (k + 2 >= len) {
                return error(string, k);
            }

            int B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
            if (B < 0) {
                return error(string, k + 1);
            }

            k += 2;
            char C;
            // Most significant bit is zero
            if ((B & 0x80) == 0) {
                C = (char) B;
                if (!component && URI_RESERVED.indexOf(C) >= 0) {
                    for (int j = start; j <= k; j++) {
                        sb.append(string.charAt(j));
                    }
                } else {
                    sb.append(C);
                }
            } else {
                // n is utf8 length, V is codepoint and minV is lower bound
                int n, V, minV;

                if ((B & 0xC0) == 0x80) {
                    // 10xxxxxx - illegal first byte
                    return error(string, k);
                } else if ((B & 0x20) == 0) {
                    // 110xxxxx 10xxxxxx
                    n = 2;
                    V = B & 0x1F;
                    minV = 0x80;
                } else if ((B & 0x10) == 0) {
                    // 1110xxxx 10xxxxxx 10xxxxxx
                    n = 3;
                    V = B & 0x0F;
                    minV = 0x800;
                } else if ((B & 0x08) == 0) {
                    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
                    n = 4;
                    V = B & 0x07;
                    minV = 0x10000;
                } else if ((B & 0x04) == 0) {
                    // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
                    n = 5;
                    V =  B & 0x03;
                    minV = 0x200000;
                } else if ((B & 0x02) == 0) {
                    // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
                    n = 6;
                    V = B & 0x01;
                    minV = 0x4000000;
                } else {
                    return error(string, k);
                }

                // check bound for sufficient chars
                if (k + (3*(n-1)) >= len) {
                    return error(string, k);
                }

                for (int j = 1; j < n; j++) {
                    k++;
                    if (string.charAt(k) != '%') {
                        return error(string, k);
                    }

                    B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
                    if (B < 0 || (B & 0xC0) != 0x80) {
                        return error(string, k + 1);
                    }

                    V = (V << 6) | (B & 0x3F);
                    k += 2;
                }

                // Check for overlongs and invalid codepoints.
                // The high and low surrogate halves used by UTF-16
                // (U+D800 through U+DFFF) are not legal Unicode values.
                if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) {
                    V = Integer.MAX_VALUE;
                }

                if (V < 0x10000) {
                    C = (char) V;
                    if (!component && URI_RESERVED.indexOf(C) >= 0) {
                        for (int j = start; j != k; j++) {
                            sb.append(string.charAt(j));
                        }
                    } else {
                        sb.append(C);
                    }
                } else { // V >= 0x10000
                    if (V > 0x10FFFF) {
                        return error(string, k);
                    }
                    final int L = ((V - 0x10000) & 0x3FF) + 0xDC00;
                    final int H = (((V - 0x10000) >> 10) & 0x3FF) + 0xD800;
                    sb.append((char) H);
                    sb.append((char) L);
                }
            }
        }

        return sb.toString();
    }

    private static int hexDigit(final char ch) {
        final char chu = Character.toUpperCase(ch);
        if (chu >= '0' && chu <= '9') {
            return (chu - '0');
        } else if (chu >= 'A' && chu <= 'F') {
            return (chu - 'A' + 10);
        } else {
            return -1;
        }
    }

    private static int toHexByte(final char ch1, final char ch2) {
        final int i1 = hexDigit(ch1);
        final int i2 = hexDigit(ch2);
        if (i1 >= 0 && i2 >= 0) {
            return (i1 << 4) | i2;
        }
        return -1;
    }

    private static String toHexEscape(final int u0) {
        int u = u0;
        int len;
        final byte[] b = new byte[6];

        if (u <= 0x7f) {
            b[0] = (byte) u;
            len = 1;
        } else {
            // > 0x7ff -> length 2
            // > 0xffff -> length 3
            // and so on. each new length is an additional 5 bits from the
            // original 11
            // the final mask is 8-len zeros in the low part.
            len = 2;
            for (int mask = u >>> 11; mask != 0; mask >>>= 5) {
                len++;
            }
            for (int i = len - 1; i > 0; i--) {
                b[i] = (byte) (0x80 | (u & 0x3f));
                u >>>= 6; // 64 bits per octet.
            }

            b[0] = (byte) (~((1 << (8 - len)) - 1) | u);
        }

        final StringBuilder sb = new StringBuilder();
        for (int i = 0; i < len; i++) {
            sb.append('%');
            if ((b[i] & 0xff) < 0x10) {
                sb.append('0');
            }
            sb.append(Integer.toHexString(b[i] & 0xff).toUpperCase());
        }

        return sb.toString();
    }

    private static String error(final String string, final int index) {
        throw uriError("bad.uri", string, Integer.toString(index));
    }

    // 'uriEscaped' except for alphanumeric chars
    private static final String URI_UNESCAPED_NONALPHANUMERIC = "-_.!~*'()";
    // 'uriReserved' + '#'
    private static final String URI_RESERVED = ";/?:@&=+$,#";

    private static boolean isUnescaped(final char ch, final boolean component) {
        if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')
                || ('0' <= ch && ch <= '9')) {
            return true;
        }

        if (URI_UNESCAPED_NONALPHANUMERIC.indexOf(ch) >= 0) {
            return true;
        }

        if (!component) {
            return URI_RESERVED.indexOf(ch) >= 0;
        }

        return false;
    }
}

Other Java examples (source code examples)

Here is a short list of links related to this Java URIUtils.java source code file:

... this post is sponsored by my books ...

#1 New Release!

FP Best Seller

 

new blog posts

 

Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.

A percentage of advertising revenue from
pages under the /java/jwarehouse URI on this website is
paid back to open source projects.