|
Play Framework/Scala example source code file (UriEncoding.scala)
The UriEncoding.scala Play Framework example source code
/*
* Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com>
*/
package play.utils
import java.util.BitSet
import java.io.ByteArrayOutputStream
/**
* Provides support for correctly encoding pieces of URIs.
*
* @see http://www.ietf.org/rfc/rfc3986.txt
*/
object UriEncoding {
/**
* Encode a string so that it can be used safely in the "path segment"
* part of a URI. A path segment is defined in RFC 3986. In a URI such
* as `http://www.example.com/abc/def?a=1&b=2` both `abc` and `def`
* are path segments.
*
* Path segment encoding differs from encoding for other parts of a URI.
* For example, the "&" character is permitted in a path segment, but
* has special meaning in query parameters. On the other hand, the "/"
* character cannot appear in a path segment, as it is the path delimiter,
* so it must be encoded as "%2F". These are just two examples of the
* differences between path segment and query string encoding; there are
* other differences too.
*
* When encoding path segments the `encodePathSegment` method should always
* be used in preference to the [[java.net.URLEncoder.encode(String,String)]]
* method. `URLEncoder.encode`, despite its name, actually provides encoding
* in the `application/x-www-form-urlencoded` MIME format which is the encoding
* used for form data in HTTP GET and POST requests. This encoding is suitable
* for inclusion in the query part of a URI. But `URLEncoder.encode` should not
* be used for path segment encoding. (Also note that `URLEncoder.encode` is
* not quite spec compliant. For example, it percent-encodes the `~` character when
* really it should leave it as unencoded.)
*
* @param s The string to encode.
* @param inputCharset The name of the encoding that the string `s` is encoded with.
* The string `s` will be converted to octets (bytes) using this character encoding.
* @return An encoded string in the US-ASCII character set.
*/
def encodePathSegment(s: String, inputCharset: String): String = {
val in = s.getBytes(inputCharset)
val out = new ByteArrayOutputStream()
for (b <- in) {
val allowed = segmentChars.get(b & 0xFF)
if (allowed) {
out.write(b)
} else {
out.write('%')
out.write(upperHex((b >> 4) & 0xF))
out.write(upperHex(b & 0xF))
}
}
out.toString("US-ASCII")
}
/**
* Decode a string according to the rules for the "path segment"
* part of a URI. A path segment is defined in RFC 3986. In a URI such
* as `http://www.example.com/abc/def?a=1&b=2` both `abc` and `def`
* are path segments.
*
* Path segment encoding differs from encoding for other parts of a URI.
* For example, the "&" character is permitted in a path segment, but
* has special meaning in query parameters. On the other hand, the "/"
* character cannot appear in a path segment, as it is the path delimiter,
* so it must be encoded as "%2F". These are just two examples of the
* differences between path segment and query string encoding; there are
* other differences too.
*
* When decoding path segments the `decodePathSegment` method should always
* be used in preference to the [[java.net.URLDecoder.decode(String,String)]]
* method. `URLDecoder.decode`, despite its name, actually decodes
* the `application/x-www-form-urlencoded` MIME format which is the encoding
* used for form data in HTTP GET and POST requests. This format is suitable
* for inclusion in the query part of a URI. But `URLDecoder.decoder` should not
* be used for path segment encoding or decoding.
*
* @param s The string to decode. Must use the US-ASCII character set.
* @param outputCharset The name of the encoding that the output should be encoded with.
* The output string will be converted from octets (bytes) using this character encoding.
* @throws InvalidEncodingException If the input is not a valid encoded path segment.
* @return A decoded string in the `outputCharset` character set.
*/
def decodePathSegment(s: String, outputCharset: String): String = {
val in = s.getBytes("US-ASCII")
val out = new ByteArrayOutputStream()
var inPos = 0
def next(): Int = {
val b = in(inPos) & 0xFF
inPos += 1
b
}
while (inPos < in.length) {
val b = next()
if (b == '%') {
// Read high digit
if (inPos >= in.length) throw new InvalidUriEncodingException(s"Cannot decode $s: % at end of string")
val high = fromHex(next())
if (high == -1) throw new InvalidUriEncodingException(s"Cannot decode $s: expected hex digit at position $inPos.")
// Read low digit
if (inPos >= in.length) throw new InvalidUriEncodingException(s"Cannot decode $s: incomplete percent encoding at end of string")
val low = fromHex(next())
if (low == -1) throw new InvalidUriEncodingException(s"Cannot decode $s: expected hex digit at position $inPos.")
// Write decoded byte
out.write((high << 4) + low)
} else if (segmentChars.get(b)) {
// This character is allowed
out.write(b)
} else {
throw new InvalidUriEncodingException(s"Cannot decode $s: illegal character at position $inPos.")
}
}
out.toString(outputCharset)
}
/**
* Decode the path path of a URI. Each path segment will be decoded
* using the same rules as ``decodePathSegment``. No normalization is performed:
* leading, trailing and duplicated slashes, if present are left as they are and
* if absent remain absent; dot-segments (".." and ".") are ignored.
*
* Encoded slash characters are will appear as slashes in the output, thus "a/b"
* will be indistinguishable from "a%2Fb".
*
* @param s The string to decode. Must use the US-ASCII character set.
* @param outputCharset The name of the encoding that the output should be encoded with.
* The output string will be converted from octets (bytes) using this character encoding.
* @throws InvalidEncodingException If the input is not a valid encoded path.
* @return A decoded string in the `outputCharset` character set.
*/
def decodePath(s: String, outputCharset: String): String = {
// Note: Could easily expose a method to return the decoded path as a Seq[String].
// This would allow better handling of paths segments with encoded slashes in them.
// However, there is no need for this yet, so the method hasn't been added yet.
splitString(s, '/').map(decodePathSegment(_, outputCharset)).mkString("/")
}
// RFC 3986, 3.3. Path
// segment = *pchar
// segment-nz = 1*pchar
// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
// ; non-zero-length segment without any colon ":"
/** The set of ASCII character codes that are allowed in a URI path segment. */
private val segmentChars: BitSet = membershipTable(pchar)
/** The characters allowed in a path segment; defined in RFC 3986 */
private def pchar: Seq[Char] = {
// RFC 3986, 2.3. Unreserved Characters
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
val alphaDigit = for ((min, max) <- Seq(('a', 'z'), ('A', 'Z'), ('0', '9')); c <- min to max) yield c
val unreserved = alphaDigit ++ Seq('-', '.', '_', '~')
// RFC 3986, 2.2. Reserved Characters
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
// / "*" / "+" / "," / ";" / "="
val subDelims = Seq('!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=')
// RFC 3986, 3.3. Path
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
unreserved ++ subDelims ++ Seq(':', '@')
}
/** Create a BitSet to act as a membership lookup table for the given characters. */
private def membershipTable(chars: Seq[Char]): BitSet = {
val bits = new BitSet(256)
for (c <- chars) { bits.set(c.toInt) }
bits
}
/**
* Given a number from 0 to 16, return the ASCII character code corresponding
* to its uppercase hexadecimal representation.
*/
private def upperHex(x: Int): Int = {
// Assume 0 <= x < 16
if (x < 10) (x + '0') else (x - 10 + 'A')
}
/**
* Given the ASCII value of a character, return its value as a hex digit.
* If the character isn't a valid hex digit, return -1 instead.
*/
private def fromHex(b: Int): Int = {
if (b >= '0' && b <= '9') {
b - '0'
} else if (b >= 'A' && b <= 'Z') {
10 + b - 'A'
} else if (b >= 'a' && b <= 'z') {
10 + b - 'a'
} else {
-1
}
}
/**
* Split a string on a character. Similar to `String.split` except, for this method,
* the invariant {{{splitString(s, '/').mkString("/") == s}}} holds.
*
* For example:
* {{{
* splitString("//a//", '/') == Seq("", "", "a", "", "")
* String.split("//a//", '/') == Seq("", "", "a")
* }}}
*/
private[utils] def splitString(s: String, c: Char): Seq[String] = {
val result = scala.collection.mutable.ListBuffer.empty[String]
import scala.annotation.tailrec
@tailrec
def splitLoop(start: Int): Unit = if (start < s.length) {
var end = s.indexOf(c, start)
if (end == -1) {
result += s.substring(start)
} else {
result += s.substring(start, end)
splitLoop(end + 1)
}
} else if (start == s.length) {
result += ""
}
splitLoop(0)
result
}
}
/**
* An error caused by processing a value that isn't encoded correctly.
*/
class InvalidUriEncodingException(msg: String) extends RuntimeException(msg)
Other Play Framework source code examplesHere is a short list of links related to this Play Framework UriEncoding.scala source code file: |
| ... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.