|
Play Framework/Scala example source code file (UriEncodingSpec.scala)
The UriEncodingSpec.scala Play Framework example source code/* * Copyright (C) 2009-2013 Typesafe Inc. <http://www.typesafe.com> */ package play.utils import org.specs2.mutable._ /** * Tests for the UriEncoding object. */ object UriEncodingSpec extends Specification { import UriEncoding._ sealed trait EncodingResult // Good behaviour case object NotEncoded extends EncodingResult case class PercentEncoded(encoded: String) extends EncodingResult // Bad behaviour case class NotEncodedButDecodeDifferent(decodedEncoded: String) extends EncodingResult case class PercentEncodedButDecodeDifferent(encoded: String, decodedEncoded: String) extends EncodingResult case class PercentEncodedButDecodedInvalid(encoded: String) extends EncodingResult def encodingFor(in: String, inCharset: String): EncodingResult = { val encoded = encodePathSegment(in, inCharset) if (encoded == in) { val decodedEncoded = decodePathSegment(encoded, inCharset) if (decodedEncoded != in) return NotEncodedButDecodeDifferent(decodedEncoded) NotEncoded } else { val decodedEncoded = decodePathSegment(encoded, inCharset) if (decodedEncoded != in) return PercentEncodedButDecodeDifferent(encoded, decodedEncoded) try { decodePathSegment(in, inCharset) return PercentEncodedButDecodedInvalid(encoded) // Decoding should have failed } catch { case _: InvalidUriEncodingException => () // This is expected behaviour } PercentEncoded(encoded) } } "Path segment encoding and decoding" should { /* RFC 3986 - Uniform Resource Identifier (URI): Generic Syntax 2.2. Reserved Characters ... reserved = gen-delims / sub-delims gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" ... URI producing applications should percent-encode data octets that correspond to characters in the reserved set unless these characters are specifically allowed by the URI scheme to represent data in that component. ... 3.3. Path ... segment = *pchar segment-nz = 1*pchar segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) ; non-zero-length segment without any colon ":" pchar = unreserved / pct-encoded / sub-delims / ":" / "@" */ "percent-encode reserved characters that aren't allowed in a path segment" in { // Not allowed (gen-delims, except ":" / "@") encodingFor("/", "utf-8") must_== PercentEncoded("%2F") encodingFor("?", "utf-8") must_== PercentEncoded("%3F") encodingFor("#", "utf-8") must_== PercentEncoded("%23") encodingFor("[", "utf-8") must_== PercentEncoded("%5B") encodingFor("]", "utf-8") must_== PercentEncoded("%5D") } "not percent-encode reserved characters that are allowed in a path segment" in { // Allowed (sub-delims / ":" / "@") encodingFor("!", "utf-8") must_== NotEncoded encodingFor("$", "utf-8") must_== NotEncoded encodingFor("&", "utf-8") must_== NotEncoded encodingFor("'", "utf-8") must_== NotEncoded encodingFor("(", "utf-8") must_== NotEncoded encodingFor(")", "utf-8") must_== NotEncoded encodingFor("*", "utf-8") must_== NotEncoded encodingFor("+", "utf-8") must_== NotEncoded encodingFor(",", "utf-8") must_== NotEncoded encodingFor(";", "utf-8") must_== NotEncoded encodingFor("=", "utf-8") must_== NotEncoded encodingFor(":", "utf-8") must_== NotEncoded encodingFor("@", "utf-8") must_== NotEncoded } /* 2.3. Unreserved Characters ... unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" ... For consistency, percent-encoded octets in the ranges of ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), underscore (%5F), or tilde (%7E) should not be created by URI producers and, when found in a URI, should be decoded to their corresponding unreserved characters by URI normalizers. */ "not percent-encode unreserved characters" in { encodingFor("a", "utf-8") must_== NotEncoded encodingFor("z", "utf-8") must_== NotEncoded encodingFor("A", "utf-8") must_== NotEncoded encodingFor("Z", "utf-8") must_== NotEncoded encodingFor("0", "utf-8") must_== NotEncoded encodingFor("9", "utf-8") must_== NotEncoded encodingFor("-", "utf-8") must_== NotEncoded encodingFor(".", "utf-8") must_== NotEncoded encodingFor("_", "utf-8") must_== NotEncoded encodingFor("~", "utf-8") must_== NotEncoded } /* 2.1. Percent-Encoding A percent-encoding mechanism is used to represent a data octet in a component when that octet's corresponding character is outside the allowed set... */ "percent-encode any characters that aren't specifically allowed in a path segment" in { encodingFor("\000", "US-ASCII") must_== PercentEncoded("%00") encodingFor("\037", "US-ASCII") must_== PercentEncoded("%1F") encodingFor(" ", "US-ASCII") must_== PercentEncoded("%20") encodingFor("\"", "US-ASCII") must_== PercentEncoded("%22") encodingFor("%", "US-ASCII") must_== PercentEncoded("%25") encodingFor("<", "US-ASCII") must_== PercentEncoded("%3C") encodingFor(">", "US-ASCII") must_== PercentEncoded("%3E") encodingFor("\\", "US-ASCII") must_== PercentEncoded("%5C") encodingFor("^", "US-ASCII") must_== PercentEncoded("%5E") encodingFor("`", "US-ASCII") must_== PercentEncoded("%60") encodingFor("{", "US-ASCII") must_== PercentEncoded("%7B") encodingFor("|", "US-ASCII") must_== PercentEncoded("%7C") encodingFor("}", "US-ASCII") must_== PercentEncoded("%7D") encodingFor("\177", "ISO-8859-1") must_== PercentEncoded("%7F") encodingFor("\377", "ISO-8859-1") must_== PercentEncoded("%FF") } "percent-encode UTF-8 strings by encoding each octet not allowed in a path segment" in { encodingFor("£0.25", "UTF-8") must_== PercentEncoded("%C2%A30.25") encodingFor("€100", "UTF-8") must_== PercentEncoded("%E2%82%AC100") encodingFor("«küßî»", "UTF-8") must_== PercentEncoded("%C2%ABk%C3%BC%C3%9F%C3%AE%C2%BB") encodingFor("“ЌύБЇ”", "UTF-8") must_== PercentEncoded("%E2%80%9C%D0%8C%CF%8D%D0%91%D0%87%E2%80%9D") } /* 2.1. Percent-Encoding ... A percent-encoded octet is encoded as a character triplet, consisting of the percent character "%" followed by the two hexadecimal digits representing that octet's numeric value. ... pct-encoded = "%" HEXDIG HEXDIG The uppercase hexadecimal digits 'A' through 'F' are equivalent to the lowercase digits 'a' through 'f', respectively. If two URIs differ only in the case of hexadecimal digits used in percent-encoded octets, they are equivalent. For consistency, URI producers and normalizers should use uppercase hexadecimal digits for all percent- encodings. */ "percent-encode to triplets with upper-case hex" in { encodingFor("\000", "ISO-8859-1") must_== PercentEncoded("%00") encodingFor("\231", "ISO-8859-1") must_== PercentEncoded("%99") encodingFor("\252", "ISO-8859-1") must_== PercentEncoded("%AA") encodingFor("\377", "ISO-8859-1") must_== PercentEncoded("%FF") } // Misc tests "handle strings of different lengths" in { encodingFor("", "UTF-8") must_== NotEncoded encodingFor("1", "UTF-8") must_== NotEncoded encodingFor("12", "UTF-8") must_== NotEncoded encodingFor("123", "UTF-8") must_== NotEncoded encodingFor("1234567890", "UTF-8") must_== NotEncoded } "handle strings needing partial percent-encoding" in { encodingFor("Hello world", "US-ASCII") must_== PercentEncoded("Hello%20world") encodingFor("/home/foo", "US-ASCII") must_== PercentEncoded("%2Fhome%2Ffoo") } // Path segment encoding differs from query string encoding, which is // "application/x-www-form-urlencoded". One difference is the encoding // of the "+" and space characters. "percent-encode spaces, but not + characters" in { encodingFor(" ", "US-ASCII") must_== PercentEncoded("%20") // vs "+" for query strings encodingFor("+", "US-ASCII") must_== NotEncoded // vs "%2B" for query strings encodingFor(" +", "US-ASCII") must_== PercentEncoded("%20+") // vs "+%2B" for query strings encodingFor("1+2=3", "US-ASCII") must_== NotEncoded encodingFor("1 + 2 = 3", "US-ASCII") must_== PercentEncoded("1%20+%202%20=%203") } "decode characters percent-encoded with upper and lowercase hex digits" in { decodePathSegment("%aa", "ISO-8859-1") must_== "\252" decodePathSegment("%aA", "ISO-8859-1") must_== "\252" decodePathSegment("%Aa", "ISO-8859-1") must_== "\252" decodePathSegment("%AA", "ISO-8859-1") must_== "\252" decodePathSegment("%ff", "ISO-8859-1") must_== "\377" decodePathSegment("%fF", "ISO-8859-1") must_== "\377" decodePathSegment("%Ff", "ISO-8859-1") must_== "\377" decodePathSegment("%FF", "ISO-8859-1") must_== "\377" } "decode percent-encoded characters that don't really need to be encoded" in { decodePathSegment("%21", "utf-8") must_== "!" decodePathSegment("%61", "utf-8") must_== "a" decodePathSegment("%31%32%33", "UTF-8") must_== "123" // Encoded by MIME type "application/x-www-form-urlencoded" decodePathSegment("%2b", "US-ASCII") must_== "+" decodePathSegment("%7e", "US-ASCII") must_== "~" } } "Path decoding" should { "decode basic paths" in { decodePath("", "utf-8") must_== "" decodePath("/", "utf-8") must_== "/" decodePath("/abc", "utf-8") must_== "/abc" decodePath("/css/stylesheet.css", "utf-8") must_== "/css/stylesheet.css" } "decode paths with encoded characters" in { decodePath("/hello%20world", "utf-8") must_== "/hello world" } "decode encoded slashes (although they can't be distinguished from unencoded slashes)" in { decodePath("/a%2fb", "utf-8") must_== "/a/b" decodePath("/a%2fb/c%2fd", "utf-8") must_== "/a/b/c/d" } "not decode badly encoded paths" in { decodePath("/a|b/", "utf-8") must throwA[InvalidUriEncodingException] decodePath("/hello world", "utf-8") must throwA[InvalidUriEncodingException] } "not perform normalization of dot-segments" in { decodePath("a/..", "utf-8") must_== "a/.." decodePath("a/.", "utf-8") must_== "a/." } "not perform normalization of duplicate slashes" in { decodePath("//a", "utf-8") must_== "//a" decodePath("a//b", "utf-8") must_== "a//b" decodePath("a//", "utf-8") must_== "a//" } "decode complex UTF-8 octets" in { decodePath("/path/%C2%ABk%C3%BC%C3%9F%C3%AE%C2%BB", "UTF-8") must_== "/path/«küßî»" decodePath("/path/%E2%80%9C%D0%8C%CF%8D%D0%91%D0%87%E2%80%9D", "UTF-8") must_== "/path/“ЌύБЇ”" } } // Internal methods "Internal UriEncoding methods" should { "know how to split strings" in { splitString("", '/') must_== Seq("") splitString("/", '/') must_== Seq("", "") splitString("a", '/') must_== Seq("a") splitString("a/b", '/') must_== Seq("a", "b") splitString("a//b", '/') must_== Seq("a", "", "b") splitString("/a", '/') must_== Seq("", "a") splitString("/a/b", '/') must_== Seq("", "a", "b") splitString("/a/b/", '/') must_== Seq("", "a", "b", "") splitString("/abc/xyz", '/') must_== Seq("", "abc", "xyz") } } } Other Play Framework source code examplesHere is a short list of links related to this Play Framework UriEncodingSpec.scala source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.