Skip to content

Commit

Permalink
feat: decode uri path components correctly (#913)
Browse files Browse the repository at this point in the history
* feat: decode uri path components correctly

The old implementation was incorrecly treating '+' as a space. Now
the only things that get decoded in the path are uri escaped sequences.

Fixes #398

* tweak javadoc

* remove hardcoded string
  • Loading branch information
codyoss authored and chingor13 committed Dec 17, 2019
1 parent 853ab4b commit 7d4a048
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 22 deletions.
Expand Up @@ -81,10 +81,10 @@ public class GenericUrl extends GenericData {
private String fragment;

/**
* If true, the URL string originally given is used as is (without encoding, decoding and
* escaping) whenever referenced; otherwise, part of the URL string may be encoded or decoded as
* deemed appropriate or necessary.
*/
* If true, the URL string originally given is used as is (without encoding, decoding and
* escaping) whenever referenced; otherwise, part of the URL string may be encoded or decoded as
* deemed appropriate or necessary.
*/
private boolean verbatim;

public GenericUrl() {}
Expand Down Expand Up @@ -112,20 +112,20 @@ public GenericUrl(String encodedUrl) {
/**
* Constructs from an encoded URL.
*
* <p>Any known query parameters with pre-defined fields as data keys are parsed based on
* their data type. Any unrecognized query parameter are always parsed as a string.
* <p>Any known query parameters with pre-defined fields as data keys are parsed based on their
* data type. Any unrecognized query parameter are always parsed as a string.
*
* <p>Any {@link MalformedURLException} is wrapped in an {@link IllegalArgumentException}.
*
* @param encodedUrl encoded URL, including any existing query parameters that should be parsed
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
* escaping)
* @throws IllegalArgumentException if URL has a syntax error
*/
public GenericUrl(String encodedUrl, boolean verbatim) {
this(parseURL(encodedUrl), verbatim);
}


/**
* Constructs from a URI.
*
Expand All @@ -140,7 +140,8 @@ public GenericUrl(URI uri) {
* Constructs from a URI.
*
* @param uri URI
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
* escaping)
*/
public GenericUrl(URI uri, boolean verbatim) {
this(
Expand Down Expand Up @@ -168,7 +169,8 @@ public GenericUrl(URL url) {
* Constructs from a URL.
*
* @param url URL
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
* escaping)
* @since 1.14
*/
public GenericUrl(URL url, boolean verbatim) {
Expand Down Expand Up @@ -209,7 +211,7 @@ private GenericUrl(
UrlEncodedParser.parse(query, this);
}
this.userInfo = userInfo != null ? CharEscapers.decodeUri(userInfo) : null;
}
}
}

@Override
Expand Down Expand Up @@ -567,10 +569,11 @@ public static List<String> toPathParts(String encodedPath) {
*
* @param encodedPath slash-prefixed encoded path, for example {@code
* "/m8/feeds/contacts/default/full"}
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
* @return path parts (decoded if not {@code verbatim}), with each part assumed to be preceded by a {@code '/'}, for example
* {@code "", "m8", "feeds", "contacts", "default", "full"}, or {@code null} for {@code null}
* or {@code ""} input
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
* escaping)
* @return path parts (decoded if not {@code verbatim}), with each part assumed to be preceded by
* a {@code '/'}, for example {@code "", "m8", "feeds", "contacts", "default", "full"}, or
* {@code null} for {@code null} or {@code ""} input
*/
public static List<String> toPathParts(String encodedPath, boolean verbatim) {
if (encodedPath == null || encodedPath.length() == 0) {
Expand All @@ -588,7 +591,7 @@ public static List<String> toPathParts(String encodedPath, boolean verbatim) {
} else {
sub = encodedPath.substring(cur);
}
result.add(verbatim ? sub : CharEscapers.decodeUri(sub));
result.add(verbatim ? sub : CharEscapers.decodeUriPath(sub));

This comment has been minimized.

Copy link
@dkocher

dkocher Jan 27, 2020

This change breaks accessing objects using the google-api-services-storage with whitespace in object names. Input parameter encodedPath is like /storage/v1/b/test.cyberduck.ch/o/msIwJ6Ac+ZQ8U7otu. Google Storage API does not accept + but key names must be percent encoded. In googleapis/java-storage#57

cur = slash + 1;
}
return result;
Expand All @@ -608,13 +611,17 @@ private void appendRawPathFromParts(StringBuilder buf) {
}

/** Adds query parameters from the provided entrySet into the buffer. */
static void addQueryParams(Set<Entry<String, Object>> entrySet, StringBuilder buf, boolean verbatim) {
static void addQueryParams(
Set<Entry<String, Object>> entrySet, StringBuilder buf, boolean verbatim) {
// (similar to UrlEncodedContent)
boolean first = true;
for (Map.Entry<String, Object> nameValueEntry : entrySet) {
Object value = nameValueEntry.getValue();
if (value != null) {
String name = verbatim ? nameValueEntry.getKey() : CharEscapers.escapeUriQuery(nameValueEntry.getKey());
String name =
verbatim
? nameValueEntry.getKey()
: CharEscapers.escapeUriQuery(nameValueEntry.getKey());
if (value instanceof Collection<?>) {
Collection<?> collectionValue = (Collection<?>) value;
for (Object repeatedValue : collectionValue) {
Expand All @@ -627,15 +634,17 @@ static void addQueryParams(Set<Entry<String, Object>> entrySet, StringBuilder bu
}
}

private static boolean appendParam(boolean first, StringBuilder buf, String name, Object value, boolean verbatim) {
private static boolean appendParam(
boolean first, StringBuilder buf, String name, Object value, boolean verbatim) {
if (first) {
first = false;
buf.append('?');
} else {
buf.append('&');
}
buf.append(name);
String stringValue = verbatim ? value.toString() : CharEscapers.escapeUriQuery(value.toString());
String stringValue =
verbatim ? value.toString() : CharEscapers.escapeUriQuery(value.toString());
if (stringValue.length() != 0) {
buf.append('=').append(stringValue);
}
Expand Down
Expand Up @@ -16,6 +16,8 @@

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

/**
* Utility functions for dealing with {@code CharEscaper}s, and some commonly used {@code
Expand Down Expand Up @@ -83,7 +85,29 @@ public static String escapeUri(String value) {
*/
public static String decodeUri(String uri) {
try {
return URLDecoder.decode(uri, "UTF-8");
return URLDecoder.decode(uri, StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
// UTF-8 encoding guaranteed to be supported by JVM
throw new RuntimeException(e);
}
}

/**
* Decodes the path component of a URI. This must be done via a method that does not try to
* convert + into spaces(the behavior of {@link java.net.URLDecoder#decode(String, String)}). This
* method transforms URI encoded values into their decoded symbols.
*
* <p>i.e: {@code decodePath("%3Co%3E")} would return {@code "<o>"}
*
* @param path the value to be decoded
* @return decoded version of {@code path}
*/
public static String decodeUriPath(String path) {
if (path == null) {
return null;
}
try {
return URLDecoder.decode(path.replace("+", "%2B"), StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
// UTF-8 encoding guaranteed to be supported by JVM
throw new RuntimeException(e);
Expand Down
Expand Up @@ -62,7 +62,7 @@ public class PercentEscaper extends UnicodeEscaper {
* specified in RFC 3986. Note that some of these characters do need to be escaped when used in
* other parts of the URI.
*/
public static final String SAFEPATHCHARS_URLENCODER = "-_.!~*'()@:$&,;=";
public static final String SAFEPATHCHARS_URLENCODER = "-_.!~*'()@:$&,;=+";

/**
* Contains the save characters plus all reserved characters. This happens to be the safe path
Expand Down
Expand Up @@ -480,6 +480,8 @@ public void testToPathParts() {
subtestToPathParts("/path/to/resource", "", "path", "to", "resource");
subtestToPathParts("/path/to/resource/", "", "path", "to", "resource", "");
subtestToPathParts("/Go%3D%23%2F%25%26%20?%3Co%3Egle/2nd", "", "Go=#/%& ?<o>gle", "2nd");
subtestToPathParts("/plus+test/resource", "", "plus+test", "resource");
subtestToPathParts("/plus%2Btest/resource", "", "plus+test", "resource");
}

private void subtestToPathParts(String encodedPath, String... expectedDecodedParts) {
Expand Down
@@ -0,0 +1,49 @@
/*
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/

package com.google.api.client.util.escape;

import junit.framework.TestCase;

public class CharEscapersTest extends TestCase {

public void testDecodeUriPath() {
subtestDecodeUriPath(null, null);
subtestDecodeUriPath("", "");
subtestDecodeUriPath("abc", "abc");
subtestDecodeUriPath("a+b%2Bc", "a+b+c");
subtestDecodeUriPath("Go%3D%23%2F%25%26%20?%3Co%3Egle", "Go=#/%& ?<o>gle");
}

private void subtestDecodeUriPath(String input, String expected) {
String actual = CharEscapers.decodeUriPath(input);
assertEquals(expected, actual);
}

public void testDecodeUri_IllegalArgumentException() {
subtestDecodeUri_IllegalArgumentException("abc%-1abc");
subtestDecodeUri_IllegalArgumentException("%JJ");
subtestDecodeUri_IllegalArgumentException("abc%0");
}

private void subtestDecodeUri_IllegalArgumentException(String input) {
boolean thrown = false;
try {
CharEscapers.decodeUriPath(input);
} catch (IllegalArgumentException e) {
thrown = true;
}
assertTrue(thrown);
}
}

1 comment on commit 7d4a048

@SanoKriss
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit added incompatibility with Android API 17 because java.nio.charset.StandardCharsets;
It is possible fix it without using that? I know it is old Android, but still there is lot of tablets with that version of Android.
Thanks

Please sign in to comment.