/
CharEscapers.java
223 lines (207 loc) · 9.48 KB
/
CharEscapers.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
/*
* Copyright (c) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.api.client.util.escape;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
/**
* Utility functions for dealing with {@code CharEscaper}s, and some commonly used {@code
* CharEscaper} instances.
*
* @since 1.0
*/
public final class CharEscapers {
private static final Escaper URI_ESCAPER =
new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true);
private static final Escaper URI_PATH_ESCAPER =
new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false);
private static final Escaper URI_RESERVED_ESCAPER =
new PercentEscaper(PercentEscaper.SAFE_PLUS_RESERVED_CHARS_URLENCODER, false);
private static final Escaper URI_USERINFO_ESCAPER =
new PercentEscaper(PercentEscaper.SAFEUSERINFOCHARS_URLENCODER, false);
private static final Escaper URI_QUERY_STRING_ESCAPER =
new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false);
/**
* Escapes the string value so it can be safely included in URIs. For details on escaping URIs,
* see <a href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - section 2.4</a>.
*
* <p>When encoding a String, the following rules apply:
*
* <ul>
* <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain
* the same.
* <li>The special characters ".", "-", "*", and "_" remain the same.
* <li>The space character " " is converted into a plus sign "+".
* <li>All other characters are converted into one or more bytes using UTF-8 encoding and each
* byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
* uppercase, hexadecimal representation of the byte value.
* </ul>
*
* <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadecimal sequences.
* From <a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
* <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
* percent-encodings."</i>
*
* <p>This escaper has identical behavior to (but is potentially much faster than):
*
* <ul>
* <li>{@link java.net.URLEncoder#encode(String, String)} with the encoding name "UTF-8"
* </ul>
*/
public static String escapeUri(String value) {
return URI_ESCAPER.escape(value);
}
/**
* Percent-decodes a US-ASCII string into a Unicode string. UTF-8 encoding is used to determine
* what characters are represented by any consecutive sequences of the form "%<i>XX</i>".
*
* <p>This replaces each occurrence of '+' with a space, ' '. So this method should not be used
* for non application/x-www-form-urlencoded strings such as host and path.
*
* @param uri a percent-encoded US-ASCII string
* @return a Unicode string
*/
public static String decodeUri(String uri) {
try {
return URLDecoder.decode(uri, StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
// UTF-8 encoding guaranteed to be supported by JVM
throw new RuntimeException(e);
}
}
/**
* Decodes the path component of a URI. This must be done via a method that does not try to
* convert + into spaces(the behavior of {@link java.net.URLDecoder#decode(String, String)}). This
* method transforms URI encoded values into their decoded symbols.
*
* <p>i.e: {@code decodePath("%3Co%3E")} would return {@code "<o>"}
*
* @param path the value to be decoded
* @return decoded version of {@code path}
*/
public static String decodeUriPath(String path) {
if (path == null) {
return null;
}
try {
return URLDecoder.decode(path.replace("+", "%2B"), StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException e) {
// UTF-8 encoding guaranteed to be supported by JVM
throw new RuntimeException(e);
}
}
/**
* Escapes the string value so it can be safely included in URI path segments. For details on
* escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - section
* 2.4</a>.
*
* <p>When encoding a String, the following rules apply:
*
* <ul>
* <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain
* the same.
* <li>The unreserved characters ".", "-", "~", and "_" remain the same.
* <li>The general delimiters "@" and ":" remain the same.
* <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and "=" remain the
* same.
* <li>The space character " " is converted into %20.
* <li>All other characters are converted into one or more bytes using UTF-8 encoding and each
* byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
* uppercase, hexadecimal representation of the byte value.
* </ul>
*
* <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadecimal sequences.
* From <a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
* <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
* percent-encodings."</i>
*/
public static String escapeUriPath(String value) {
return URI_PATH_ESCAPER.escape(value);
}
/**
* Escapes a URI path but retains all reserved characters, including all general delimiters. That
* is the same as {@link #escapeUriPath(String)} except that it keeps '?', '+', and '/' unescaped.
*/
public static String escapeUriPathWithoutReserved(String value) {
return URI_RESERVED_ESCAPER.escape(value);
}
/**
* Escapes the string value so it can be safely included in URI user info part. For details on
* escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - section
* 2.4</a>.
*
* <p>When encoding a String, the following rules apply:
*
* <ul>
* <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain
* the same.
* <li>The unreserved characters ".", "-", "~", and "_" remain the same.
* <li>The general delimiter ":" remains the same.
* <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and "=" remain the
* same.
* <li>The space character " " is converted into %20.
* <li>All other characters are converted into one or more bytes using UTF-8 encoding and each
* byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
* uppercase, hexadecimal representation of the byte value.
* </ul>
*
* <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadecimal sequences.
* From <a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
* <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
* percent-encodings."</i>
*
* @since 1.15
*/
public static String escapeUriUserInfo(String value) {
return URI_USERINFO_ESCAPER.escape(value);
}
/**
* Escapes the string value so it can be safely included in URI query string segments. When the
* query string consists of a sequence of name=value pairs separated by &, the names and
* values should be individually encoded. If you escape an entire query string in one pass with
* this escaper, then the "=" and "&" characters used as separators will also be escaped.
*
* <p>This escaper is also suitable for escaping fragment identifiers.
*
* <p>For details on escaping URIs, see <a
* href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - section 2.4</a>.
*
* <p>When encoding a String, the following rules apply:
*
* <ul>
* <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain
* the same.
* <li>The unreserved characters ".", "-", "~", and "_" remain the same.
* <li>The general delimiters "@" and ":" remain the same.
* <li>The path delimiters "/" and "?" remain the same.
* <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";", remain the same.
* <li>The space character " " is converted into %20.
* <li>The equals sign "=" is converted into %3D.
* <li>The ampersand "&" is converted into %26.
* <li>All other characters are converted into one or more bytes using UTF-8 encoding and each
* byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
* uppercase, hexadecimal representation of the byte value.
* </ul>
*
* <p><b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadecimal sequences.
* From <a href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
* <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
* percent-encodings."</i>
*/
public static String escapeUriQuery(String value) {
return URI_QUERY_STRING_ESCAPER.escape(value);
}
private CharEscapers() {}
}