From 052fabee68b792256313a64e3f5c25f0ce714855 Mon Sep 17 00:00:00 2001 From: Rohit Vaidya Date: Thu, 5 Aug 2021 16:11:31 -0700 Subject: [PATCH 1/2] Fix: Defaulting to UTF-8 if charset is missing Some servers don't return the charset. This causes german characters to be encoded incorrectly, since ISO_8859_1 does not work very well in such cases defaulting to UTF-8 if its missing. https://www.iana.org/assignments/media-types/text/csv --- .../google/api/client/http/HttpResponse.java | 4 +++ .../api/client/http/HttpResponseTest.java | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java b/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java index 2ffa4652b..b0ad3e20b 100644 --- a/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java +++ b/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java @@ -534,6 +534,10 @@ public Charset getContentCharset() { // https://tools.ietf.org/html/rfc4627 - JSON must be encoded with UTF-8 return StandardCharsets.UTF_8; } + // fallback to well-kown charset for text/csv + if ("text".equals(mediaType.getType()) && "csv".equals(mediaType.getSubType())) { + return StandardCharsets.UTF_8; + } } return StandardCharsets.ISO_8859_1; } diff --git a/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java b/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java index bed019f42..fbe56be13 100644 --- a/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java +++ b/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java @@ -68,6 +68,8 @@ public void testParseAsString_none() throws Exception { private static final String VALID_CONTENT_TYPE = "text/plain"; private static final String VALID_CONTENT_TYPE_WITH_PARAMS = "application/vnd.com.google.datastore.entity+json; charset=utf-8; version=v1; q=0.9"; + private static final String VALID_CONTENT_TYPE_WITHOUT_CHARSET = + "text/csv; version=v1; q=0.9"; private static final String INVALID_CONTENT_TYPE = "!!!invalid!!!"; private static final String JSON_CONTENT_TYPE = "application/json"; @@ -194,6 +196,32 @@ public LowLevelHttpResponse execute() throws IOException { assertEquals("ISO-8859-1", response.getContentCharset().name()); } + public void testParseAsString_validContentTypeWithoutCharSetWithParams() throws Exception { + HttpTransport transport = + new MockHttpTransport() { + @Override + public LowLevelHttpRequest buildRequest(String method, String url) throws IOException { + return new MockLowLevelHttpRequest() { + @Override + public LowLevelHttpResponse execute() throws IOException { + MockLowLevelHttpResponse result = new MockLowLevelHttpResponse(); + result.setContent(SAMPLE2); + result.setContentType(VALID_CONTENT_TYPE_WITHOUT_CHARSET); + return result; + } + }; + } + }; + HttpRequest request = + transport.createRequestFactory().buildGetRequest(HttpTesting.SIMPLE_GENERIC_URL); + + HttpResponse response = request.execute(); + assertEquals(SAMPLE2, response.parseAsString()); + assertEquals(VALID_CONTENT_TYPE_WITHOUT_CHARSET, response.getContentType()); + assertNotNull(response.getMediaType()); + assertEquals("UTF-8", response.getContentCharset().name()); + } + public void testParseAsString_jsonContentType() throws IOException { HttpTransport transport = new MockHttpTransport() { From d32902fc58c3952d99036570d17e6bcb8342ade1 Mon Sep 17 00:00:00 2001 From: Rohit Vaidya Date: Tue, 10 Aug 2021 10:21:10 -0700 Subject: [PATCH 2/2] Fix: Defaulting to UTF-8 if charset is missing Fixed review comments. Thanks --- .../src/main/java/com/google/api/client/http/HttpResponse.java | 3 ++- .../test/java/com/google/api/client/http/HttpResponseTest.java | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java b/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java index b0ad3e20b..f7bf0b42c 100644 --- a/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java +++ b/google-http-client/src/main/java/com/google/api/client/http/HttpResponse.java @@ -534,8 +534,9 @@ public Charset getContentCharset() { // https://tools.ietf.org/html/rfc4627 - JSON must be encoded with UTF-8 return StandardCharsets.UTF_8; } - // fallback to well-kown charset for text/csv + // fallback to well-kown charset for text/csv if ("text".equals(mediaType.getType()) && "csv".equals(mediaType.getSubType())) { + // https://www.iana.org/assignments/media-types/text/csv - CSV must be encoded with UTF-8 return StandardCharsets.UTF_8; } } diff --git a/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java b/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java index fbe56be13..ef7599197 100644 --- a/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java +++ b/google-http-client/src/test/java/com/google/api/client/http/HttpResponseTest.java @@ -68,8 +68,7 @@ public void testParseAsString_none() throws Exception { private static final String VALID_CONTENT_TYPE = "text/plain"; private static final String VALID_CONTENT_TYPE_WITH_PARAMS = "application/vnd.com.google.datastore.entity+json; charset=utf-8; version=v1; q=0.9"; - private static final String VALID_CONTENT_TYPE_WITHOUT_CHARSET = - "text/csv; version=v1; q=0.9"; + private static final String VALID_CONTENT_TYPE_WITHOUT_CHARSET = "text/csv; version=v1; q=0.9"; private static final String INVALID_CONTENT_TYPE = "!!!invalid!!!"; private static final String JSON_CONTENT_TYPE = "application/json";