Skip to content

Commit

Permalink
test_file.py: extended TestSameContent TestCase with test_utf8_bom_co…
Browse files Browse the repository at this point in the history
…ntent_decoding

file.py: added encodings params, so it can be tested
test_importer.py: fix broken asserts
  • Loading branch information
Mate Laszlo Valko committed Apr 27, 2024
1 parent 31e0238 commit 23e1336
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 8 deletions.
3 changes: 0 additions & 3 deletions frappe/core/doctype/data_import/test_importer.py
Expand Up @@ -52,8 +52,6 @@ def test_data_import_from_file(self):

def test_data_validation_semicolon_success(self):
import_file = get_import_file("sample_import_file_semicolon")
self.assertIsNotNone(frappe.flags.delimiter_options)
frappe.flags.delimiter_options = ",;"
data_import = self.get_importer(doctype_name, import_file, update=True)

doc = data_import.get_preview_from_template().get("data", [{}])
Expand Down Expand Up @@ -167,7 +165,6 @@ def get_importer_semicolon(self, doctype, import_file, update=False):
# deliberatly overwrite default delimiter options here, causing to fail when parsing ;
data_import.delimiter_options = ","
data_import.insert()
# Commit so that the first import failure does not rollback the Data Import insert.
frappe.db.commit()

return data_import
Expand Down
6 changes: 4 additions & 2 deletions frappe/core/doctype/file/file.py
Expand Up @@ -515,7 +515,7 @@ def unzip(self) -> list["File"]:
def exists_on_disk(self):
return os.path.exists(self.get_full_path())

def get_content(self) -> bytes:
def get_content(self, encodings=None) -> bytes | str:
if self.is_folder:
frappe.throw(_("Cannot get file contents of a Folder"))

Expand All @@ -531,10 +531,12 @@ def get_content(self) -> bytes:
self.validate_file_url()
file_path = self.get_full_path()

if encodings is None:
encodings = ["utf-8-sig", "utf-8", "windows-1250", "windows-1252"]
# read file with proper encoding
with open(file_path, mode="rb") as f:
self._content = f.read()
encodings = ["utf-8-sig", "utf-8", "windows-1250", "windows-1252"]

for encoding in encodings:
try:
# for plain text files
Expand Down
24 changes: 21 additions & 3 deletions frappe/core/doctype/file/test_file.py
@@ -1,7 +1,6 @@
# Copyright (c) 2022, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
import base64
import json
import os
import shutil
import tempfile
Expand Down Expand Up @@ -111,7 +110,7 @@ class TestBase64File(FrappeTestCase):
def setUp(self):
self.attached_to_doctype, self.attached_to_docname = make_test_doc()
self.test_content = base64.b64encode(test_content1.encode("utf-8"))
_file: "File" = frappe.get_doc(
_file: frappe.Document = frappe.get_doc(
{
"doctype": "File",
"file_name": "test_base64.txt",
Expand All @@ -125,7 +124,7 @@ def setUp(self):
self.saved_file_url = _file.file_url

def test_saved_content(self):
_file = frappe.get_doc("File", {"file_url": self.saved_file_url})
_file: frappe.Document = frappe.get_doc("File", {"file_url": self.saved_file_url})
content = _file.get_content()
self.assertEqual(content, test_content1)

Expand Down Expand Up @@ -255,6 +254,25 @@ def test_attachment_limit(self):
limit_property.delete()
frappe.clear_cache(doctype="ToDo")

def test_utf8_bom_content_decoding(self):
utf8_bom_content = test_content1.encode("utf-8-sig")
_file: frappe.Document = frappe.get_doc(
{
"doctype": "File",
"file_name": "utf8bom.txt",
"attached_to_doctype": self.attached_to_doctype1,
"attached_to_name": self.attached_to_docname1,
"content": utf8_bom_content,
"decode": False,
}
)
_file.save()
saved_file = frappe.get_doc("File", _file.name)
file_content_decoded = saved_file.get_content(encodings=["utf-8"])
self.assertEqual(file_content_decoded[0], "\ufeff")
file_content_properly_decoded = saved_file.get_content(encodings=["utf-8-sig", "utf-8"])
self.assertEqual(file_content_properly_decoded, test_content1)


class TestFile(FrappeTestCase):
def setUp(self):
Expand Down

0 comments on commit 23e1336

Please sign in to comment.