diff --git a/sdv/io/local/local.py b/sdv/io/local/local.py index 56aa12080..0d81ab634 100644 --- a/sdv/io/local/local.py +++ b/sdv/io/local/local.py @@ -64,13 +64,19 @@ class CSVHandler(BaseLocalHandler): The character used to denote the decimal point. Defaults to ``.``. float_format (str or None): The formatting string for floating-point numbers. Optional. + quotechar (str): + Character used to denote the start and end of a quoted item. + Quoted items can include the delimiter and it will be ignored. Defaults to '"'. + quoting (int or None): + Control field quoting behavior. Default is 0. Raises: ValueError: If the provided encoding is not available in the system. """ - def __init__(self, sep=',', encoding='UTF', decimal='.', float_format=None): + def __init__(self, sep=',', encoding='UTF', decimal='.', float_format=None, + quotechar='"', quoting=0): super().__init__(decimal, float_format) try: codecs.lookup(encoding) @@ -81,6 +87,8 @@ def __init__(self, sep=',', encoding='UTF', decimal='.', float_format=None): self.sep = sep self.encoding = encoding + self.quotechar = quotechar + self.quoting = quoting def read(self, folder_name, file_names=None): """Read data from CSV files and returns it along with metadata. @@ -131,7 +139,9 @@ def read(self, folder_name, file_names=None): 'parse_dates': False, 'low_memory': False, 'decimal': self.decimal, - 'on_bad_lines': 'warn' + 'on_bad_lines': 'warn', + 'quotechar': self.quotechar, + 'quoting': self.quoting } args = inspect.getfullargspec(pd.read_csv) @@ -178,5 +188,7 @@ def write(self, synthetic_data, folder_name, file_name_suffix=None, mode='x'): encoding=self.encoding, index=False, float_format=self.float_format, + quotechar=self.quotechar, + quoting=self.quoting, mode=mode, ) diff --git a/tests/unit/io/local/test_local.py b/tests/unit/io/local/test_local.py index eb81d52c7..e69d18636 100644 --- a/tests/unit/io/local/test_local.py +++ b/tests/unit/io/local/test_local.py @@ -22,17 +22,28 @@ def test___init__(self): assert instance.float_format is None assert instance.encoding == 'UTF' assert instance.sep == ',' + assert instance.quotechar == '"' + assert instance.quoting == 0 def test___init___custom(self): """Test custom initialization of the class.""" # Run - instance = CSVHandler(sep=';', encoding='utf-8', decimal=',', float_format='%.2f') + instance = CSVHandler( + sep=';', + encoding='utf-8', + decimal=',', + float_format='%.2f', + quotechar="'", + quoting=2 + ) # Assert assert instance.decimal == ',' assert instance.float_format == '%.2f' assert instance.encoding == 'utf-8' assert instance.sep == ';' + assert instance.quotechar == "'" + assert instance.quoting == 2 def test___init___error_encoding(self): """Test custom initialization of the class."""