-
Notifications
You must be signed in to change notification settings - Fork 287
/
test_local.py
212 lines (177 loc) · 7.3 KB
/
test_local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
"""Unit tests for local file handlers."""
import os
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
from sdv.io.local.local import CSVHandler
from sdv.metadata.multi_table import MultiTableMetadata
class TestCSVHandler:
def test___init__(self):
"""Test the dafault initialization of the class."""
# Run
instance = CSVHandler()
# Assert
assert instance.decimal == '.'
assert instance.float_format is None
assert instance.encoding == 'UTF'
assert instance.sep == ','
assert instance.quotechar == '"'
assert instance.quoting == 0
def test___init___custom(self):
"""Test custom initialization of the class."""
# Run
instance = CSVHandler(
sep=';',
encoding='utf-8',
decimal=',',
float_format='%.2f',
quotechar="'",
quoting=2
)
# Assert
assert instance.decimal == ','
assert instance.float_format == '%.2f'
assert instance.encoding == 'utf-8'
assert instance.sep == ';'
assert instance.quotechar == "'"
assert instance.quoting == 2
def test___init___error_encoding(self):
"""Test custom initialization of the class."""
# Run and Assert
error_msg = "The provided encoding 'sdvutf-8' is not available in your system."
with pytest.raises(ValueError, match=error_msg):
CSVHandler(sep=';', encoding='sdvutf-8', decimal=',', float_format='%.2f')
@patch('sdv.io.local.local.Path.glob')
@patch('pandas.read_csv')
def test_read(self, mock_read_csv, mock_glob):
"""Test the read method of CSVHandler class with a folder."""
# Setup
mock_glob.return_value = [
Path('/path/to/data/parent.csv'),
Path('/path/to/data/child.csv')
]
mock_read_csv.side_effect = [
pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
]
handler = CSVHandler()
# Run
data, metadata = handler.read('/path/to/data')
# Assert
assert len(data) == 2
assert 'parent' in data
assert 'child' in data
assert isinstance(metadata, MultiTableMetadata)
assert mock_read_csv.call_count == 2
pd.testing.assert_frame_equal(
data['parent'],
pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})
)
pd.testing.assert_frame_equal(
data['child'],
pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
)
def test_read_files(self, tmpdir):
"""Test the read method of CSVHandler class with given ``file_names``."""
# Setup
file_path = Path(tmpdir)
pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}).to_csv(
file_path / 'parent.csv',
index=False
)
pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']}).to_csv(
file_path / 'child.csv',
index=False
)
handler = CSVHandler()
# Run
data, metadata = handler.read(tmpdir, file_names=['parent.csv'])
# Assert
assert 'parent' in data
pd.testing.assert_frame_equal(
data['parent'],
pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})
)
def test_read_files_missing(self, tmpdir):
"""Test the read method of CSVHandler with missing ``file_names``."""
# Setup
file_path = Path(tmpdir)
pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}).to_csv(
file_path / 'parent.csv',
index=False
)
pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']}).to_csv(
file_path / 'child.csv',
index=False
)
handler = CSVHandler()
# Run and Assert
error_msg = 'The following files do not exist in the folder: grandchild.csv, parents.csv.'
with pytest.raises(FileNotFoundError, match=error_msg):
handler.read(tmpdir, file_names=['grandchild.csv', 'parents.csv'])
def test_write(self, tmpdir):
"""Test the write functionality of a CSVHandler."""
# Setup
synthetic_data = {
'table1': pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
'table2': pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
}
handler = CSVHandler()
assert os.path.exists(tmpdir / 'synthetic_data') is False
# Run
handler.write(synthetic_data, tmpdir / 'synthetic_data', file_name_suffix='_synthetic')
# Assert
assert 'table1_synthetic.csv' in os.listdir(tmpdir / 'synthetic_data')
assert 'table2_synthetic.csv' in os.listdir(tmpdir / 'synthetic_data')
def test_write_file_exists(self, tmpdir):
"""Test that an error is raised when it exists and the mode is `x`."""
# Setup
synthetic_data = {
'table1': pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
'table2': pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
}
os.makedirs(tmpdir / 'synthetic_data')
synthetic_data['table1'].to_csv(tmpdir / 'synthetic_data' / 'table1.csv', index=False)
handler = CSVHandler()
# Run
with pytest.raises(FileExistsError):
handler.write(synthetic_data, tmpdir / 'synthetic_data')
def test_write_file_exists_mode_is_a(self, tmpdir):
"""Test the write functionality of a CSVHandler when the mode is ``a``."""
# Setup
synthetic_data = {
'table1': pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
'table2': pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
}
os.makedirs(tmpdir / 'synthetic_data')
synthetic_data['table1'].to_csv(tmpdir / 'synthetic_data' / 'table1.csv', index=False)
handler = CSVHandler()
# Run
handler.write(synthetic_data, tmpdir / 'synthetic_data', mode='a')
# Assert
dataframe = pd.read_csv(tmpdir / 'synthetic_data' / 'table1.csv')
expected_dataframe = pd.DataFrame({
'col1': ['1', '2', '3', 'col1', '1', '2', '3'],
'col2': ['a', 'b', 'c', 'col2', 'a', 'b', 'c']
})
pd.testing.assert_frame_equal(dataframe, expected_dataframe)
def test_write_file_exists_mode_is_w(self, tmpdir):
"""Test the write functionality of a CSVHandler when the mode is ``w``."""
# Setup
synthetic_data = {
'table1': pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
'table2': pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']})
}
os.makedirs(tmpdir / 'synthetic_data')
synthetic_data['table1'].to_csv(tmpdir / 'synthetic_data' / 'table1.csv', index=False)
handler = CSVHandler()
# Run
handler.write(synthetic_data, tmpdir / 'synthetic_data', mode='w')
# Assert
dataframe = pd.read_csv(tmpdir / 'synthetic_data' / 'table1.csv')
expected_dataframe = pd.DataFrame({
'col1': [1, 2, 3],
'col2': ['a', 'b', 'c']
})
pd.testing.assert_frame_equal(dataframe, expected_dataframe)