/
test_excel_import.txt
313 lines (291 loc) · 4.87 KB
/
test_excel_import.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# First block should never be escaped, since they are valid Excel numbers.
1.23456789E7
-1.23456789E7
0.00999999E6
-0.00999999E6
3.1415926535897
-3.1415926535897
3.14E99
-3.14E-99
0.12345
0.12345E-99
0.1234567890123456789
1.
1.E-99
.12345
.12345E-99
.1234567890123456789
+.12345
-.12345
+.12345E-99
-.12345E-99
1,234
12,345
123,456
1,234,567
12,345,678
123,456,789
1,234,567,890
1234,567
1234,567,8901
1234,567,8901.E-99
1234,567,8901.2E-99
1
12
123
1234
12345
123456
1234567
12345678
123456789
1234567890
12345678901
# Next block should only be escaped with --paranoid,
# since they aren't numbers, and Excel doesn't try to treat them as non-text.
,123,456
1,235,6
1,235,67
6" 2'
6" 2"
O00116:347
5710 266971
05710 266971
Nova-1
19,Feb
19,Feb
19,Feb 2013
19,Feb2013
19,Feb,2013
Feb,19
Feb,19,2013
1,08
1,08,88
1,08
1,88
December 1st
December 2nd
December 3rd
December 4th
December 1rd
December 1st 2013
December 1st, 2013
December 1st, 13
1st December
1st December 2013
3:00 long rambling junk in the middle Sep7
filler at beginning 3:00 Sep7
3:00 Sep7 filler at end
3:00 x Sep7
Sep7 x 3:00
3:00 x Sep7
3 AM x Sep7
Sep7 x 3:00 AM
3:00AM x 01/08
3:00 AM x Sep7
3:00 AM x 01/08
01/08 x 3:00
01/08 x 3:00 AM
3:00 ; Sep7
Sep7 ; 3:00
3:00 ; Sep7
3 AM ; Sep7
Sep7 ; 3:00 AM
3:00 AM ; Sep7
3:00 AM ; 01/08
01/08 ; 3:00
01/08 ; 3:00 AM
3:00 -- Sep7
Sep7 -- 3:00
3:00 -- Sep7
3 AM -- Sep7
Sep7 -- 3:00 AM
3:00 AM -- Sep7
3:00 AM -- 01/08
01/08 -- 3:00
01/08 -- 3:00 AM
-
--
---
+
++
+++
+
+-
-+
+-+
-+-
# Excel doesn't see these as numbers, so they are OK to leave as-is
1,2
12,34
12,34,56
123,4
123,45
1,2,3,4567
12,3456,78
12,34,5678,9
# Excel sees these as numbers, so they need to be escaped
12,3456
1234,567
1234,5678
12345,67890
# these should already be caught by the big number check
123,456,789,012,345
1234,5678,9012,3456
# Excel has some odd spacing related parsing behavior
SEP7
SEP7 space at end is treated as date by Excel
" SEP7"
"SEP7 " space at end is treated as date by Excel
"SEP7"
"SEP7" space after "" is still treated as date by Excel
"SEP7"
"SEP7 " also treated as date by Excel
1234567890123456789 Excel truncates
1234567890123456789 Excel truncates
" 1234567890123456789" Excel truncates
"1234567890123456789 " Excel truncates
"1234567890123456789" space before "" protects numbers too
"1234567890123456789 " Excel truncates
# Examples below are test cases for stripping various characters from the
# original input, such as leading/trailing spaces, equals sings,
# enclosing double quotes, and nested occurances of these
"SEP7
"SEP7" Excel line-wraps this into the previous line
""SEP7""
Sep7
Sep7
="SEP7"
="="SEP7""
# Current behavior won't strip ="" unless it is at the beginning or nested
# immediately inside a previously stripped ="".
"="SEP7""
# This used to be a test for the old script behavior of recursively stripping
# enclosing "", which would strip out both nested ="", as well as nested
# enclosing "". The current "" handling only strips them once, if they
# enclose the entire field (allowing for spaces after them, though).
# So, this specifically crafted test case isn't so useful anymore.
="" =" ""SEP7"" " ""
# Excel treats the following as the start of a line wrap if the file is loaded
# directly into Excel, or if escape_excel.pl is run with the --no-dq option.
# Current script default "smart" double quote handling will escape the
# double quotes so as to not trigger a line wrap when Excel loads its output.
"SEP7""
this line is here to terminate the Excel line wrap"
# Everything below should be escaped, either because Excel does corrupt
# them, or it looks suspiciously like something Excel might corrupt.
00123456
123456789012
1234567890123
12345678901234
123456789012345
1234567890123456
12345678901234567
123456789012345678
1234567890123456789
2310009E13
SEP7
April 31
April 99
19-Feb
19Feb
19Feb2013
19-Feb2013
19-Feb-2013
Feb-19
Feb-19-2013
1-08
1-08-88
1/08
1/08/88
1-88
1/88
'night
'1234
'SEP7
=1+2
3:00
3 P
3 A
3 PM
3 AM
3:00:00
3:00:00 PM
3:00:00 AM
3:00:00 P
3:00:00 A
3:00 Sep7
Sep7 3:00
3:00Sep7
3 AM Sep7
Sep7 3:00 AM
3:00 AM01/08
3:00 AM Sep7
3:00 AM 01/08
01/08 3:00
01/08 3:00 AM
3:00 Sep7
Sep7 3:00
3:00 Sep7
3 AM Sep7
Sep7 3:00 AM
3:00 AM Sep7
3:00 AM 01/08
01/08 3:00
01/08 3:00 AM
3:00/Sep7
Sep7/3:00
3:00/Sep7
3 AM/Sep7
Sep7/3:00 AM
3:00 AM/Sep7
3:00 AM/01/08
01/08/3:00
01/08/3:00 AM
3:00-Sep7
Sep7-3:00
3:00-Sep7
3 AM-Sep7
Sep7-3:00 AM
3:00 AM-Sep7
3:00 AM-01/08
01/08-3:00
01/08-3:00 AM
3:00,Sep7
Sep7,3:00
3:00,Sep7
3 AM,Sep7
Sep7,3:00 AM
3:00 / Sep7
Sep7 / 3:00
3:00 / Sep7
3 AM / Sep7
Sep7 / 3:00 AM
3:00 AM / Sep7
3:00 AM / 01/08
01/08 / 3:00
01/08 / 3:00 AM
3:00 - Sep7
Sep7 - 3:00
3:00 - Sep7
3 AM - Sep7
Sep7 - 3:00 AM
3:00 AM - Sep7
3:00 AM - 01/08
01/08 - 3:00
01/08 - 3:00 AM
3:00 , Sep7
Sep7 , 3:00
3:00 , Sep7
3 AM , Sep7
Sep7 , 3:00 AM
3:00 AM , Sep7
3:00 AM , 01/08
01/08 , 3:00
01/08 , 3:00 AM
-- sometext
- sometext
-sometext
++ sometext
+ sometext
+sometext