/
unformatted-string-to-table.sql
89 lines (67 loc) · 2.56 KB
/
unformatted-string-to-table.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
-- Description:
-- In this example we load raw data from a
-- txt file into a Luminesce table. The
-- data is in a comma separated format
/*
====================================================
1. Load comma separated string from txt file
====================================================
*/
@@txtDataOriginal = use Drive.RawText
--file=/luminesce-examples/prices-in-txt-file.txt
enduse;
@@txtData_RemoveRedundantTxt = select regexp_replace(@@txtDataOriginal, '"|ADD| ', '');
@@txtData_CommaSeparatedString = select regexp_match(@@txtData_RemoveRedundantTxt, '(?<=ENDRECORD)[.|\n|\W|\w]*');
/*
======================================
2. Parse out the columns names
======================================
*/
@@columnsWithNoSepCommas = select regexp_match(@@txtDataOriginal , '(?<=RECORD)([.|\n|\W|\w]*)(?=ENDRECORD)');
@@columnsWithTrailingComma = select regexp_replace(@@columnsWithNoSepCommas, '\r\n', ',');
@columnsWithSepCommas = select substr(@@columnsWithTrailingComma, 2, length(@@columnsWithTrailingComma) -2) as ColumnNames;
/*
==========================================
3. Split string into separate rows
==========================================
*/
-- Note: This is how you split by endlines in Luminesce
@txtSplitByRow = select Value from Tools.Split where DelimiterString = '
'
and Original = @@txtData_CommaSeparatedString;
@tableRowsAsOneCol = select distinct Value as OriginalColumnValue from @txtSplitByRow
where Value != '';
/*
====================================================================
4. Use the Tools.Split provider to split the data into cells
====================================================================
*/
-- Split the table body first
@tableRowsAsOneColWithIndex = select r.OriginalColumnValue, t.*
from @tableRowsAsOneCol r
inner join Tools.Split t
on t.Original = r.OriginalColumnValue;
-- Then split column names
@columnsNamesToTable = select r.ColumnNames, t.*
from @columnsWithSepCommas r
inner join Tools.Split t
on t.Original = r.ColumnNames;
-- Join the table body and column names together
@formattedTable = select
tr.Value as [RowValue], tr.[OriginalIndex], ct.Value as [ColumnName]
from @columnsNamesToTable ct
join @tableRowsAsOneColWithIndex tr on (ct.[Index] = tr.[Index])
;
/*
=======================================
5. Pivot and format the results
=======================================
*/
@pivoted =
use Tools.Pivot with @formattedTable
--key=ColumnName
--aggregateColumns=RowValue
enduse;
select
*
from @pivoted;