ricklupton · neiljp · May 11, 2018 · May 11, 2018
diff --git a/floweaver/dataset.py b/floweaver/dataset.py
@@ -126,6 +126,26 @@ def read(filename):
         dim_time = read(dim_time_filename)
         return cls(flows, dim_process, dim_material, dim_time)
 
+    @classmethod
+    def from_excel(cls,
+                   flows_filename,
+                   dim_process_filename=None,
+                   dim_material_filename=None,
+                   dim_time_filename=None):
+
+        if dim_material_filename or dim_time_filename:
+            print("material and time filenames are not supported at this time.")
+
+        def read(filename):
+            if filename is not None:
+                return pd.read_excel(filename).set_index('id')
+            else:
+                return None
+
+        flows = pd.read_excel(flows_filename)
+        dim_process = read(dim_process_filename)
+        return cls(flows, dim_process)
+
 
 def find_flows(flows,
                source_query,

diff --git a/test/fixtures/fruit_flows.csv b/test/fixtures/fruit_flows.csv
diff --git a/test/fixtures/fruit_flows_MSexcel2007_2013XML.xlsx b/test/fixtures/fruit_flows_MSexcel2007_2013XML.xlsx
diff --git a/test/fixtures/fruit_flows_officeopenxml.xlsx b/test/fixtures/fruit_flows_officeopenxml.xlsx
diff --git a/test/fixtures/fruit_processes.csv b/test/fixtures/fruit_processes.csv
@@ -0,0 +1,40 @@
+id,type,location,function,sector
+inputs,stock,*,inputs,
+farm1,process,Cambridge,small farm,farming
+farm2,process,Cambridge,small farm,farming
+farm3,process,Ely,small farm,farming
+farm4,process,Ely,allotment,farming
+farm5,process,Newmarket,allotment,farming
+farm6,process,Ely,allotment,farming
+farm7,process,Ely,allotment,farming
+farm8,process,Newmarket,large farm,farming
+farm9,process,Newmarket,large farm,farming
+farm10,process,Ely,large farm,farming
+farm11,process,Cambridge,allotment,farming
+farm12,process,Newmarket,small farm,farming
+farm13,process,Newmarket,small farm,farming
+farm14,process,Cambridge,large farm,farming
+farm15,process,Ely,small farm,farming
+eat1,process,Cambridge,consumers,domestic
+eat2,process,Ely,consumers,domestic
+eat3,process,Ely,consumers,government
+eat4,process,Ely,consumers,industry
+eat5,process,Cambridge,consumers,domestic
+eat6,process,Cambridge,consumers,industry
+eat7,process,Cambridge,consumers,industry
+eat8,process,Cambridge,consumers,domestic
+eat9,process,London,consumers,industry
+eat10,process,Newmarket,consumers,domestic
+eat11,process,Newmarket,consumers,industry
+landfill Cambridge,stock,Cambridge,landfill,
+composting Cambridge,process,Cambridge,composting process,
+compost Cambridge,stock,Cambridge,composting stock,
+landfill Ely,stock,Ely,landfill,
+composting Ely,process,Ely,composting process,
+compost Ely,stock,Ely,composting stock,
+landfill Newmarket,stock,Newmarket,landfill,
+composting Newmarket,process,Newmarket,composting process,
+compost Newmarket,stock,Newmarket,composting stock,
+landfill London,stock,London,landfill,
+composting London,process,London,composting process,
+compost London,stock,London,composting stock,
diff --git a/test/fixtures/fruit_processes_MSExcel2007_2013XML.xlsx b/test/fixtures/fruit_processes_MSExcel2007_2013XML.xlsx
diff --git a/test/fixtures/fruit_processes_OfficeOpenXML.xlsx b/test/fixtures/fruit_processes_OfficeOpenXML.xlsx
diff --git a/test/test_dataset.py b/test/test_dataset.py
@@ -6,6 +6,38 @@
 from floweaver.sankey_definition import ProcessGroup, Bundle, Elsewhere
 
 
+def test_dataset_from_csv_simple():
+    d = Dataset.from_csv('./fixtures/fruit_flows.csv')
+    assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value'}
+
+
+def test_dataset_from_csv_with_process_table():
+    d = Dataset.from_csv('./fixtures/fruit_flows.csv',
+                         './fixtures/fruit_processes.csv')
+    assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value',
+        'source.type', 'source.location', 'source.function', 'source.sector',
+        'target.type', 'target.location', 'target.function', 'target.sector'}
+
+def test_dataset_from_excel_simple():
+    d = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx')
+    assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value'}
+    d2 = Dataset.from_excel('./fixtures/fruit_flows_officeopenxml.xlsx')
+    assert set(d2._table.columns) == {'source', 'target', 'material', 'time', 'value'}
+
+
+def test_dataset_from_excel_with_process_table():
+    d = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx',
+                           './fixtures/fruit_processes_MSExcel2007_2013XML.xlsx')
+    assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value',
+        'source.type', 'source.location', 'source.function', 'source.sector',
+        'target.type', 'target.location', 'target.function', 'target.sector'}
+    d2 = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx',
+                            './fixtures/fruit_processes_MSExcel2007_2013XML.xlsx')
+    assert set(d2._table.columns) == {'source', 'target', 'material', 'time', 'value',
+        'source.type', 'source.location', 'source.function', 'source.sector',
+        'target.type', 'target.location', 'target.function', 'target.sector'}
+
+
 def _dataset():
     dim_process = pd.DataFrame.from_records(
         [