Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add minimal Excel loader and supporting tests for Excel & CSV #46

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 20 additions & 0 deletions floweaver/dataset.py
Expand Up @@ -126,6 +126,26 @@ def read(filename):
dim_time = read(dim_time_filename)
return cls(flows, dim_process, dim_material, dim_time)

@classmethod
def from_excel(cls,
flows_filename,
dim_process_filename=None,
dim_material_filename=None,
dim_time_filename=None):

if dim_material_filename or dim_time_filename:
print("material and time filenames are not supported at this time.")

def read(filename):
if filename is not None:
return pd.read_excel(filename).set_index('id')
else:
return None

flows = pd.read_excel(flows_filename)
dim_process = read(dim_process_filename)
return cls(flows, dim_process)


def find_flows(flows,
source_query,
Expand Down
11,392 changes: 11,392 additions & 0 deletions test/fixtures/fruit_flows.csv

Large diffs are not rendered by default.

Binary file not shown.
Binary file added test/fixtures/fruit_flows_officeopenxml.xlsx
Binary file not shown.
40 changes: 40 additions & 0 deletions test/fixtures/fruit_processes.csv
@@ -0,0 +1,40 @@
id,type,location,function,sector
inputs,stock,*,inputs,
farm1,process,Cambridge,small farm,farming
farm2,process,Cambridge,small farm,farming
farm3,process,Ely,small farm,farming
farm4,process,Ely,allotment,farming
farm5,process,Newmarket,allotment,farming
farm6,process,Ely,allotment,farming
farm7,process,Ely,allotment,farming
farm8,process,Newmarket,large farm,farming
farm9,process,Newmarket,large farm,farming
farm10,process,Ely,large farm,farming
farm11,process,Cambridge,allotment,farming
farm12,process,Newmarket,small farm,farming
farm13,process,Newmarket,small farm,farming
farm14,process,Cambridge,large farm,farming
farm15,process,Ely,small farm,farming
eat1,process,Cambridge,consumers,domestic
eat2,process,Ely,consumers,domestic
eat3,process,Ely,consumers,government
eat4,process,Ely,consumers,industry
eat5,process,Cambridge,consumers,domestic
eat6,process,Cambridge,consumers,industry
eat7,process,Cambridge,consumers,industry
eat8,process,Cambridge,consumers,domestic
eat9,process,London,consumers,industry
eat10,process,Newmarket,consumers,domestic
eat11,process,Newmarket,consumers,industry
landfill Cambridge,stock,Cambridge,landfill,
composting Cambridge,process,Cambridge,composting process,
compost Cambridge,stock,Cambridge,composting stock,
landfill Ely,stock,Ely,landfill,
composting Ely,process,Ely,composting process,
compost Ely,stock,Ely,composting stock,
landfill Newmarket,stock,Newmarket,landfill,
composting Newmarket,process,Newmarket,composting process,
compost Newmarket,stock,Newmarket,composting stock,
landfill London,stock,London,landfill,
composting London,process,London,composting process,
compost London,stock,London,composting stock,
Binary file not shown.
Binary file added test/fixtures/fruit_processes_OfficeOpenXML.xlsx
Binary file not shown.
32 changes: 32 additions & 0 deletions test/test_dataset.py
Expand Up @@ -6,6 +6,38 @@
from floweaver.sankey_definition import ProcessGroup, Bundle, Elsewhere


def test_dataset_from_csv_simple():
d = Dataset.from_csv('./fixtures/fruit_flows.csv')
assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value'}


def test_dataset_from_csv_with_process_table():
d = Dataset.from_csv('./fixtures/fruit_flows.csv',
'./fixtures/fruit_processes.csv')
assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value',
'source.type', 'source.location', 'source.function', 'source.sector',
'target.type', 'target.location', 'target.function', 'target.sector'}

def test_dataset_from_excel_simple():
d = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx')
assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value'}
d2 = Dataset.from_excel('./fixtures/fruit_flows_officeopenxml.xlsx')
assert set(d2._table.columns) == {'source', 'target', 'material', 'time', 'value'}


def test_dataset_from_excel_with_process_table():
d = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx',
'./fixtures/fruit_processes_MSExcel2007_2013XML.xlsx')
assert set(d._table.columns) == {'source', 'target', 'material', 'time', 'value',
'source.type', 'source.location', 'source.function', 'source.sector',
'target.type', 'target.location', 'target.function', 'target.sector'}
d2 = Dataset.from_excel('./fixtures/fruit_flows_MSexcel2007_2013XML.xlsx',
'./fixtures/fruit_processes_MSExcel2007_2013XML.xlsx')
assert set(d2._table.columns) == {'source', 'target', 'material', 'time', 'value',
'source.type', 'source.location', 'source.function', 'source.sector',
'target.type', 'target.location', 'target.function', 'target.sector'}


def _dataset():
dim_process = pd.DataFrame.from_records(
[
Expand Down