From 170394d5260244f040ec63480443c614e1ee9219 Mon Sep 17 00:00:00 2001 From: Vanessa Sochat Date: Tue, 12 Feb 2019 09:13:30 -0500 Subject: [PATCH] updating parser --- data/oshpd-ca/parse.py | 19 ++++++++++++++++++- .../the-methodist-hospital-(houston)/parse.py | 6 ++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/data/oshpd-ca/parse.py b/data/oshpd-ca/parse.py index 03fca7f..ec176b8 100644 --- a/data/oshpd-ca/parse.py +++ b/data/oshpd-ca/parse.py @@ -41,7 +41,7 @@ df = pandas.DataFrame(columns=columns) seen = [] -for r in range(420 ,len(results)): +for r in range(423 ,len(results)): result = results[r] filename = os.path.join(latest, result['filename']) if not os.path.exists(filename): @@ -87,6 +87,23 @@ price_key = 'Price' code_key = 'Charge Code' + # ['Description', 'Code', 'Unnamed: 2', 'Unnamed: 3', 'Price', 'Tier Code', 'Dept', 'Subd', 'Elem', 'Stat'] + # Writing over row of dashes ---- + elif "106420491_CDM" in filename: + content = pandas.read_excel(filename, skiprows=2) + content.columns = ['Description', + 'Code', + 'Unnamed: 2', 'Unnamed: 3', + 'Price', + 'Tier Code', + 'Dept', + 'Subd', + 'Elem', + 'Stat'] + description_key = 'Description' + price_key = 'Price' + code_key = 'Code' + # ['Fac', 'Charge #', 'Description', 'Price', 'GL Key'] elif "106301357_CDM" in filename: content = pandas.read_excel(filename, skiprows=5) diff --git a/data/the-methodist-hospital-(houston)/parse.py b/data/the-methodist-hospital-(houston)/parse.py index 7efc01d..6c4e8a3 100644 --- a/data/the-methodist-hospital-(houston)/parse.py +++ b/data/the-methodist-hospital-(houston)/parse.py @@ -114,3 +114,9 @@ output_data = os.path.join(here, 'data-latest-2.tsv') output_year = os.path.join(here, 'data-%s-2.tsv' % year) df = pandas.DataFrame(columns=columns) + + +# Final Save +print(df.shape) +df.to_csv(output_data, sep='\t', index=False) +df.to_csv(output_year, sep='\t', index=False)