Skip to content

Commit

Permalink
cleanup data checking
Browse files Browse the repository at this point in the history
  • Loading branch information
droumis committed Apr 25, 2024
1 parent 7130a39 commit c0f6f05
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 14 deletions.
19 changes: 5 additions & 14 deletions examples/tutorial/00_Setup.ipynb
Expand Up @@ -75,12 +75,12 @@
"metadata": {},
"outputs": [],
"source": [
"import version_checker\n",
"import datashader, panel, hvplot, param, holoviews as hv # noqa\n",
"from package_checker import check_packages\n",
"hv.extension('bokeh', 'matplotlib')\n",
"\n",
"packages = ['datashader', 'holoviews', 'panel', 'hvplot', 'param']\n",
"version_checker.check_packages(packages)"
"check_packages(packages)"
]
},
{
Expand All @@ -96,19 +96,10 @@
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"import pandas as pd\n",
"from data_checker import check_data\n",
"\n",
"print('Data exists:', pathlib.Path('../data/earthquakes-projected.parq').is_file())\n",
"\n",
"try: \n",
" columns = ['depth', 'id', 'latitude', 'longitude', 'mag', 'place', 'time', 'type']\n",
" path = pathlib.Path('../data/earthquakes-projected.parq')\n",
" data = pd.read_parquet(path, columns=columns, engine='fastparquet')\n",
" data.head()\n",
" print('Data check successful!')\n",
"except RuntimeError as e:\n",
" print('The data cannot be read: %s' % e)\n"
"data_path = '../data/earthquakes-projected.parq'\n",
"check_data(data_path)"
]
},
{
Expand Down
23 changes: 23 additions & 0 deletions examples/tutorial/data_checker.py
@@ -0,0 +1,23 @@
import pathlib
import pandas as pd

def check_data(file_path='../data/earthquakes-projected.parq'):
"""
Checks if the data file exists and reads it.
Args:
file_path (str, optional): The path to the parquet file. Default is '../data/earthquakes-projected.parq'.
"""
path = pathlib.Path(file_path)

if not path.is_file():
print(f"Data file does not exist at {file_path}")

try:
columns = ['depth', 'id', 'latitude', 'longitude', 'mag', 'place', 'time', 'type']
data = pd.read_parquet(path, columns=columns, engine='fastparquet')
data.head()
print("Data exists and is readable!")
except RuntimeError as e:
print(f"The data cannot be read: {str(e)}")
45 changes: 45 additions & 0 deletions examples/tutorial/package_checker.py
@@ -0,0 +1,45 @@
import yaml
import pathlib
import sys
from packaging.version import Version

def get_required_versions(yaml_path):
"""
Reads the YAML configuration and extracts all package version requirements.
Returns:
dict: A dictionary mapping package names to their required versions.
"""
yaml_file_path = pathlib.Path(yaml_path)
with open(yaml_file_path, 'r') as file:
data = yaml.safe_load(file)
packages = data['packages']
version_dict = {}
for package in packages:
if '==' in package:
pkg, version = package.split('==')
version_dict[pkg] = version
return version_dict

def check_packages(packages, yaml_path='../anaconda-project.yml'):
"""
Checks if specified packages are installed with correct versions as per the YAML configuration.
Args:
packages (list): A list of package names to check.
"""
required_versions = get_required_versions(yaml_path)
error_found = False
for pkg in packages:
try:
req_version = required_versions[pkg]
installed_version = sys.modules[pkg].__version__
if Version(installed_version) < Version(req_version):
print(f"Error: {pkg} expected version {req_version}, got {installed_version}")
error_found = True
except KeyError:
print(f"{pkg} is not installed or not specified in the YAML configuration.")
error_found = True

if not error_found:
print("All specified packages are correctly installed.")

0 comments on commit c0f6f05

Please sign in to comment.