Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow reading beliefs from xlsx files, and from data files with multiple headers #103

Merged
merged 6 commits into from Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,39 @@
"""add source id as primary key for timed beliefs

Revision ID: 04f0e2d2924a
Revises: e62ac5f519d7
Create Date: 2021-04-10 13:53:22.561718

"""
from alembic import op


# revision identifiers, used by Alembic.
revision = "04f0e2d2924a"
down_revision = "e62ac5f519d7"
branch_labels = None
depends_on = None


def upgrade():
op.drop_constraint("timed_belief_pkey", "timed_belief")
op.create_primary_key(
"timed_belief_pkey",
"timed_belief",
[
"event_start",
"belief_horizon",
"cumulative_probability",
"sensor_id",
"source_id",
],
)


def downgrade():
op.drop_constraint("timed_belief_pkey", "timed_belief")
op.create_primary_key(
"timed_belief_pkey",
"timed_belief",
["event_start", "belief_horizon", "cumulative_probability", "sensor_id"],
)
101 changes: 80 additions & 21 deletions flexmeasures/data/scripts/cli_tasks/data_add.py
Expand Up @@ -220,6 +220,12 @@ def add_initial_structure():
type=click.IntRange(min=1),
help="Sensor to which the beliefs pertain.",
)
@click.option(
"--source",
required=True,
type=str,
help="Source of the beliefs (an existing source id or name, or a new name).",
)
@click.option(
"--horizon",
required=False,
Expand All @@ -238,16 +244,55 @@ def add_initial_structure():
help="Allow overwriting possibly already existing data.\n"
"Not allowing overwriting can be much more efficient",
)
@click.option(
"--skiprows",
required=False,
default=1,
type=int,
help="Number of rows to skip from the top. Set to >1 to skip additional headers.",
)
@click.option(
"--nrows",
required=False,
type=int,
help="Number of rows to read (from the top, after possibly skipping rows). Leave out to read all rows.",
)
@click.option(
"--datecol",
required=False,
default=0,
type=int,
help="Column number with datetimes (0 is 1st column, the default)",
)
@click.option(
"--valuecol",
required=False,
default=1,
type=int,
help="Column number with values (1 is 2nd column, the default)",
)
@click.option(
"--sheet_number",
required=False,
type=int,
help="[For xls or xlsx files] Sheet number with the data (0 is 1st sheet)",
)
def add_beliefs(
file: str,
sensor_id: int,
source: str,
horizon: Optional[int] = None,
cp: Optional[float] = None,
allow_overwrite: bool = False,
skiprows: int = 1,
nrows: Optional[int] = None,
datecol: int = 0,
valuecol: int = 1,
sheet_number: Optional[int] = None,
):
"""Add sensor data from a csv file.
"""Add sensor data from a csv file (also accepts xls or xlsx).

Structure your csv file as follows:
To use default settings, structure your csv file as follows:

- One header line (will be ignored!)
- UTC datetimes in 1st column
Expand All @@ -267,30 +312,44 @@ def add_beliefs(
if sensor is None:
print(f"Failed to create beliefs: no sensor found with id {sensor_id}.")
return
source = (
DataSource.query.filter(DataSource.name == "Seita")
.filter(DataSource.type == "CLI script")
.one_or_none()
)
if not source:
print("SETTING UP CLI SCRIPT AS NEW DATA SOURCE...")
source = DataSource(name="Seita", type="CLI script")
db.session.add(source)
db.session.flush() # assigns id
query = DataSource.query.filter(DataSource.type == "CLI script")
Flix6x marked this conversation as resolved.
Show resolved Hide resolved
if source.isdigit():
query = query.filter(DataSource.id == int(source))
_source = query.one_or_none()
if not _source:
print(f"Failed to find source {source}.")
return
else:
query = query.filter(DataSource.name == source)
_source = query.one_or_none()
if not _source:
print(f"Setting up '{source}' as new data source...")
_source = DataSource(name=source, type="CLI script")
db.session.add(_source)
db.session.flush() # assigns id

# Set up optional parameters for read_csv
kwargs = dict()
if file.split(".")[-1].lower() == "csv":
kwargs["infer_datetime_format"] = True
if sheet_number is not None:
kwargs["sheet_name"] = sheet_number
if horizon is not None:
kwargs["belief_horizon"] = timedelta(minutes=horizon)
else:
kwargs["belief_time"] = server_now().astimezone(pytz.timezone(sensor.timezone))

bdf = tb.read_csv(
file,
sensor,
source=source,
source=_source,
cumulative_probability=cp,
header=None,
skiprows=skiprows,
nrows=nrows,
usecols=[datecol, valuecol],
parse_dates=True,
infer_datetime_format=True,
**(
dict(belief_horizon=timedelta(minutes=horizon))
if horizon is not None
else dict(
belief_time=server_now().astimezone(pytz.timezone(sensor.timezone))
)
),
**kwargs,
)
try:
TimedBelief.add(
Expand Down
2 changes: 1 addition & 1 deletion requirements/app.in
Expand Up @@ -32,7 +32,7 @@ netCDF4
siphon
tables
timetomodel>=0.6.8
timely-beliefs>=1.4.0
timely-beliefs>=1.4.3
python-dotenv
# a backport, not needed in Python3.8
importlib_metadata
Expand Down
2 changes: 1 addition & 1 deletion requirements/app.txt
Expand Up @@ -317,7 +317,7 @@ tables==3.6.1
# via -r requirements/app.in
threadpoolctl==2.1.0
# via scikit-learn
timely-beliefs==1.4.0
timely-beliefs==1.4.3
# via -r requirements/app.in
timetomodel==0.6.9
# via -r requirements/app.in
Expand Down