Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix panic during normalizing the invalid data #1698

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions qlib/workflow/cli.py
Expand Up @@ -68,7 +68,8 @@ def workflow(config_path, experiment_name="workflow", uri_folder="mlruns"):

"""
with open(config_path) as fp:
config = yaml.safe_load(fp)
yml = yaml.YAML(typ='safe', pure=True)
config = yml.load(fp)

base_config_path = config.get("BASE_CONFIG_PATH", None)
if base_config_path:
Expand All @@ -90,7 +91,8 @@ def workflow(config_path, experiment_name="workflow", uri_folder="mlruns"):
raise FileNotFoundError(f"Can't find the BASE_CONFIG file: {base_config_path}")

with open(path) as fp:
base_config = yaml.safe_load(fp)
yml = yaml.YAML(typ='safe', pure=True)
base_config = yml.load(fp)
logger.info(f"Load BASE_CONFIG_PATH succeed: {path.resolve()}")
config = update_config(base_config, config)

Expand Down
15 changes: 9 additions & 6 deletions scripts/data_collector/base.py
Expand Up @@ -301,12 +301,15 @@ def _executor(self, file_path: Path):
na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
)

df = self._normalize_obj.normalize(df)
if df is not None and not df.empty:
if self._end_date is not None:
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
df = df[_mask]
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
try:
df = self._normalize_obj.normalize(df)
if df is not None and not df.empty:
if self._end_date is not None:
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
df = df[_mask]
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
except Exception as e:
logger.warning(f"normalize {file_path.name} failed, error: {e}")

def normalize(self):
logger.info("normalize data......")
Expand Down