Skip to content

Commit

Permalink
isort, black
Browse files Browse the repository at this point in the history
  • Loading branch information
arminwitte committed Sep 24, 2023
1 parent ce4e10d commit 806b3e4
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 69 deletions.
49 changes: 31 additions & 18 deletions binarybeech/attributehandler.py
Expand Up @@ -77,21 +77,24 @@ def split(self, df):
]
N = len(df.index)
n = [len(df_.index) for df_ in split_df]

loss_args = {key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}

loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
loss_args = [loss_args.copy(), loss_args.copy()]
if "__weights__" in df:
for i, df_ in enumerate(split_df):
loss_args[i]["weights"] = df_["__weights__"].values



val = [
self.metrics.node_value(df_[self.y_name], **loss_args[i])
for i, df_ in enumerate(split_df)
]
loss = n[0] / N * self.metrics.loss(
split_df[0][self.y_name], val[0], **loss_args[0]
) + n[1] / N * self.metrics.loss(split_df[1][self.y_name], val[1], **loss_args[1])
) + n[1] / N * self.metrics.loss(
split_df[1][self.y_name], val[1], **loss_args[1]
)
if loss < self.loss:
success = True
self.loss = loss
Expand Down Expand Up @@ -165,19 +168,23 @@ def fun(x):
if min(n) == 0:
return np.Inf

loss_args = {key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}
loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
loss_args = [loss_args.copy(), loss_args.copy()]
if "__weights__" in df:
for i, df_ in enumerate(split_df):
loss_args[i]["weights"] = df_["__weights__"].values
loss_args[i]["weights"] = df_["__weights__"].values

val = [
self.metrics.node_value(df_[self.y_name], **loss_args[i])
for i, df_ in enumerate(split_df)
]
return n[0] / N * self.metrics.loss(
split_df[0][self.y_name], val[0], **loss_args[0]
) + n[1] / N * self.metrics.loss(split_df[1][self.y_name], val[1], **loss_args[1])
) + n[1] / N * self.metrics.loss(
split_df[1][self.y_name], val[1], **loss_args[1]
)

return fun

Expand Down Expand Up @@ -218,21 +225,24 @@ def split(self, df):
]
N = len(df.index)
n = [len(df_.index) for df_ in self.split_df]

loss_args = {key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}

loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
loss_args = [loss_args.copy(), loss_args.copy()]
if "__weights__" in df:
for i, df_ in enumerate(self.split_df):
loss_args[i]["weights"] = df_["__weights__"].values


val = [
self.metrics.node_value(df_[self.y_name], **loss_args[i])
for i, df_ in enumerate(self.split_df)
]
self.loss = n[0] / N * self.metrics.loss(
self.split_df[0][self.y_name], val[0], **loss_args[0]
) + n[1] / N * self.metrics.loss(self.split_df[1][self.y_name], val[1], **loss_args[1])
) + n[1] / N * self.metrics.loss(
self.split_df[1][self.y_name], val[1], **loss_args[1]
)

return success

Expand Down Expand Up @@ -302,21 +312,24 @@ def _opt_fun(self, df):
def fun(x):
split_df = [df[df[split_name] < x], df[df[split_name] >= x]]
n = [len(df_.index) for df_ in split_df]


loss_args = {key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}

loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
loss_args = [loss_args.copy(), loss_args.copy()]
if "__weights__" in df:
for i, df_ in enumerate(split_df):
loss_args[i]["weights"] = df_["__weights__"].values

val = [
self.metrics.node_value(df_[self.y_name], **loss_args[i])
for i, df_ in enumerate(split_df)
]
return n[0] / N * self.metrics.loss(
split_df[0][self.y_name], val[0], **loss_args[0]
) + n[1] / N * self.metrics.loss(split_df[1][self.y_name], val[1], **loss_args[1])
) + n[1] / N * self.metrics.loss(
split_df[1][self.y_name], val[1], **loss_args[1]
)

return fun

Expand Down
37 changes: 22 additions & 15 deletions binarybeech/binarybeech.py
Expand Up @@ -99,9 +99,9 @@ def __init__(
min_leaf_samples=1,
min_split_samples=1,
max_depth=10,
min_split_loss = 0.,
lambda_l1 = 0.,
lambda_l2 = 0.,
min_split_loss=0.0,
lambda_l1=0.0,
lambda_l2=0.0,
method="regression",
handle_missings="simple",
attribute_handlers=None,
Expand All @@ -128,7 +128,6 @@ def __init__(
self.max_depth = max_depth
self.min_split_loss = min_split_loss


self.depth = 0
self.seed = seed

Expand Down Expand Up @@ -230,7 +229,9 @@ def create_tree(self, leaf_loss_threshold=1e-12):
def _node_or_leaf(self, df):
y = df[self.y_name]

loss_args = {key:self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}
loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
if "__weights__" in df:
loss_args["weights"] = df["__weights__"].values

Expand Down Expand Up @@ -274,7 +275,9 @@ def _node_or_leaf(self, df):
decision_fun=self.dmgr[split_name].decide,
)
item.pinfo["N"] = len(df.index)
loss_args = {key:self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}
loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
item.pinfo["r"] = self.dmgr.metrics.loss_prune(y, y_hat, **loss_args)
item.pinfo["R"] = (
item.pinfo["N"] / len(self.training_data.df.index) * item.pinfo["r"]
Expand All @@ -290,7 +293,9 @@ def _leaf(self, y, y_hat):
leaf = Node(value=y_hat)

leaf.pinfo["N"] = y.size
loss_args = {key:self.algorithm_kwargs[key] for key in ["lambda_l1","lambda_l2"]}
loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
leaf.pinfo["r"] = self.dmgr.metrics.loss_prune(y, y_hat, **loss_args)
leaf.pinfo["R"] = (
leaf.pinfo["N"] / len(self.training_data.df.index) * leaf.pinfo["r"]
Expand Down Expand Up @@ -406,8 +411,8 @@ def __init__(
sample_frac=1,
n_attributes=None,
learning_rate=0.1,
lambda_l1 = 0.,
lambda_l2 = 0.,
lambda_l1=0.0,
lambda_l2=0.0,
cart_settings={},
init_method="logistic",
gamma=None,
Expand Down Expand Up @@ -551,8 +556,10 @@ def _opt_fun(self, tree):
for i, x in enumerate(self.df.iloc):
delta[i] = tree.traverse(x).value
y = self.df[self.y_name].values

loss_args = {key:self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]}

loss_args = {
key: self.algorithm_kwargs[key] for key in ["lambda_l1", "lambda_l2"]
}
if "__weights__" in self.df:
loss_args["weights"] = self.df["__weights__"].values

Expand Down Expand Up @@ -648,8 +655,8 @@ def __init__(
X_names=None,
sample_frac=1,
n_attributes=None,
lambda_l1 = 0.,
lambda_l2 = 0.,
lambda_l1=0.0,
lambda_l2=0.0,
cart_settings={},
method="classification",
handle_missings="simple",
Expand Down Expand Up @@ -813,8 +820,8 @@ def __init__(
verbose=False,
sample_frac=1,
n_attributes=None,
lambda_l1 = 0.,
lambda_l2 = 0.,
lambda_l1=0.0,
lambda_l2=0.0,
cart_settings={},
method="regression",
handle_missings="simple",
Expand Down
30 changes: 14 additions & 16 deletions binarybeech/metrics.py
Expand Up @@ -112,7 +112,7 @@ def goodness_of_fit(self, y, y_hat):

def bins(self, df, y_name, attribute):
y = df[y_name]

kwargs = {}
if "__weights__" in df:
kwargs["weights"] = df["__weights__"].values
Expand All @@ -122,7 +122,7 @@ def bins(self, df, y_name, attribute):
unique = np.unique(df[attribute])
for u in unique:
y_u = df[df[attribute] == u][y_name]

kwargs = {}
if "__weights__" in df:
kwargs["weights"] = df[df[attribute] == u]["__weights__"].values
Expand All @@ -138,27 +138,25 @@ def bins(self, df, y_name, attribute):
@staticmethod
def check(x):
return math.check_interval(x)


class RegressionMetricsRegularized(RegressionMetrics):
def __init__(self):
super().__init__()

def node_value(self, y, **kwargs):
y = np.array(y).ravel()
n = y.shape[0]
lambda_l1 = kwargs.get("lambda_l1")
lambda_l2 = kwargs.get("lambda_l2")
y_sum = np.sum(y)

if y_sum < -lambda_l1:
return (y_sum + lambda_l1)/(n + lambda_l2)
return (y_sum + lambda_l1) / (n + lambda_l2)
elif y_sum > lambda_l1:
return (y_sum - lambda_l1)/(n + lambda_l2)
return (y_sum - lambda_l1) / (n + lambda_l2)
else:
return 0.


return 0.0


class LogisticMetrics(Metrics):
Expand Down Expand Up @@ -204,17 +202,17 @@ def inverse_transform(arr):

def bins(self, df, y_name, attribute):
y = df[y_name]

kwargs = {}
if "__weights__" in df:
kwargs ["weights"] = df["__weights__"].values
kwargs["weights"] = df["__weights__"].values

y_hat = self.node_value(y, **kwargs)
bins = [[], []]
unique = np.unique(df[attribute])
for u in unique:
y_u = df[df[attribute] == u][y_name]

kwargs = {}
if "__weights__" in df:
kwargs["weights"] = df[df[attribute] == u]["__weights__"].values
Expand Down Expand Up @@ -293,7 +291,7 @@ def goodness_of_fit(self, y, y_hat):

def bins(self, df, y_name, attribute):
y = df[y_name]

kwargs = {}
if "__weights__" in df:
kwargs["weights"] = df["__weights__"].values
Expand All @@ -303,7 +301,7 @@ def bins(self, df, y_name, attribute):
unique = np.unique(df[attribute])
for u in unique:
y_u = df[df[attribute] == u][y_name]

kwargs = {}
if "__weights__" in df:
kwargs["weights"] = df[df[attribute] == u]["__weights__"].values
Expand Down

0 comments on commit 806b3e4

Please sign in to comment.