Skip to content

Commit

Permalink
Bug in StackBoWBP
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Feb 1, 2024
1 parent 62f94a1 commit 6d5071c
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
2 changes: 1 addition & 1 deletion EvoMSA/__init__.py
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = '2.0.6'
__version__ = '2.0.7'

try:
from EvoMSA.text_repr import BoW, TextRepresentations, StackGeneralization, DenseBoW
Expand Down
24 changes: 22 additions & 2 deletions EvoMSA/back_prop.py
Expand Up @@ -65,6 +65,25 @@ def stack_model_binary(params, X, df):
return Y * pesos[0] + nn.sigmoid(df) * pesos[1] - 0.5


def initial_parameters(df, df2, y, score=None):
"""Estimate initial parameters :py:class:`~EvoMSA.back_prop.StackBoWBP`"""
from sklearn.metrics import f1_score
from scipy.special import softmax

def f(x):
hy = (x[0] * df + x[1] * df2).argmax(axis=1)
return score(y, hy)

if score is None:
score = lambda y, hy: f1_score(y, hy, average='macro')
df = softmax(df, axis=1)
df2 = softmax(df2, axis=1)
value = np.linspace(0, 1, 100)
_ = [f([v, 1-v]) for v in value]
index = np.argmax(_)
return jnp.array([value[index], 1 - value[index]])


class BoWBP(BoW):
"""BoWBP is a :py:class:`~EvoMSA.text_repr.BoW` with the difference that the parameters are fine-tuned using jax
Expand Down Expand Up @@ -289,7 +308,8 @@ def initial_parameters(self, X, y, df):

def model_args(self, D: List[Union[dict, list]]):
if not hasattr(self, '_bow_ins'):
hy = BoW.train_predict_decision_function(self, D)
X = self._transform(D)
hy = self.train_predict_decision_function(D, X=X)
else:
X = super(StackBoWBP, self)._transform(D)
hy = getattr(self._bow_ins, self.decision_function_name)(X)
Expand All @@ -303,4 +323,4 @@ def fit(self, D: List[Union[dict, list]],
_ = self._transform(D)
labels = self.dependent_variable(D, y=y)
self._bow_ins = self.estimator_class(**self.estimator_kwargs).fit(_, labels)
return self
return self
7 changes: 5 additions & 2 deletions EvoMSA/text_repr.py
Expand Up @@ -379,14 +379,16 @@ def b4msa_fit(self, D: List[Union[List, dict]]):
return self.bow.fit(_)

def train_predict_decision_function(self, D: List[Union[dict, list]],
y: Union[np.ndarray, None]=None) -> np.ndarray:
y: Union[np.ndarray, None]=None,
X=None) -> np.ndarray:
"""
Method to compute the kfold predictions on dataset `D` with response `y`
:param D: Texts to be transformed. In the case, it is a list of dictionaries the text is on the key :py:attr:`BoW.key`
:type D: List of texts or dictionaries.
:param y: Response variable
:type y: Array or None
:param X: Transform dataset
For example, the following code computes the accuracy using k-fold cross-validation on the dataset found on `TWEETS`
Expand All @@ -411,7 +413,8 @@ def train_predict(tr, vs):
y = self.dependent_variable(D, y=y)
kf = self.kfold_class(**self.kfold_kwargs)
kfolds = [x for x in kf.split(D, y)]
X = self.transform(D, y=y)
if X is None:
X = self.transform(D, y=y)
hys = Parallel(n_jobs=self.n_jobs)(delayed(train_predict)(tr, vs)
for tr, vs in kfolds)
K = np.unique(y).shape[0]
Expand Down

0 comments on commit 6d5071c

Please sign in to comment.