Skip to content

Commit

Permalink
Quickfix memory usage due to broken filelocks on distributed systems (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
cskaandorp committed May 13, 2024
1 parent 73590f7 commit f6774b7
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 14 deletions.
4 changes: 3 additions & 1 deletion asreview/webapp/api/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,7 @@ def api_classify_instance(project, doc_id): # noqa: F401

retrain_model = False if is_prior == "1" else True
prior = True if is_prior == "1" else False
file_lock_path = Path(project.project_path, "trainings.lock")

if request.method == "POST":
with open_state(project.project_path, read_only=False) as state:
Expand All @@ -1485,7 +1486,8 @@ def api_classify_instance(project, doc_id): # noqa: F401
elif label == -1:
state.delete_record_labeling_data(record_id)

if retrain_model:
# retrain only if there is no lock
if retrain_model and not file_lock_path.exists():
# retrain model
subprocess.Popen(
[
Expand Down
6 changes: 6 additions & 0 deletions asreview/webapp/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ def create_app(config_path=None):
if config_fp := (config_path or app.config.get("CONFIG_PATH", None)):
app.config.from_file(Path(config_fp).absolute(), load=tomllib.load, text=False)

# remove all lock files per project folder
for f in asreview_path().iterdir():
if f.is_dir():
# remove lock files
[lockfile.unlink(missing_ok=True) for lockfile in f.glob("*.lock")]

# if there are no cors and config is in debug mode, add default cors
if app.debug and not app.config.get("CORS_ORIGINS", None):
app.config["CORS_ORIGINS"] = ["http://localhost:3000", "http://127.0.0.1:3000"]
Expand Down
20 changes: 7 additions & 13 deletions asreview/webapp/entry_points/run_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,8 @@
# limitations under the License.

import argparse
import logging
from pathlib import Path

from filelock import FileLock
from filelock import Timeout

from asreview.models.balance import get_balance_model
from asreview.models.classifiers import get_classifier
from asreview.models.feature_extraction import get_feature_model
Expand All @@ -42,6 +38,7 @@ def run_model_entry_point(argv):
args = parser.parse_args(argv)

project = ASReviewProject(args.project_path)
file_lock_path = Path(project.project_path, "trainings.lock")

try:

Expand All @@ -51,9 +48,7 @@ def run_model_entry_point(argv):
return

# Lock so that only one training run is running at the same time.
lock = FileLock(Path(project.project_path, "training.lock"), timeout=0)

with lock:
with open(file_lock_path, "w"):

with open_state(project) as state:
settings = state.settings
Expand All @@ -70,14 +65,13 @@ def run_model_entry_point(argv):

reviewer.train()

project.update_review(status="review")

except Timeout:
logging.debug("Another iteration is training")
# Update status
project.update_review(status="review")

except Exception as err:
project.set_error(err, save_error_message=args.output_error)
raise err

else:
project.update_review(status="review")
finally:
# Ensure removal of lock file
file_lock_path.unlink(missing_ok=True)

0 comments on commit f6774b7

Please sign in to comment.