Merge branch 'release-1.7.2'

refinery-platform · Nov 19, 2019 · dd4287c · dd4287c
2 parents 5a4630b + 7bfcc97
commit dd4287c
Show file tree

Hide file tree

Showing 36 changed files with 578 additions and 380 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,32 @@
 # Change Log
 
+## [v1.7.2](https://github.com/refinery-platform/refinery-platform/tree/v1.7.2) (2019-11-18)
+[Full Changelog](https://github.com/refinery-platform/refinery-platform/compare/v1.7.1...v1.7.2)
+
+**Fixed bugs:**
+
+- File-Browser: Select All Bug [\#3467](https://github.com/refinery-platform/refinery-platform/issues/3467)
+- Analysis import fails due to celery SoftTimeLimitExceeded  [\#3435](https://github.com/refinery-platform/refinery-platform/issues/3435)
+
+**Closed issues:**
+
+- Bam File IGV Not Working [\#3465](https://github.com/refinery-platform/refinery-platform/issues/3465)
+- Botocore incompatability with v1.12.98 [\#3388](https://github.com/refinery-platform/refinery-platform/issues/3388)
+
+**Merged pull requests:**
+
+- upgrade postgresql version [\#3489](https://github.com/refinery-platform/refinery-platform/pull/3489) ([ilan-gold](https://github.com/ilan-gold))
+- Adding some tests and better handling for non-ascii throughout [\#3488](https://github.com/refinery-platform/refinery-platform/pull/3488) ([ilan-gold](https://github.com/ilan-gold))
+- Fixing the Provenance Graph [\#3487](https://github.com/refinery-platform/refinery-platform/pull/3487) ([ilan-gold](https://github.com/ilan-gold))
+- Ilan gold/user registration bug [\#3486](https://github.com/refinery-platform/refinery-platform/pull/3486) ([ilan-gold](https://github.com/ilan-gold))
+- Fix Excessive File Downloading [\#3485](https://github.com/refinery-platform/refinery-platform/pull/3485) ([ilan-gold](https://github.com/ilan-gold))
+- Fix .bam file handling for visualizations [\#3484](https://github.com/refinery-platform/refinery-platform/pull/3484) ([ilan-gold](https://github.com/ilan-gold))
+- Set Up CloudWatch Alarms for CPU Utilization [\#3483](https://github.com/refinery-platform/refinery-platform/pull/3483) ([ilan-gold](https://github.com/ilan-gold))
+- Fix Dict/Unicode bug [\#3475](https://github.com/refinery-platform/refinery-platform/pull/3475) ([ilan-gold](https://github.com/ilan-gold))
+- Update AWS CLI to the latest version [\#3472](https://github.com/refinery-platform/refinery-platform/pull/3472) ([hackdna](https://github.com/hackdna))
+- Rename Files in Flight [\#3471](https://github.com/refinery-platform/refinery-platform/pull/3471) ([ilan-gold](https://github.com/ilan-gold))
+- Restructure Analysis Metadata Fetching for Celery [\#3461](https://github.com/refinery-platform/refinery-platform/pull/3461) ([ilan-gold](https://github.com/ilan-gold))
+
 ## [v1.7.1](https://github.com/refinery-platform/refinery-platform/tree/v1.7.1) (2019-10-21)
 [Full Changelog](https://github.com/refinery-platform/refinery-platform/compare/v1.7.0...v1.7.1)
 

diff --git a/deployment/puppet/refinery/manifests/postgresql.pp b/deployment/puppet/refinery/manifests/postgresql.pp
@@ -7,7 +7,7 @@
   $rds_endpoint_address   = $refinery::params::rds_endpoint_address,
 ) inherits refinery::params {
   $server_version = '10'
-  $package_version = "${server_version}.10-1.pgdg16.04+1"
+  $package_version = "${server_version}.11-1.pgdg16.04+1"
 
   if $deployment_platform == 'aws' {
     $rds_settings = {

diff --git a/deployment/requirements.txt b/deployment/requirements.txt
@@ -3,6 +3,6 @@ ecdsa==0.11
 fabtools==0.19.0
 paramiko==2.5.0
 pycrypto==2.6.1
-awscli==1.16.108
+awscli==1.16.263
 aws-shell==0.2.1
 pre-commit==0.7.6
diff --git a/deployment/terraform/live/main.tf b/deployment/terraform/live/main.tf
@@ -97,6 +97,7 @@ module "database" {
   snapshot_id                  = "${var.rds_snapshot_id}"
   vpc_id                       = "${module.vpc.vpc_id}"
   tags                         = "${local.tags}"
+  alarm_sns_arn                = "${var.alarm_sns_arn}"
 }
 
 module "web" {
@@ -138,4 +139,5 @@ module "web" {
   data_volume_snapshot_id              = "${var.data_volume_snapshot_id}"
   resource_name_prefix                 = "${terraform.workspace}"
   tags                                 = "${local.tags}"
+  alarm_sns_arn                        = "${var.alarm_sns_arn}"
 }
diff --git a/deployment/terraform/live/vars.tf b/deployment/terraform/live/vars.tf
@@ -195,3 +195,8 @@ variable "data_volume_snapshot_id" {
   description = "A snapshot to base the EBS data volume off of"
   default     = ""
 }
+
+variable "alarm_sns_arn" {
+  description = "The ARN of an SNS topic to publish CloudWatch alarms"
+  default     = ""
+}
diff --git a/deployment/terraform/modules/ec2/main.tf b/deployment/terraform/modules/ec2/main.tf
@@ -133,7 +133,8 @@ resource "aws_instance" "app_server" {
   subnet_id              = "${var.subnet_id}"
   iam_instance_profile   = "${aws_iam_instance_profile.app_server.name}"
   root_block_device {
-    volume_type = "gp2"
+    volume_type = "gp2",
+    volume_size = 12
   }
   ebs_block_device {
     delete_on_termination = false
@@ -347,3 +348,22 @@ resource "aws_elb" "https" {
     unhealthy_threshold = 4
   }
 }
+
+resource "aws_cloudwatch_metric_alarm" "app_server_cpu_utilization" {
+  count               = "${var.alarm_sns_arn == "" ? 0 : 1}"
+  alarm_name          = "${var.resource_name_prefix}-app-server-cpu-utilization"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  evaluation_periods  = "5"
+  metric_name         = "CPUUtilization"
+  namespace           = "AWS/EC2"
+  period              = "300"
+  statistic           = "Average"
+  threshold           = "25"
+  alarm_description   = "Monitors CPU utilization of ${aws_instance.app_server.tags.Name}"
+  alarm_actions       = [ "${var.alarm_sns_arn}" ]
+  dimensions {
+    InstanceId = "${aws_instance.app_server.id}"
+  }
+
+}
+
diff --git a/deployment/terraform/modules/ec2/vars.tf b/deployment/terraform/modules/ec2/vars.tf
@@ -44,3 +44,4 @@ variable "data_volume_device_name" {
   description = "Device name for the EBS data volume"
   default     = "/dev/xvdr"
 }
+variable "alarm_sns_arn" {}
diff --git a/deployment/terraform/modules/rds/main.tf b/deployment/terraform/modules/rds/main.tf
@@ -45,3 +45,21 @@ resource "aws_db_instance" "default" {
     ignore_changes = ["final_snapshot_identifier"]
   }
 }
+
+resource "aws_cloudwatch_metric_alarm" "default_rds_cpu_utilization" {
+  count               = "${var.alarm_sns_arn == "" ? 0 : 1}"
+  alarm_name          = "${var.resource_name_prefix}-default-rds-cpu-utilization"
+  comparison_operator = "GreaterThanOrEqualToThreshold"
+  evaluation_periods  = "5"
+  metric_name         = "CPUUtilization"
+  namespace           = "AWS/RDS"
+  period              = "300"
+  statistic           = "Average"
+  threshold           = "5"
+  alarm_description   = "Monitors CPU utilization of default RDS instance"
+  alarm_actions       = [ "${var.alarm_sns_arn}" ]
+  dimensions {
+    DBInstanceIdentifier = "${aws_db_instance.default.id}"
+  }
+
+}
diff --git a/deployment/terraform/modules/rds/vars.tf b/deployment/terraform/modules/rds/vars.tf
@@ -10,3 +10,4 @@ variable "availability_zone" {}
 variable "snapshot_id" {}
 variable "master_user_password" {}
 variable "vpc_id" {}
+variable "alarm_sns_arn" {}
diff --git a/refinery/analysis_manager/tasks.py b/refinery/analysis_manager/tasks.py
@@ -145,16 +145,26 @@ def _attach_workflow_outputs(analysis_uuid):
     our Analysis
     """
     analysis = _get_analysis(analysis_uuid)
-    analysis_status = _get_analysis_status(analysis_uuid)
-
     if analysis.workflow.type == Workflow.ANALYSIS_TYPE:
-        analysis.attach_derived_nodes_to_dataset()
+        tasks = analysis.attach_derived_nodes_to_dataset()
+        logger.info(
+            "Starting auxiliary file creation for analysis %s'", analysis
+        )
+        TaskSet(tasks=tasks).apply_async()
     elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE:
         analysis.attach_outputs_downloads()
     else:
         logger.warning("Unknown workflow type '%s' in analysis '%s'",
                        analysis.workflow.type, analysis.name)
 
+
+def _finalize_analysis(analysis_uuid):
+    """
+    finalize analysis after attaching outputs from galaxy to the refinery file
+    system
+    """
+    analysis = _get_analysis(analysis_uuid)
+    analysis_status = _get_analysis_status(analysis_uuid)
     analysis.set_status(Analysis.SUCCESS_STATUS)
     analysis.send_email()
     logger.info("Analysis '%s' finished successfully", analysis)
@@ -302,6 +312,7 @@ def run_analysis(analysis_uuid):
     _check_galaxy_history_state(analysis_uuid)
     _galaxy_file_export(analysis_uuid)
     _attach_workflow_outputs(analysis_uuid)
+    _finalize_analysis(analysis_uuid)
 
 
 def _run_galaxy_file_import(analysis_uuid):
@@ -470,34 +481,33 @@ def _get_galaxy_download_task_ids(analysis):
     task_id_list = []
     # retrieving list of files to download for workflow
     tool = _get_workflow_tool(analysis.uuid)
-    tool.create_analysis_output_node_connections()
-    galaxy_instance = analysis.workflow.workflow_engine.instance
     try:
-        download_list = tool.get_galaxy_dataset_download_list()
+        download_list = tool.create_analysis_output_node_connections()
     except galaxy.client.ConnectionError as exc:
         error_msg = \
             "Error downloading Galaxy history files for analysis '%s': %s"
         logger.error(error_msg, analysis.name, exc.message)
         analysis.set_status(Analysis.FAILURE_STATUS, error_msg)
         analysis.galaxy_cleanup()
         return task_id_list
+    galaxy_instance = analysis.workflow.workflow_engine.instance
 
     # Iterating through files in current galaxy history
     for results in download_list:
         # download file if result state is "ok"
         if results['state'] == 'ok':
-            file_extension = results["type"]
-            result_name = "{}.{}".format(results['name'], file_extension)
+            file_extension = results['file_ext']
             # size of file defined by galaxy
             file_size = results['file_size']
             file_store_item = FileStoreItem(source=urlparse.urljoin(
                 galaxy_instance.base_url,
-                "datasets/{}/display?to_ext=txt".format(results['dataset_id'])
+                "datasets/{}/display?to_ext=txt".format(results['id'])
             ))
             # workaround to set the correct file type for zip archives of
             # FastQC HTML reports produced by Galaxy dynamically
             if file_extension == 'html':
                 file_extension = 'zip'
+            result_name = "{}.{}".format(results['name'], file_extension)
             # assign file type manually since it cannot be inferred from source
             try:
                 extension = FileExtension.objects.get(name=file_extension)
@@ -520,7 +530,8 @@ def _get_galaxy_download_task_ids(analysis):
             # downloading analysis results into file_store
             # only download files if size is greater than 1
             if file_size > 0:
-                task_id = FileImportTask().subtask((file_store_item.uuid,))
+                task_id = FileImportTask().subtask((file_store_item.uuid,
+                                                    result_name,))
                 task_id_list.append(task_id)
 
     return task_id_list
diff --git a/refinery/config/settings/base.py b/refinery/config/settings/base.py
@@ -313,7 +313,10 @@ def get_setting(name, settings=local_settings, default=None):
                           '%(funcName)s[%(task_id)s] - %(message)s'
 # for system stability
 CELERYD_MAX_TASKS_PER_CHILD = get_setting("CELERYD_MAX_TASKS_PER_CHILD")
-CELERY_ROUTES = {"file_store.tasks.FileImportTask": {"queue": "file_import"}}
+CELERY_ROUTES = {
+    "file_store.tasks.FileImportTask": {"queue": "file_import"},
+    "data_set_manager.tasks.generate_auxiliary_file": {"queue": "file_import"}
+}
 CELERY_ACCEPT_CONTENT = ['pickle']
 CELERYD_TASK_SOFT_TIME_LIMIT = 60  # seconds
 CELERYBEAT_SCHEDULE = {

diff --git a/refinery/core/models.py b/refinery/core/models.py
@@ -53,7 +53,7 @@
 from data_set_manager.search_indexes import NodeIndex
 from data_set_manager.utils import (add_annotated_nodes_selection,
                                     index_annotated_nodes_selection)
-from file_store.models import FileStoreItem, FileType
+from file_store.models import FileStoreItem
 from file_store.tasks import FileImportTask
 from galaxy_connector.models import Instance
 import tool_manager
@@ -92,8 +92,8 @@ class UserProfile(models.Model):
     login_count = models.IntegerField(default=0)
 
     def __unicode__(self):
-        return self.user.first_name + " " + self.user.last_name + \
-               " (" + self.affiliation + "): " + self.user.email
+        return self.user.first_name + u" " + self.user.last_name + \
+               u" (" + self.affiliation + u"): " + self.user.email
 
     def has_viewed_data_upload_tut(self):
         try:
@@ -1067,9 +1067,9 @@ def __str__(self):
                                      self.summary)
 
     def get_expanded_workflow_graph(self):
-        return tool_manager.utils.create_expanded_workflow_graph(
-            ast.literal_eval(self.workflow_copy)
-        )
+        self.refresh_from_db(fields=['workflow_copy'])
+        workflow_copy = ast.literal_eval(self.workflow_copy)
+        return tool_manager.utils.create_expanded_workflow_graph(workflow_copy)
 
     def has_nodes_used_in_downstream_analyses(self):
         """
@@ -1346,43 +1346,6 @@ def send_email(self):
                 "'%s' with UUID '%s'",
                 self.get_status(), user.email, name, self.uuid)
 
-    def rename_results(self):
-        """Rename files in file_store after download"""
-        logger.debug("Renaming analysis results")
-        # rename file_store items to new name updated from galaxy file_ids
-        for result in self.results.all():
-            try:
-                item = FileStoreItem.objects.get(uuid=result.file_store_uuid)
-            except (FileStoreItem.DoesNotExist,
-                    FileStoreItem.MultipleObjectsReturned) as exc:
-                logger.error("Error renaming analysis result '%s': %s",
-                             result, exc)
-                break
-
-            # workaround for FastQC reports downloaded from Galaxy as zip
-            # archives
-            (root, ext) = os.path.splitext(result.file_name)
-            if ext == '.html':
-                try:
-                    zipfile = FileType.objects.get(name='ZIP')
-                except (FileType.DoesNotExist,
-                        FileType.MultipleObjectsReturned) as exc:
-                    logger.error("Error renaming HTML to zip: %s", exc)
-                else:
-                    if item.filetype == zipfile:
-                        item.rename_datafile(''.join([root, '.zip']))
-            else:
-                item.rename_datafile(result.file_name)
-
-            try:
-                node = Node.objects.get(file_item=item)
-            except (Node.DoesNotExist, Node.MultipleObjectsReturned) as exc:
-                logger.error("Error retrieving Node with file UUID '%s': %s",
-                             item.uuid, exc)
-            else:
-                if node.is_derived():
-                    node.run_generate_auxiliary_node_task()
-
     def attach_derived_nodes_to_dataset(self):
         graph_with_data_transformation_nodes = (
             self._create_data_transformation_nodes(
@@ -1395,7 +1358,7 @@ def attach_derived_nodes_to_dataset(self):
             )
         )
         self._create_derived_data_file_nodes(graph_with_input_nodes_linked)
-        self._create_annotated_nodes()
+        return self._create_annotated_nodes()
 
     def attach_outputs_downloads(self):
         if self.results.all().count() == 0:
@@ -1440,18 +1403,33 @@ def terminate_file_import_tasks(self):
 
     def _prepare_annotated_nodes(self, node_uuids):
         """
-        Wrapper method to ensure that `rename_results` is called before
-        index_annotated_nodes_selection.
-
-        If `rename_results` isn't executed before
-        `index_annotated_nodes_selection` we end up indexing incorrect
-        information.
+        Wrapper method to ensure that auxiliary nodes are generated before
+        indexing annotated nodes
 
         Call order is ensured through:
         core.tests.test__prepare_annotated_nodes_calls_methods_in_proper_order
         """
-        self.rename_results()
+        auxiliary_file_tasks = []
+        for result in self.results.all():
+            try:
+                item = FileStoreItem.objects.get(uuid=result.file_store_uuid)
+            except (FileStoreItem.DoesNotExist,
+                    FileStoreItem.MultipleObjectsReturned) as exc:
+                logger.error("Error renaming analysis result '%s': %s",
+                             result, exc)
+                break
+            try:
+                node = Node.objects.get(file_item=item)
+            except (Node.DoesNotExist, Node.MultipleObjectsReturned) as exc:
+                logger.error("Error retrieving Node with file UUID '%s': %s",
+                             item.uuid, exc)
+            else:
+                if node.is_derived() and node.is_auxiliary_node_needed():
+                    auxiliary_file_tasks += [
+                        node.generate_auxiliary_node_task()
+                    ]
         index_annotated_nodes_selection(node_uuids)
+        return auxiliary_file_tasks
 
     def _get_output_connection_to_analysis_result_mapping(self):
         """Create and return a dict mapping each "output" type
@@ -1653,7 +1631,7 @@ def _create_annotated_nodes(self):
             self.get_input_node_study().uuid,
             self.get_input_node_assay().uuid
         )
-        self._prepare_annotated_nodes(node_uuids)
+        return self._prepare_annotated_nodes(node_uuids)
 
     def get_refinery_import_task_signatures(self):
         """Create and return a list of file import task signatures for the
@@ -2012,7 +1990,7 @@ def custom_send_activation_email(self, site):
                     'site': site.domain,
                     'registered_user_email': self.user.email,
                     'registered_user_username': self.user.username,
-                    'registered_user_full_name': "{} {}".format(
+                    'registered_user_full_name': u"{} {}".format(
                         self.user.first_name, self.user.last_name),
                     'registered_user_affiliation':
                         self.user.profile.affiliation