Issue289/fix-region-requirement and add Changelog (#299)

* Check for no region on barcoded config GH-289 using a barcode-only config can result in crash when attempting to look up the non-existant region. * Bump version + drive-by pyproject.toml updates * Add Changelog * Return new action if action overridden pass it in stats module in `log_read` call as kwarg * Fix logic for action overridden counting, now counts new action name. but still logs old name Move counting for barcode and region conditions into same function Add doctest for `_update_condition_counter` * Update src/readfish/_statistics.py * Add unreleased section to changelog * Ad section on changelog * Add changelog to index of documentation * Add changelog to built docs * Move changelog to own page Make dynamically sectioned changelog load --------- Co-authored-by: Alexander Payne <alex.payne22@gmail.com> Co-authored-by: mattloose <matt.loose@nottingham.ac.uk>
LooseLab · Oct 12, 2023 · beb3dff · beb3dff
1 parent 820f8b3
commit beb3dff
Show file tree

Hide file tree

Showing 8 changed files with 121 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -548,3 +548,13 @@ And for our Awesome Logo please checkout out [@tim_bassford](https://twitter.com
 [bulk - R9]: https://s3.amazonaws.com/nanopore-human-wgs/bulkfile/PLSP57501_20170308_FNFAF14035_MN16458_sequencing_run_NOTT_Hum_wh1rs2_60428.fast5
 [bulk - R10 5khz]: https://s3.amazonaws.com/nanopore-human-wgs/bulkfile/GXB02001_20230509_1250_FAW79338_X3_sequencing_run_NA12878_B1_19382aa5_ef4362cd.fast5
 [ONT]: https://nanoporetech.com
+
+<!-- start-changelog -->
+# Changelog
+## Unreleased changes
+None currently.
+## 2023.1.1
+1. Fix Readme Logo link 🥳 (#296)
+1. Fix bug where we had accidentally started requiring barcoded TOMLs to specify a region. Thanks to @jamesemery for catching this. (#299)
+1. Correctly handle overriding a decision in internal statistics tracking. (#299)
+<!-- end-changelog -->
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -0,0 +1,5 @@
+
+```{include} ../README.md
+:start-after: <!-- start-changelog -->
+:end-before: <!-- end-changelog -->
+```
diff --git a/docs/developers-guide.md b/docs/developers-guide.md
@@ -69,6 +69,14 @@ conda env create -f docs/development.yml
 Readfish uses [calver](https://calver.org/) for versioning. Specifically the format should be
 `YYYY.MINOR.MICRO.Modifier`, where `MINOR` is the feature addiiton, `MICRO` is any hotfix/bugfix, and `Modifier` is the modifier (e.g. `rc` for release candidate, `dev` for development, empty for stable).
 
+## Changelog
+
+We are generally trying to follow the guidance here. https://keepachangelog.com/en/1.0.0/
+
+Notably we should correctly update the Unreleased section for things added in the PRs that are inbetween releases.
+
+If possible, link the PR that introduced the change, and add a **brief** description of the change.
+
 ## Adding a simulated position for testing
 ```{include} ../README.md
 :start-after: <!-- begin-simulate -->

diff --git a/docs/index.md b/docs/index.md
@@ -11,6 +11,7 @@ FAQ
 post-analysis
 developers-guide
 readfish
+changelog
 ```
 
 # Welcome to the readfish documentation!

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,7 @@ classifiers = [
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: Implementation :: CPython",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
 ]
@@ -48,15 +49,15 @@ readfish = "readfish._cli_base:main"
 docs = ["sphinx-copybutton", "furo", "myst-parser", "faqtory"]
 tests = ["pytest", "coverage[toml]"]
 tests-mappy = ["readfish[tests,mappy,guppy]"]
-dev = ["readfish[all,docs,tests]"]
+dev = ["readfish[all,docs,tests]", "pre-commit"]
 # Running dependencies, this is a little bit clunky but works for now
 mappy = ["mappy"]
 mappy-rs = ["mappy-rs >= 0.0.6"]
 guppy = ["ont_pyguppy_client_lib"]
 all = ["readfish[mappy,mappy-rs,guppy]"]
 
 [project.urls]
-# Documentation = "https://looselab.github.io/readfish"
+Documentation = "https://looselab.github.io/readfish"
 "Bug Tracker" = "https://github.com/LooseLab/readfish/issues"
 "Source Code" = "https://github.com/LooseLab/readfish"
 

diff --git a/src/readfish/__about__.py b/src/readfish/__about__.py
@@ -1,4 +1,4 @@
 """__about__.py
 Version of the read until software
 """
-__version__ = "2023.1.0"
+__version__ = "2023.1.1"
diff --git a/src/readfish/_statistics.py b/src/readfish/_statistics.py
@@ -17,7 +17,9 @@ class is able to update and query various statistics and counters regarding the
     >>> from readfish._statistics import ReadfishStatistics, DEBUG_STATS_LOG_FIELDS
     >>> stats = ReadfishStatistics(None)
     >>> stats.add_batch_performance(1,1)
-    >>> stats.log_read(**dict(zip(DEBUG_STATS_LOG_FIELDS, (1, 2, "test_read_id", 7, 1, 100, 3, "single_on", "stop_receiving", "exp_region", None, None, False, 0.0))), region_name="naff")
+    >>> stats.log_read(**dict(zip(DEBUG_STATS_LOG_FIELDS, (1, 2, "test_read_id", 7, 1, 100, 3,\
+ "single_on", "stop_receiving", "exp_region", None, None, False, 0.0))), region_name="naff",\
+ overridden_action_name=None)
     >>> print(stats.get_batch_performance())
     0001R/1.0000s; Avg: 0001R/1.0000s; Seq:1; Unb:0; Pro:0; Slow batches (>1.00s): 0/1
     >>> print(stats.decisions)
@@ -74,7 +76,7 @@ class ReadfishStatistics:
     >>> stats.add_batch_performance(1, 1)
     >>> stats.log_read(**dict(zip(DEBUG_STATS_LOG_FIELDS, (1, 2, "test_read_id",\
 7, 1, 100, 3, "single_on", "stop_receiving", "exp_region", None, None,\
-False, 0.0))), region_name="naff")
+False, 0.0))), region_name="naff", overridden_action_name=None)
     >>> print(stats.get_batch_performance())
     0001R/1.0000s; Avg: 0001R/1.0000s; Seq:1; Unb:0; Pro:0; Slow batches (>1.00s): 0/1
     >>> print(stats.decisions)
@@ -97,7 +99,7 @@ class ReadfishStatistics:
     ...     # Use the log_read method to log a sample read
     ...     stats.log_read(**dict(zip(DEBUG_STATS_LOG_FIELDS,\
 (1, 2, "test_read_id", 7, 1, 100, 3, "single_on", "stop_receiving", "exp_region",\
-None, None, False, 0.0))), region_name="naff")
+None, None, False, 0.0))), region_name="naff", overridden_action_name=None)
     ...     # in this test, we need a small amount of time to allow the logger to write the file
     ...     time.sleep(0.1)
     ...     # Read the content of the file
@@ -326,31 +328,49 @@ def add_batch_performance(self, number_of_reads: int, batch_time: float) -> None
             else:
                 self.batch_statistics["consecutive_lagging_batches"] = 0
 
-    def log_read(self, region_name, **kwargs) -> None:
-        """Add a new read chunk record into the collected statistics,
-        and log it to the debug logger."""
+    def log_read(
+        self, region_name: str, overridden_action_name: str | None, **kwargs
+    ) -> None:
+        """
+        Add a new read chunk record into the collected statistics,
+        and log it to the debug logger.
+
+        The following terms are used in this function:
+        decision is expected to be one of Unblock, stop_receiving etc.
+        mode is expected to be one of single_on, single_off, multi_on etc.
+
+        The term "action" is used to describe what the sequencer actually did.
+        #ToDo: see and address issue #298
+        :param region_name: The name of the region on the flow cell.
+        :param overridden_action_name: Optional, if the originally determined action
+        was overridden, the name of the NEW action.
+        """
+
         with self._lock:
             self.total_chunks += 1
-            decision = kwargs.get("decision")
             action_overridden = kwargs.get("action_overridden")
+            # Use the overridden action name for readfish stats counters
+            decision_name = (
+                kwargs.get("decision")
+                if not action_overridden
+                else overridden_action_name
+            )
 
             # Increment total actions count, Unblock, stop_receiving etc.
-            self.actions[decision] += 1
-            # Increment total hits for this condition
+            self.actions[decision_name] += 1
+            # Increment total hits for this condition and condition.action.mode
             condition_name = kwargs.get("condition")
             self._update_condition_counter(
-                condition_name, region_name, kwargs.get("decision"), kwargs.get("mode")
+                condition_name,
+                region_name,
+                decision_name,
+                kwargs.get("mode"),
             )
             # increment count for this decision - single_off, single_on etc.
             self.decisions[kwargs.get("mode")] += 1
             # Count if read was skipped because it was mid translocation
             first_read_key = f"{'first_read_skipped' if not kwargs.get('previous_action') and action_overridden else 'read_analysed'}"
             self.first_read_overrides[first_read_key] += 1
-            # Count the actions for each condition
-            if not action_overridden:
-                self.actions_conditions[
-                    (condition_name, decision, kwargs.get("mode"))
-                ] += 1
             # Log the read to the debug logger
             debug_log_record = "\t".join(map(str, kwargs.values()))
             self.debug_logger.debug(debug_log_record)
@@ -360,7 +380,7 @@ def _update_condition_counter(
         condition_name: str,
         region_name: str,
         decision_name: str,
-        action_name: str,
+        mode_name: str,
     ) -> None:
         """
         Update the condition and action condition counters with the provided parameters.
@@ -373,15 +393,42 @@ def _update_condition_counter(
 
         :param condition_name: The name of the condition being updated.
         :param region_name: The name of the region related to the condition.
-        :param decision_name: The name of the decision made in relation to the condition.
-        :param action_name: The name of the action taken in relation to the condition.
+        :param decision_name: The name of the decision made in relation to the condition. unblock, proceed, etc..
+        :param mode_name: The name of the action taken in relation to the condition. single_off, single on, etc...
 
-        Usage:
-        Called internally when there's a need to update the condition counters based
-        on the received parameters, usually after processing a read or a batch of reads.
+        Examples:
+
+        Condition and region being different i.e a barcoded experiment:
+
+        >>> stats = ReadfishStatistics(None)
+        >>> stats._update_condition_counter("barcode_a", "region_a", "unblock", "action_a")
+        >>> stats.conditions
+        Counter({'barcode_a': 1, 'region_a': 1})
+        >>> stats.actions_conditions
+        Counter({('region_a', 'unblock', 'action_a'): 1, ('barcode_a', 'unblock', 'action_a'): 1})
+
+        Condition and region being the same i.e not a barcoded experiment:
+
+        >>> stats._update_condition_counter("barcode_a", "barcode_a", "decision_b", "action_b")
+        >>> stats.conditions
+        Counter({'barcode_a': 2, 'region_a': 1})
+        >>> stats.actions_conditions
+        Counter({('region_a', 'unblock', 'action_a'): 1, ('barcode_a', 'unblock', 'action_a'): 1, ('barcode_a', 'decision_b', 'action_b'): 1})
+
+        With action overridden:
+
+        >>> stats._update_condition_counter("barcode_b", "region_b", "decision_c", "action_c")
+        >>> stats.conditions
+        Counter({'barcode_a': 2, 'region_a': 1, 'barcode_b': 1, 'region_b': 1})
+        >>> stats.actions_conditions
+        Counter({('region_a', 'unblock', 'action_a'): 1, ('barcode_a', 'unblock', 'action_a'): 1,\
+ ('barcode_a', 'decision_b', 'action_b'): 1, ('region_b', 'decision_c', 'action_c'): 1, ('barcode_b',\
+ 'decision_c', 'action_c'): 1})
         """
         with self._lock:
             self.conditions[condition_name] += 1
+            # We have a region for this barcoded read, increment the count for the region
             if condition_name != region_name:
                 self.conditions[region_name] += 1
-                self.actions_conditions[(region_name, decision_name, action_name)] += 1
+                self.actions_conditions[(region_name, decision_name, mode_name)] += 1
+            self.actions_conditions[(condition_name, decision_name, mode_name)] += 1
diff --git a/src/readfish/entry_points/targets.py b/src/readfish/entry_points/targets.py
@@ -255,7 +255,7 @@ def check_override_action(
         condition: _Condition,
         stop_receiving_action_list: list[tuple[int, int]],
         unblock_batch_action_list: list[tuple[int, int]],
-    ) -> tuple[Action, bool]:
+    ) -> tuple[Action, bool, str | None]:
         """
         Check the chosen Action and amend it based on conditional checks.
         The action lists are appended to in place, so no return is required.
@@ -275,7 +275,8 @@ def check_override_action(
         :param stop_receiving_action_list: List to append channels and read numbers for which 'stop receiving' action is decided.
         :param unblock_batch_action_list: List to append channels, read numbers, and read IDs for which 'unblock' action is decided.
 
-        :return: A tuple containing the previous action taken for this read, and a boolean indicating if the action was overridden.
+        :return: A tuple containing the previous action taken for this read,
+          boolean indicating if the action was overridden, and the name of the action overridden too.
 
         """
 
@@ -328,7 +329,11 @@ def check_override_action(
                 )
             # Add decided Action
             self.previous_action_tracker.add_action(result.channel, action)
-        return previous_action, action_overridden
+        return (
+            previous_action,
+            action_overridden,
+            action.name if action_overridden else None,
+        )
 
     def run(self):
         """Run the read until loop, in one continuous while loop."""
@@ -391,7 +396,11 @@ def run(self):
                 action = condition.get_action(result.decision)
                 seen_count = self.chunk_tracker.seen(result.channel, result.read_number)
                 #  Check if there any conditions that override the action chose, exceed_max_chunks etc...
-                previous_action, action_overridden = self.check_override_action(
+                (
+                    previous_action,
+                    action_overridden,
+                    overridden_action_name,
+                ) = self.check_override_action(
                     control,
                     action,
                     result,
@@ -412,13 +421,20 @@ def run(self):
                     decision=action.name,
                     condition=condition.name,
                     barcode=result.barcode,
-                    previous_action=previous_action.name
-                    if previous_action is not None
-                    else previous_action,
+                    previous_action=(
+                        previous_action.name
+                        if previous_action is not None
+                        else previous_action
+                    ),
                     action_overridden=action_overridden,
                     timestamp=time.time(),
                     # Anything below here is not included in the Debug log
-                    region_name=self.conf.get_region(result.channel).name,
+                    region_name=(
+                        _region.name
+                        if (_region := self.conf.get_region(result.channel))
+                        else "flowcell"
+                    ),
+                    overridden_action_name=overridden_action_name,
                 )
 
             #######################################################################