From 8d5d963ec3a50e8d9cd4ddd0a2888462209f4867 Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 11:36:42 +0300
Subject: [PATCH 1/8] doc(#227): fetch topics too, document /steps in
 steps/README.md

---
 steps/README.md         | 15 +++++++++++++++
 steps/discover-repos.rb |  3 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 steps/README.md

diff --git a/steps/README.md b/steps/README.md
new file mode 100644
index 00000000..2dca8c0b
--- /dev/null
+++ b/steps/README.md
@@ -0,0 +1,15 @@
+### How it Works?
+The entire process of gathering metrics from GitHub repos consists of these
+steps:
+* **Discovering**. Get the list of repos from GitHub and then create directories for them.
+* **Polishing**. Delete directories that don't exist in the list of required repositories.
+* **Unregistering**. Clean directories from the CSV register if their clones are absent.
+* **Cloning**. In this step we run `git clone` on found repositories.
+* **JPEEK**. Here, we build those gathered repositories and run
+[jpeek](https://github.com/cqfn/jpeek) on them.
+* **Filtering**. Is where we apply [all the filters](https://github.com/yegor256/cam/tree/master/filters)
+in order to get rid of irrelevant classes (such as `*Test`, `*ITCase`, invalid
+files and so on). Whole filtering process will be printed in the final report,
+you can check it [here](http://cam.yegor256.com/cam-2024-03-02.pdf).
+* **Measuring**. We calculate metrics for each file using these [metrics](https://github.com/yegor256/cam/tree/master/metrics).
+* **Aggregating**. Finally, we aggregate all metrics in summary CSV files.
diff --git a/steps/discover-repos.rb b/steps/discover-repos.rb
index acae423f..28cc9c39 100755
--- a/steps/discover-repos.rb
+++ b/steps/discover-repos.rb
@@ -88,7 +88,8 @@
       created_at: i[:created_at].iso8601,
       size: i[:size],
       open_issues_count: i[:open_issues_count],
-      description: i[:description]
+      description: i[:description],
+      topics: i[:topics]
     }
     puts "Found #{i[:full_name].inspect} GitHub repo ##{found.count} \
 (#{i[:forks_count]} forks, #{i[:stargazers_count]} stars)"

From e7e826284fd9485027eabcb44f513ede881dea2b Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 11:39:09 +0300
Subject: [PATCH 2/8] doc: more pretty

---
 steps/README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/steps/README.md b/steps/README.md
index 2dca8c0b..b187a3d6 100644
--- a/steps/README.md
+++ b/steps/README.md
@@ -1,9 +1,12 @@
 ### How it Works?
 The entire process of gathering metrics from GitHub repos consists of these
 steps:
-* **Discovering**. Get the list of repos from GitHub and then create directories for them.
-* **Polishing**. Delete directories that don't exist in the list of required repositories.
-* **Unregistering**. Clean directories from the CSV register if their clones are absent.
+* **Discovering**. Here we fetch the list of repos from GitHub and then create
+directories for them.
+* **Polishing**. Then we delete directories that don't exist in the list of
+required repositories.
+* **Unregistering**. During this step, we clean directories from the CSV
+register if their clones are absent.
 * **Cloning**. In this step we run `git clone` on found repositories.
 * **JPEEK**. Here, we build those gathered repositories and run
 [jpeek](https://github.com/cqfn/jpeek) on them.

From 122c7ffc181203ae9de0bc738248d6769ec4d6df Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 11:47:28 +0300
Subject: [PATCH 3/8] feat(#227): topics csv arg

---
 tests/steps/test-discover-repos.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/steps/test-discover-repos.sh b/tests/steps/test-discover-repos.sh
index c26850fd..0df2891b 100755
--- a/tests/steps/test-discover-repos.sh
+++ b/tests/steps/test-discover-repos.sh
@@ -37,7 +37,7 @@ tex=${TARGET}/foo.tex
     test -e "${csv}"
     test -s "${tex}"
     test "$(wc -l < "${csv}" | xargs)" = '4'
-    test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8'
+    test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9'
 } > "${stdout}" 2>&1
 echo "👍🏻 Small repositories discovery test is succeed"
 
@@ -50,6 +50,6 @@ echo "👍🏻 Small repositories discovery test is succeed"
     test -e "${csv}"
     test -s "${tex}"
     test "$(wc -l < "${csv}" | xargs)" = '36'
-    test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8'
+    test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9'
 } > "${stdout}" 2>&1
 echo "👍🏻 Medium repositories discovery test is succeed"

From 06988eeb853c57b599749af9edf38220b1484dd3 Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 13:22:37 +0300
Subject: [PATCH 4/8] doc: clean for mdlint

---
 steps/README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/steps/README.md b/steps/README.md
index b187a3d6..69fd99ee 100644
--- a/steps/README.md
+++ b/steps/README.md
@@ -1,6 +1,10 @@
-### How it Works?
+# Gathering Steps
+
+## How it Works?
+
 The entire process of gathering metrics from GitHub repos consists of these
 steps:
+
 * **Discovering**. Here we fetch the list of repos from GitHub and then create
 directories for them.
 * **Polishing**. Then we delete directories that don't exist in the list of

From ed282a8f0aa3290cbedb6d546fff737545c0b905 Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 14:29:17 +0300
Subject: [PATCH 5/8] doc: heading at ## level

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5d1bc3c0..3c21887b 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ docker run -v $(pwd):/c --rm yegor256/cam:0.9.2 make -C /c test
 
 ## How to Calculate Additional Metrics
 
-You may want to use this dataset as a basis, with an intend of adding your own
+You may want to use this dataset as a basis, with an intent of adding your own
 metrics on top of it. It should be easy:
 
 * Clone this repo into `cam/` directory

From 0c0571e807b18379f3f135541a1c04bbba49bf58 Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 14:29:47 +0300
Subject: [PATCH 6/8] doc: heading at ## level

---
 steps/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/steps/README.md b/steps/README.md
index 69fd99ee..955eef99 100644
--- a/steps/README.md
+++ b/steps/README.md
@@ -1,4 +1,4 @@
-# Gathering Steps
+## Gathering Steps
 
 ## How it Works?
 

From 26e479babb7e2622dec3973ce17721527e9cab0d Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 14:32:06 +0300
Subject: [PATCH 7/8] doc: typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3c21887b..5d1bc3c0 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ docker run -v $(pwd):/c --rm yegor256/cam:0.9.2 make -C /c test
 
 ## How to Calculate Additional Metrics
 
-You may want to use this dataset as a basis, with an intent of adding your own
+You may want to use this dataset as a basis, with an intend of adding your own
 metrics on top of it. It should be easy:
 
 * Clone this repo into `cam/` directory

From 1e921dd76b1c0fd2cecbf450fb3b8e7712829f27 Mon Sep 17 00:00:00 2001
From: h1alexbel <hialexbel@gmail.com>
Date: Mon, 15 Apr 2024 15:05:38 +0300
Subject: [PATCH 8/8] doc: one heading

---
 steps/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/steps/README.md b/steps/README.md
index 955eef99..d4625b28 100644
--- a/steps/README.md
+++ b/steps/README.md
@@ -1,7 +1,5 @@
 ## Gathering Steps
 
-## How it Works?
-
 The entire process of gathering metrics from GitHub repos consists of these
 steps: