From 8d5d963ec3a50e8d9cd4ddd0a2888462209f4867 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 11:36:42 +0300 Subject: [PATCH 1/8] doc(#227): fetch topics too, document /steps in steps/README.md --- steps/README.md | 15 +++++++++++++++ steps/discover-repos.rb | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 steps/README.md diff --git a/steps/README.md b/steps/README.md new file mode 100644 index 00000000..2dca8c0b --- /dev/null +++ b/steps/README.md @@ -0,0 +1,15 @@ +### How it Works? +The entire process of gathering metrics from GitHub repos consists of these +steps: +* **Discovering**. Get the list of repos from GitHub and then create directories for them. +* **Polishing**. Delete directories that don't exist in the list of required repositories. +* **Unregistering**. Clean directories from the CSV register if their clones are absent. +* **Cloning**. In this step we run `git clone` on found repositories. +* **JPEEK**. Here, we build those gathered repositories and run +[jpeek](https://github.com/cqfn/jpeek) on them. +* **Filtering**. Is where we apply [all the filters](https://github.com/yegor256/cam/tree/master/filters) +in order to get rid of irrelevant classes (such as `*Test`, `*ITCase`, invalid +files and so on). Whole filtering process will be printed in the final report, +you can check it [here](http://cam.yegor256.com/cam-2024-03-02.pdf). +* **Measuring**. We calculate metrics for each file using these [metrics](https://github.com/yegor256/cam/tree/master/metrics). +* **Aggregating**. Finally, we aggregate all metrics in summary CSV files. diff --git a/steps/discover-repos.rb b/steps/discover-repos.rb index acae423f..28cc9c39 100755 --- a/steps/discover-repos.rb +++ b/steps/discover-repos.rb @@ -88,7 +88,8 @@ created_at: i[:created_at].iso8601, size: i[:size], open_issues_count: i[:open_issues_count], - description: i[:description] + description: i[:description], + topics: i[:topics] } puts "Found #{i[:full_name].inspect} GitHub repo ##{found.count} \ (#{i[:forks_count]} forks, #{i[:stargazers_count]} stars)" From e7e826284fd9485027eabcb44f513ede881dea2b Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 11:39:09 +0300 Subject: [PATCH 2/8] doc: more pretty --- steps/README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/steps/README.md b/steps/README.md index 2dca8c0b..b187a3d6 100644 --- a/steps/README.md +++ b/steps/README.md @@ -1,9 +1,12 @@ ### How it Works? The entire process of gathering metrics from GitHub repos consists of these steps: -* **Discovering**. Get the list of repos from GitHub and then create directories for them. -* **Polishing**. Delete directories that don't exist in the list of required repositories. -* **Unregistering**. Clean directories from the CSV register if their clones are absent. +* **Discovering**. Here we fetch the list of repos from GitHub and then create +directories for them. +* **Polishing**. Then we delete directories that don't exist in the list of +required repositories. +* **Unregistering**. During this step, we clean directories from the CSV +register if their clones are absent. * **Cloning**. In this step we run `git clone` on found repositories. * **JPEEK**. Here, we build those gathered repositories and run [jpeek](https://github.com/cqfn/jpeek) on them. From 122c7ffc181203ae9de0bc738248d6769ec4d6df Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 11:47:28 +0300 Subject: [PATCH 3/8] feat(#227): topics csv arg --- tests/steps/test-discover-repos.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/steps/test-discover-repos.sh b/tests/steps/test-discover-repos.sh index c26850fd..0df2891b 100755 --- a/tests/steps/test-discover-repos.sh +++ b/tests/steps/test-discover-repos.sh @@ -37,7 +37,7 @@ tex=${TARGET}/foo.tex test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '4' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Small repositories discovery test is succeed" @@ -50,6 +50,6 @@ echo "👍🏻 Small repositories discovery test is succeed" test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '36' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Medium repositories discovery test is succeed" From 06988eeb853c57b599749af9edf38220b1484dd3 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 13:22:37 +0300 Subject: [PATCH 4/8] doc: clean for mdlint --- steps/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/steps/README.md b/steps/README.md index b187a3d6..69fd99ee 100644 --- a/steps/README.md +++ b/steps/README.md @@ -1,6 +1,10 @@ -### How it Works? +# Gathering Steps + +## How it Works? + The entire process of gathering metrics from GitHub repos consists of these steps: + * **Discovering**. Here we fetch the list of repos from GitHub and then create directories for them. * **Polishing**. Then we delete directories that don't exist in the list of From ed282a8f0aa3290cbedb6d546fff737545c0b905 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 14:29:17 +0300 Subject: [PATCH 5/8] doc: heading at ## level --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d1bc3c0..3c21887b 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ docker run -v $(pwd):/c --rm yegor256/cam:0.9.2 make -C /c test ## How to Calculate Additional Metrics -You may want to use this dataset as a basis, with an intend of adding your own +You may want to use this dataset as a basis, with an intent of adding your own metrics on top of it. It should be easy: * Clone this repo into `cam/` directory From 0c0571e807b18379f3f135541a1c04bbba49bf58 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 14:29:47 +0300 Subject: [PATCH 6/8] doc: heading at ## level --- steps/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/steps/README.md b/steps/README.md index 69fd99ee..955eef99 100644 --- a/steps/README.md +++ b/steps/README.md @@ -1,4 +1,4 @@ -# Gathering Steps +## Gathering Steps ## How it Works? From 26e479babb7e2622dec3973ce17721527e9cab0d Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 14:32:06 +0300 Subject: [PATCH 7/8] doc: typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3c21887b..5d1bc3c0 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ docker run -v $(pwd):/c --rm yegor256/cam:0.9.2 make -C /c test ## How to Calculate Additional Metrics -You may want to use this dataset as a basis, with an intent of adding your own +You may want to use this dataset as a basis, with an intend of adding your own metrics on top of it. It should be easy: * Clone this repo into `cam/` directory From 1e921dd76b1c0fd2cecbf450fb3b8e7712829f27 Mon Sep 17 00:00:00 2001 From: h1alexbel Date: Mon, 15 Apr 2024 15:05:38 +0300 Subject: [PATCH 8/8] doc: one heading --- steps/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/steps/README.md b/steps/README.md index 955eef99..d4625b28 100644 --- a/steps/README.md +++ b/steps/README.md @@ -1,7 +1,5 @@ ## Gathering Steps -## How it Works? - The entire process of gathering metrics from GitHub repos consists of these steps: