diff --git a/steps/README.md b/steps/README.md new file mode 100644 index 00000000..d4625b28 --- /dev/null +++ b/steps/README.md @@ -0,0 +1,20 @@ +## Gathering Steps + +The entire process of gathering metrics from GitHub repos consists of these +steps: + +* **Discovering**. Here we fetch the list of repos from GitHub and then create +directories for them. +* **Polishing**. Then we delete directories that don't exist in the list of +required repositories. +* **Unregistering**. During this step, we clean directories from the CSV +register if their clones are absent. +* **Cloning**. In this step we run `git clone` on found repositories. +* **JPEEK**. Here, we build those gathered repositories and run +[jpeek](https://github.com/cqfn/jpeek) on them. +* **Filtering**. Is where we apply [all the filters](https://github.com/yegor256/cam/tree/master/filters) +in order to get rid of irrelevant classes (such as `*Test`, `*ITCase`, invalid +files and so on). Whole filtering process will be printed in the final report, +you can check it [here](http://cam.yegor256.com/cam-2024-03-02.pdf). +* **Measuring**. We calculate metrics for each file using these [metrics](https://github.com/yegor256/cam/tree/master/metrics). +* **Aggregating**. Finally, we aggregate all metrics in summary CSV files. diff --git a/steps/discover-repos.rb b/steps/discover-repos.rb index acae423f..28cc9c39 100755 --- a/steps/discover-repos.rb +++ b/steps/discover-repos.rb @@ -88,7 +88,8 @@ created_at: i[:created_at].iso8601, size: i[:size], open_issues_count: i[:open_issues_count], - description: i[:description] + description: i[:description], + topics: i[:topics] } puts "Found #{i[:full_name].inspect} GitHub repo ##{found.count} \ (#{i[:forks_count]} forks, #{i[:stargazers_count]} stars)" diff --git a/tests/steps/test-discover-repos.sh b/tests/steps/test-discover-repos.sh index c26850fd..0df2891b 100755 --- a/tests/steps/test-discover-repos.sh +++ b/tests/steps/test-discover-repos.sh @@ -37,7 +37,7 @@ tex=${TARGET}/foo.tex test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '4' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Small repositories discovery test is succeed" @@ -50,6 +50,6 @@ echo "👍🏻 Small repositories discovery test is succeed" test -e "${csv}" test -s "${tex}" test "$(wc -l < "${csv}" | xargs)" = '36' - test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '8' + test "$(head -1 "${csv}" | tr "," "\n" | wc -l | xargs)" = '9' } > "${stdout}" 2>&1 echo "👍🏻 Medium repositories discovery test is succeed"