Skip to content

Commit

Permalink
Fix: sort delays by the most specific rule\
Browse files Browse the repository at this point in the history
* based on the length of the rule path
  • Loading branch information
stevenbarragan committed Aug 15, 2023
1 parent a478bee commit ed34610
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
8 changes: 5 additions & 3 deletions lib/robotex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def initialize(uri, user_agent)

@disallows = {}
@allows = {}
@delays = {}
@delays = []
@sitemaps = []
agent = /.*/
io.each do |line|
Expand All @@ -46,15 +46,17 @@ def initialize(uri, user_agent)
@disallows[agent] ||= []
@disallows[agent] << to_regex(value)
when "crawl-delay"
@delays[agent] = value.to_i
@delays << [agent, value.to_i]
when "sitemap"
@sitemaps << URI.join(uri, value).to_s
end
end

@delays.sort_by! { |(agent, rule)| agent.to_s.size }.reverse!

@parsed = true
end

def allowed?(uri, user_agent)
return true unless @parsed
allowed = true
Expand Down
17 changes: 17 additions & 0 deletions spec/robotex_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,23 @@
robotex.delay(SPEC_DOMAIN).should == 20
end
end

context 'when a more specific rule is declared' do
let(:robots) do
<<~ROBOTS
User-agent: *
Crawl-delay: 10
User-agent: usasearch
Crawl-delay: 2
ROBOTS
end

it 'overrides more generic ones' do
robotex = Robotex.new('usasearch')
robotex.delay(SPEC_DOMAIN).should == 2
end
end
end

describe '#sitemaps' do
Expand Down

0 comments on commit ed34610

Please sign in to comment.