GSA · stevenbarragan · Aug 15, 2023 · Aug 10, 2023 · Aug 10, 2023 · Aug 10, 2023
diff --git a/lib/robotex.rb b/lib/robotex.rb
@@ -27,7 +27,7 @@ def initialize(uri, user_agent)
 
       @disallows = {}
       @allows = {}
-      @delays = {}
+      @delays = []
       @sitemaps = []
       agent = /.*/
       io.each do |line|
@@ -46,15 +46,17 @@ def initialize(uri, user_agent)
             @disallows[agent] ||= []
             @disallows[agent] << to_regex(value)
           when "crawl-delay"
-            @delays[agent] = value.to_i
+            @delays << [agent, value.to_i]
           when "sitemap"
             @sitemaps << URI.join(uri, value).to_s
         end
       end
+
+      @delays.sort_by! { |(agent, rule)| agent.to_s.size }.reverse!
 
       @parsed = true
     end
-    
+
     def allowed?(uri, user_agent)
       return true unless @parsed
       allowed = true

diff --git a/robotex.gemspec b/robotex.gemspec
@@ -15,7 +15,7 @@ spec = Gem::Specification.new do |s|
   s.add_development_dependency "rake", ">=0.9.2"
   s.add_development_dependency "rdoc", ">=3.12"
   s.add_development_dependency "rspec", ">=2.8.0"
-  s.add_development_dependency "fakeweb", ">=1.3.0"
+  s.add_development_dependency "webmock", "~> 3.18"
 
   s.files = %w[
     VERSION

diff --git a/spec/robotex_spec.rb b/spec/robotex_spec.rb
@@ -19,13 +19,11 @@
     ROBOTS
   end
 
-  let(:response) do
-    { body: robots, content_type: 'text/plain', status: [200, "OK"] }
-  end
+  let(:robots_url) { "#{ SPEC_DOMAIN }robots.txt" }
 
   before do
-    FakeWeb.allow_net_connect = false
-    FakeWeb.register_uri(:get, SPEC_DOMAIN + 'robots.txt', response)
+    stub_request(:get, robots_url)
+      .to_return(body: robots, headers: {'Content-Type' => "text/plain"}, status: [200, "OK"])
   end
 
   describe '#initialize' do
@@ -47,51 +45,49 @@
     context 'when the robots.txt disallows the user-agent to the url' do
       it 'returns false' do
         robotex = Robotex.new('bender')
-        robotex.allowed?(SPEC_DOMAIN + 'my_shiny_metal_ass').should be_false
+        robotex.allowed?(SPEC_DOMAIN + 'my_shiny_metal_ass').should be_falsey
       end
     end
 
     context 'when the robots.txt disallows the user-agent to some urls, but allows this one' do
       it 'returns true' do
         robotex = Robotex.new('bender')
-        robotex.allowed?(SPEC_DOMAIN + 'cigars').should be_true
+        robotex.allowed?(SPEC_DOMAIN + 'cigars').should be_truthy
       end
     end
 
     context 'when the robots.txt disallows any user-agent to the url' do
       it 'returns false' do
         robotex = Robotex.new
-        robotex.allowed?(SPEC_DOMAIN + 'login').should be_false
+        robotex.allowed?(SPEC_DOMAIN + 'login').should be_falsey
       end
     end
 
     context 'when the robots.txt disallows and then allows the url' do
       it 'returns false' do
         robotex = Robotex.new
-        robotex.allowed?(SPEC_DOMAIN + 'locked').should be_false
+        robotex.allowed?(SPEC_DOMAIN + 'locked').should be_falsey
       end
     end
 
     context 'when a rule includes a comment' do
       it 'returns false' do
         robotex = Robotex.new
-        robotex.allowed?(SPEC_DOMAIN + 'archive/old').should be_false
+        robotex.allowed?(SPEC_DOMAIN + 'archive/old').should be_falsey
       end
     end
 
     context 'when the robots.txt url is redirected' do
-      let(:redirection) do
-        { status: [301], location: 'https://example.com/robots.txt' }
-      end
+      let(:robots_url) { "#{ SPEC_DOMAIN }real-robots.txt" }
 
       before do
-        FakeWeb.register_uri(:get, SPEC_DOMAIN + 'robots.txt', redirection)
-        FakeWeb.register_uri(:get, 'https://example.com/robots.txt', response)
+        stub_request(:get, "#{ SPEC_DOMAIN }robots.txt" )
+          .to_return(status: [301], headers: { Location: robots_url })
       end
 
       it 'returns false' do
         robotex = Robotex.new
-        robotex.allowed?(SPEC_DOMAIN + 'locked').should be_false
+        robotex.allowed?(SPEC_DOMAIN + 'locked').should be_falsey
       end
     end
   end
@@ -110,6 +106,23 @@
         robotex.delay(SPEC_DOMAIN).should == 20
       end
     end
+
+    context 'when a more specific rule is declared' do
+      let(:robots) do
+        <<~ROBOTS
+          User-agent: *
+          Crawl-delay: 10
+
+          User-agent: usasearch
+          Crawl-delay: 2
+        ROBOTS
+      end
+
+      it 'overrides more generic ones' do
+        robotex = Robotex.new('usasearch')
+        robotex.delay(SPEC_DOMAIN).should == 2
+      end
+    end
   end
 
   describe '#sitemaps' do

diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
@@ -2,7 +2,7 @@
 require 'bundler/setup'
 $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
 require 'robotex'
-require 'fakeweb'
+require 'webmock/rspec'
 
 SPEC_DOMAIN = 'http://www.example.com/'