Skip to content

Commit

Permalink
Url matchers
Browse files Browse the repository at this point in the history
  • Loading branch information
Vladimir Tikhonov committed Mar 13, 2015
1 parent 5b0ed61 commit eff104f
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 0 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ r1 = Regexy::Web::IPv6.new # matches '::1', '2001:DB8::8:800:200C:41
r1 = Regexy::Web::IPv6.new(:with_port) # matches '[::1]:80' and so on
any_ipv6 = Regexy::Web::IPv6.new(:normal) | Regexy::Web::IPv6.new(:with_port) # matches ip w\ and w\o port
```
### Regexy::Web::Url

Generates regular expressions for matching Url addresses (with unicode support).

```ruby
r1 = Regexy::Web::Url.new # matches 'http://foo.com', 'www.foo.com' and 'foo.com'
```

## Contributing
Have an idea of new regular expression? Create an [issue](https://github.com/vladimir-tikhonov/regexy/issues) (some test cases will be much appreciated) or open a [pull request](https://github.com/vladimir-tikhonov/regexy/pulls).
1 change: 1 addition & 0 deletions lib/regexy/web.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ module Web
autoload :Email, 'regexy/web/email'
autoload :IPv4, 'regexy/web/ip'
autoload :IPv6, 'regexy/web/ip'
autoload :Url, 'regexy/web/url'
end
end
20 changes: 20 additions & 0 deletions lib/regexy/web/url.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# encoding: UTF-8

module Regexy
module Web
class Url < ::Regexy::Regexp
URL = /^([a-z][a-z\d+\-.]*:(\/\/([\p{L}\d\-._~%!$&'()*+,;=]+@)?([\p{L}\d\-._~%]+|
\[[\p{L}\d:.]+\]|\[v[a-f0-9][\p{L}\d\-._~%!$&'()*+,;=:]+\])(:[0-9]+)?
(\/[\p{L}\d\-._~%!$&'()*+,;=:@]+)*\/?|(\/?[\p{L}\d\-._~%!$&'()*+,;=:@]+
(\/[\p{L}\d\-._~%!$&'()*+,;=:@]+)*\/?)?)|([\p{L}\d\-._~%!$&'()*+,;=@]+
(\/[\p{L}\d\-._~%!$&'()*+,;=:@]+)*\/?|(\/[\p{L}\d\-._~%!$&'()*+,;=:@]+)
+\/?))
(\?[\p{L}\d\-._~%!$&'()*+,;=:@\/?]*)?(\#[\p{L}\d\-._~%!$&'()*+,;=:@\/?]*)?$
/ix.freeze

def initialize(*args)
super(URL, *args)
end
end
end
end
66 changes: 66 additions & 0 deletions spec/web/url_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# encoding: UTF-8

describe Regexy::Web::Url do
VALID_URL = [
'http://foo.com/blah_blah',
'http://foo.com/blah_blah/',
'http://foo.com/blah_blah_(wikipedia)',
'http://foo.com/blah_blah_(wikipedia)_(again)',
'http://www.example.com/wpstyle/?p=364',
'https://www.example.com/foo/?bar=baz&inga=42&quux',
'http://userid@example.com',
'http://userid@example.com/',
'http://userid@example.com:8080',
'http://userid@example.com:8080/',
'http://142.42.1.1/',
'http://foo.com/blah_(wikipedia)#cite-1',
'http://foo.com/blah_(wikipedia)_blah#cite-1',
'http://foo.com/(something)?after=parens',
'http://code.google.com/events/#&product=browser',
'http://j.mp',
'ftp://foo.bar/baz',
'http://foo.bar/?q=Test%20URL-encoded%20stuff',
'http://مثال.إختبار',
'http://例子.测试',
'http://1337.net',
'http://a.b-c.de',
'http://223.255.255.254',
'http://киррилический/адрес.рф',
'www.foo.bar',
'foo.bar',
'foo.bar#anchor'
]

INVALID_URL = [
'http://',
'http://?',
'http://??',
'http://??/',
'http://#',
'http://##',
'http://##/',
'http://foo.bar?q=Spaces should be encoded',
'//',
'//a',
'///a',
'///',
'http:///a',
'http:// shouldfail.com',
':// should fail',
'http://foo.bar/foo(bar)baz quux',
]

let(:r) { Regexy::Web::Url.new }

it 'accepts valid url' do
VALID_URL.each do |url|
expect(url =~ r).to be_truthy
end
end

it 'declines invalid url' do
INVALID_URL.each do |url|
expect(url =~ r).to be_nil
end
end
end

0 comments on commit eff104f

Please sign in to comment.