/
archive_api.rb
40 lines (35 loc) · 1017 Bytes
/
archive_api.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
require 'json'
require 'uri'
module ArchiveAPI
def get_raw_list_from_api url, page_index
request_url = URI("https://web.archive.org/cdx/search/xd")
params = [["output", "json"], ["url", url]]
params += parameters_for_api page_index
request_url.query = URI.encode_www_form(params)
begin
json = JSON.parse(URI(request_url).open.read)
if (json[0] <=> ["timestamp","original"]) == 0
json.shift
end
json
rescue JSON::ParserError
[]
end
end
def parameters_for_api page_index
parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
if !@all
parameters.push(["filter", "statuscode:200"])
end
if @from_timestamp and @from_timestamp != 0
parameters.push(["from", @from_timestamp.to_s])
end
if @to_timestamp and @to_timestamp != 0
parameters.push(["to", @to_timestamp.to_s])
end
if page_index
parameters.push(["page", page_index])
end
parameters
end
end