-
Notifications
You must be signed in to change notification settings - Fork 0
/
vege.rb
89 lines (78 loc) · 2.62 KB
/
vege.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# encoding:utf-8
require 'rubygems'
require 'active_record'
require 'nokogiri'
require 'date'
require 'open-uri'
require 'pp'
require 'yaml'
class Catalog < ActiveRecord::Base ; end
class Vegetable < ActiveRecord::Base ; end
class VegetableLog < ActiveRecord::Base ; end
module Robot
def self.initialize
environment = ENV['RACK_ENV'] || 'development'
dbconfig = YAML.load(File.read('database.yml')) #change the path of your own database.yml
ActiveRecord::Base.establish_connection dbconfig[environment]
def self.go!
initialize()
vegetable()
end
def self.vegetable
last_item = VegetableLog.order('log_date DESC').first
start_at = last_item ? last_item.log_date : Date.parse("2002-1-1")
end_at = Date.today
@@all_vegetable_set = {}
Vegetable.select('id , serial').each do |v|
@@all_vegetable_set[v.serial.strip] = v.id
end
if start_at == end_at
puts "Already done the parsing"
end
while start_at < end_at do
start_at = start_at.next_day
vegetable_filter(start_at)
sleep(0.1)
end
#sleep 0.1 sec between 2 times reading the target website
end
def self.get_vegetable_id(serial , name , r_name)
serial = serial.strip
if @@all_vegetable_set[serial]
return @@all_vegetable_set[serial]
else
v = Vegetable.new
v.serial = serial
v.name = name
v.r_name = r_name
v.save
@@all_vegetable_set[serial] = v.id
return v.id
end
end
def self.vegetable_filter(date)
year = (date.strftime('%Y').to_i - 1911).to_s.rjust(3 , '0')
puts "http://www.tapmc.com.tw/tapmc_new16/price1.asp?YEARS=#{year}&MONTHS=#{date.strftime('%m')}&DAYS=#{date.strftime('%d')}&FV_CODE=A&MARKET=1&temp=Z"
body = open("http://www.tapmc.com.tw/tapmc_new16/price1.asp?YEARS=#{year}&MONTHS=#{date.strftime('%m')}&DAYS=#{date.strftime('%d')}&FV_CODE=A&MARKET=1&temp=Z")
doc = Nokogiri::HTML(body)
target = doc.css('form table')[1]
if(target)
tr = target.css('tr')[2..-1]
ans = []
tr.each do |i|
temp = []
i.css('td font').each do |j|
temp << j.text
end
ans << temp
end
count = ans.length
ans = ans.map{|temp| "(#{get_vegetable_id(temp[0] , temp[1] , temp[2])},#{temp[4].to_i},#{temp[5].to_i},#{temp[6].to_i},'#{date.strftime('%Y-%m-%d')}')"}.join(',')
ActiveRecord::Base.connection.execute("INSERT INTO vegetable_logs (vegetable_id,price1,price2,price3,log_date) VALUES #{ans}")
puts "go!! => #{date.strftime('%Y-%m-%d')} : count => #{count}"
else
puts "skip => #{date.strftime('%Y-%m-%d')}"
end
end
end
Robot.go!