This repository has been archived by the owner on Aug 7, 2019. It is now read-only.
/
script.py
115 lines (97 loc) · 4.74 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
import urllib2
import datetime
from bs4 import BeautifulSoup
# clear the terminal
def clean():
for i in range(0,40):
print '\n'
clean()
# Welcome Message
print('Python Scraper. Returnerar en lista på de senaste auktionerna från http://tradera.com')
#specify the search query
def main():
##############
items = {}
prices = {}
times = {}
bids = {}
ind = 0
##############
searchQuery = raw_input('Sökord: ')
searchQuery = searchQuery.replace(' ', '%20') # Removes blankspaces and replaces them with %20
seller = ''
# Specify the url
# We want to filter between private or company resellers
filter = raw_input('Filtrera efter typ av säljare: Båda (0), Privat (1) eller Företag (2). ')
if filter == '0':
quote_page = 'https://www.tradera.com/search?q='
seller = 'Privat och Företag'
elif filter == '1':
quote_page = 'https://www.tradera.com/search?sellerType=Private&q='
seller = 'Enbart Privat'
elif filter == '2':
quote_page = 'https://www.tradera.com/search?sellerType=Company&q='
seller = 'Enbart Företag'
clean()
now = datetime.datetime.now()
quote_page = quote_page + searchQuery
# query the website and return the html to the variable ‘page’
page = urllib2.urlopen(quote_page)
# parse the html using beautiful soup and store in variable `soup`
soup = BeautifulSoup(page, 'html.parser')
# Take out the <div> of name and get its value
name_box = soup.find('h3', attrs={'class': 'item-card-details-header'})
price_box = soup.find('span', attrs={'class': 'item-card-details-price-amount'})
time_box = soup.find('span', attrs={'class': 'item-card-details-time-left'})
bid_box = card_box = soup.find('span', attrs={'class': 'item-card-details-bids'})
file = open(searchQuery + ".txt", "w")
# A lot of printing
print'Sökord: ' + searchQuery.replace('%20',' ')
print'Typ av säljare: ' + seller + '.'
file.write('Sökord: ' + searchQuery.replace('%20',' \n'))
file.write('Typ av säljare: ' + seller + '.\n')
print('')
file.write('\n')
print '%0s %16s %18s %12s %24s' % ('#', 'Tid Kvar', 'Antal Bud', 'Pris', 'Artikelnamn')
file.write('%0s %16s %18s %12s %24s' % ('#', 'Tid Kvar', 'Antal Bud', 'Pris', 'Artikelnamn\n'))
print('------------------------------------------------------------------------------------------------------------------------------------')
file.write('------------------------------------------------------------------------------------------------------------------------------------\n')
######## For loops to get the data we want and save it to arrays. ########
# Get all the items
for name_box in soup.find_all('h3', attrs={'class': 'item-card-details-header'}):
items[ind] = name_box.text.strip()
ind += 1
ind = 0
# Get all the item prices
for price_box in soup.find_all('span', attrs={'class': 'item-card-details-price-amount'}):
prices[ind] = price_box.text.strip()
ind += 1
ind = 0
# Get number of bids
for bid_box in soup.find_all('span', attrs={'class': 'item-card-details-bids'}):
bids[ind] = bid_box.text.strip().replace('bud', '') # Removes 'bud' from each bid
ind += 1
ind = 0
# Get time remaining (FINALLY WORKS)
for card_box in soup.find_all('span', attrs={'class': 'item-card-details-time-left'}):
times[ind] = card_box.text.strip()
ind += 1
#############################################################################
#Output everything in a table
for i in range(0,15):
try:
line_new = '%0s %16s %16s %16s %5s' %( repr(i+1).encode('utf-8') + '.', times[i], bids[i], prices[i], items[i])
print line_new
file.write(line_new + ' \n')
except:
pass
print('------------------------------------------------------------------------------------------------------------------------------------')
print 'Sökning utfördes:' + now.strftime(" %H:%M %d-%m-%Y")
print('Filen ' + searchQuery.replace('%20', '_')+ '.txt har skapats.')
file.write('------------------------------------------------------------------------------------------------------------------------------------\n')
file.write('Sökning utfördes:' + now.strftime(" %H:%M %d-%m-%Y"))
file.close()
pass
while(True):
main()