-
Notifications
You must be signed in to change notification settings - Fork 1
/
parlament.js
126 lines (99 loc) · 2.98 KB
/
parlament.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
var XDate = require('xdate');
var fs = require('fs');
var casper = require('casper').create({
pageSettings:
{
loadImages: false,
loadPlugins: false,
},
clientScripts: ['xdate.js'],
verbose: true,
viewportSize: // circumvent being accidentally redirected to mobile sites based on screen real estate
{
width: 1024,
height: 768
}
});
var data;
/*
- check for more pages selector
- if more pages: increase num_steps
- compare num_steps to currentstep
- switch to second page, reset step count if more pages
*/
var config = {
num_steps: 6,
add_steps: 0, // this is the number of steps added to navigate to the next page
current_num_steps: 0,
current_step: 0
};
run();
function run()
{
casper.start("http://www.parlament-berlin.de/de/Dokumente/Drucksachen");
casper.waitFor(function () {
return this.evaluate(function() {
return $('#indicator').css('display') == 'none';
});
});
casper.thenEvaluate(function() {
sel = document.querySelector('#Beschreibung');
sel.value = 'Bebauungsplan';
// this website, it's crazy
applyFilter();
});
casper.waitFor(function () {
return this.evaluate(function() {
return $('#indicator').css('display') == 'none';
});
});
casper.then(function() {
data = this.evaluate(function () {
rows = document.querySelectorAll('#vorgaenge tbody tr');
var result = new Array();
for (i = 0; i < rows.length; i++)
{
var description = rows[i].querySelector('td > strong').innerText;
var id = description.match(/Bebauungsplans? ([\w-]+)/i);
var date = rows[i].childNodes[4].textContent.match(/(e|ü):([0-9.]+) /i);
var documents = new Array();
var link = rows[i].childNodes[0].textContent;
$('#vorgaenge tbody tr:nth-child('+i+') a').each(function() { documents.push(this.href) });
if (id)
{
d = date[2].split('.');
d = new Date(parseInt(d[2]), parseInt(d[1]), parseInt(d[0]));
xdate = new XDate(d);
result.push({
"description": description,
"documents": documents,
"date": xdate.toISOString(),
"id": id[1],
"link": "http://www.parlament-berlin.de/de/Dokumente/Drucksachen?Vorgang="+link
});
}
}
return result;
});
});
casper.run(function() {
fs.write("data/json/parlament.json", JSON.stringify(data, null, '\t'), 'w');
var out = "";
for (i = 0; i < data.length; i++)
{
var docs = "";
for (var j = 0; j < data[i].documents.length; j++)
{
docs += "- " + data[i].documents[j] + "\n";
}
out += "Datum: " + data[i].date
+ "\nLink: " + data[i].link
+ "\nBezeichner: " + data[i].id
+ "\n\nBeschreibung:\n\n" + data[i].description
+ "\n\nDokumente:\n\n" + docs
+ "\n\n---\n\n";
}
fs.write("data/text/parlament.txt", out, 'w');
casper.exit();
});
}