/
titlebot.py
executable file
·144 lines (135 loc) · 6.22 KB
/
titlebot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env python2
# coding: utf-8
import os
import sys
import socket
import string
import time
import urllib2
import HTMLParser
import zlib
import re
import libirc
HOST = "irc.freenode.net"
PORT = 6697
NICK = "titlebot"
IDENT = "titlebot"
REALNAME = "titlebot"
CHANS = ["##Orz"]
def ParseURL(s):
http_idx = s.find('http:')
https_idx = s.find('https:')
if https_idx == -1:
if http_idx == -1:
return None
else:
return s[http_idx:]
else:
if http_idx == -1:
return s[https_idx:]
else:
return s[min(http_idx, https_idx):]
try:
c = libirc.IRCConnection()
c.connect((HOST, PORT), use_ssl=True)
c.setnick(NICK)
c.setuser(IDENT, REALNAME)
for CHAN in CHANS:
c.join(CHAN)
except:
time.sleep(10)
sys.stderr.write("Restarting...\n")
os.execlp("python2", "python2", __file__)
raise
CHAN = CHANS[0]
socket.setdefaulttimeout(10)
html_parser = HTMLParser.HTMLParser()
quiting = False
while not quiting:
if not c.sock:
quiting = True
time.sleep(10)
sys.stderr.write("Restarting...\n")
os.execlp("python2", "python2", __file__)
break
try:
line = c.recvline(block=True)
if not line:
continue
sys.stderr.write("%s\n" % line.encode('utf-8', 'replace'))
line = c.parse(line=line)
if line and line["cmd"] == "PRIVMSG":
if line["dest"] == NICK:
if line["msg"] == u"Get out of this channel!": # A small hack
c.quit(u"%s asked to leave." % line["nick"])
quiting = True
else:
CHAN = line["dest"]
for w in line["msg"].split():
if w.startswith(u'magnet:?'):
time.sleep(3)
filename = re.findall(u'(&|&)dn=([^&]+)', w)
if len(filename) < 1 or len(filename[0]) < 2:
break
filename = html_parser.unescape(urllib2.unquote(filename[0][1].replace(u'+', u' ').encode('utf-8', 'replace')).decode('utf-8', 'replace')).replace(u'\n', u' ').replace(u'\r', u'')
c.say(CHAN, u'⇪文件名: %s' % filename)
break
w = ParseURL(w)
if w:
time.sleep(3)
w = w.split(">", 1)[0].split('"', 1)[0]
if re.match("https?:/*git.io(/|$)", w):
continue # Fix for git.io
opener = urllib2.build_opener()
opener.addheaders = [("Accept", "text/html, image/png, image/webp, image/jpeg, image/gif, */*"), ("Accept-Charset", "utf-8, iso-8859-1"), ("Accept-Language", "zh-cn, zh-hans, zh-tw, zh-hant, zh, en-us, en-gb, en"), ("Range", "bytes=0-16383"), ("User-Agent", "Mozilla/5.0 (compatible; Titlebot; like IRCbot; +https://github.com/m13253/titlebot)"), ("X-Forwarded-For", "10.2.0.101"), ("X-moz", "prefetch"), ("X-Prefetch", "yes"), ("X-Requested-With", "Titlebot")]
h = opener.open(w.encode("utf-8", "replace"))
if h.code == 200 or h.code == 206:
if not "Content-Type" in h.info() or h.info()["Content-Type"].split(";")[0] == "text/html":
wbuf = h.read(16384)
read_times = 1
while len(wbuf) < 16384 and read_times < 4:
read_times += 1
wbuf_ = h.read(16384)
if wbuf_:
wbuf += wbuf_
else:
break
if "Content-Encoding" in h.info() and h.info()["Content-Encoding"] == "gzip": # Fix buggy www.bilibili.tv
try:
gunzip_obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
wbuf = gunzip_obj.decompress(wbuf)
except:
pass
if wbuf.find("<title>") != -1:
titleenc = wbuf.split("<title>")[1].split("</title>")[0]
title = None
for enc in ("utf-8", "gbk", "gb18030", "iso-8859-1"):
try:
title = titleenc.decode(enc)
break
except UnicodeDecodeError:
pass
if title is None:
title = title.decode("utf-8", "replace")
title = html_parser.unescape(title).replace("\r", "").replace("\n", " ").strip()
c.say(CHAN, u"⇪标题: %s" % title)
else:
c.say(CHAN, u"⇪无标题网页")
else:
if "Content-Range" in h.info():
c.say(CHAN, u"⇪文件类型: %s, 文件大小: %s 字节\r\n" % (h.info()["Content-Type"], h.info()["Content-Range"].split("/")[1]))
elif "Content-Length" in h.info():
c.say(CHAN, u"⇪文件类型: %s, 文件大小: %s 字节\r\n" % (h.info()["Content-Type"], h.info()["Content-Length"]))
else:
c.say(CHAN, u"⇪文件类型: %s\r\n" % h.info()["Content-Type"])
else:
c.say(CHAN, u"⇪HTTP %d 错误\r\n" % h.code)
except Exception as e:
try:
c.say(CHAN, u"哎呀,%s 好像出了点问题: %s" % (NICK, e))
except:
pass
except socket.error as e:
sys.stderr.write("Error: %s\n", e)
c.quit("Network error.")
# vim: et ft=python sts=4 sw=4 ts=4