forked from jellyang/shooter-subtitle-downloader
/
subtitle_downloader.py
135 lines (116 loc) · 4.63 KB
/
subtitle_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# -*- coding: utf-8 -*-
import os
import glob
import hashlib
import math
import urllib2
import re
import sys
from g2butf8 import g2butf8
# 未來希望能夠加入其他網站的搜尋功能
# http://www.opensubtitles.org/zh
# http://www.zimuku.net/
dir_path = '/var/lib/transmission/Downloads/'
# dir_path = 'D:\\video\\movies\\test'
if len(sys.argv) > 1:
dir_path = sys.argv[1]
extensions = (".avi", ".mp4", ".mkv", ".mpg", ".mpeg", ".mov", ".rm", ".vob", ".wmv", ".flv", ".3gp")
def get_hash(name):
hash_val = list()
with open(name, 'rb') as f:
e = 4096
f.seek(0, os.SEEK_END)
size = f.tell()
# first 4k
start = min(size, 4096)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
hash_val.append(digest)
# second 4k
start = math.floor(size / 3 * 2)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
hash_val.append(digest)
# third 4k
start = math.floor(size / 3)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
hash_val.append(digest)
# fourth 4k
start = max(0, size - 8192)
end = min(start + e, size)
f.seek(int(start))
data = f.read(int(end - start))
digest = hashlib.md5(data).hexdigest()
hash_val.append(digest)
return hash_val
def sub_downloader(path):
hash_val = get_hash(path)
name = path.split('\\')[-1].split('/')[-1] # 為了安全性著想不傳送完整路徑,只給檔名
replace = extensions
for content in replace:
path = path.replace(content, "")
headers = {'User-Agent': 'SubDB/1.0 (shooter-subtitle-downloader/1.0; '
'http://github.com/marksylee/shooter-subtitle-downloader)'}
# step 1. find subtitle list
filehash = hash_val[0] + '%3B' + hash_val[1] + '%3B' + hash_val[2] + '%3B' + hash_val[3]
url = 'http://www.shooter.cn/api/subapi.php?filehash=' + filehash + '&format=json&pathinfo=' + name + '&lang=Chn'
print 'get list url:', url
req = urllib2.Request(url, '', headers)
done = False
response = None
while not done:
try:
response = urllib2.urlopen(req).read()
done = True
except:
print 'shooter api timeout, retry...'
done = False
# 找不到字幕
if response == '\xff' or response == '0xff(-1)':
return None
# step 2. get first 5 subtitle from subtitle list
for index, res in enumerate(eval(response)):
if index < 5: # 經過測試,射手 api 似乎只會回傳三筆字幕檔,但還是以防萬一限制一下上限
subtitle = res['Files']
url = subtitle[0]['Link'].replace('\u0026', '&')
print 'download file url:', url
req = urllib2.Request(url, '', headers)
done = False
while not done:
try:
response = urllib2.urlopen(req).read()
done = True
except:
done = False
srt_file_name = path + '-' + str(index) + ".zh.srt"
with open(srt_file_name, "wb") as subtitle_file:
subtitle_file.write(response)
subtitle_file.close()
os.chmod(subtitle_file.name, 0777)
# 簡轉繁
g2butf8.translate(srt_file_name)
# 重新命名資料夾,避免出現[]及(),glob會抓不到內容
for filename in os.listdir(dir_path):
if re.match('^[A-Za-z0-9_.]+$', filename) is None: # 只可包含英文大小寫及dot
new_filename = re.sub('[^0-9a-zA-Z]+', '.', filename)
if new_filename[-1] == '.': # 有可能以 '.' 為結尾,修改它
new_filename = new_filename[:-1]
print 'rename from', filename, 'to', new_filename
os.rename(os.path.join(dir_path, filename), os.path.join(dir_path, new_filename))
for root, subFolders, files in os.walk(dir_path):
print 'now in folder:', root
print 'num of srt files:', len(glob.glob(os.path.join(root, '*.srt')))
# 檢查同資料夾底下是否已存在超過一個字幕檔 (因為可能有原生字幕檔)
# 如果有超過一個字幕檔表示已經下載過,就可以直接跳過
if len(glob.glob(os.path.join(root, '*.srt'))) <= 1:
for ext in extensions:
for f in glob.glob(os.path.join(root, '*' + ext)):
print 'video file:', f
sub_downloader(f)