From 16781eb31f007cd28c2a3b4577056a7901e7ed31 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 9 Mar 2018 17:24:08 +0900 Subject: [PATCH 01/46] Added a CVE-2017-0199 pattern --- Engine/plugins/dde.py | 90 ++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/Engine/plugins/dde.py b/Engine/plugins/dde.py index 0c86a32..9d0db42 100644 --- a/Engine/plugins/dde.py +++ b/Engine/plugins/dde.py @@ -113,37 +113,14 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 data = get_zip_data(filename, 'word/document.xml') if data: - # TEXT 영역을 추출한다. - texts = self.p_dde_text.findall(data) - if len(texts): - buf = '' - for text in texts: - # 앞쪽 begin Tag 제거 - off = text.find('>') - text = text[off+1:] - - # 뒤쪽 end Tag 제거 - off = text.rfind('<') - text = text[:off] - - # instr를 처리한다. - text = self.p_instr.sub(InstrSub, text) - - # 모든 Tag 삭제 - buf += self.p_tag.sub('', text) + '\n' - - # print buf - if len(buf): - if self.p_dde.search(buf) and self.p_cmd.search(buf): - return True, 'Exploit.MSWord.DDE.a', 0, kernel.INFECTED + if self.__scan_dde_docx(data): + return True, 'Exploit.MSWord.DDE.a', 0, kernel.INFECTED + elif self.__scan_cve_2017_0199_docx(data): + return True, 'Exploit.MSWord.CVE-2017-0199', 0, kernel.INFECTED elif filename_ex.lower() == 'worddocument': data = filehandle - s = self.p_dde2.search(data) - if s: - buf = s.group() - if len(buf): - if self.p_dde.search(buf) and self.p_cmd.search(buf): - return True, 'Exploit.MSWord.DDE.b', 0, kernel.INFECTED + if self.__scan_dde_doc(data): + return True, 'Exploit.MSWord.DDE.b', 0, kernel.INFECTED except IOError: pass @@ -179,11 +156,11 @@ def listvirus(self): # 진단 가능한 악성코드 리스트 # 진단/치료하는 악성코드 이름 등록 vlist.append('Exploit.MSWord.DDE.a') vlist.append('Exploit.MSWord.DDE.b') + vlist.append('Exploit.MSWord.CVE-2017-0199') return vlist - # --------------------------------------------------------------------- - + # --------------------------------------------------------------------- # getinfo(self) # 플러그인 엔진의 주요 정보를 알려준다. (제작자, 버전, ...) # 리턴값 : 플러그인 엔진 정보 @@ -195,6 +172,55 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.0' # 버전 info['title'] = 'DDE Scan Engine' # 엔진 설명 info['kmd_name'] = 'dde' # 엔진 파일 이름 - info['sig_num'] = 2 # 진단/치료 가능한 악성코드 수 + info['sig_num'] = len(self.listvirus()) # 진단/치료 가능한 악성코드 수 return info + + # --------------------------------------------------------------------- + # DDE 악성코드를 진단한다. + # --------------------------------------------------------------------- + def __scan_dde_docx(self, data): + # TEXT 영역을 추출한다. + texts = self.p_dde_text.findall(data) + if len(texts): + buf = '' + for text in texts: + # 앞쪽 begin Tag 제거 + off = text.find('>') + text = text[off + 1:] + + # 뒤쪽 end Tag 제거 + off = text.rfind('<') + text = text[:off] + + # instr를 처리한다. + text = self.p_instr.sub(InstrSub, text) + + # 모든 Tag 삭제 + buf += self.p_tag.sub('', text) + '\n' + + # print buf + if len(buf): + if self.p_dde.search(buf) and self.p_cmd.search(buf): + return True + + return False + + def __scan_dde_doc(self, data): + s = self.p_dde2.search(data) + if s: + buf = s.group() + if len(buf): + if self.p_dde.search(buf) and self.p_cmd.search(buf): + return True + + return False + + # --------------------------------------------------------------------- + # CVE-2017-0199 악성코드를 진단한다. + # --------------------------------------------------------------------- + def __scan_cve_2017_0199_docx(self, data): + if data.find(' Date: Mon, 12 Mar 2018 12:21:12 +0900 Subject: [PATCH 02/46] Added PE's resource parse when type id is zero --- Engine/plugins/pe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index d4ce75b..8b9e746 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -279,14 +279,16 @@ def parse(self): name_id_off = kavutil.get_uint32(mm, rsrc_off + 0x14 + (i * 8)) # Type이 사용자가 정의한 이름 or RCDATA? - if type_id & 0x80000000 == 0x80000000 or type_id == 0xA: + if type_id & 0x80000000 == 0x80000000 or type_id == 0xA or type_id == 0: if type_id & 0x80000000 == 0x80000000: # 사용자가 정의한 이름 추출 string_off = (type_id & 0x7FFFFFFF) + rsrc_off len_name = kavutil.get_uint16(mm, string_off) rsrc_type_name = mm[string_off + 2:string_off + 2 + (len_name * 2):2] - else: + elif type_id == 0xA: rsrc_type_name = 'RCDATA' + else: + rsrc_type_name = '%d' % type_id # Name ID name_id_off = (name_id_off & 0x7FFFFFFF) + rsrc_off From 7d991c4c6d3b42766d5e89d20175e67f98f4bc78 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Tue, 13 Mar 2018 10:34:04 +0900 Subject: [PATCH 03/46] Added PDB patterns --- Engine/plugins/emalware.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/emalware.py b/Engine/plugins/emalware.py index 564ffab..3d93fe7 100644 --- a/Engine/plugins/emalware.py +++ b/Engine/plugins/emalware.py @@ -61,7 +61,7 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.1' # 버전 info['title'] = 'eMalware Engine' # 엔진 설명 info['kmd_name'] = 'emalware' # 엔진 파일 이름 - info['sig_num'] = kavutil.handle_pattern_md5.get_sig_num('emalware') + 2 # 진단/치료 가능한 악성코드 수 + info['sig_num'] = kavutil.handle_pattern_md5.get_sig_num('emalware') + 4 # 진단/치료 가능한 악성코드 수 return info @@ -142,6 +142,8 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 pdb_sigs = { ':\\pz_git\\bin\\': 'AdWare.Win32.Sokuxuan.gen', ':\\CODE\\vitruvian\\': 'AdWare.Win32.Vitruvian.gen', + '\\bin\\Release\\WebSparkle.': 'AdWare.MSIL.BrowseFox.gen', + ':\\TeamCity\\BuildAgent1\\work\\': 'WebToolbar.Win32.Agent.avi', } for pat in pdb_sigs.keys(): From a2243ab0f991d806874f25daeaf1e6ee9c659661 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 16 Mar 2018 12:28:25 +0900 Subject: [PATCH 04/46] Supported many patterns --- Engine/plugins/kavutil.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/Engine/plugins/kavutil.py b/Engine/plugins/kavutil.py index 5556647..82df63b 100644 --- a/Engine/plugins/kavutil.py +++ b/Engine/plugins/kavutil.py @@ -57,6 +57,10 @@ def __init__(self, plugins_path): self.sig_times = {} # 메모리 관리를 위해 시간 정보를 가짐 self.plugins = plugins_path + # 각 악성코드별 시그너처의 개수 : 예) adware:1개, emalware:39개 + # 이는 향후 (size % 개수) + 1을 해서 size가 어느 그룹에 존재하는지를 확인하게 됨 + self.sig_group_count = {} + fl = glob.glob(os.path.join(plugins_path, '*.s??')) fl.sort() for name in fl: @@ -69,14 +73,22 @@ def __init__(self, plugins_path): continue if len(sp): # 로딩된 패턴이 1개 이상이면... - if not (sig_key in self.sig_sizes): - self.sig_sizes[sig_key] = {} + # 그룹 개수 추가 + self.sig_group_count[sig_key] = self.sig_group_count.get(sig_key, 0) + 1 + + # 악성코드 패턴 크기를 담는다. + if sig_key in self.sig_sizes: + self.sig_sizes[sig_key].update(dict.fromkeys(sp)) + else: + self.sig_sizes[sig_key] = dict.fromkeys(sp) + ''' for psize in list(sp): if psize in self.sig_sizes[sig_key]: self.sig_sizes[sig_key][psize].append(idx) else: self.sig_sizes[sig_key][psize] = [idx] + ''' # --------------------------------------------------------------------- # match_size(self, sig_key, sig_size) @@ -106,7 +118,8 @@ def scan(self, sig_key, sig_size, sig_md5): sig_key = sig_key.lower() # 대문자로 입력될 가능성 때문에 모두 소문자로 변환 if self.match_size(sig_key, sig_size): # 크기가 존재하는가? - idxs = self.sig_sizes[sig_key][sig_size] # 어떤 파일에 1차 패턴이 존재하는지 확인 + # idxs = self.sig_sizes[sig_key][sig_size] # 어떤 파일에 1차 패턴이 존재하는지 확인 + idxs = ['%02d' % ((sig_size % self.sig_group_count[sig_key]) + 1)] fmd5 = sig_md5.decode('hex') sig_p1 = fmd5[:6] # 1차 패턴 @@ -167,7 +180,10 @@ def __load_sig(self, fname): # 리턴값 : 악성코드 패턴 로딩 성공 여부 # --------------------------------------------------------------------- def __load_sig_ex(self, sig_dict, sig_prefix, sig_key, idx): # (self.sig_names, 'n', 'script', '01') - if not (sig_key in sig_dict) or not (idx in sig_dict[sig_key]): + if not (sig_key in sig_dict): + sig_dict[sig_key] = {} + + if not (idx in sig_dict[sig_key]): # 패턴 로딩 try: name_fname = os.path.join(self.plugins, '%s.%s%s' % (sig_key, sig_prefix, idx)) @@ -177,7 +193,7 @@ def __load_sig_ex(self, sig_dict, sig_prefix, sig_key, idx): # (self.sig_names, except IOError: return False - sig_dict[sig_key] = {idx: sp} + sig_dict[sig_key][idx] = sp # 현재 시간을 sig_time에 기록한다. if not (sig_key in self.sig_times): @@ -201,7 +217,7 @@ def __save_mem(self): for sig_prefix in self.sig_times[sig_key].keys(): for idx in self.sig_times[sig_key][sig_prefix].keys(): # print '[-]', n - self.sig_times[sig_key][sig_prefix][idx] - if n - self.sig_times[sig_key][sig_prefix][idx] > 4: # (3 * 60) : + if n - self.sig_times[sig_key][sig_prefix][idx] > 3 * 60: # print '[*] Delete sig : %s.%s%s' % (sig_key, sig_prefix, idx) if sig_prefix == 'i': # 1차 패턴 self.sig_p1s[sig_key].pop(idx) From 517fc1bc45c30d1801b610b53f4b34ed0a7cce5e Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 16 Mar 2018 17:48:49 +0900 Subject: [PATCH 05/46] Fixed RVA to Offset convert --- Engine/plugins/pe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index 8b9e746..4569e77 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -497,7 +497,7 @@ def parse(self): def rva_to_off(self, t_rva): for section in self.sections: - size = section['VirtualSize'] + size = section['SizeRawData'] rva = section['RVA'] if rva <= t_rva < rva + size: From d9403781b15490888cdfd22ed2f9171be7bf75eb Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 16 Mar 2018 18:03:33 +0900 Subject: [PATCH 06/46] Added a Worm.Win32.Allaple.gen pattern --- Engine/plugins/emalware.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/emalware.py b/Engine/plugins/emalware.py index 3d93fe7..08dda6c 100644 --- a/Engine/plugins/emalware.py +++ b/Engine/plugins/emalware.py @@ -21,6 +21,10 @@ class KavMain: # 리턴값 : 0 - 성공, 0 이외의 값 - 실패 # --------------------------------------------------------------------- def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 + # Worm.Win32.Allaple.gen 패턴 + pat = r'\xC7\x44\x24.+\x40\x00.+\x8B.\x24.+\x33..\x04\x01.\x24.+\x8B.\x24' + self.p_allaple = re.compile(pat) + pat = r'POST /cdn-cgi/\x00\x00 HTTP/1.1\r\nUser-Agent: \x00\r\nHost:' + \ r'[\d\D]+?GET\x00+/\x00+Cookie:[\d\D]+?http[\d\D]+?url=[\d\D]+?POST' self.p_linux_mirai = re.compile(pat) @@ -61,7 +65,7 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.1' # 버전 info['title'] = 'eMalware Engine' # 엔진 설명 info['kmd_name'] = 'emalware' # 엔진 파일 이름 - info['sig_num'] = kavutil.handle_pattern_md5.get_sig_num('emalware') + 4 # 진단/치료 가능한 악성코드 수 + info['sig_num'] = kavutil.handle_pattern_md5.get_sig_num('emalware') + 5 # 진단/치료 가능한 악성코드 수 return info @@ -151,6 +155,12 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 vname = kavutil.normal_vname(pdb_sigs[pat]) return True, vname, 0, kernel.INFECTED + # case 4. Worm.Win32.Allaple.gen 검사 + ep_off = ff['pe']['EntryPointRaw'] + data = mm[ep_off:ep_off+0x80] + if self.p_allaple.search(data): + return True, 'Worm.Win32.Allaple.gen', 0, kernel.INFECTED + # 미리 분석된 파일 포맷중에 ELF 포맷이 있는가? elif 'ff_elf' in fileformat: ff = fileformat['ff_elf'] From da983640954ca36e400ffa5a231d39a5665d7669 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 23 Mar 2018 14:25:29 +0900 Subject: [PATCH 07/46] Modified a pattern for Worm.Win32.Allaple.gen --- Engine/plugins/emalware.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/plugins/emalware.py b/Engine/plugins/emalware.py index 08dda6c..7631196 100644 --- a/Engine/plugins/emalware.py +++ b/Engine/plugins/emalware.py @@ -22,7 +22,7 @@ class KavMain: # --------------------------------------------------------------------- def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 # Worm.Win32.Allaple.gen 패턴 - pat = r'\xC7\x44\x24.+\x40\x00.+\x8B.\x24.+\x33..\x04\x01.\x24.+\x8B.\x24' + pat = r'\xC7\x44\x24.+[\x40\x41]\x00.+\x8B.\x24.+\x33..\x04\x01.\x24.+\x8B.\x24' self.p_allaple = re.compile(pat) pat = r'POST /cdn-cgi/\x00\x00 HTTP/1.1\r\nUser-Agent: \x00\r\nHost:' + \ From ef99af9484840552423eb4e5190b4534653436d7 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 23 Mar 2018 14:26:36 +0900 Subject: [PATCH 08/46] Added a scan function for Virus.Win32.Small.a --- Engine/plugins/ve.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/ve.py b/Engine/plugins/ve.py index 6ca19b3..e9d86c4 100644 --- a/Engine/plugins/ve.py +++ b/Engine/plugins/ve.py @@ -85,7 +85,7 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.0' # 버전 info['title'] = 'Virus Engine' # 엔진 설명 info['kmd_name'] = 've' # 엔진 파일 이름 - info['sig_num'] = kavutil.handle_pattern_vdb.get_sig_num('ve') # 진단/치료 가능한 악성코드 수 + info['sig_num'] = kavutil.handle_pattern_vdb.get_sig_num('ve') + 1 # 진단/치료 가능한 악성코드 수 return info @@ -96,12 +96,14 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 # --------------------------------------------------------------------- def listvirus(self): # 진단 가능한 악성코드 리스트 vlist = kavutil.handle_pattern_vdb.get_sig_vlist('ve') - vlist.sort() vlists = [] + vlists.append('Virus.Win32.Small.a') + for vname in vlist: vlists.append(kavutil.normal_vname(vname)) + vlists.sort() return vlists # --------------------------------------------------------------------- @@ -119,6 +121,10 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 flags = [] mm = filehandle + # Virus.Win32.Small.a 검사 + if self.__scan_virus_win32_small_a(filehandle, fileformat): + return True, 'Virus.Win32.Small.a', 0, kernel.INFECTED + # Flag별 Signature를 만든다. # Flag - 0 : 파일의 처음 flags.append([int('0000' + mm[0:2].encode('hex'), 16), gen_checksums(mm[0:0x80])]) @@ -262,3 +268,32 @@ def __scan_cs2(self, mm, ve_id, idx): return kavutil.normal_vname(vname) return None + + # --------------------------------------------------------------------- + # Virus.Win32.Small.a 검사한다. + # 리턴값 : True(발견) or False(미발견) + # --------------------------------------------------------------------- + def __scan_virus_win32_small_a(self, mm, fileformat): + if 'ff_pe' in fileformat: + ff = fileformat['ff_pe']['pe'] + ep_off = ff['EntryPointRaw'] + + if cryptolib.crc32(mm[ep_off:ep_off + 12]) == '4d49a25f': + v_rva = kavutil.get_uint32(mm, ep_off + 12) + 1 # 악성코드 RVA + v_rva -= ff['ImageBase'] + + # v_rva가 마지막 섹션에 속하는 값인지 확인한다. + sec = ff['Sections'][-1] + if sec['RVA'] <= v_rva <= sec['RVA'] + sec['VirtualSize']: + pe_file_align = ff['FileAlignment'] + if pe_file_align: + foff = (sec['PointerRawData'] / pe_file_align) * pe_file_align + else: + foff = sec['PointerRawData'] + + v_off = v_rva - sec['RVA'] + foff + + if cryptolib.crc32(mm[v_off:v_off + 0x30]) == '8d964738': + return True + + return False From 7a924409184ef7963bc5eb1cc98d602baa4e9aa7 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 2 Apr 2018 15:34:04 +0900 Subject: [PATCH 09/46] Added scan function for Trojan.HTML.IFrame.a --- Engine/plugins/html.py | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/Engine/plugins/html.py b/Engine/plugins/html.py index 7c6abe4..bb4cd88 100644 --- a/Engine/plugins/html.py +++ b/Engine/plugins/html.py @@ -4,6 +4,7 @@ import re import os +import kernel import kavutil @@ -29,6 +30,9 @@ def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 pat = r'[\d\D]*?|[\d\D]*?|<\?(php\b)?[\d\D]*?\?>' self.p_script = re.compile(pat, re.IGNORECASE) + # HTML. + self.p_html_malware = re.compile(r'\?ob_start.+?>\s* Date: Mon, 2 Apr 2018 15:35:08 +0900 Subject: [PATCH 10/46] Fixed a _OleNative Stream name --- Engine/plugins/olenative.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/plugins/olenative.py b/Engine/plugins/olenative.py index c5d2e84..07e7fd0 100644 --- a/Engine/plugins/olenative.py +++ b/Engine/plugins/olenative.py @@ -121,7 +121,7 @@ def format(self, filehandle, filename, filename_ex): mm = filehandle - if mm[:2] == '\x02\x00' and filename_ex.find('\x01Ole10Native') != -1: + if mm[:2] == '\x02\x00' and filename_ex.find('_Ole10Native') != -1: fileformat = analysis_ole10native(mm, self.verbose) if fileformat: ret = {'ff_ole10native': fileformat} From 753bb624a11e03918e7bf8408fd449c6f753bd16 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 2 Apr 2018 15:36:21 +0900 Subject: [PATCH 11/46] Added a scan funtion of Virus.Win32.SuperThreat.b --- Engine/plugins/ve.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Engine/plugins/ve.py b/Engine/plugins/ve.py index e9d86c4..6aa16d1 100644 --- a/Engine/plugins/ve.py +++ b/Engine/plugins/ve.py @@ -85,7 +85,7 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.0' # 버전 info['title'] = 'Virus Engine' # 엔진 설명 info['kmd_name'] = 've' # 엔진 파일 이름 - info['sig_num'] = kavutil.handle_pattern_vdb.get_sig_num('ve') + 1 # 진단/치료 가능한 악성코드 수 + info['sig_num'] = kavutil.handle_pattern_vdb.get_sig_num('ve') + 2 # 진단/치료 가능한 악성코드 수 return info @@ -99,6 +99,7 @@ def listvirus(self): # 진단 가능한 악성코드 리스트 vlists = [] vlists.append('Virus.Win32.Small.a') + vlists.append('Virus.Win32.SuperThreat.b') for vname in vlist: vlists.append(kavutil.normal_vname(vname)) @@ -122,8 +123,9 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 mm = filehandle # Virus.Win32.Small.a 검사 - if self.__scan_virus_win32_small_a(filehandle, fileformat): - return True, 'Virus.Win32.Small.a', 0, kernel.INFECTED + ret, vname = self.__scan_virus_win32_small_a(filehandle, fileformat) + if ret: + return True, vname, 0, kernel.INFECTED # Flag별 Signature를 만든다. # Flag - 0 : 파일의 처음 @@ -293,7 +295,10 @@ def __scan_virus_win32_small_a(self, mm, fileformat): v_off = v_rva - sec['RVA'] + foff - if cryptolib.crc32(mm[v_off:v_off + 0x30]) == '8d964738': - return True + x = cryptolib.crc32(mm[v_off:v_off + 0x30]) + if x == '8d964738': + return True, 'Virus.Win32.Small.a' + elif x == '00000000' or x == 'f288b395': # 파일이 깨진 경우이거나 모든 값이 0인 경우이다. + return True, 'Virus.Win32.SuperThreat.b' - return False + return False, None From 50f01e996828856d1966c455de3ff0f218493ed6 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 2 Apr 2018 15:37:39 +0900 Subject: [PATCH 12/46] Added extraction of Ole's attach data --- Engine/plugins/ole.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/ole.py b/Engine/plugins/ole.py index 87a9089..ffe1769 100644 --- a/Engine/plugins/ole.py +++ b/Engine/plugins/ole.py @@ -1551,10 +1551,24 @@ def listvirus(self): # 진단 가능한 악성코드 리스트 def format(self, filehandle, filename, filename_ex): ret = {} + mm = filehandle + # OLE 헤더와 동일 - if filehandle[:8] == '\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1': + if mm[:8] == '\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1': ret['ff_ole'] = 'OLE' + # OLE 뒤에 첨부된 파일이 있는지를 조사한다. + fsize = len(mm) + + bsize = 1 << kavutil.get_uint16(mm, 0x1e) + rsize = (fsize / bsize) * bsize + if fsize > rsize: + fileformat = { # 포맷 정보를 담을 공간 + 'Attached_Pos': rsize, + 'Attached_Size': fsize - rsize + } + ret['ff_attach'] = fileformat + return ret # --------------------------------------------------------------------- From f94878d1d75e5c171ddc179fd0b46169184b43d2 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 4 Apr 2018 08:59:59 +0900 Subject: [PATCH 13/46] Added new options (--copy, --qname) --- Engine/k2.py | 22 +++++++++++++--- Engine/kavcore/k2engine.py | 54 +++++++++++++++++++++++++++++++------- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/Engine/k2.py b/Engine/k2.py index ff71ac3..8bf5406 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -308,6 +308,9 @@ def define_options(): default=False) parser.add_option("-F", "--infp", metavar="PATH", dest="infp_path") + parser.add_option("", "--qname", # 격리시 악성코드 이름 부여 + action="store_true", dest="opt_qname", + default=False) parser.add_option("-R", "--nor", action="store_true", dest="opt_nor", default=False) @@ -329,6 +332,9 @@ def define_options(): parser.add_option("", "--move", action="store_true", dest="opt_move", default=False) + parser.add_option("", "--copy", + action="store_true", dest="opt_copy", + default=False) parser.add_option("", "--update", action="store_true", dest="opt_update", default=False) @@ -404,6 +410,7 @@ def print_options(): -l, --del delete infected files --no-color don't print with color --move move infected files in quarantine folder + --copy copy infected files in quarantine folder --update update --verbose enabling verbose mode (only Developer Edition) --sigtool make files for malware signatures @@ -861,14 +868,23 @@ def update_callback(ret_file_info, is_success): # ------------------------------------------------------------------------- # quarantine 콜백 함수 # ------------------------------------------------------------------------- -def quarantine_callback(filename, is_success): +def quarantine_callback(filename, is_success, q_type): + import kernel + + q_message = { + kavcore.k2const.K2_QUARANTINE_MOVE: ['quarantined', 'quarantine failed'], + kavcore.k2const.K2_QUARANTINE_COPY: ['copied', 'copy failed'], + } + + msg = q_message[q_type] + disp_name = filename if is_success: - message = 'quarantined' + message = msg[0] # 성공 message_color = FOREGROUND_GREEN | FOREGROUND_INTENSITY else: - message = 'quarantine failed' + message = msg[1] # 실패 message_color = FOREGROUND_RED | FOREGROUND_INTENSITY display_line(disp_name, message, message_color) diff --git a/Engine/kavcore/k2engine.py b/Engine/kavcore/k2engine.py index 73a4f57..be3ce43 100644 --- a/Engine/kavcore/k2engine.py +++ b/Engine/kavcore/k2engine.py @@ -52,7 +52,7 @@ def __init__(self, verbose=False): # 플러그 엔진의 가장 최신 시간 값을 가진다. # 초기값으로는 1980-01-01을 지정한다. self.max_datetime = datetime.datetime(1980, 1, 1, 0, 0, 0, 0) - + # 키콤백신이 만든 임시 파일 모두 제거 (운영체제의 임시 폴더를 초기화) k2file.K2Tempfile().removetempdir() @@ -256,6 +256,8 @@ def __init__(self, plugins_path, temp_path, max_datetime, verbose=False): self.identified_virus = set() # 유니크한 악성코드 개수를 구하기 위해 사용 self.set_result() # 악성코드 검사 결과를 초기화한다. + self.quarantine_name = {} # 격리소로 파일 이동시 악성코드 이름 폴더로 이동시 사용 + self.disinfect_callback_fn = None # 악성코드 치료 콜백 함수 self.update_callback_fn = None # 악성코드 압축 최종 치료 콜백 함수 self.quarantine_callback_fn = None # 악성코드 격리 콜백 함수 @@ -304,7 +306,7 @@ def init(self, callback_fn=None): ret = inst.init(self.plugins_path, self.options['opt_verbose']) else: ret = inst.init(self.plugins_path, False) - + if not ret: # 성공 t_kavmain_inst.append(inst) @@ -552,17 +554,22 @@ def scan(self, filename, *callback): move_master_file = False if ret_value['result']: # 악성코드 발견인가? + t_master_file = t_file_info.get_master_filename() + + # 격리소에 생성시 악성코드 이름 부여할 경우 사용할 목적임 + if not self.quarantine_name.get(t_master_file, None): + self.quarantine_name[t_master_file] = ret_value['virus_name'] + action_type = self.call_scan_callback_fn(scan_callback_fn, ret_value) - if self.options['opt_move']: + if self.options['opt_move'] or self.options['opt_copy']: if t_file_info.get_additional_filename() == '': - # print 'move 1 :', t_file_info.get_master_filename() + # print 'move 1 :', t_master_file self.__arcclose() - self.__quarantine_file(t_file_info.get_master_filename()) + self.__quarantine_file(t_master_file) move_master_file = False else: move_master_file = True - t_master_file = t_file_info.get_master_filename() else: # 격리 옵션이 치료 옵션보다 우선 적용 if action_type == k2const.K2_ACTION_QUIT: # 종료인가? return 0 @@ -645,13 +652,26 @@ def call_scan_callback_fn(self, a_scan_callback_fn, ret_value): # 입력값 : filename - 격리 대상 파일 이름 # --------------------------------------------------------------------- def __quarantine_file(self, filename): - if self.options['infp_path']: + if self.options['infp_path'] and (self.options['opt_move'] or self.options['opt_copy']): is_success = False try: + if self.options['opt_qname']: + x = self.quarantine_name.get(filename, None) + if x: + q_path = os.path.join(self.options['infp_path'], x) + self.quarantine_name.pop(filename) + else: + q_path = self.options['infp_path'] + else: + q_path = self.options['infp_path'] + + if not os.path.exists(q_path): + os.makedirs(q_path) # 다중 폴더 생성 + t_filename = os.path.split(filename)[-1] # 격리소에 동일한 파일 이름이 존재하는지 체크 - fname = os.path.join(self.options['infp_path'], t_filename) + fname = os.path.join(q_path, t_filename) t_quarantine_fname = fname count = 1 while True: @@ -661,13 +681,23 @@ def __quarantine_file(self, filename): else: break - shutil.move(filename, t_quarantine_fname) # 격리소로 이동 + if self.options['opt_move']: + shutil.move(filename, t_quarantine_fname) # 격리소로 이동 + elif self.options['opt_copy']: + shutil.copy(filename, t_quarantine_fname) # 격리소로 복사 + q_type = k2const.K2_QUARANTINE_COPY + is_success = True except (shutil.Error, OSError) as e: pass if isinstance(self.quarantine_callback_fn, types.FunctionType): - self.quarantine_callback_fn(filename, is_success) + if self.options['opt_copy']: + q_type = k2const.K2_QUARANTINE_COPY + else: + q_type = k2const.K2_QUARANTINE_MOVE + + self.quarantine_callback_fn(filename, is_success, q_type) # --------------------------------------------------------------------- # __update_process(self, file_struct, immediately_flag=False) @@ -1213,23 +1243,27 @@ def set_options(self, options=None): self.options['opt_nor'] = options.opt_nor self.options['opt_list'] = options.opt_list self.options['opt_move'] = options.opt_move + self.options['opt_copy'] = options.opt_copy self.options['opt_dis'] = options.opt_dis self.options['infp_path'] = options.infp_path self.options['opt_verbose'] = options.opt_verbose self.options['opt_sigtool'] = options.opt_sigtool self.options['opt_debug'] = options.opt_debug self.options['opt_feature'] = options.opt_feature + self.options['opt_qname'] = options.opt_qname else: # 기본값 설정 self.options['opt_arc'] = False self.options['opt_nor'] = False self.options['opt_list'] = False self.options['opt_move'] = False + self.options['opt_copy'] = False self.options['opt_dis'] = False self.options['infp_path'] = None self.options['opt_verbose'] = False self.options['opt_sigtool'] = False self.options['opt_debug'] = False self.options['opt_feature'] = 0xffffffff + self.options['opt_qname'] = False return True # ----------------------------------------------------------------- From 01397d054822b21a267ca8d3caa9d32671b6ae85 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 4 Apr 2018 09:15:18 +0900 Subject: [PATCH 14/46] Added new options (--copy, --qname) --- Engine/kavcore/k2const.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Engine/kavcore/k2const.py b/Engine/kavcore/k2const.py index bf0a895..ba77aca 100644 --- a/Engine/kavcore/k2const.py +++ b/Engine/kavcore/k2const.py @@ -5,7 +5,7 @@ # ------------------------------------------------------------------------- # 디버깅용 여부 설정하기 # ------------------------------------------------------------------------- -K2DEBUG = False +K2DEBUG = True # ------------------------------------------------------------------------- # 악성코드 치료를 지시하는 상수 @@ -15,3 +15,9 @@ K2_ACTION_DISINFECT = 1 K2_ACTION_DELETE = 2 K2_ACTION_QUIT = 3 + +# ------------------------------------------------------------------------- +# 악성코드 격리 상태 관련 상수 +# ------------------------------------------------------------------------- +K2_QUARANTINE_MOVE = 0 +K2_QUARANTINE_COPY = 1 From 86e4700e64495e32803cb377e68155968e689b1b Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 4 Apr 2018 09:16:36 +0900 Subject: [PATCH 15/46] Added new status (IDENTIFIED) --- Engine/k2.py | 8 +++++++- Engine/plugins/kernel.py | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Engine/k2.py b/Engine/k2.py index 8bf5406..0426729 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -693,16 +693,22 @@ def scan_callback(ret_value): if ret_value['result']: if ret_value['scan_state'] == kernel.INFECTED: state = 'infected' + message_color = FOREGROUND_RED | FOREGROUND_INTENSITY elif ret_value['scan_state'] == kernel.SUSPECT: state = 'suspect' + message_color = FOREGROUND_RED | FOREGROUND_INTENSITY elif ret_value['scan_state'] == kernel.WARNING: state = 'warning' + message_color = FOREGROUND_RED | FOREGROUND_INTENSITY + elif ret_value['scan_state'] == kernel.IDENTIFIED: + state = 'identified' + message_color = FOREGROUND_GREEN | FOREGROUND_INTENSITY else: state = 'unknown' + message_color = FOREGROUND_RED | FOREGROUND_INTENSITY vname = ret_value['virus_name'] message = '%s : %s' % (state, vname) - message_color = FOREGROUND_RED | FOREGROUND_INTENSITY else: if ret_value['scan_state'] == kernel.ERROR: message = ret_value['virus_name'] diff --git a/Engine/plugins/kernel.py b/Engine/plugins/kernel.py index 92ee8f3..dc7a1cb 100644 --- a/Engine/plugins/kernel.py +++ b/Engine/plugins/kernel.py @@ -7,6 +7,7 @@ INFECTED = 1 # 감염 SUSPECT = 2 # 의심 WARNING = 3 # 경고 +IDENTIFIED = 4 # 식별 ERROR = 99 # 에러 메시지 처리 From 290ae5e362f9d51bb0ad2076128b8af60fdfbd74 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 5 Apr 2018 08:36:32 +0900 Subject: [PATCH 16/46] Added Yara rule's count --- Engine/plugins/yaraex.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Engine/plugins/yaraex.py b/Engine/plugins/yaraex.py index 7926091..790eddf 100644 --- a/Engine/plugins/yaraex.py +++ b/Engine/plugins/yaraex.py @@ -31,6 +31,10 @@ def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 try: self.rules = yara.compile(os.path.join(plugins_path, 'yaraex.yar')) + + self.rule_count = 0 # Rule 개수 + for t in self.rules: + self.rule_count += 1 except: if self.verbose: print '[*] ERROR : YARA Rule compile' @@ -58,6 +62,7 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info['version'] = '1.0' # 버전 info['title'] = 'Yara Engine' # 엔진 설명 info['kmd_name'] = 'yaraex' # 엔진 파일 이름 + info['sig_num'] = self.rule_count # 진단/치료 가능한 악성코드 수 return info From 5710cb83b9138afd3c399fcb9a375a795877f903 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 5 Apr 2018 08:38:00 +0900 Subject: [PATCH 17/46] Fixed detect yara rule name --- Engine/plugins/yaraex.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/yaraex.py b/Engine/plugins/yaraex.py index 790eddf..9fcb764 100644 --- a/Engine/plugins/yaraex.py +++ b/Engine/plugins/yaraex.py @@ -78,8 +78,10 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 검사 ret = self.rules.match(filename) if len(ret): - vname = ret[0].meta.get('KicomAV', ret[0].rule) # KicomAV meta 정보 확인 - return True, vname, 0, kernel.INFECTED + for t in ret: + vname = t.meta.get('KicomAV', None) # KicomAV meta 정보 확인 + if vname: + return True, vname, 0, kernel.INFECTED # 악성코드를 발견하지 못했음을 리턴한다. return False, '', -1, kernel.NOT_FOUND From f709ed7d8c0ff3806583efc8e72a027f4c9c472c Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 6 Apr 2018 16:07:35 +0900 Subject: [PATCH 18/46] Fixed file name assembly --- Engine/k2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Engine/k2.py b/Engine/k2.py index 0426729..d3bc1cc 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -686,7 +686,8 @@ def scan_callback(ret_value): fs = ret_value['file_struct'] if len(fs.get_additional_filename()) != 0: - disp_name = '%s (%s)' % (fs.get_master_filename(), fs.get_additional_filename()) + f2 = convert_display_filename(fs.get_additional_filename()) + disp_name = '%s (%s)' % (fs.get_master_filename(), f2) else: disp_name = '%s' % (fs.get_master_filename()) From dd0f52b1e4fc32d5a3f8be4e60a22767bf58db54 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Tue, 10 Apr 2018 07:23:01 +0900 Subject: [PATCH 19/46] Fixed check to resource size --- Engine/plugins/pe.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index 4569e77..bd7e4a3 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -262,12 +262,14 @@ def parse(self): if rsrc_rva: # 리소스가 존재한가? try: - rsrc_off, _ = self.rva_to_off(rsrc_rva) # 리소스 위치 변환 + rsrc_off, rsrc_idx = self.rva_to_off(rsrc_rva) # 리소스 위치 변환 if rsrc_off > self.filesize: raise ValueError - if len(mm[rsrc_off:rsrc_off + rsrc_size]) != rsrc_size: # 충분한 리소스가 존재하지 않음 + t_size = self.sections[rsrc_idx]['SizeRawData'] + if not (len(mm[rsrc_off:rsrc_off + rsrc_size]) == rsrc_size or \ + len(mm[rsrc_off:rsrc_off + t_size]) == t_size): # 충분한 리소스가 존재하지 않음 raise ValueError # Type 체크 From b97940c1c5f2a14a8979e12202b15b8c096af591 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Tue, 10 Apr 2018 07:25:10 +0900 Subject: [PATCH 20/46] Fixed missing import API names --- Engine/plugins/pe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index bd7e4a3..0617082 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -134,7 +134,7 @@ def enum(*sequential, **named): 'IAT', 'DELAY_IMPORT', 'COM_DESCRIPTOR', 'RESERVED') -p_str = re.compile(r'[^\x00]+') # NULL 문자 직전까지 복사 +p_str = re.compile(r'[^\x00]*') # NULL 문자 직전까지 복사 class PE: From 79d59d7b6c98fec1436b1a0e2fe7f79fae6f87c6 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 11 Apr 2018 09:33:56 +0900 Subject: [PATCH 21/46] Added a new scan area --- Engine/plugins/ve.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Engine/plugins/ve.py b/Engine/plugins/ve.py index 6aa16d1..edd1fc4 100644 --- a/Engine/plugins/ve.py +++ b/Engine/plugins/ve.py @@ -19,6 +19,7 @@ # 1 : 실행 위치 (DOS-EP) # 2 : 실행 위치 (PE-EP) # 3 : 각 섹션의 처음 (PE, ELF 등) +# 4 : Attach의 처음 # Checksum1 : Flag, Offset, Length, CRC32 # Checksum2 : Flag, Offset, Length, CRC32 # MalwareName @@ -154,6 +155,16 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 flag3_off.append(foff) self.flags_off[3] = flag3_off + # Attach 영역이 존재하는가? + if 'ff_attach' in fileformat: + # Flag - 4 : Attach 영역 + pos = fileformat['ff_attach']['Attached_Pos'] + size = fileformat['ff_attach']['Attached_Size'] + if size > 0x80: + flags.append([int('0004' + mm[pos:pos+2].encode('hex'), 16), + gen_checksums(mm[pos:pos + 0x80])]) + self.flags_off[4] = [pos] + cs_size = [6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80] From 54a80306fe59992e135569d5cf1e7a81076823a3 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 11 Apr 2018 09:35:41 +0900 Subject: [PATCH 22/46] Added the struct.error exception --- Engine/plugins/pe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index 0617082..6daae54 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -492,7 +492,7 @@ def parse(self): kavutil.vprint(None, 'Name', '%s' % repr(pe_format['PDB_Name'])) print - except ValueError: + except (ValueError, struct.error) as e: return None return pe_format From 2ee7e24b48dde8cd019e4be15423d8f47f49cf31 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 16 Apr 2018 09:33:24 +0900 Subject: [PATCH 23/46] Improved speed for cab file extract --- Engine/plugins/cab.py | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/Engine/plugins/cab.py b/Engine/plugins/cab.py index 259e2c1..eeee1a7 100644 --- a/Engine/plugins/cab.py +++ b/Engine/plugins/cab.py @@ -36,6 +36,8 @@ """ # from __future__ import print_function +import tempfile +import shutil import sys import os.path try: @@ -745,10 +747,14 @@ class KavMain: # --------------------------------------------------------------------- def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 self.handle = {} + self.temp_path = {} if not LOAD_WINTYPES: return -1 + pid = os.getpid() + self.root_temp_path = os.path.join(tempfile.gettempdir(), 'ktmp%05x' % pid) + return 0 # 플러그인 엔진 초기화 성공 # --------------------------------------------------------------------- @@ -768,10 +774,10 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 info = dict() # 사전형 변수 선언 info['author'] = 'Kei Choi' # 제작자 - info['version'] = '1.0' # 버전 + info['version'] = '1.1' # 버전 info['title'] = 'Cab Archive Engine' # 엔진 설명 info['kmd_name'] = 'cab' # 엔진 파일 이름 - info['engine_type'] = kernel.ARCHIVE_ENGINE # 엔진 타입 + info['engine_type'] = kernel.ARCHIVE_ENGINE # 엔진 타입 return info @@ -787,6 +793,7 @@ def __get_handle(self, filename): else: zfile = CabinetFile(filename) # cab 파일 열기 self.handle[filename] = zfile + self.temp_path[filename] = tempfile.mktemp(prefix='ktmp', dir=self.root_temp_path) return zfile @@ -824,11 +831,16 @@ def arclist(self, filename, fileformat): if 'ff_cab' in fileformat: zfile = self.__get_handle(filename) - try: - for name in zfile.namelist(): - file_scan_list.append(['arc_cab', name]) - except CabinetError: - pass + cab_extract_path = self.temp_path.get(filename, None) + if cab_extract_path: + zfile.extract(cab_extract_path) + + try: + for name in zfile.namelist(): + file_scan_list.append(['arc_cab', name]) + + except CabinetError: + pass return file_scan_list @@ -841,10 +853,12 @@ def arclist(self, filename, fileformat): # --------------------------------------------------------------------- def unarc(self, arc_engine_id, arc_name, fname_in_arc): if arc_engine_id == 'arc_cab': - zfile = self.__get_handle(arc_name) - data = zfile.read(fname_in_arc) - - return data + cab_extract_path = self.temp_path.get(arc_name, None) + tname = os.path.join(cab_extract_path, fname_in_arc) + if os.path.exists(tname): + data = open(tname, 'rb').read() + os.remove(tname) + return data return None @@ -856,4 +870,9 @@ def arcclose(self): for fname in self.handle.keys(): zfile = self.handle[fname] zfile.close() + + cab_extract_path = self.temp_path.get(fname, None) + shutil.rmtree(cab_extract_path) + self.handle.pop(fname) + self.temp_path.pop(fname) From 0350ece8a340211da3f9e003143bd65f54652b61 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 16 Apr 2018 16:21:56 +0900 Subject: [PATCH 24/46] Fixed crc32 of base offset --- Engine/plugins/ve.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/ve.py b/Engine/plugins/ve.py index edd1fc4..90706a9 100644 --- a/Engine/plugins/ve.py +++ b/Engine/plugins/ve.py @@ -253,7 +253,9 @@ def disinfect(self, filename, malware_id): # 악성코드 치료 # --------------------------------------------------------------------- def __get_data_crc32(self, buf, flag, off, size): crc32s = [] - for base_off in self.flags_off[flag]: + + base_offs = self.flags_off.get(flag, []) + for base_off in base_offs: crc32s.append(int(gen_checksum(buf, base_off + off, size), 16)) return crc32s From 8887cdbaa201ae468f016a48cfc49c40571d0233 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 16 Apr 2018 16:28:11 +0900 Subject: [PATCH 25/46] Added new scan areas --- Engine/plugins/ve.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/ve.py b/Engine/plugins/ve.py index 90706a9..e6cdbdf 100644 --- a/Engine/plugins/ve.py +++ b/Engine/plugins/ve.py @@ -35,7 +35,7 @@ def gen_checksums(buf): patterns = [] # 처음 10개는 앞쪽 6, 7, 8, 9 ... 0xF - for i in range(6, 0x10): + for i in range(1, 0x10): patterns.append(int(gen_checksum(buf, 0, i), 16)) # 나머지 15개는 0x10, 0x18, 0x20 ... 0x80 @@ -165,7 +165,7 @@ def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 gen_checksums(mm[pos:pos + 0x80])]) self.flags_off[4] = [pos] - cs_size = [6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x18, + cs_size = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80] From 6c58f18caf79c7a83d61592ee8751b07e3f42427 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 19 Apr 2018 17:55:59 +0900 Subject: [PATCH 26/46] Fixed remove tree if it exists --- Engine/plugins/cab.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/cab.py b/Engine/plugins/cab.py index eeee1a7..c32cc1e 100644 --- a/Engine/plugins/cab.py +++ b/Engine/plugins/cab.py @@ -872,7 +872,8 @@ def arcclose(self): zfile.close() cab_extract_path = self.temp_path.get(fname, None) - shutil.rmtree(cab_extract_path) + if os.path.exists(cab_extract_path): + shutil.rmtree(cab_extract_path) self.handle.pop(fname) self.temp_path.pop(fname) From 43d19941dc710a9559ce0ece509b5b058da418d6 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Tue, 1 May 2018 08:58:56 +0900 Subject: [PATCH 27/46] Changed NSIS format process in nsis.py to pe.py --- Engine/plugins/nsis.py | 49 ++++++++++++++++++++-------------------- Engine/plugins/pe.py | 51 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/Engine/plugins/nsis.py b/Engine/plugins/nsis.py index 0906eca..1d4af01 100644 --- a/Engine/plugins/nsis.py +++ b/Engine/plugins/nsis.py @@ -94,7 +94,7 @@ class NSIS: TYPE_ZLIB = 2 TYPE_COPY = 3 - def __init__(self, filename, verbose): + def __init__(self, filename, offset=0, verbose=False): self.verbose = verbose self.filename = filename self.fp = None @@ -104,9 +104,22 @@ def __init__(self, filename, verbose): self.body_data = None self.case_type = 0 + self.temp_name = None + self.start_offset = offset + def parse(self): - self.fp = open(self.filename, 'rb') - fsize = os.path.getsize(self.filename) + self.temp_name = tempfile.mktemp(prefix='knsf') + + # NSIS 위치 읽기 + fp = open(self.filename, 'rb') + fp.seek(self.start_offset) + data = fp.read() + fp.close() + + open(self.temp_name, 'wb').write(data) + + self.fp = open(self.temp_name, 'rb') + fsize = os.path.getsize(self.temp_name) if fsize == 0: return False @@ -261,6 +274,10 @@ def __get_comp_type(self, data_size): def __del(self): self.close() + if self.temp_name: + os.unlink(self.temp_name) + + class NSISHeader: def __init__(self, data): self.mm = data @@ -497,35 +514,16 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 # 입력값 : filename - 파일 이름 # 리턴값 : 압축 파일 핸들 # --------------------------------------------------------------------- - def __get_handle(self, filename): + def __get_handle(self, filename, offset=0): if filename in self.handle: # 이전에 열린 핸들이 존재하는가? zfile = self.handle.get(filename, None) else: - zfile = NSIS(filename, self.verbose) # nsis 파일 열기 + zfile = NSIS(filename, offset, self.verbose) # nsis 파일 열기 if zfile.parse(): self.handle[filename] = zfile - else: - None return zfile - # --------------------------------------------------------------------- - # format(self, filehandle, filename, filename_ex) - # 파일 포맷을 분석한다. - # 입력값 : filehandle - 파일 핸들 - # filename - 파일 이름 - # filename_ex - 압축 파일 내부 파일 이름 - # 리턴값 : {파일 포맷 분석 정보} or None - # --------------------------------------------------------------------- - def format(self, filehandle, filename, filename_ex): - ret = {} - - mm = filehandle - if mm[4:20] == '\xEF\xBE\xAD\xDENullsoftInst': - ret = {'ff_nsis': 'NSIS'} - - return ret - # --------------------------------------------------------------------- # arclist(self, filename, fileformat) # 압축 파일 내부의 파일 목록을 얻는다. @@ -538,7 +536,8 @@ def arclist(self, filename, fileformat): # 미리 분석된 파일 포맷중에 ff_nsis 포맷이 있는가? if 'ff_nsis' in fileformat: - zfile = self.__get_handle(filename) + off = fileformat['ff_nsis']['Offset'] + zfile = self.__get_handle(filename, off) for name in zfile.namelist(): file_scan_list.append(['arc_nsis', name]) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index 6daae54..6ed2433 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -527,6 +527,20 @@ class KavMain: # --------------------------------------------------------------------- def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 self.verbose = verbose + + # NSIS 코드 패턴 + ''' + 81 7D DC EF BE AD DE cmp [ebp+var_24], 0DEADBEEFh + 75 69 jnz short loc_402D79 + 81 7D E8 49 6E 73 74 cmp [ebp+var_18], 'tsnI' + 75 60 jnz short loc_402D79 + 81 7D E4 73 6F 66 74 cmp [ebp+var_1C], 'tfos' + 75 57 jnz short loc_402D79 + 81 7D E0 4E 75 6C 6C cmp [ebp+var_20], 'lluN' + ''' + + self.p_nsis = '817DDCEFBEADDE7569817DE8496E7374'.decode('hex') + return 0 # 플러그인 엔진 초기화 성공 # --------------------------------------------------------------------- @@ -606,11 +620,38 @@ def format(self, filehandle, filename, filename_ex): attach_size = file_size - pe_size if pe_size < file_size and pe_size != 0: - fileformat = { # 포맷 정보를 담을 공간 - 'Attached_Pos': pe_size, - 'Attached_Size': attach_size - } - ret['ff_attach'] = fileformat + mm = filehandle + + # NSIS 코드가 .text 영역에 존재하는지 체크한다. + text_sec = pe_format['Sections'][0] + if pe_file_align: + off = (text_sec['PointerRawData'] / pe_file_align) * pe_file_align + else: + off = text_sec['PointerRawData'] + size = text_sec['SizeRawData'] + + if size: + if mm[off:off + size].find(self.p_nsis) != -1: + # PE 파일에 뒤쪽에 데이터가 있다면 NSIS 파일인지 분석하기 + i = 1 + while True: + t = mm[i * 0x200 + 4:i * 0x200 + 20] + if len(t) != 16: + break + + if t == '\xEF\xBE\xAD\xDENullsoftInst': + ret['ff_nsis'] = {'Offset': i * 0x200} + break + + i += 1 + + # Attach 처리하기 (단 NSIS가 존재하면 처리하지 않음) + if not('ff_nsis' in ret): + fileformat = { # 포맷 정보를 담을 공간 + 'Attached_Pos': pe_size, + 'Attached_Size': attach_size + } + ret['ff_attach'] = fileformat return ret From efff8b2454b9544b62111979aea17c739baef343 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 3 May 2018 16:38:07 +0900 Subject: [PATCH 28/46] Added patterns --- Engine/plugins/yaraex.yar | 62 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/yaraex.yar b/Engine/plugins/yaraex.yar index d927a1e..9696fb4 100644 --- a/Engine/plugins/yaraex.yar +++ b/Engine/plugins/yaraex.yar @@ -1,10 +1,20 @@ +rule IsPeFile { +meta: + ref = "https://github.com/godaddy/yara-rules/blob/master/example.yara" +strings: + $mz = "MZ" + + condition: + $mz at 0 and uint32(uint32(0x3C)) == 0x4550 +} + rule Hwp_Malware1 { meta: author = "Kei Choi (hanul93@gmail.com)" date = "2017-08-23" KicomAV = "Trojan.PS.Agent.yra" - strings: +strings: $regex1 = /<[0-9A-Fa-f]{500,}/ $string1 = "1 bitshift add" nocase $string2 = "(KERNEL32.DLL)" nocase @@ -21,7 +31,7 @@ meta: author = "Kei Choi (hanul93@gmail.com)" date = "2017-08-23" KicomAV = "Trojan.PS.Agent.yrb" - strings: +strings: $regex1 = /<[0-9A-Fa-f]{500,}/ $regex2 = "90909090" $string1 = "putinterval def" nocase @@ -44,3 +54,51 @@ meta: condition: all of them } + + +rule APT34_Malware_Exeruner { + meta: + description = "Detects APT 34 malware" + author = "Florian Roth" + reference = "https://www.fireeye.com/blog/threat-research/2017/12/targeted-attack-in-middle-east-by-apt34.html" + date = "2017-12-07" + hash1 = "c75c85acf0e0092d688a605778425ba4cb2a57878925eee3dc0f4dd8d636a27a" + KicomAV = "Trojan-Dropper.MSIL.Agent.gen" + strings: + $x1 = "\\obj\\Debug\\exeruner.pdb" ascii + $x2 = "\"wscript.shell`\")`nShell0.run" wide + $x3 = "powershell.exe -exec bypass -enc \" + ${global:$http_ag} +" wide + $x4 = "/c powershell -exec bypass -window hidden -nologo -command " fullword wide + $x5 = "\\UpdateTasks\\JavaUpdatesTasksHosts\\" wide + $x6 = "schtasks /create /F /ru SYSTEM /sc minute /mo 1 /tn" wide + $x7 = "UpdateChecker.ps1 & ping 127.0.0.1" wide + $s8 = "exeruner.exe" fullword wide + $s9 = "${global:$address1} = $env:ProgramData + \"\\Windows\\Microsoft\\java\";" fullword wide + $s10 = "C:\\ProgramData\\Windows\\Microsoft\\java" fullword wide + $s11 = "function runByVBS" fullword wide + $s12 = "$84e31856-683b-41c0-81dd-a02d8b795026" fullword ascii + $s13 = "${global:$dns_ag} = \"aQBmACAAKAAoAEcAZQB0AC0AVwBtAGk" wide + condition: + IsPeFile and filesize < 100KB and 1 of them +} + +rule APT34_Malware_HTA { + meta: + description = "Detects APT 34 malware" + author = "Florian Roth" + reference = "https://www.fireeye.com/blog/threat-research/2017/12/targeted-attack-in-middle-east-by-apt34.html" + date = "2017-12-07" + hash1 = "f6fa94cc8efea0dbd7d4d4ca4cf85ac6da97ee5cf0c59d16a6aafccd2b9d8b9a" + KicomAV = "Trojan.VBS.Powbow.gen" + strings: + $x1 = "WshShell.run \"cmd.exe /C C:\\ProgramData\\" ascii + $x2 = ".bat&ping 127.0.0.1 -n 6 > nul&wscript /b" ascii + $x3 = "cmd.exe /C certutil -f -decode C:\\ProgramData\\" ascii + $x4 = "a.WriteLine(\"set Shell0 = CreateObject(" ascii + $x5 = "& vbCrLf & \"Shell0.run" ascii + + $s1 = "Blog.tkacprow.pl: HTA Hello World!" fullword ascii + $s2 = "" fullword ascii + condition: + filesize < 60KB and ( 1 of ($x*) or all of ($s*) ) +} \ No newline at end of file From 68f5724cfa0c9b28f613f626b445ffe55eb84c20 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Tue, 8 May 2018 17:36:06 +0900 Subject: [PATCH 29/46] Added rar archive engine --- Engine/plugins/kicom.lst | 1 + Engine/plugins/rar.py | 3163 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 3164 insertions(+) create mode 100644 Engine/plugins/rar.py diff --git a/Engine/plugins/kicom.lst b/Engine/plugins/kicom.lst index fee353f..cf5444e 100644 --- a/Engine/plugins/kicom.lst +++ b/Engine/plugins/kicom.lst @@ -21,6 +21,7 @@ pyz.kmd carch.kmd olenative.kmd attach.kmd +rar.kmd zip.kmd alz.kmd egg.kmd diff --git a/Engine/plugins/rar.py b/Engine/plugins/rar.py new file mode 100644 index 0000000..98e19de --- /dev/null +++ b/Engine/plugins/rar.py @@ -0,0 +1,3163 @@ +# -*- coding:utf-8 -*- +# 출처 : https://github.com/markokr/rarfile + +# rarfile.py +# +# Copyright (c) 2005-2016 Marko Kreen +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +r"""RAR archive reader. + +This is Python module for Rar archive reading. The interface +is made as :mod:`zipfile`-like as possible. + +Basic logic: + - Parse archive structure with Python. + - Extract non-compressed files with Python + - Extract compressed files with unrar. + - Optionally write compressed data to temp file to speed up unrar, + otherwise it needs to scan whole archive on each execution. + +Example:: + + import rarfile + + rf = rarfile.RarFile('myarchive.rar') + for f in rf.infolist(): + print f.filename, f.file_size + if f.filename == 'README': + print(rf.read(f)) + +Archive files can also be accessed via file-like object returned +by :meth:`RarFile.open`:: + + import rarfile + + with rarfile.RarFile('archive.rar') as rf: + with rf.open('README') as f: + for ln in f: + print(ln.strip()) + +There are few module-level parameters to tune behaviour, +here they are with defaults, and reason to change it:: + + import rarfile + + # Set to full path of unrar.exe if it is not in PATH + rarfile.UNRAR_TOOL = "unrar" + + # Set to '\\' to be more compatible with old rarfile + rarfile.PATH_SEP = '/' + +For more details, refer to source. + +""" + +from __future__ import division, print_function + +## +## Imports and compat - support both Python 2.x and 3.x +## + +import sys +import os +import errno +import struct + +from struct import pack, unpack, Struct +from binascii import crc32, hexlify +from tempfile import mkstemp +from subprocess import Popen, PIPE, STDOUT +from io import RawIOBase +from hashlib import sha1, sha256 +from hmac import HMAC +from datetime import datetime, timedelta, tzinfo + +# fixed offset timezone, for UTC +try: + from datetime import timezone +except ImportError: + class timezone(tzinfo): + """Compat timezone.""" + __slots__ = ('_ofs', '_name') + _DST = timedelta(0) + + def __init__(self, offset, name): + super(timezone, self).__init__() + self._ofs, self._name = offset, name + + def utcoffset(self, dt): + return self._ofs + + def tzname(self, dt): + return self._name + + def dst(self, dt): + return self._DST + +# only needed for encryped headers +try: + try: + from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf import pbkdf2 + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) + self.decrypt = ciph.decryptor().update + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) + return ctx.derive(password) + + except ImportError: + from Crypto.Cipher import AES + from Crypto.Protocol import KDF + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) + + _have_crypto = 1 +except ImportError: + _have_crypto = 0 + +try: + try: + from hashlib import blake2s + _have_blake2 = True + except ImportError: + from pyblake2 import blake2s + _have_blake2 = True +except ImportError: + _have_blake2 = False + +# compat with 2.x +if sys.hexversion < 0x3000000: + def rar_crc32(data, prev=0): + """CRC32 with unsigned values. + """ + if (prev > 0) and (prev & 0x80000000): + prev -= (1 << 32) + res = crc32(data, prev) + if res < 0: + res += (1 << 32) + return res + tohex = hexlify + _byte_code = ord +else: # pragma: no cover + def tohex(data): + """Return hex string.""" + return hexlify(data).decode('ascii') + rar_crc32 = crc32 + unicode = str + _byte_code = int # noqa + +# don't break 2.6 completely +if sys.hexversion < 0x2070000: + memoryview = lambda x: x # noqa + +__version__ = '3.0' + +# export only interesting items +__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] + +## +## Module configuration. Can be tuned after importing. +## + +#: default fallback charset +DEFAULT_CHARSET = "windows-1252" + +#: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed +TRY_ENCODINGS = ('utf8', 'utf-16le') + +#: 'unrar', 'rar' or full path to either one +UNRAR_TOOL = "unrar" + +#: Command line args to use for opening file for reading. +OPEN_ARGS = ('p', '-inul') + +#: Command line args to use for extracting file to disk. +EXTRACT_ARGS = ('x', '-y', '-idq') + +#: args for testrar() +TEST_ARGS = ('t', '-idq') + +# +# Allow use of tool that is not compatible with unrar. +# +# By default use 'bsdtar' which is 'tar' program that +# sits on top of libarchive. +# +# Problems with libarchive RAR backend: +# - Does not support solid archives. +# - Does not support password-protected archives. +# + +ALT_TOOL = 'bsdtar' +ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f') +ALT_EXTRACT_ARGS = ('-x', '-f') +ALT_TEST_ARGS = ('-t', '-f') +ALT_CHECK_ARGS = ('--help',) + +#ALT_TOOL = 'unar' +#ALT_OPEN_ARGS = ('-o', '-') +#ALT_EXTRACT_ARGS = () +#ALT_TEST_ARGS = ('-test',) # does not work +#ALT_CHECK_ARGS = ('-v',) + +#: whether to speed up decompression by using tmp archive +USE_EXTRACT_HACK = 1 + +#: limit the filesize for tmp archive usage +HACK_SIZE_LIMIT = 20 * 1024 * 1024 + +#: Separator for path name components. RAR internally uses '\\'. +#: Use '/' to be similar with zipfile. +PATH_SEP = '/' + +## +## rar constants +## + +# block types +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { + +# flags for RAR_BLOCK_MAIN +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 + +# flags for RAR_BLOCK_FILE +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 + +# flags for RAR_BLOCK_ENDARC +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 + +# flags common to all blocks +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 + +# Host OS types +RAR_OS_MSDOS = 0 +RAR_OS_OS2 = 1 +RAR_OS_WIN32 = 2 +RAR_OS_UNIX = 3 +RAR_OS_MACOS = 4 +RAR_OS_BEOS = 5 + +# Compression methods - '0'..'5' +RAR_M0 = 0x30 +RAR_M1 = 0x31 +RAR_M2 = 0x32 +RAR_M3 = 0x33 +RAR_M4 = 0x34 +RAR_M5 = 0x35 + +# +# RAR5 constants +# + +RAR5_BLOCK_MAIN = 1 +RAR5_BLOCK_FILE = 2 +RAR5_BLOCK_SERVICE = 3 +RAR5_BLOCK_ENCRYPTION = 4 +RAR5_BLOCK_ENDARC = 5 + +RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01 +RAR5_BLOCK_FLAG_DATA_AREA = 0x02 +RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04 +RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08 +RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10 +RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20 +RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40 + +RAR5_MAIN_FLAG_ISVOL = 0x01 +RAR5_MAIN_FLAG_HAS_VOLNR = 0x02 +RAR5_MAIN_FLAG_SOLID = 0x04 +RAR5_MAIN_FLAG_RECOVERY = 0x08 +RAR5_MAIN_FLAG_LOCKED = 0x10 + +RAR5_FILE_FLAG_ISDIR = 0x01 +RAR5_FILE_FLAG_HAS_MTIME = 0x02 +RAR5_FILE_FLAG_HAS_CRC32 = 0x04 +RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08 + +RAR5_COMPR_SOLID = 0x40 + +RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01 + +RAR5_ENDARC_FLAG_NEXT_VOL = 0x01 + +RAR5_XFILE_ENCRYPTION = 1 +RAR5_XFILE_HASH = 2 +RAR5_XFILE_TIME = 3 +RAR5_XFILE_VERSION = 4 +RAR5_XFILE_REDIR = 5 +RAR5_XFILE_OWNER = 6 +RAR5_XFILE_SERVICE = 7 + +RAR5_XTIME_UNIXTIME = 0x01 +RAR5_XTIME_HAS_MTIME = 0x02 +RAR5_XTIME_HAS_CTIME = 0x04 +RAR5_XTIME_HAS_ATIME = 0x08 + +RAR5_XENC_CIPHER_AES256 = 0 + +RAR5_XENC_CHECKVAL = 0x01 +RAR5_XENC_TWEAKED = 0x02 + +RAR5_XHASH_BLAKE2SP = 0 + +RAR5_XREDIR_UNIX_SYMLINK = 1 +RAR5_XREDIR_WINDOWS_SYMLINK = 2 +RAR5_XREDIR_WINDOWS_JUNCTION = 3 +RAR5_XREDIR_HARD_LINK = 4 +RAR5_XREDIR_FILE_COPY = 5 + +RAR5_XREDIR_ISDIR = 0x01 + +RAR5_XOWNER_UNAME = 0x01 +RAR5_XOWNER_GNAME = 0x02 +RAR5_XOWNER_UID = 0x04 +RAR5_XOWNER_GID = 0x08 + +RAR5_OS_WINDOWS = 0 +RAR5_OS_UNIX = 1 + +## +## internal constants +## + +RAR_ID = b"Rar!\x1a\x07\x00" +RAR5_ID = b"Rar!\x1a\x07\x01\x00" +ZERO = b'\0' +EMPTY = b'' +UTC = timezone(timedelta(0), 'UTC') +BSIZE = 32 * 1024 + +def _get_rar_version(xfile): + """Check quickly whether file is rar archive. + """ + with XFile(xfile) as fd: + buf = fd.read(len(RAR5_ID)) + if buf.startswith(RAR_ID): + return 3 + elif buf.startswith(RAR5_ID): + return 5 + return 0 + +## +## Public interface +## + +def is_rarfile(xfile): + """Check quickly whether file is rar archive. + """ + return _get_rar_version(xfile) > 0 + +class Error(Exception): + """Base class for rarfile errors.""" + +class BadRarFile(Error): + """Incorrect data in archive.""" + +class NotRarFile(Error): + """The file is not RAR archive.""" + +class BadRarName(Error): + """Cannot guess multipart name components.""" + +class NoRarEntry(Error): + """File not found in RAR""" + +class PasswordRequired(Error): + """File requires password""" + +class NeedFirstVolume(Error): + """Need to start from first volume.""" + +class NoCrypto(Error): + """Cannot parse encrypted headers - no crypto available.""" + +class RarExecError(Error): + """Problem reported by unrar/rar.""" + +class RarWarning(RarExecError): + """Non-fatal error""" + +class RarFatalError(RarExecError): + """Fatal error""" + +class RarCRCError(RarExecError): + """CRC error during unpacking""" + +class RarLockedArchiveError(RarExecError): + """Must not modify locked archive""" + +class RarWriteError(RarExecError): + """Write error""" + +class RarOpenError(RarExecError): + """Open error""" + +class RarUserError(RarExecError): + """User error""" + +class RarMemoryError(RarExecError): + """Memory error""" + +class RarCreateError(RarExecError): + """Create error""" + +class RarNoFilesError(RarExecError): + """No files that match pattern were found""" + +class RarUserBreak(RarExecError): + """User stop""" + +class RarWrongPassword(RarExecError): + """Incorrect password""" + +class RarUnknownError(RarExecError): + """Unknown exit code""" + +class RarSignalExit(RarExecError): + """Unrar exited with signal""" + +class RarCannotExec(RarExecError): + """Executable not found.""" + + +class RarInfo(object): + r"""An entry in rar archive. + + RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. + RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. + + Attributes: + + filename + File name with relative path. + Path separator is '/'. Always unicode string. + + date_time + File modification timestamp. As tuple of (year, month, day, hour, minute, second). + RAR5 allows archives where it is missing, it's None then. + + file_size + Uncompressed size. + + compress_size + Compressed size. + + compress_type + Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants. + + extract_version + Minimal Rar version needed for decompressing. As (major*10 + minor), + so 2.9 is 29. + + RAR3: 10, 20, 29 + + RAR5 does not have such field in archive, it's simply set to 50. + + host_os + Host OS type, one of RAR_OS_* constants. + + RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`, + :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`. + + RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`. + + mode + File attributes. May be either dos-style or unix-style, depending on host_os. + + mtime + File modification time. Same value as :attr:`date_time` + but as :class:`datetime.datetime` object with extended precision. + + ctime + Optional time field: creation time. As :class:`datetime.datetime` object. + + atime + Optional time field: last access time. As :class:`datetime.datetime` object. + + arctime + Optional time field: archival time. As :class:`datetime.datetime` object. + (RAR3-only) + + CRC + CRC-32 of uncompressed file, unsigned int. + + RAR5: may be None. + + blake2sp_hash + Blake2SP hash over decompressed data. (RAR5-only) + + comment + Optional file comment field. Unicode string. (RAR3-only) + + file_redir + If not None, file is link of some sort. Contains tuple of (type, flags, target). + (RAR5-only) + + Type is one of constants: + + :data:`RAR5_XREDIR_UNIX_SYMLINK` + unix symlink to target. + :data:`RAR5_XREDIR_WINDOWS_SYMLINK` + windows symlink to target. + :data:`RAR5_XREDIR_WINDOWS_JUNCTION` + windows junction. + :data:`RAR5_XREDIR_HARD_LINK` + hard link to target. + :data:`RAR5_XREDIR_FILE_COPY` + current file is copy of another archive entry. + + Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. + + volume + Volume nr, starting from 0. + + volume_file + Volume file name, where file starts. + + """ + + # zipfile-compatible fields + filename = None + file_size = None + compress_size = None + date_time = None + comment = None + CRC = None + volume = None + orig_filename = None + + # optional extended time fields, datetime() objects. + mtime = None + ctime = None + atime = None + + extract_version = None + mode = None + host_os = None + compress_type = None + + # rar3-only fields + comment = None + arctime = None + + # rar5-only fields + blake2sp_hash = None + file_redir = None + + # internal fields + flags = 0 + type = None + + def isdir(self): + """Returns True if entry is a directory. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def needs_password(self): + """Returns True if data is stored password-protected. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_PASSWORD) > 0 + return False + + +class RarFile(object): + """Parse RAR structure, provide access to files in archive. + """ + + #: Archive comment. Unicode string or None. + comment = None + + def __init__(self, rarfile, mode="r", charset=None, info_callback=None, + crc_check=True, errors="stop"): + """Open and parse a RAR archive. + + Parameters: + + rarfile + archive file name + mode + only 'r' is supported. + charset + fallback charset to use, if filenames are not already Unicode-enabled. + info_callback + debug callback, gets to see all archive entries. + crc_check + set to False to disable CRC checks + errors + Either "stop" to quietly stop parsing on errors, + or "strict" to raise errors. Default is "stop". + """ + self._rarfile = rarfile + self._charset = charset or DEFAULT_CHARSET + self._info_callback = info_callback + self._crc_check = crc_check + self._password = None + self._file_parser = None + + if errors == "stop": + self._strict = False + elif errors == "strict": + self._strict = True + else: + raise ValueError("Invalid value for 'errors' parameter.") + + if mode != "r": + raise NotImplementedError("RarFile supports only mode=r") + + self._parse() + + def __enter__(self): + """Open context.""" + return self + + def __exit__(self, typ, value, traceback): + """Exit context""" + self.close() + + def setpassword(self, password): + """Sets the password to use when extracting. + """ + self._password = password + if self._file_parser: + if self._file_parser.has_header_encryption(): + self._file_parser = None + if not self._file_parser: + self._parse() + else: + self._file_parser.setpassword(self._password) + + def needs_password(self): + """Returns True if any archive entries require password for extraction. + """ + return self._file_parser.needs_password() + + def namelist(self): + """Return list of filenames in archive. + """ + return [f.filename for f in self.infolist()] + + def infolist(self): + """Return RarInfo objects for all files/directories in archive. + """ + return self._file_parser.infolist() + + def volumelist(self): + """Returns filenames of archive volumes. + + In case of single-volume archive, the list contains + just the name of main archive file. + """ + return self._file_parser.volumelist() + + def getinfo(self, fname): + """Return RarInfo for file. + """ + return self._file_parser.getinfo(fname) + + def open(self, fname, mode='r', psw=None): + """Returns file-like object (:class:`RarExtFile`) from where the data can be read. + + The object implements :class:`io.RawIOBase` interface, so it can + be further wrapped with :class:`io.BufferedReader` + and :class:`io.TextIOWrapper`. + + On older Python where io module is not available, it implements + only .read(), .seek(), .tell() and .close() methods. + + The object is seekable, although the seeking is fast only on + uncompressed files, on compressed files the seeking is implemented + by reading ahead and/or restarting the decompression. + + Parameters: + + fname + file name or RarInfo instance. + mode + must be 'r' + psw + password to use for extracting. + """ + + if mode != 'r': + raise NotImplementedError("RarFile.open() supports only mode=r") + + # entry lookup + inf = self.getinfo(fname) + if inf.isdir(): + raise TypeError("Directory does not have any data: " + inf.filename) + + # check password + if inf.needs_password(): + psw = psw or self._password + if psw is None: + raise PasswordRequired("File %s requires password" % inf.filename) + else: + psw = None + + return self._file_parser.open(inf, psw) + + def read(self, fname, psw=None): + """Return uncompressed data for archive entry. + + For longer files using :meth:`RarFile.open` may be better idea. + + Parameters: + + fname + filename or RarInfo instance + psw + password to use for extracting. + """ + + with self.open(fname, 'r', psw) as f: + return f.read() + + def close(self): + """Release open resources.""" + pass + + def printdir(self): + """Print archive file list to stdout.""" + for f in self.infolist(): + print(f.filename) + + def extract(self, member, path=None, pwd=None): + """Extract single file into current directory. + + Parameters: + + member + filename or :class:`RarInfo` instance + path + optional destination path + pwd + optional password to use + """ + if isinstance(member, RarInfo): + fname = member.filename + else: + fname = member + self._extract([fname], path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all files into current directory. + + Parameters: + + path + optional destination path + members + optional filename or :class:`RarInfo` instance list to extract + pwd + optional password to use + """ + fnlist = [] + if members is not None: + for m in members: + if isinstance(m, RarInfo): + fnlist.append(m.filename) + else: + fnlist.append(m) + self._extract(fnlist, path, pwd) + + def testrar(self): + """Let 'unrar' test the archive. + """ + cmd = [UNRAR_TOOL] + list(TEST_ARGS) + add_password_arg(cmd, self._password) + cmd.append('--') + with XTempFile(self._rarfile) as rarfile: + cmd.append(rarfile) + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + + def strerror(self): + """Return error string if parsing failed or None if no problems. + """ + if not self._file_parser: + return "Not a RAR file" + return self._file_parser.strerror() + + ## + ## private methods + ## + + def _parse(self): + ver = _get_rar_version(self._rarfile) + if ver == 3: + p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p3 # noqa + elif ver == 5: + p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p5 # noqa + else: + raise BadRarFile("Not a RAR file") + + self._file_parser.parse() + self.comment = self._file_parser.comment + + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + + # pasoword + psw = psw or self._password + add_password_arg(cmd, psw) + cmd.append('--') + + # rar file + with XTempFile(self._rarfile) as rarfn: + cmd.append(rarfn) + + # file list + for fn in fnlist: + if os.sep != PATH_SEP: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # destination path + if path is not None: + cmd.append(path + os.sep) + + # call + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + +# +# File format parsing +# + +class CommonParser(object): + """Shared parser parts.""" + _main = None + _hdrenc_main = None + _needs_password = False + _fd = None + _expect_sig = None + _parse_error = None + _password = None + comment = None + + def __init__(self, rarfile, password, crc_check, charset, strict, info_cb): + self._rarfile = rarfile + self._password = password + self._crc_check = crc_check + self._charset = charset + self._strict = strict + self._info_callback = info_cb + self._info_list = [] + self._info_map = {} + self._vol_list = [] + + def has_header_encryption(self): + """Returns True if headers are encrypted + """ + if self._hdrenc_main: + return True + if self._main: + if self._main.flags & RAR_MAIN_PASSWORD: + return True + return False + + def setpassword(self, psw): + """Set cached password.""" + self._password = psw + + def volumelist(self): + """Volume files""" + return self._vol_list + + def needs_password(self): + """Is password required""" + return self._needs_password + + def strerror(self): + """Last error""" + return self._parse_error + + def infolist(self): + """List of RarInfo records. + """ + return self._info_list + + def getinfo(self, member): + """Return RarInfo for filename + """ + if isinstance(member, RarInfo): + fname = member.filename + else: + fname = member + + # accept both ways here + if PATH_SEP == '/': + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: %s" % fname) + + # read rar + def parse(self): + """Process file.""" + self._fd = None + try: + self._parse_real() + finally: + if self._fd: + self._fd.close() + self._fd = None + + def _parse_real(self): + fd = XFile(self._rarfile) + self._fd = fd + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + if isinstance(self._rarfile, (str, unicode)): + raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) + raise NotRarFile("Not a Rar archive") + + volume = 0 # first vol (.rar) is 0 + more_vols = False + endarc = False + volfile = self._rarfile + self._vol_list = [self._rarfile] + while 1: + if endarc: + h = None # don't read past ENDARC + else: + h = self._parse_header(fd) + if not h: + if more_vols: + volume += 1 + fd.close() + try: + volfile = self._next_volname(volfile) + fd = XFile(volfile) + except IOError: + self._set_error("Cannot open next volume: %s", volfile) + break + self._fd = fd + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + self._set_error("Invalid volume sig: %s", volfile) + break + more_vols = False + endarc = False + self._vol_list.append(volfile) + continue + break + h.volume = volume + h.volume_file = volfile + + if h.type == RAR_BLOCK_MAIN and not self._main: + self._main = h + if h.flags & RAR_MAIN_NEWNUMBERING: + # RAR 2.x does not set FIRSTVOLUME, + # so check it only if NEWNUMBERING is used + if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: + raise NeedFirstVolume("Need to start from first volume") + if h.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + if not self._password: + break + elif h.type == RAR_BLOCK_ENDARC: + more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 + endarc = True + elif h.type == RAR_BLOCK_FILE: + # RAR 2.x does not write RAR_BLOCK_ENDARC + if h.flags & RAR_FILE_SPLIT_AFTER: + more_vols = True + # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME + if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Need to start from first volume") + + if h.needs_password(): + self._needs_password = True + + # store it + self.process_entry(fd, h) + + if self._info_callback: + self._info_callback(h) + + # go to next header + if h.add_size > 0: + fd.seek(h.data_offset + h.add_size, 0) + + def process_entry(self, fd, item): + """Examine item, add into lookup cache.""" + raise NotImplementedError() + + def _decrypt_header(self, fd): + raise NotImplementedError('_decrypt_header') + + def _parse_block_header(self, fd): + raise NotImplementedError('_parse_block_header') + + def _open_hack(self, inf, psw): + raise NotImplementedError('_open_hack') + + # read single header + def _parse_header(self, fd): + try: + # handle encrypted headers + if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: + if not self._password: + return + fd = self._decrypt_header(fd) + + # now read actual header + return self._parse_block_header(fd) + except struct.error: + self._set_error('Broken header in RAR file') + return None + + # given current vol name, construct next one + def _next_volname(self, volfile): + if is_filelike(volfile): + raise IOError("Working on single FD") + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return _next_newvol(volfile) + return _next_oldvol(volfile) + + def _set_error(self, msg, *args): + if args: + msg = msg % args + self._parse_error = msg + if self._strict: + raise BadRarFile(msg) + + def open(self, inf, psw): + """Return stream object for file data.""" + + if inf.file_redir: + # cannot leave to unrar as it expects copied file to exist + if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(inf.file_redir[2]) + if not inf: + raise BadRarFile('cannot find copied file') + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # is temp write usable? + use_hack = 1 + if not self._main: + use_hack = 0 + elif self._main._must_disable_hack(): + use_hack = 0 + elif inf._must_disable_hack(): + use_hack = 0 + elif is_filelike(self._rarfile): + pass + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + elif not USE_EXTRACT_HACK: + use_hack = 0 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + elif is_filelike(self._rarfile): + return self._open_unrar_membuf(self._rarfile, inf, psw) + else: + return self._open_unrar(self._rarfile, inf, psw) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + def _open_hack_core(self, inf, psw, prefix, suffix): + + size = inf.compress_size + inf.header_size + rf = XFile(inf.volume_file, 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + tmpf.write(prefix) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile('read failed: ' + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.write(suffix) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + # write in-memory archive to temp file - needed for solid archives + def _open_unrar_membuf(self, memfile, inf, psw): + tmpname = membuf_tempfile(memfile) + return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + add_password_arg(cmd, psw) + cmd.append("--") + cmd.append(rarfile) + + # not giving filename avoids encoding related problems + if not tmpfile or force_file: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # read from unrar pipe + return PipeReader(self, inf, cmd, tmpfile) + +# +# RAR3 format +# + +class Rar3Info(RarInfo): + """RAR3 specific fields.""" + extract_version = 15 + salt = None + add_size = 0 + header_crc = None + header_size = None + header_offset = None + data_offset = None + _md_class = None + _md_expect = None + + # make sure some rar5 fields are always present + file_redir = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.type == RAR_BLOCK_FILE: + if self.flags & RAR_FILE_PASSWORD: + return True + elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + return True + elif self.type == RAR_BLOCK_MAIN: + if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + return True + return False + + +class RAR3Parser(CommonParser): + """Parse RAR3 file format. + """ + _expect_sig = RAR_ID + _last_aes_key = (None, None, None) # (salt, key, iv) + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + h = Rar3Info() + h.header_offset = fd.tell() + + # read and parse base header + buf = fd.read(S_BLK_HDR.size) + if not buf: + return None + t = S_BLK_HDR.unpack_from(buf) + h.header_crc, h.type, h.flags, h.header_size = t + + # read full header + if h.header_size > S_BLK_HDR.size: + hdata = buf + fd.read(h.header_size - S_BLK_HDR.size) + else: + hdata = buf + h.data_offset = fd.tell() + + # unexpected EOF? + if len(hdata) != h.header_size: + self._set_error('Unexpected EOF when reading header') + return None + + pos = S_BLK_HDR.size + + # block has data assiciated with it? + if h.flags & RAR_LONG_BLOCK: + h.add_size, pos = load_le32(hdata, pos) + else: + h.add_size = 0 + + # parse interesting ones, decide header boundaries for crc + if h.type == RAR_BLOCK_MARK: + return h + elif h.type == RAR_BLOCK_MAIN: + pos += 6 + if h.flags & RAR_MAIN_ENCRYPTVER: + pos += 1 + crc_pos = pos + if h.flags & RAR_MAIN_COMMENT: + self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_FILE: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = pos + if h.flags & RAR_FILE_COMMENT: + pos = self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_SUB: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = h.header_size + elif h.type == RAR_BLOCK_OLD_AUTH: + pos += 8 + crc_pos = pos + elif h.type == RAR_BLOCK_OLD_EXTRA: + pos += 7 + crc_pos = pos + else: + crc_pos = h.header_size + + # check crc + if h.type == RAR_BLOCK_OLD_SUB: + crcdat = hdata[2:] + fd.read(h.add_size) + else: + crcdat = hdata[2:crc_pos] + + calc_crc = rar_crc32(crcdat) & 0xFFFF + + # return good header + if h.header_crc == calc_crc: + return h + + # header parsing failed. + self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)', + h.type, h.header_crc, calc_crc, len(crcdat)) + + # instead panicing, send eof + return None + + # read file-specific header + def _parse_file_header(self, h, hdata, pos): + fld = S_FILE_HDR.unpack_from(hdata, pos) + pos += S_FILE_HDR.size + + h.compress_size = fld[0] + h.file_size = fld[1] + h.host_os = fld[2] + h.CRC = fld[3] + h.date_time = parse_dos_time(fld[4]) + h.mtime = to_datetime(h.date_time) + h.extract_version = fld[5] + h.compress_type = fld[6] + name_size = fld[7] + h.mode = fld[8] + + h._md_class = CRC32Context + h._md_expect = h.CRC + + if h.flags & RAR_FILE_LARGE: + h1, pos = load_le32(hdata, pos) + h2, pos = load_le32(hdata, pos) + h.compress_size |= h1 << 32 + h.file_size |= h2 << 32 + h.add_size = h.compress_size + + name, pos = load_bytes(hdata, name_size, pos) + if h.flags & RAR_FILE_UNICODE: + nul = name.find(ZERO) + h.orig_filename = name[:nul] + u = UnicodeFilename(h.orig_filename, name[nul + 1:]) + h.filename = u.decode() + + # if parsing failed fall back to simple name + if u.failed: + h.filename = self._decode(h.orig_filename) + else: + h.orig_filename = name + h.filename = self._decode(name) + + # change separator, if requested + if PATH_SEP != '\\': + h.filename = h.filename.replace('\\', PATH_SEP) + + if h.flags & RAR_FILE_SALT: + h.salt, pos = load_bytes(hdata, 8, pos) + else: + h.salt = None + + # optional extended time stamps + if h.flags & RAR_FILE_EXTTIME: + pos = _parse_ext_time(h, hdata, pos) + else: + h.mtime = h.atime = h.ctime = h.arctime = None + + return pos + + # find old-style comment subblock + def _parse_subblocks(self, h, hdata, pos): + while pos < len(hdata): + # ordinary block header + t = S_BLK_HDR.unpack_from(hdata, pos) + ___scrc, stype, sflags, slen = t + pos_next = pos + slen + pos += S_BLK_HDR.size + + # corrupt header + if pos_next < pos: + break + + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar3_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif rar_crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) + + pos = pos_next + return pos + + def _read_comment_v3(self, inf, psw=None): + + # read data + with XFile(inf.volume_file) as rf: + rf.seek(inf.data_offset) + data = rf.read(inf.compress_size) + + # decompress + cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + + # check crc + if self._crc_check: + crc = rar_crc32(cmt) + if crc != inf.CRC: + return None + + return self._decode_comment(cmt) + + def _decode(self, val): + for c in TRY_ENCODINGS: + try: + return val.decode(c) + except UnicodeError: + pass + return val.decode(self._charset, 'replace') + + def _decode_comment(self, val): + return self._decode(val) + + def process_entry(self, fd, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.compress_size += item.compress_size + + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt + + if item.type == RAR_BLOCK_MAIN: + if item.flags & RAR_MAIN_COMMENT: + self.comment = item.comment + if item.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw): + # create main header: crc, type, flags, size, res1, res2 + prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) + return self._open_hack_core(inf, psw, prefix, EMPTY) + +# +# RAR5 format +# + +class Rar5Info(RarInfo): + """Shared fields for RAR5 records. + """ + extract_version = 50 + header_crc = None + header_size = None + header_offset = None + data_offset = None + + # type=all + block_type = None + block_flags = None + add_size = 0 + block_extra_size = 0 + + # type=MAIN + volume_number = None + _md_class = None + _md_expect = None + + def _must_disable_hack(self): + return False + + +class Rar5BaseFile(Rar5Info): + """Shared sturct for file & service record. + """ + type = -1 + file_flags = None + file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) + file_compress_flags = None + file_redir = None + file_owner = None + file_version = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.flags & RAR_FILE_PASSWORD: + return True + if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return True + if self.file_compress_flags & RAR5_COMPR_SOLID: + return True + if self.file_redir: + return True + return False + + +class Rar5FileInfo(Rar5BaseFile): + """RAR5 file record. + """ + type = RAR_BLOCK_FILE + + +class Rar5ServiceInfo(Rar5BaseFile): + """RAR5 service record. + """ + type = RAR_BLOCK_SUB + + +class Rar5MainInfo(Rar5Info): + """RAR5 archive main record. + """ + type = RAR_BLOCK_MAIN + main_flags = None + main_volume_number = None + + def _must_disable_hack(self): + if self.main_flags & RAR5_MAIN_FLAG_SOLID: + return True + return False + + +class Rar5EncryptionInfo(Rar5Info): + """RAR5 archive header encryption record. + """ + type = RAR5_BLOCK_ENCRYPTION + encryption_algo = None + encryption_flags = None + encryption_kdf_count = None + encryption_salt = None + encryption_check_value = None + + def needs_password(self): + return True + + +class Rar5EndArcInfo(Rar5Info): + """RAR5 end of archive record. + """ + type = RAR_BLOCK_ENDARC + endarc_flags = None + + +class RAR5Parser(CommonParser): + """Parse RAR5 format. + """ + _expect_sig = RAR5_ID + _hdrenc_main = None + + # AES encrypted headers + _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) + + def _gen_key(self, kdf_count, salt): + if self._last_aes256_key[:2] == (kdf_count, salt): + return self._last_aes256_key[2] + if kdf_count > 24: + raise BadRarFile('Too large kdf_count') + psw = self._password + if isinstance(psw, unicode): + psw = psw.encode('utf8') + key = pbkdf2_sha256(psw, salt, 1 << kdf_count) + self._last_aes256_key = (kdf_count, salt, key) + return key + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + h = self._hdrenc_main + key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) + iv = fd.read(16) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + header_offset = fd.tell() + + preload = 4 + 3 + start_bytes = fd.read(preload) + header_crc, pos = load_le32(start_bytes, 0) + hdrlen, pos = load_vint(start_bytes, pos) + if hdrlen > 2 * 1024 * 1024: + return None + header_size = pos + hdrlen + + # read full header, check for EOF + hdata = start_bytes + fd.read(header_size - len(start_bytes)) + if len(hdata) != header_size: + self._set_error('Unexpected EOF when reading header') + return None + data_offset = fd.tell() + + calc_crc = rar_crc32(memoryview(hdata)[4:]) + if header_crc != calc_crc: + # header parsing failed. + self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)', + header_crc, calc_crc, len(hdata)) + return None + + block_type, pos = load_vint(hdata, pos) + + if block_type == RAR5_BLOCK_MAIN: + h, pos = self._parse_block_common(Rar5MainInfo(), hdata) + h = self._parse_main_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_FILE: + h, pos = self._parse_block_common(Rar5FileInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_SERVICE: + h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENCRYPTION: + h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata) + h = self._parse_encryption_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENDARC: + h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata) + h = self._parse_endarc_block(h, hdata, pos) + else: + h = None + if h: + h.header_offset = header_offset + h.data_offset = data_offset + return h + + def _parse_block_common(self, h, hdata): + h.header_crc, pos = load_le32(hdata, 0) + hdrlen, pos = load_vint(hdata, pos) + h.header_size = hdrlen + pos + h.block_type, pos = load_vint(hdata, pos) + h.block_flags, pos = load_vint(hdata, pos) + + if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA: + h.block_extra_size, pos = load_vint(hdata, pos) + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.add_size, pos = load_vint(hdata, pos) + + h.compress_size = h.add_size + + if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN: + h.flags |= RAR_SKIP_IF_UNKNOWN + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.flags |= RAR_LONG_BLOCK + return h, pos + + def _parse_main_block(self, h, hdata, pos): + h.main_flags, pos = load_vint(hdata, pos) + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR: + h.main_volume_number = load_vint(hdata, pos) + + h.flags |= RAR_MAIN_NEWNUMBERING + if h.main_flags & RAR5_MAIN_FLAG_SOLID: + h.flags |= RAR_MAIN_SOLID + if h.main_flags & RAR5_MAIN_FLAG_ISVOL: + h.flags |= RAR_MAIN_VOLUME + if h.main_flags & RAR5_MAIN_FLAG_RECOVERY: + h.flags |= RAR_MAIN_RECOVERY + if self._hdrenc_main: + h.flags |= RAR_MAIN_PASSWORD + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0: + h.flags |= RAR_MAIN_FIRSTVOLUME + + return h + + def _parse_file_block(self, h, hdata, pos): + h.file_flags, pos = load_vint(hdata, pos) + h.file_size, pos = load_vint(hdata, pos) + h.mode, pos = load_vint(hdata, pos) + + if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME: + h.mtime, pos = load_unixtime(hdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32: + h.CRC, pos = load_le32(hdata, pos) + h._md_class = CRC32Context + h._md_expect = h.CRC + + h.file_compress_flags, pos = load_vint(hdata, pos) + h.file_host_os, pos = load_vint(hdata, pos) + h.orig_filename, pos = load_vstr(hdata, pos) + h.filename = h.orig_filename.decode('utf8', 'replace') + + # use compatible values + if h.file_host_os == RAR5_OS_WINDOWS: + h.host_os = RAR_OS_WIN32 + else: + h.host_os = RAR_OS_UNIX + h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7) + + if h.block_extra_size: + # allow 1 byte of garbage + while pos < len(hdata) - 1: + xsize, pos = load_vint(hdata, pos) + xdata, pos = load_bytes(hdata, xsize, pos) + self._process_file_extra(h, xdata) + + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE: + h.flags |= RAR_FILE_SPLIT_BEFORE + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER: + h.flags |= RAR_FILE_SPLIT_AFTER + if h.file_flags & RAR5_FILE_FLAG_ISDIR: + h.flags |= RAR_FILE_DIRECTORY + if h.file_compress_flags & RAR5_COMPR_SOLID: + h.flags |= RAR_FILE_SOLID + + return h + + def _parse_endarc_block(self, h, hdata, pos): + h.endarc_flags, pos = load_vint(hdata, pos) + if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL: + h.flags |= RAR_ENDARC_NEXT_VOLUME + return h + + def _parse_encryption_block(self, h, hdata, pos): + h.encryption_algo, pos = load_vint(hdata, pos) + h.encryption_flags, pos = load_vint(hdata, pos) + h.encryption_kdf_count, pos = load_byte(hdata, pos) + h.encryption_salt, pos = load_bytes(hdata, 16, pos) + if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: + h.encryption_check_value = load_bytes(hdata, 12, pos) + if h.encryption_algo != RAR5_XENC_CIPHER_AES256: + raise BadRarFile('Unsupported header encryption cipher') + self._hdrenc_main = h + return h + + # file extra record + def _process_file_extra(self, h, xdata): + xtype, pos = load_vint(xdata, 0) + if xtype == RAR5_XFILE_TIME: + self._parse_file_xtime(h, xdata, pos) + elif xtype == RAR5_XFILE_ENCRYPTION: + self._parse_file_encryption(h, xdata, pos) + elif xtype == RAR5_XFILE_HASH: + self._parse_file_hash(h, xdata, pos) + elif xtype == RAR5_XFILE_VERSION: + self._parse_file_version(h, xdata, pos) + elif xtype == RAR5_XFILE_REDIR: + self._parse_file_redir(h, xdata, pos) + elif xtype == RAR5_XFILE_OWNER: + self._parse_file_owner(h, xdata, pos) + elif xtype == RAR5_XFILE_SERVICE: + pass + else: + pass + + # extra block for file time record + def _parse_file_xtime(self, h, xdata, pos): + tflags, pos = load_vint(xdata, pos) + ldr = load_windowstime + if tflags & RAR5_XTIME_UNIXTIME: + ldr = load_unixtime + if tflags & RAR5_XTIME_HAS_MTIME: + h.mtime, pos = ldr(xdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if tflags & RAR5_XTIME_HAS_CTIME: + h.ctime, pos = ldr(xdata, pos) + if tflags & RAR5_XTIME_HAS_ATIME: + h.atime, pos = ldr(xdata, pos) + + # just remember encryption info + def _parse_file_encryption(self, h, xdata, pos): + algo, pos = load_vint(xdata, pos) + flags, pos = load_vint(xdata, pos) + kdf_count, pos = load_byte(xdata, pos) + salt, pos = load_bytes(xdata, 16, pos) + iv, pos = load_bytes(xdata, 16, pos) + checkval = None + if flags & RAR5_XENC_CHECKVAL: + checkval, pos = load_bytes(xdata, 12, pos) + if flags & RAR5_XENC_TWEAKED: + h._md_expect = None + h._md_class = NoHashContext + + h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval) + h.flags |= RAR_FILE_PASSWORD + + def _parse_file_hash(self, h, xdata, pos): + hash_type, pos = load_vint(xdata, pos) + if hash_type == RAR5_XHASH_BLAKE2SP: + h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) + if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: + h._md_class = Blake2SP + h._md_expect = h.blake2sp_hash + + def _parse_file_version(self, h, xdata, pos): + flags, pos = load_vint(xdata, pos) + version, pos = load_vint(xdata, pos) + h.file_version = (flags, version) + + def _parse_file_redir(self, h, xdata, pos): + redir_type, pos = load_vint(xdata, pos) + redir_flags, pos = load_vint(xdata, pos) + redir_name, pos = load_vstr(xdata, pos) + redir_name = redir_name.decode('utf8', 'replace') + h.file_redir = (redir_type, redir_flags, redir_name) + + def _parse_file_owner(self, h, xdata, pos): + user_name = group_name = user_id = group_id = None + + flags, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_UNAME: + user_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_GNAME: + group_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_UID: + user_id, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_GID: + group_id, pos = load_vint(xdata, pos) + + h.file_owner = (user_name, group_name, user_id, group_id) + + def process_entry(self, fd, item): + if item.block_type == RAR5_BLOCK_FILE: + # use only first part + if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.blake2sp_hash = item.blake2sp_hash + old.compress_size += item.compress_size + elif item.block_type == RAR5_BLOCK_SERVICE: + if item.filename == 'CMT': + self._load_comment(fd, item) + + def _load_comment(self, fd, item): + if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return None + if item.compress_type != RAR_M0: + return None + + if item.flags & RAR_FILE_PASSWORD: + algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption + if algo != RAR5_XENC_CIPHER_AES256: + return None + key = self._gen_key(kdf_count, salt) + f = HeaderDecrypt(fd, key, iv) + cmt = f.read(item.file_size) + else: + # archive comment + with self._open_clear(item) as cmtstream: + cmt = cmtstream.read() + + # rar bug? - appends zero to comment + cmt = cmt.split(ZERO, 1)[0] + self.comment = cmt.decode('utf8') + + def _open_hack(self, inf, psw): + # len, type, blk_flags, flags + main_hdr = b'\x03\x01\x00\x00' + endarc_hdr = b'\x03\x05\x00\x00' + main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr + endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr + return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) + +## +## Utility classes +## + +class UnicodeFilename(object): + """Handle RAR3 unicode filename decompression. + """ + def __init__(self, name, encdata): + self.std_name = bytearray(name) + self.encdata = bytearray(encdata) + self.pos = self.encpos = 0 + self.buf = bytearray() + self.failed = 0 + + def enc_byte(self): + """Copy encoded byte.""" + try: + c = self.encdata[self.encpos] + self.encpos += 1 + return c + except IndexError: + self.failed = 1 + return 0 + + def std_byte(self): + """Copy byte from 8-bit representation.""" + try: + return self.std_name[self.pos] + except IndexError: + self.failed = 1 + return ord('?') + + def put(self, lo, hi): + """Copy 16-bit value to result.""" + self.buf.append(lo) + self.buf.append(hi) + self.pos += 1 + + def decode(self): + """Decompress compressed UTF16 value.""" + hi = self.enc_byte() + flagbits = 0 + while self.encpos < len(self.encdata): + if flagbits == 0: + flags = self.enc_byte() + flagbits = 8 + flagbits -= 2 + t = (flags >> flagbits) & 3 + if t == 0: + self.put(self.enc_byte(), 0) + elif t == 1: + self.put(self.enc_byte(), hi) + elif t == 2: + self.put(self.enc_byte(), self.enc_byte()) + else: + n = self.enc_byte() + if n & 0x80: + c = self.enc_byte() + for _ in range((n & 0x7f) + 2): + lo = (self.std_byte() + c) & 0xFF + self.put(lo, hi) + else: + for _ in range(n + 2): + self.put(self.std_byte(), 0) + return self.buf.decode("utf-16le", "replace") + + +class RarExtFile(RawIOBase): + """Base class for file-like object that :meth:`RarFile.open` returns. + + Provides public methods and common crc checking. + + Behaviour: + - no short reads - .read() and .readinfo() read as much as requested. + - no internal buffer, use io.BufferedReader for that. + """ + + #: Filename of the archive entry + name = None + + def __init__(self, parser, inf): + """Open archive entry. + """ + super(RarExtFile, self).__init__() + + # standard io.* properties + self.name = inf.filename + self.mode = 'rb' + + self._parser = parser + self._inf = inf + self._fd = None + self._remain = 0 + self._returncode = 0 + + self._md_context = None + + self._open() + + def _open(self): + if self._fd: + self._fd.close() + md_class = self._inf._md_class or NoHashContext + self._md_context = md_class() + self._fd = None + self._remain = self._inf.file_size + + def read(self, cnt=None): + """Read all or specified amount of data from archive entry.""" + + # sanitize cnt + if cnt is None or cnt < 0: + cnt = self._remain + elif cnt > self._remain: + cnt = self._remain + if cnt == 0: + return EMPTY + + # actual read + data = self._read(cnt) + if data: + self._md_context.update(data) + self._remain -= len(data) + if len(data) != cnt: + raise BadRarFile("Failed the read enough data") + + # done? + if not data or self._remain == 0: + # self.close() + self._check() + return data + + def _check(self): + """Check final CRC.""" + final = self._md_context.digest() + exp = self._inf._md_expect + if exp is None: + return + if final is None: + return + if self._returncode: + check_returncode(self, '') + if self._remain != 0: + raise BadRarFile("Failed the read enough data") + if final != exp: + raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % ( + self._inf.filename, exp, final)) + + def _read(self, cnt): + """Actual read that gets sanitized cnt.""" + + def close(self): + """Close open resources.""" + + super(RarExtFile, self).close() + + if self._fd: + self._fd.close() + self._fd = None + + def __del__(self): + """Hook delete to make sure tempfile is removed.""" + self.close() + + def readinto(self, buf): + """Zero-copy read directly into buffer. + + Returns bytes read. + """ + raise NotImplementedError('readinto') + + def tell(self): + """Return current reading position in uncompressed data.""" + return self._inf.file_size - self._remain + + def seek(self, ofs, whence=0): + """Seek in data. + + On uncompressed files, the seeking works by actual + seeks so it's fast. On compresses files its slow + - forward seeking happends by reading ahead, + backwards by re-opening and decompressing from the start. + """ + + # disable crc check when seeking + self._md_context = NoHashContext() + + fsize = self._inf.file_size + cur_ofs = self.tell() + + if whence == 0: # seek from beginning of file + new_ofs = ofs + elif whence == 1: # seek from current position + new_ofs = cur_ofs + ofs + elif whence == 2: # seek from end of file + new_ofs = fsize + ofs + else: + raise ValueError('Invalid value for whence') + + # sanity check + if new_ofs < 0: + new_ofs = 0 + elif new_ofs > fsize: + new_ofs = fsize + + # do the actual seek + if new_ofs >= cur_ofs: + self._skip(new_ofs - cur_ofs) + else: + # reopen and seek + self._open() + self._skip(new_ofs) + return self.tell() + + def _skip(self, cnt): + """Read and discard data""" + while cnt > 0: + if cnt > 8192: + buf = self.read(8192) + else: + buf = self.read(cnt) + if not buf: + break + cnt -= len(buf) + + def readable(self): + """Returns True""" + return True + + def writable(self): + """Returns False. + + Writing is not supported. + """ + return False + + def seekable(self): + """Returns True. + + Seeking is supported, although it's slow on compressed files. + """ + return True + + def readall(self): + """Read all remaining data""" + # avoid RawIOBase default impl + return self.read() + + +class PipeReader(RarExtFile): + """Read data from pipe, handle tempfile cleanup.""" + + def __init__(self, rf, inf, cmd, tempfile=None): + self._cmd = cmd + self._proc = None + self._tempfile = tempfile + super(PipeReader, self).__init__(rf, inf) + + def _close_proc(self): + if not self._proc: + return + if self._proc.stdout: + self._proc.stdout.close() + if self._proc.stdin: + self._proc.stdin.close() + if self._proc.stderr: + self._proc.stderr.close() + self._proc.wait() + self._returncode = self._proc.returncode + self._proc = None + + def _open(self): + super(PipeReader, self)._open() + + # stop old process + self._close_proc() + + # launch new process + self._returncode = 0 + self._proc = custom_popen(self._cmd) + self._fd = self._proc.stdout + + # avoid situation where unrar waits on stdin + if self._proc.stdin: + self._proc.stdin.close() + + def _read(self, cnt): + """Read from pipe.""" + + # normal read is usually enough + data = self._fd.read(cnt) + if len(data) == cnt or not data: + return data + + # short read, try looping + buf = [data] + cnt -= len(data) + while cnt > 0: + data = self._fd.read(cnt) + if not data: + break + cnt -= len(data) + buf.append(data) + return EMPTY.join(buf) + + def close(self): + """Close open resources.""" + + self._close_proc() + super(PipeReader, self).close() + + if self._tempfile: + try: + os.unlink(self._tempfile) + except OSError: + pass + self._tempfile = None + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + cnt = len(buf) + if cnt > self._remain: + cnt = self._remain + vbuf = memoryview(buf) + res = got = 0 + while got < cnt: + res = self._fd.readinto(vbuf[got : cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._remain -= res + got += res + return got + + +class DirectReader(RarExtFile): + """Read uncompressed data directly from archive. + """ + _cur = None + _cur_avail = None + _volfile = None + + def _open(self): + super(DirectReader, self)._open() + + self._volfile = self._inf.volume_file + self._fd = XFile(self._volfile, 0) + self._fd.seek(self._inf.header_offset, 0) + self._cur = self._parser._parse_header(self._fd) + self._cur_avail = self._cur.add_size + + def _skip(self, cnt): + """RAR Seek, skipping through rar files to get to correct position + """ + + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + cnt -= self._cur_avail + self._remain -= self._cur_avail + self._cur_avail = 0 + else: + self._fd.seek(cnt, 1) + self._cur_avail -= cnt + self._remain -= cnt + cnt = 0 + + def _read(self, cnt): + """Read from potentially multi-volume archive.""" + + buf = [] + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + data = self._fd.read(self._cur_avail) + else: + data = self._fd.read(cnt) + if not data: + break + + # got some data + cnt -= len(data) + self._cur_avail -= len(data) + buf.append(data) + + if len(buf) == 1: + return buf[0] + return EMPTY.join(buf) + + def _open_next(self): + """Proceed to next volume.""" + + # is the file split over archives? + if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + return False + + if self._fd: + self._fd.close() + self._fd = None + + # open next part + self._volfile = self._parser._next_volname(self._volfile) + fd = open(self._volfile, "rb", 0) + self._fd = fd + sig = fd.read(len(self._parser._expect_sig)) + if sig != self._parser._expect_sig: + raise BadRarFile("Invalid signature") + + # loop until first file header + while 1: + cur = self._parser._parse_header(fd) + if not cur: + raise BadRarFile("Unexpected EOF") + if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): + if cur.add_size: + fd.seek(cur.add_size, 1) + continue + if cur.orig_filename != self._inf.orig_filename: + raise BadRarFile("Did not found file entry") + self._cur = cur + self._cur_avail = cur.add_size + return True + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + got = 0 + vbuf = memoryview(buf) + while got < len(buf): + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # length for next read + cnt = len(buf) - got + if cnt > self._cur_avail: + cnt = self._cur_avail + + # read into temp view + res = self._fd.readinto(vbuf[got : got + cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._cur_avail -= res + self._remain -= res + got += res + return got + + +class HeaderDecrypt(object): + """File-like object that decrypts from another file""" + def __init__(self, f, key, iv): + self.f = f + self.ciph = AES_CBC_Decrypt(key, iv) + self.buf = EMPTY + + def tell(self): + """Current file pos - works only on block boundaries.""" + return self.f.tell() + + def read(self, cnt=None): + """Read and decrypt.""" + if cnt > 8 * 1024: + raise BadRarFile('Bad count to header decrypt - wrong password?') + + # consume old data + if cnt <= len(self.buf): + res = self.buf[:cnt] + self.buf = self.buf[cnt:] + return res + res = self.buf + self.buf = EMPTY + cnt -= len(res) + + # decrypt new data + blklen = 16 + while cnt > 0: + enc = self.f.read(blklen) + if len(enc) < blklen: + break + dec = self.ciph.decrypt(enc) + if cnt >= len(dec): + res += dec + cnt -= len(dec) + else: + res += dec[:cnt] + self.buf = dec[cnt:] + cnt = 0 + + return res + + +# handle (filename|filelike) object +class XFile(object): + """Input may be filename or file object. + """ + __slots__ = ('_fd', '_need_close') + + def __init__(self, xfile, bufsize=1024): + if is_filelike(xfile): + self._need_close = False + self._fd = xfile + self._fd.seek(0) + else: + self._need_close = True + self._fd = open(xfile, 'rb', bufsize) + + def read(self, n=None): + """Read from file.""" + return self._fd.read(n) + + def tell(self): + """Return file pos.""" + return self._fd.tell() + + def seek(self, ofs, whence=0): + """Move file pos.""" + return self._fd.seek(ofs, whence) + + def readinto(self, dst): + """Read into buffer.""" + return self._fd.readinto(dst) + + def close(self): + """Close file object.""" + if self._need_close: + self._fd.close() + + def __enter__(self): + return self + + def __exit__(self, typ, val, tb): + self.close() + + +class NoHashContext(object): + """No-op hash function.""" + def __init__(self, data=None): + """Initialize""" + def update(self, data): + """Update data""" + def digest(self): + """Final hash""" + def hexdigest(self): + """Hexadecimal digest.""" + + +class CRC32Context(object): + """Hash context that uses CRC32.""" + __slots__ = ['_crc'] + + def __init__(self, data=None): + self._crc = 0 + if data: + self.update(data) + + def update(self, data): + """Process data.""" + self._crc = rar_crc32(data, self._crc) + + def digest(self): + """Final hash.""" + return self._crc + + def hexdigest(self): + """Hexadecimal digest.""" + return '%08x' % self.digest() + + +class Blake2SP(object): + """Blake2sp hash context. + """ + __slots__ = ['_thread', '_buf', '_cur', '_digest'] + digest_size = 32 + block_size = 64 + parallelism = 8 + + def __init__(self, data=None): + self._buf = b'' + self._cur = 0 + self._digest = None + self._thread = [] + + for i in range(self.parallelism): + ctx = self._blake2s(i, 0, i == (self.parallelism - 1)) + self._thread.append(ctx) + + if data: + self.update(data) + + def _blake2s(self, ofs, depth, is_last): + return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last, + depth=2, inner_size=32, fanout=self.parallelism) + + def _add_block(self, blk): + self._thread[self._cur].update(blk) + self._cur = (self._cur + 1) % self.parallelism + + def update(self, data): + """Hash data. + """ + view = memoryview(data) + bs = self.block_size + if self._buf: + need = bs - len(self._buf) + if len(view) < need: + self._buf += view.tobytes() + return + self._add_block(self._buf + view[:need].tobytes()) + view = view[need:] + while len(view) >= bs: + self._add_block(view[:bs]) + view = view[bs:] + self._buf = view.tobytes() + + def digest(self): + """Return final digest value. + """ + if self._digest is None: + if self._buf: + self._add_block(self._buf) + self._buf = EMPTY + ctx = self._blake2s(0, 1, True) + for t in self._thread: + ctx.update(t.digest()) + self._digest = ctx.digest() + return self._digest + + def hexdigest(self): + """Hexadecimal digest.""" + return tohex(self.digest()) + + +class Rar3Sha1(object): + """Bug-compat for SHA1 + """ + digest_size = 20 + block_size = 64 + + _BLK_BE = struct.Struct(b'>16L') + _BLK_LE = struct.Struct(b'<16L') + + __slots__ = ('_nbytes', '_md', '_rarbug') + + def __init__(self, data=b'', rarbug=False): + self._md = sha1() + self._nbytes = 0 + self._rarbug = rarbug + self.update(data) + + def update(self, data): + """Process more data.""" + self._md.update(data) + bufpos = self._nbytes & 63 + self._nbytes += len(data) + + if self._rarbug and len(data) > 64: + dpos = self.block_size - bufpos + while dpos + self.block_size <= len(data): + self._corrupt(data, dpos) + dpos += self.block_size + + def digest(self): + """Return final state.""" + return self._md.digest() + + def hexdigest(self): + """Return final state as hex string.""" + return self._md.hexdigest() + + def _corrupt(self, data, dpos): + """Corruption from SHA1 core.""" + ws = list(self._BLK_BE.unpack_from(data, dpos)) + for t in range(16, 80): + tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15] + ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF + self._BLK_LE.pack_into(data, dpos, *ws) + + +## +## Utility functions +## + +S_LONG = Struct(' len(buf): + raise BadRarFile('cannot load byte') + return S_BYTE.unpack_from(buf, pos)[0], end + + +def load_le32(buf, pos): + """Load little-endian 32-bit integer""" + end = pos + 4 + if end > len(buf): + raise BadRarFile('cannot load le32') + return S_LONG.unpack_from(buf, pos)[0], pos + 4 + + +def load_bytes(buf, num, pos): + """Load sequence of bytes""" + end = pos + num + if end > len(buf): + raise BadRarFile('cannot load bytes') + return buf[pos : end], end + + +def load_vstr(buf, pos): + """Load bytes prefixed by vint length""" + slen, pos = load_vint(buf, pos) + return load_bytes(buf, slen, pos) + + +def load_dostime(buf, pos): + """Load LE32 dos timestamp""" + stamp, pos = load_le32(buf, pos) + tup = parse_dos_time(stamp) + return to_datetime(tup), pos + + +def load_unixtime(buf, pos): + """Load LE32 unix timestamp""" + secs, pos = load_le32(buf, pos) + dt = datetime.fromtimestamp(secs, UTC) + return dt, pos + + +def load_windowstime(buf, pos): + """Load LE64 windows timestamp""" + # unix epoch (1970) in seconds from windows epoch (1601) + unix_epoch = 11644473600 + val1, pos = load_le32(buf, pos) + val2, pos = load_le32(buf, pos) + secs, n1secs = divmod((val2 << 32) | val1, 10000000) + dt = datetime.fromtimestamp(secs - unix_epoch, UTC) + dt = dt.replace(microsecond=n1secs // 10) + return dt, pos + + +# new-style next volume +def _next_newvol(volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= '0' and volfile[i] <= '9': + return _inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: " + volfile) + + +# old-style next volume +def _next_oldvol(volfile): + # rar -> r00 + if volfile[-4:].lower() == '.rar': + return volfile[:-2] + '00' + return _inc_volname(volfile, len(volfile) - 1) + + +# increase digits with carry, otherwise just increment char +def _inc_volname(volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != '9': + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = '0' + i -= 1 + return ''.join(fn) + + +# rar3 extended time fields +def _parse_ext_time(h, data, pos): + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime) + h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos) + h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos) + h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos) + if mtime: + h.mtime = mtime + h.date_time = mtime.timetuple()[:6] + return pos + + +# rar3 one extended time field +def _parse_xtime(flag, data, pos, basetime=None): + res = None + if flag & 8: + if not basetime: + basetime, pos = load_dostime(data, pos) + + # load second fractions + rem = 0 + cnt = flag & 3 + for _ in range(cnt): + b, pos = load_byte(data, pos) + rem = (b << 16) | (rem >> 8) + + # convert 100ns units to microseconds + usec = rem // 10 + if usec > 1000000: + usec = 999999 + + # dostime has room for 30 seconds only, correct if needed + if flag & 4 and basetime.second < 59: + res = basetime.replace(microsecond=usec, second=basetime.second + 1) + else: + res = basetime.replace(microsecond=usec) + return res, pos + + +def is_filelike(obj): + """Filename or file object? + """ + if isinstance(obj, (bytes, unicode)): + return False + res = True + for a in ('read', 'tell', 'seek'): + res = res and hasattr(obj, a) + if not res: + raise ValueError("Invalid object passed as file") + return True + + +def rar3_s2k(psw, salt): + """String-to-key hash for RAR3. + """ + if not isinstance(psw, unicode): + psw = psw.decode('utf8') + seed = bytearray(psw.encode('utf-16le') + salt) + h = Rar3Sha1(rarbug=True) + iv = EMPTY + for i in range(16): + for j in range(0x4000): + cnt = S_LONG.pack(i * 0x4000 + j) + h.update(seed) + h.update(cnt[:3]) + if j == 0: + iv += h.digest()[19:20] + key_be = h.digest()[:16] + key_le = pack("LLLL", key_be)) + return key_le, iv + + +def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + """Decompress blob of compressed data. + + Used for data with non-standard header - eg. comments. + """ + # already uncompressed? + if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: + return data + + # take only necessary flags + flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) + flags |= RAR_LONG_BLOCK + + # file header + fname = b'data' + date = 0 + mode = 0x20 + fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, + date, vers, meth, len(fname), mode) + fhdr += fname + if flags & RAR_FILE_SALT: + if not salt: + return EMPTY + fhdr += salt + + # full header + hlen = S_BLK_HDR.size + len(fhdr) + hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr + hcrc = rar_crc32(hdr[2:]) & 0xFFFF + hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr + + # archive main header + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) + + # decompress via temp rar + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + try: + tmpf.write(RAR_ID + mh + hdr + data) + tmpf.close() + + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD)) + cmd.append(tmpname) + + p = custom_popen(cmd) + return p.communicate()[0] + finally: + tmpf.close() + os.unlink(tmpname) + + +def to_datetime(t): + """Convert 6-part time tuple into datetime object. + """ + if t is None: + return None + + # extract values + year, mon, day, h, m, s = t + + # assume the values are valid + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + pass + + # sanitize invalid values + mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: + mon = 1 + if mon > 12: + mon = 12 + if day < 1: + day = 1 + if day > mday[mon]: + day = mday[mon] + if h > 23: + h = 23 + if m > 59: + m = 59 + if s > 59: + s = 59 + if mon == 2 and day == 29: + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + day = 28 + return datetime(year, mon, day, h, m, s) + + +def parse_dos_time(stamp): + """Parse standard 32-bit DOS timestamp. + """ + sec, stamp = stamp & 0x1F, stamp >> 5 + mn, stamp = stamp & 0x3F, stamp >> 6 + hr, stamp = stamp & 0x1F, stamp >> 5 + day, stamp = stamp & 0x1F, stamp >> 5 + mon, stamp = stamp & 0x0F, stamp >> 4 + yr = (stamp & 0x7F) + 1980 + return (yr, mon, day, hr, mn, sec * 2) + + +def custom_popen(cmd): + """Disconnect cmd from parent fds, read only from stdout. + """ + # needed for py2exe + creationflags = 0 + if sys.platform == 'win32': + creationflags = 0x08000000 # CREATE_NO_WINDOW + + # run command + try: + p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, + creationflags=creationflags) + except OSError as ex: + if ex.errno == errno.ENOENT: + raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + if ex.errno == errno.EACCES or ex.errno == errno.EPERM: + raise RarCannotExec("Cannot execute unrar (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + raise + return p + + +def custom_check(cmd, ignore_retcode=False): + """Run command, collect output, raise error if needed. + """ + p = custom_popen(cmd) + out, _ = p.communicate() + if p.returncode and not ignore_retcode: + raise RarExecError("Check-run failed") + return out + + +def add_password_arg(cmd, psw, ___required=False): + """Append password switch to commandline. + """ + if UNRAR_TOOL == ALT_TOOL: + return + if psw is not None: + cmd.append('-p' + psw) + else: + cmd.append('-p-') + + +def check_returncode(p, out): + """Raise exception according to unrar exit code. + """ + code = p.returncode + if code == 0: + return + + # map return code to exception class, codes from rar.txt + errmap = [None, + RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 + RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 + RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 + if UNRAR_TOOL == ALT_TOOL: + errmap = [None] + if 0 < code < len(errmap): + exc = errmap[code] + elif code == 255: + exc = RarUserBreak + elif code < 0: + exc = RarSignalExit + else: + exc = RarUnknownError + + # format message + if out: + msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out) + else: + msg = "%s [%d]" % (exc.__doc__, p.returncode) + + raise exc(msg) + + +def hmac_sha256(key, data): + """HMAC-SHA256""" + return HMAC(key, data, sha256).digest() + + +def membuf_tempfile(memfile): + """Write in-memory file object to real file.""" + memfile.seek(0, 0) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + while True: + buf = memfile.read(BSIZE) + if not buf: + break + tmpf.write(buf) + tmpf.close() + except: + tmpf.close() + os.unlink(tmpname) + raise + return tmpname + + +class XTempFile(object): + """Real file for archive. + """ + __slots__ = ('_tmpfile', '_filename') + + def __init__(self, rarfile): + if is_filelike(rarfile): + self._tmpfile = membuf_tempfile(rarfile) + self._filename = self._tmpfile + else: + self._tmpfile = None + self._filename = rarfile + + def __enter__(self): + return self._filename + + def __exit__(self, exc_type, exc_value, tb): + if self._tmpfile: + try: + os.unlink(self._tmpfile) + except OSError: + pass + self._tmpfile = None + +# +# Check if unrar works +# + +ORIG_UNRAR_TOOL = UNRAR_TOOL +ORIG_OPEN_ARGS = OPEN_ARGS +ORIG_EXTRACT_ARGS = EXTRACT_ARGS +ORIG_TEST_ARGS = TEST_ARGS + +def _check_unrar_tool(): + global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS + try: + # does UNRAR_TOOL work? + custom_check([ORIG_UNRAR_TOOL], True) + + UNRAR_TOOL = ORIG_UNRAR_TOOL + OPEN_ARGS = ORIG_OPEN_ARGS + EXTRACT_ARGS = ORIG_EXTRACT_ARGS + TEST_ARGS = ORIG_TEST_ARGS + except RarCannotExec: + try: + # does ALT_TOOL work? + custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) + # replace config + UNRAR_TOOL = ALT_TOOL + OPEN_ARGS = ALT_OPEN_ARGS + EXTRACT_ARGS = ALT_EXTRACT_ARGS + TEST_ARGS = ALT_TEST_ARGS + except RarCannotExec: + # no usable tool, only uncompressed archives work + return False + return True + +_check_unrar_tool() + + +import kernel + +# ------------------------------------------------------------------------- +# KavMain 클래스 +# ------------------------------------------------------------------------- +class KavMain: + # --------------------------------------------------------------------- + # init(self, plugins_path) + # 플러그인 엔진을 초기화 한다. + # 인력값 : plugins_path - 플러그인 엔진의 위치 + # verbose - 디버그 모드 (True or False) + # 리턴값 : 0 - 성공, 0 이외의 값 - 실패 + # --------------------------------------------------------------------- + def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 + self.handle = {} # 압축 파일 핸들 + return 0 # 플러그인 엔진 초기화 성공 + + # --------------------------------------------------------------------- + # uninit(self) + # 플러그인 엔진을 종료한다. + # 리턴값 : 0 - 성공, 0 이외의 값 - 실패 + # --------------------------------------------------------------------- + def uninit(self): # 플러그인 엔진 종료 + return 0 # 플러그인 엔진 종료 성공 + + # --------------------------------------------------------------------- + # getinfo(self) + # 플러그인 엔진의 주요 정보를 알려준다. (제작자, 버전, ...) + # 리턴값 : 플러그인 엔진 정보 + # --------------------------------------------------------------------- + def getinfo(self): # 플러그인 엔진의 주요 정보 + info = dict() # 사전형 변수 선언 + + info['author'] = 'Kei Choi' # 제작자 + info['version'] = '1.0' # 버전 + info['title'] = 'Rar Archive Engine' # 엔진 설명 + info['kmd_name'] = 'rar' # 엔진 파일 이름 + info['engine_type'] = kernel.ARCHIVE_ENGINE # 엔진 타입 + + return info + + # --------------------------------------------------------------------- + # format(self, filehandle, filename, filename_ex) + # 파일 포맷을 분석한다. + # 입력값 : filehandle - 파일 핸들 + # filename - 파일 이름 + # filename_ex - 압축 파일 내부 파일 이름 + # 리턴값 : {파일 포맷 분석 정보} or None + # --------------------------------------------------------------------- + def format(self, filehandle, filename, filename_ex): + ret = {} + + mm = filehandle + if mm[0:4] == 'Rar!': # 헤더 체크 + ret['ff_rar'] = 'rar' + return ret + + return None + + # --------------------------------------------------------------------- + # __get_handle(self, filename) + # 압축 파일의 핸들을 얻는다. + # 입력값 : filename - 파일 이름 + # 리턴값 : 압축 파일 핸들 + # --------------------------------------------------------------------- + def __get_handle(self, filename): + if filename in self.handle: # 이전에 열린 핸들이 존재하는가? + zfile = self.handle.get(filename, None) + else: + zfile = RarFile(filename) # rar 파일 열기 + self.handle[filename] = zfile + + return zfile + + # --------------------------------------------------------------------- + # arclist(self, filename, fileformat) + # 압축 파일 내부의 파일 목록을 얻는다. + # 입력값 : filename - 파일 이름 + # fileformat - 파일 포맷 분석 정보 + # 리턴값 : [[압축 엔진 ID, 압축된 파일 이름]] + # --------------------------------------------------------------------- + def arclist(self, filename, fileformat): + file_scan_list = [] # 검사 대상 정보를 모두 가짐 + + # 미리 분석된 파일 포맷중에 RAR 포맷이 있는가? + if 'ff_rar' in fileformat: + zfile = self.__get_handle(filename) + + for name in zfile.namelist(): + file_scan_list.append(['arc_rar', name]) + # zfile.close() + + return file_scan_list + + # --------------------------------------------------------------------- + # unarc(self, arc_engine_id, arc_name, fname_in_arc) + # 입력값 : arc_engine_id - 압축 엔진 ID + # arc_name - 압축 파일 + # fname_in_arc - 압축 해제할 파일 이름 + # 리턴값 : 압축 해제된 내용 or None + # --------------------------------------------------------------------- + def unarc(self, arc_engine_id, arc_name, fname_in_arc): + if arc_engine_id == 'arc_rar': + zfile = self.__get_handle(arc_name) + try: + data = zfile.read(fname_in_arc) + return data + except BadRarFile: + # import traceback + # print (traceback.format_exc()) + pass + + return None + + # --------------------------------------------------------------------- + # arcclose(self) + # 압축 파일 핸들을 닫는다. + # --------------------------------------------------------------------- + def arcclose(self): + for fname in self.handle.keys(): + zfile = self.handle[fname] + zfile.close() + self.handle.pop(fname) From c13f61942dad8db6200a6cee4cacaffb50ebc959 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 9 May 2018 09:05:56 +0900 Subject: [PATCH 30/46] Added 7z archive --- Engine/plugins/zip.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Engine/plugins/zip.py b/Engine/plugins/zip.py index 7d8e97e..bef6ea3 100644 --- a/Engine/plugins/zip.py +++ b/Engine/plugins/zip.py @@ -5,6 +5,7 @@ import struct import zlib import os +import py7zlib import zipfile import kernel @@ -297,6 +298,15 @@ def __get_handle(self, filename): return zfile + def __get_handle_7z(self, filename): + if filename in self.handle: # 이전에 열린 핸들이 존재하는가? + zfile = self.handle.get(filename, None) + else: + zfile = py7zlib.Archive7z(open(filename, 'rb')) # 7z 파일 열기 + self.handle[filename] = zfile + + return zfile + # --------------------------------------------------------------------- # format(self, filehandle, filename, filename_ex) # 파일 포맷을 분석한다. @@ -346,6 +356,9 @@ def format(self, filehandle, filename, filename_ex): pass return ret + elif mm[0:4] == '7z\xbc\xaf': + ret['ff_7z'] = '7z' + return ret return None @@ -370,6 +383,10 @@ def arclist(self, filename, fileformat): off, zsize = fileformat['ff_attach_zip'] file_scan_list.append(['arc_attach_zip:0:%d' % off, '#1']) file_scan_list.append(['arc_attach_zip:%d:%d' % (off, zsize), '#2']) + elif 'ff_7z' in fileformat: + zfile = self.__get_handle_7z(filename) + for name in zfile.filenames: + file_scan_list.append(['arc_7z', name]) return file_scan_list @@ -397,6 +414,16 @@ def unarc(self, arc_engine_id, arc_name, fname_in_arc): fp.seek(off) data = fp.read(size) return data + elif arc_engine_id == 'arc_7z': + zfile = self.__get_handle_7z(arc_name) + cf = zfile.getmember(fname_in_arc) + try: + data = cf.read() + return data + except (ValueError, py7zlib.UnsupportedCompressionMethodError) as e: + # BCJ LZMA, BCJ2 LZMA를 py7zlib가 아직 지원하지 못함 (ver 0.4.9) + # LZMA 지원 체크 완료 + pass return None From 457020c5be5cf8546c3a6fca0e1fc78b55900354 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 10 May 2018 23:19:19 +0900 Subject: [PATCH 31/46] Inserted images to README.md --- README.md | 135 +++--------------------------------------------------- 1 file changed, 6 insertions(+), 129 deletions(-) diff --git a/README.md b/README.md index 73009c7..dd45f8a 100644 --- a/README.md +++ b/README.md @@ -35,147 +35,24 @@ C:\kicomav\Release> python k2.py [path] [options] **Example 1 :** KicomAV help Options -``` -C:\kicomav\Release> python k2.py ------------------------------------------------------------- -KICOM Anti-Virus II (for WIN32) Ver 0.30 (May 07 2018) -Copyright (C) 1995-2018 Kei Choi. All rights reserved. ------------------------------------------------------------- - -Usage: k2.py path[s] [options] -Options: - -f, --files scan files * - -r, --arc scan archives - -G, --log=file create log file - -I, --list display all files - -e, --app append to log file - -F, --infp=path set infected quarantine folder - -R, --nor do not recurse into folders - -V, --vlist display virus list - -p, --prompt prompt for action - -d, --dis disinfect files - -l, --del delete infected files - --no-color don't print with color - --move move infected files in quarantine folder - --update update - --verbose enabling verbose mode (only Developer Edition) - --sigtool make files for malware signatures - -?, --help this help - * = default option - -C:\kicomav\Release> _ -``` +![KicomAV_Run](http://www.hanul93.com/images/kicomav/k2_run.gif) **Example 2 :** Update for malware signatures -``` -C:\kicomav\Release>k2.py --update ------------------------------------------------------------- -KICOM Anti-Virus II (for WIN32) Ver 0.30 (May 07 2018) -Copyright (C) 1995-2018 Kei Choi. All rights reserved. ------------------------------------------------------------- - -plugins/emalware.c01 .... update -plugins/emalware.i01 ..... update -plugins/emalware.n01 ........ update -plugins/emalware.s01 .. update - -[Update complete] - -C:\kicomav\Release> _ -``` +![KicomAV_Update](http://www.hanul93.com/images/kicomav/k2_update.gif) **Example 3 :** Scan for current path -``` -C:\kicomav\Release> python k2.py . ------------------------------------------------------------- -KICOM Anti-Virus II (for WIN32) Ver 0.30 (May 07 2018) -Copyright (C) 1995-2018 Kei Choi. All rights reserved. ------------------------------------------------------------- -Last updated Wed Mar 7 00:14:58 2018 UTC -Signature number: 2,052 - -C:\kicomav\Relea ... 08ecba90d0cd778 infected : Trojan-Ransom.Win32.Snocry.cxu -C:\kicomav\Release\ ... 218e8a8d7eb93df1003 infected : Trojan.Win32.Agent.icgh - - -Results: -Folders :4 -Files :37 -Packed :0 -Infected files :2 -Suspect files :0 -Warnings :0 -Identified viruses:2 -I/O errors :0 - - -C:\kicomav\Release> _ -``` +![KicomAV_Scan](http://www.hanul93.com/images/kicomav/k2_scan.gif) **Example 4 :** Scan for ZIP files -``` -C:\kicomav\Release> python k2.py sample\test.zip -r -I ------------------------------------------------------------- -KICOM Anti-Virus II (for WIN32) Ver 0.30 (May 07 2018) -Copyright (C) 1995-2018 Kei Choi. All rights reserved. ------------------------------------------------------------- -Last updated Wed Mar 7 00:14:58 2018 UTC -Signature number: 2,052 - -C:\kicomav\Release\sample\test.zip ok -C:\kicomav\Relea ... .zip (dummy.txt) infected : Dummy-Test-File (not a virus) - - -Results: -Folders :0 -Files :2 -Packed :1 -Infected files :1 -Suspect files :0 -Warnings :0 -Identified viruses:1 -I/O errors :0 - - -C:\kicomav\Release> _ -``` +![KicomAV_Scan_Zip](http://www.hanul93.com/images/kicomav/k2_scan_zip.gif) **Example 5 :** Display Virus list -``` -C:\kicomav\Release> python k2.py -V ------------------------------------------------------------- -KICOM Anti-Virus II (for WIN32) Ver 0.30 (May 07 2018) -Copyright (C) 1995-2018 Kei Choi. All rights reserved. ------------------------------------------------------------- -Last updated Wed Mar 7 00:14:58 2018 UTC -Signature number: 2,052 - -Dummy-Test-File (not a virus) [dummy.kmd] -EICAR-Test-File (not a virus) [eicar.kmd] -Backdoor.Linux.Mirai.a.gen [emalware.kmd] -Trojan-Ransom.NSIS.MyxaH.niz [emalware.kmd] -Trojan-Ransom.NSIS.Onion.afvz [emalware.kmd] -Trojan-Ransom.Win32.Agent.aagy [emalware.kmd] -Trojan-Ransom.Win32.Agent.aahp [emalware.kmd] - -... - -Trojan.Win32.Inject.adnta [emalware.kmd] -Trojan.Win32.Inject.wnfq [emalware.kmd] -Trojan.Win32.Invader [emalware.kmd] -Trojan.Win32.KillDisk.gen [emalware.kmd] -Trojan.Win32.Menti.gen [emalware.kmd] -Worm.Script.Generic [emalware.kmd] -Virus.MSExcel.Laroux.Gen [macro.kmd] -Exploit.HWP.Generic [hwp.kmd] - - -C:\kicomav\Release> _ -``` +![KicomAV_Virus_list](http://www.hanul93.com/images/kicomav/k2_vlist.gif) + ## Author From 4c10ee30acb4c10390d67a1b7cb39f9015fd66fa Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 11 May 2018 08:06:05 +0900 Subject: [PATCH 32/46] Added examples for command line --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index dd45f8a..47fff26 100644 --- a/README.md +++ b/README.md @@ -35,22 +35,42 @@ C:\kicomav\Release> python k2.py [path] [options] **Example 1 :** KicomAV help Options +``` +C:\kicomav\Release> python k2.py +``` + ![KicomAV_Run](http://www.hanul93.com/images/kicomav/k2_run.gif) **Example 2 :** Update for malware signatures +``` +C:\kicomav\Release> python k2.py --update +``` + ![KicomAV_Update](http://www.hanul93.com/images/kicomav/k2_update.gif) **Example 3 :** Scan for current path +``` +C:\kicomav\Release> python k2.py . -I +``` + ![KicomAV_Scan](http://www.hanul93.com/images/kicomav/k2_scan.gif) **Example 4 :** Scan for ZIP files +``` +C:\kicomav\Release> python k2.py sample -r -I +``` + ![KicomAV_Scan_Zip](http://www.hanul93.com/images/kicomav/k2_scan_zip.gif) **Example 5 :** Display Virus list +``` +C:\kicomav\Release> python k2.py -V +``` + ![KicomAV_Virus_list](http://www.hanul93.com/images/kicomav/k2_vlist.gif) From f96e5eb34e722e28c88a9b9688a073122e892a8c Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 11 May 2018 11:07:49 +0900 Subject: [PATCH 33/46] Fixed malware detection against yaraex.yar --- Engine/plugins/yaraex.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Engine/plugins/yaraex.py b/Engine/plugins/yaraex.py index 9fcb764..b855696 100644 --- a/Engine/plugins/yaraex.py +++ b/Engine/plugins/yaraex.py @@ -76,12 +76,13 @@ def getinfo(self): # 플러그인 엔진의 주요 정보 # 리턴값 : (악성코드 발견 여부, 악성코드 이름, 악성코드 ID) 등등 # --------------------------------------------------------------------- def scan(self, filehandle, filename, fileformat, filename_ex): # 악성코드 검사 - ret = self.rules.match(filename) - if len(ret): - for t in ret: - vname = t.meta.get('KicomAV', None) # KicomAV meta 정보 확인 - if vname: - return True, vname, 0, kernel.INFECTED + if filename.lower().find('yaraex.yar') == -1: # yara rule을 검사할 가능성 있음 + ret = self.rules.match(filename) + if len(ret): + for t in ret: + vname = t.meta.get('KicomAV', None) # KicomAV meta 정보 확인 + if vname: + return True, vname, 0, kernel.INFECTED # 악성코드를 발견하지 못했음을 리턴한다. return False, '', -1, kernel.NOT_FOUND From d62015553067f8e4a7e7bacc08cef0b229c6bf14 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 11 May 2018 11:47:52 +0900 Subject: [PATCH 34/46] Added 7z module --- Engine/k2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Engine/k2.py b/Engine/k2.py index d3bc1cc..6571b3f 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -15,6 +15,7 @@ try: from backports import lzma import yara + import py7zlib except ImportError: pass @@ -48,7 +49,7 @@ # 주요 상수 # ------------------------------------------------------------------------- KAV_VERSION = '0.30' -KAV_BUILDDATE = 'May 07 2018' +KAV_BUILDDATE = 'May 11 2018' KAV_LASTYEAR = KAV_BUILDDATE[len(KAV_BUILDDATE)-4:] g_options = None # 옵션 From 38bd7381fa310a5dcee4be84107d82d144fd1fed Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 16 May 2018 09:30:03 +0900 Subject: [PATCH 35/46] Fixed the problem that file name inside egg file is broken --- Engine/plugins/egg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Engine/plugins/egg.py b/Engine/plugins/egg.py index 2b4a888..51537cb 100644 --- a/Engine/plugins/egg.py +++ b/Engine/plugins/egg.py @@ -3,6 +3,7 @@ import os +import sys import mmap import zlib import bz2 @@ -351,7 +352,8 @@ def __EGG_Filename_Header__(self, data): except: pass - return size, fname + fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding() + return size, fname.decode('utf-8').encode(fsencoding) # ----------------------------------------------------------------- # __EGG_Block_Header_Size__(self, data) From aef994d2bcf1c753437735a389d6825618951100 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Sat, 9 Jun 2018 12:28:15 +0900 Subject: [PATCH 36/46] Added InstallShield Engine --- Engine/plugins/ishield.py | 206 ++++++++++++++++++++++++++++++++++++++ Engine/plugins/kicom.lst | 1 + 2 files changed, 207 insertions(+) create mode 100644 Engine/plugins/ishield.py diff --git a/Engine/plugins/ishield.py b/Engine/plugins/ishield.py new file mode 100644 index 0000000..13b8fff --- /dev/null +++ b/Engine/plugins/ishield.py @@ -0,0 +1,206 @@ +# -*- coding:utf-8 -*- +# Author: Kei Choi(hanul93@gmail.com) + + +import struct +import zlib +import os +import py7zlib + +import zipfile +import kernel +import kavutil + + +# --------------------------------------------------------------------- +# InstallShield 클래스 +# --------------------------------------------------------------------- +class InstallShield: + def __init__(self, fname): + self.fname = fname + self.fp = None + self.fsize = 0 + self.install_name = [] + + def __del__(self): + if self.fp: + self.close() + + def close(self): + if self.fp: + self.fp.close() + self.fp = None + + def parse(self): + try: + self.fp = open(self.fname, 'rb') + self.fsize = os.fstat(self.fp.fileno()).st_size + + cur_pos = 0 + + # Magic 체크 + if self.fp.read(0xe) != 'InstallShield\x00': + raise ValueError + + cur_pos += 0xe + + # InstallShield에 첨부된 파일 수 + data = self.fp.read(0x20) + num_file = kavutil.get_uint32(data, 0) + + cur_pos += 0x20 + + for i in range(num_file): + data = self.fp.read(0x138) + fname = data[:0x10b].replace('\x00', '') + fsize = kavutil.get_uint32(data, 0x10c) + foff = cur_pos + 0x138 + self.install_name.append((foff, fsize, fname)) + + cur_pos += 0x138 + fsize + self.fp.seek(cur_pos) + + return True + except (IOError, OSError, ValueError) as e: + pass + + return False + + def namelist(self): + flist = [] + + for f in self.install_name: + flist.append(f[2]) + + return flist + + def read(self, fname): + for f in self.install_name: + if f[2] == fname: + foff = f[0] + fsize = f[1] + + if self.fp: + self.fp.seek(foff) + data = self.fp.read(fsize) + return data + + return None + + +# ------------------------------------------------------------------------- +# KavMain 클래스 +# ------------------------------------------------------------------------- +class KavMain: + # --------------------------------------------------------------------- + # init(self, plugins_path) + # 플러그인 엔진을 초기화 한다. + # 인력값 : plugins_path - 플러그인 엔진의 위치 + # verbose - 디버그 모드 (True or False) + # 리턴값 : 0 - 성공, 0 이외의 값 - 실패 + # --------------------------------------------------------------------- + def init(self, plugins_path, verbose=False): # 플러그인 엔진 초기화 + self.handle = {} # 압축 파일 핸들 + return 0 # 플러그인 엔진 초기화 성공 + + # --------------------------------------------------------------------- + # uninit(self) + # 플러그인 엔진을 종료한다. + # 리턴값 : 0 - 성공, 0 이외의 값 - 실패 + # --------------------------------------------------------------------- + def uninit(self): # 플러그인 엔진 종료 + return 0 # 플러그인 엔진 종료 성공 + + # --------------------------------------------------------------------- + # getinfo(self) + # 플러그인 엔진의 주요 정보를 알려준다. (제작자, 버전, ...) + # 리턴값 : 플러그인 엔진 정보 + # --------------------------------------------------------------------- + def getinfo(self): # 플러그인 엔진의 주요 정보 + info = dict() # 사전형 변수 선언 + + info['author'] = 'Kei Choi' # 제작자 + info['version'] = '1.0' # 버전 + info['title'] = 'InstallShield Engine' # 엔진 설명 + info['kmd_name'] = 'ishield' # 엔진 파일 이름 + + return info + + # --------------------------------------------------------------------- + # __get_handle(self, filename) + # 압축 파일의 핸들을 얻는다. + # 입력값 : filename - 파일 이름 + # 리턴값 : 압축 파일 핸들 + # --------------------------------------------------------------------- + def __get_handle(self, filename): + if filename in self.handle: # 이전에 열린 핸들이 존재하는가? + zfile = self.handle.get(filename, None) + else: + zfile = InstallShield(filename) # InstallShield 파일 열기 + self.handle[filename] = zfile + + return zfile + + # --------------------------------------------------------------------- + # format(self, filehandle, filename, filename_ex) + # 파일 포맷을 분석한다. + # 입력값 : filehandle - 파일 핸들 + # filename - 파일 이름 + # filename_ex - 압축 파일 내부 파일 이름 + # 리턴값 : {파일 포맷 분석 정보} or None + # --------------------------------------------------------------------- + def format(self, filehandle, filename, filename_ex): + ret = {} + + mm = filehandle + data = mm[0:0xe] + if data == 'InstallShield\x00': # 헤더 체크 + ret['ff_installshield'] = 'InstallShield' + return ret + + return None + + # --------------------------------------------------------------------- + # arclist(self, filename, fileformat) + # 압축 파일 내부의 파일 목록을 얻는다. + # 입력값 : filename - 파일 이름 + # fileformat - 파일 포맷 분석 정보 + # 리턴값 : [[압축 엔진 ID, 압축된 파일 이름]] + # --------------------------------------------------------------------- + def arclist(self, filename, fileformat): + file_scan_list = [] # 검사 대상 정보를 모두 가짐 + + # 미리 분석된 파일 포맷중에 InstallShield 포맷이 있는가? + if 'ff_installshield' in fileformat: + zfile = self.__get_handle(filename) + + if zfile.parse(): + for name in zfile.namelist(): + file_scan_list.append(['arc_installshield', name]) + + return file_scan_list + + # --------------------------------------------------------------------- + # unarc(self, arc_engine_id, arc_name, fname_in_arc) + # 입력값 : arc_engine_id - 압축 엔진 ID + # arc_name - 압축 파일 + # fname_in_arc - 압축 해제할 파일 이름 + # 리턴값 : 압축 해제된 내용 or None + # --------------------------------------------------------------------- + def unarc(self, arc_engine_id, arc_name, fname_in_arc): + if arc_engine_id == 'arc_installshield': + zfile = self.__get_handle(arc_name) + data = zfile.read(fname_in_arc) + return data + + return None + + # --------------------------------------------------------------------- + # arcclose(self) + # 압축 파일 핸들을 닫는다. + # --------------------------------------------------------------------- + def arcclose(self): + for fname in self.handle.keys(): + zfile = self.handle[fname] + zfile.close() + self.handle.pop(fname) diff --git a/Engine/plugins/kicom.lst b/Engine/plugins/kicom.lst index cf5444e..b125d0c 100644 --- a/Engine/plugins/kicom.lst +++ b/Engine/plugins/kicom.lst @@ -31,5 +31,6 @@ gz.kmd xz.kmd tar.kmd nsis.kmd +ishield.kmd unpack.kmd upx.kmd From bbc33ce7e79a0b954e931114ea430dc7157f87c4 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Sun, 10 Jun 2018 12:32:23 +0900 Subject: [PATCH 37/46] Removed Parentheses --- Engine/plugins/unpack.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Engine/plugins/unpack.py b/Engine/plugins/unpack.py index 0482d31..8428951 100644 --- a/Engine/plugins/unpack.py +++ b/Engine/plugins/unpack.py @@ -2,6 +2,7 @@ # Author: Kei Choi(hanul93@gmail.com) +import os import zlib import struct import kavutil @@ -88,10 +89,12 @@ def arclist(self, filename, fileformat): # 미리 분석된 파일 포맷중에 특정 포맷이 있는가? if 'ff_zlib' in fileformat: - file_scan_list.append(['arc_zlib', '']) + # file_scan_list.append(['arc_zlib', '']) + file_scan_list.append(['arc_zlib', 'Zlib']) if 'ff_embed_ole' in fileformat: - file_scan_list.append(['arc_embed_ole', '']) + # file_scan_list.append(['arc_embed_ole', '']) + file_scan_list.append(['arc_embed_ole', 'Embed']) return file_scan_list From 070db2ffb991ee2c65647434807693fb2b3ebe97 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Wed, 13 Jun 2018 12:37:43 +0900 Subject: [PATCH 38/46] Added display PDB info in debug mode --- Engine/plugins/pe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Engine/plugins/pe.py b/Engine/plugins/pe.py index 6ed2433..a216911 100644 --- a/Engine/plugins/pe.py +++ b/Engine/plugins/pe.py @@ -490,6 +490,7 @@ def parse(self): if 'PDB_Name' in pe_format: kavutil.vprint('PDB Information') kavutil.vprint(None, 'Name', '%s' % repr(pe_format['PDB_Name'])) + print repr(pe_format['PDB_Name']) print except (ValueError, struct.error) as e: From 7f5375e3bd56b9a925304315cbaa52b59fa51718 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Thu, 14 Jun 2018 12:41:01 +0900 Subject: [PATCH 39/46] Fixed parse of dir_referencesrecord in macro engine --- Engine/plugins/macro.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/Engine/plugins/macro.py b/Engine/plugins/macro.py index 3606aec..1a1abaa 100644 --- a/Engine/plugins/macro.py +++ b/Engine/plugins/macro.py @@ -469,12 +469,15 @@ def dir_informationrecord(data, off, verbose=False): def dir_referencesrecord(data, off, verbose=False): while True: + _follow = False # modify by sungho # NameRecord에는 2개의 레코드가 존재함 t_data = get_record_size32(data, off) val = get_uint16(t_data, 0) + if val != 0x0016: # raise Error('dir:ReferencesRecord:NameRecord #1') break + off += len(t_data) if verbose: @@ -492,25 +495,31 @@ def dir_referencesrecord(data, off, verbose=False): t_data = get_record_size32(data, off) off += len(t_data) t_data = get_record_size32(data, off) - if get_uint16(t_data, 0) == 0x002F: # REFERENCECONTROL + val = get_uint16(data, off) # modify by sungho + _follow = True + + # modify by sungho + """ REFERENCEREGISTERED field is optional """ + if val == 0x002F: # REFERENCECONTROL + if _follow is False: + t_data = get_record_size32(data, off) + off += len(t_data) + t_data = get_record_size32(data, off) + if get_uint16(t_data, 0) == 0x0016: # NameRecordExtended off += len(t_data) t_data = get_record_size32(data, off) - if get_uint16(t_data, 0) == 0x0016: # NameRecordExtended + if get_uint16(t_data, 0) == 0x003E: # NameRecordExtended:Reserved off += len(t_data) t_data = get_record_size32(data, off) - if get_uint16(t_data, 0) == 0x003E: # NameRecordExtended:Reserved + if get_uint16(t_data, 0) == 0x0030: # Reserved3 off += len(t_data) - t_data = get_record_size32(data, off) - if get_uint16(t_data, 0) == 0x0030: # Reserved3 - off += len(t_data) - else: - raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:Reserved3') else: - raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:NameRecordExtended:Reserved') + raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:Reserved3') else: - raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:NameRecordExtended') + raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:NameRecordExtended:Reserved') else: - raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL') + raise Error('dir:ReferencesRecord:ReferenceRecord:REFERENCECONTROL:NameRecordExtended') + elif val == 0x000D or val == 0x000E: # REFERENCEPROJECT t_data = get_record_size32(data, off) off += len(t_data) @@ -542,7 +551,8 @@ def dir_modulesrecord(data, off, verbose=False): t_data = get_record_size32(data, off) val = get_uint16(t_data, 0) if val != 0x0047: - raise Error('dir:ModulesRecord:NameUnicodeRecord') + continue # modify by sungho + # raise Error('dir:ModulesRecord:NameUnicodeRecord') off += len(t_data) t_data = get_record_size32(data, off) From 9cf295e21ebc41853da0897ad623f8d0c6a79bb5 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 18 Jun 2018 12:43:39 +0900 Subject: [PATCH 40/46] Modified the display filename --- Engine/k2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Engine/k2.py b/Engine/k2.py index 6571b3f..62a248c 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -647,8 +647,11 @@ def convert_display_filename(real_filename): display_filename = real_filename.encode(sys.stdout.encoding, 'replace') else: display_filename = unicode(real_filename, fsencoding).encode(sys.stdout.encoding, 'replace') - return display_filename + if display_filename[0] == '/' or display_filename[0] == '\\': + return display_filename[1:] + else: + return display_filename def display_line(filename, message, message_color): max_sizex = get_terminal_sizex() - 1 From 527490f20f520ae0f1c9248c299aedc95024beee Mon Sep 17 00:00:00 2001 From: hanul93 Date: Fri, 15 Jun 2018 12:45:54 +0900 Subject: [PATCH 41/46] Modified the create temp files in sigtool mode --- Engine/kavcore/k2engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Engine/kavcore/k2engine.py b/Engine/kavcore/k2engine.py index be3ce43..e55de93 100644 --- a/Engine/kavcore/k2engine.py +++ b/Engine/kavcore/k2engine.py @@ -1075,7 +1075,10 @@ def unarc(self, file_struct): shutil.copy(rname, sig_fname) # sigtool.log 파일을 생성한다. - msg = '%s : %s\n' % (sig_fname, rname_struct.get_additional_filename()) + t = rname_struct.get_additional_filename() + if t[0] == '/' or t[0] == '\\': + t = t[1:] + msg = '%s : %s\n' % (sig_fname, t) fp = open('sigtool.log', 'at') fp.write(msg) fp.close() From c3841d127ee5fefb661be3447f0275b7f5f4bc58 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Sat, 16 Jun 2018 12:47:07 +0900 Subject: [PATCH 42/46] Removed input mode --- Engine/kavcore/k2engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/kavcore/k2engine.py b/Engine/kavcore/k2engine.py index e55de93..759dec5 100644 --- a/Engine/kavcore/k2engine.py +++ b/Engine/kavcore/k2engine.py @@ -945,7 +945,7 @@ def __scan_file(self, file_struct, fileformat): if k2const.K2DEBUG: import traceback print traceback.format_exc() - raw_input('>>') + # raw_input('>>') self.result['IO_errors'] += 1 # 파일 I/O 오류 발생 수 if mm: From a969556c5d0abca5dadc606dce1877356fefdf4a Mon Sep 17 00:00:00 2001 From: hanul93 Date: Sun, 17 Jun 2018 12:49:18 +0900 Subject: [PATCH 43/46] Modified version --- Engine/kavcore/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Engine/kavcore/__init__.py b/Engine/kavcore/__init__.py index 9959142..0eee95c 100644 --- a/Engine/kavcore/__init__.py +++ b/Engine/kavcore/__init__.py @@ -2,4 +2,4 @@ # Author: Kei Choi(hanul93@gmail.com) -__version__ = '0.30' +__version__ = '0.31' From 52efd92a66be5de76cbec1b92d05a04e3aceaab0 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 18 Jun 2018 12:52:22 +0900 Subject: [PATCH 44/46] Modified version --- Engine/k2.py | 4 ++-- Engine/kavcore/k2const.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Engine/k2.py b/Engine/k2.py index 62a248c..2c595ea 100644 --- a/Engine/k2.py +++ b/Engine/k2.py @@ -48,8 +48,8 @@ # ------------------------------------------------------------------------- # 주요 상수 # ------------------------------------------------------------------------- -KAV_VERSION = '0.30' -KAV_BUILDDATE = 'May 11 2018' +KAV_VERSION = '0.31' +KAV_BUILDDATE = 'Jun 18 2018' KAV_LASTYEAR = KAV_BUILDDATE[len(KAV_BUILDDATE)-4:] g_options = None # 옵션 diff --git a/Engine/kavcore/k2const.py b/Engine/kavcore/k2const.py index ba77aca..2e6678d 100644 --- a/Engine/kavcore/k2const.py +++ b/Engine/kavcore/k2const.py @@ -5,7 +5,7 @@ # ------------------------------------------------------------------------- # 디버깅용 여부 설정하기 # ------------------------------------------------------------------------- -K2DEBUG = True +K2DEBUG = False # ------------------------------------------------------------------------- # 악성코드 치료를 지시하는 상수 From 90052baf1d85c634de6dce726e7ee4621cbda781 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 18 Jun 2018 13:14:26 +0900 Subject: [PATCH 45/46] Changed log filed --- CHANGELOG.md | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb70383..f63b2a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,30 @@ -## v0.30 (May 07, 2018) +## v0.30 (Jun 18, 2018) + +* **Plugins Modules :** + * cab: Improved speed for cab file extract + * dde: Added a CVE-2017-0199 pattern + * egg: Fixed the problem that file name inside egg file is broken + * emalware: Added a Worm.Win32.Allaple.gen pattern + * html: Added scan function for Trojan.HTML.IFrame.a + * ishield: New support + * macro: Fixed parse of dir_referencesrecord + * ole: Added extraction of Ole's attach data + * olenative: Fixed a _OleNative Stream name + * pe: Fixed missing import API names + * pe: Fixed check to resource size + * rar: New support + * ve: Added New scan areas + * ve: Fixed crc32 of base offset + * yaraex: Fixed detect yara rule name + * yaraex: Fixed malware detection against yaraex.yar + * zip: Added 7z archive + +* **Command Line Interface :** + * k2: Added New status (IDENTIFIED) + * k2: Added New options (--copy, --qname) + * k2: Fixed file name assembly + +## v0.30 (Mar 07, 2018) * **Engine :** * k2engine: Changed WindowsError exception handling to OSError exception handling @@ -41,12 +67,12 @@ * k2file: Add a class to process temporary folders by process * **Plugins Modules :** - * adware: new support + * adware: New support * attach: process to add size information of an attached image to newly extract an attached image * bz: New support * carch: New support * dde: New support - * egg: new support + * egg: New support * elf: verbose processing on ELF 64bit * emalware: Handle MD5 calculations if section size is 0 * emalware: Handle malicious code in addition to .text area From f7be08247be2b9b93e4a14d326fb4da70d5a35d2 Mon Sep 17 00:00:00 2001 From: hanul93 Date: Mon, 18 Jun 2018 13:16:52 +0900 Subject: [PATCH 46/46] Modified README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47fff26..f6f04ac 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# KicomAV v0.30 +# KicomAV v0.31 [![License](https://img.shields.io/badge/license-gpl2-blue.svg)](LICENSE) ![Platform](https://img.shields.io/badge/platform-windows-lightgrey.svg)