Skip to content

Commit

Permalink
Feature/rdj assembler limit (#13)
Browse files Browse the repository at this point in the history
Simplify data format, remove hex data from flow item.

The old format is terribly inefficient, and moving it to gridFS is
slow and clunky, even if you were to manually handle appends.

Data is capped at 15 MB, to stay well under the document limit
of 16MB. Any data beyond that is discarded, but the start
of the session will remain searchable.
  • Loading branch information
RickdeJager committed Aug 21, 2022
1 parent fd63d22 commit b286bd3
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 18 deletions.
1 change: 0 additions & 1 deletion frontend/src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ export interface Id {
export interface FlowData {
from: string
data: string
hex: string
time: number
}

Expand Down
5 changes: 1 addition & 4 deletions frontend/src/pages/FlowView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,7 @@ function FlowContainer({
}

function HexFlow({ flow }: { flow: FlowData }) {
const data = flow.hex;
// make hex view here, use Buffer or maybe not.
const buffer = Buffer.from(data, "hex");
const hex = hexy(buffer);
const hex = hexy(flow.data);
return <FlowContainer copyText={hex}>{hex}</FlowContainer>;
}

Expand Down
3 changes: 1 addition & 2 deletions services/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ Each document will have:
"starred": //if the flow is starred
"flow": [
{
"data": "...", //printable data
"hex": //original data encoded in hex
"data": "...", // session data (capped at 15 MB)
"from": "c" // "c" for client, "s" for server
"time": //timestamp
},
Expand Down
10 changes: 10 additions & 0 deletions services/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import sys
import pprint
from configurations import mongo_server
import gridfs


class DB:
Expand All @@ -39,6 +40,7 @@ def __init__(self):
mongo_server, serverSelectionTimeoutMS=200, unicode_decode_error_handler='ignore')
self.client.server_info()
self.db = self.client.pcap
self.fs = gridfs.GridFS(self.db)
self.pcap_coll = self.db.pcap
self.file_coll = self.db.filesImported
self.signature_coll = self.db.signatures
Expand Down Expand Up @@ -89,8 +91,16 @@ def getFlowDetail(self, id):
tmp = self.signature_coll.find_one({"_id": ObjectId(sig_id)})
if tmp:
ret["signatures"].append(tmp)

return ret

def getRawFile(self, raw_id):
print("raw id was", raw_id)
res = self.fs.get(ObjectId(raw_id))
if res:
return res.read()
return None

def setStar(self, flow_id, star):
self.pcap_coll.find_one_and_update({"_id": ObjectId(flow_id)}, {"$set": {"starred": star}})

Expand Down
3 changes: 2 additions & 1 deletion services/flow2pwn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
import string

def escape(i):
i = ord(i)
ret = chr(i) if 0x20 <= i and i < 0x7f else f'\\x{i:02x}'
if ret in '\\"':
ret = '\\' + ret
return ret

def convert(message):
data = bytes.fromhex(message["hex"])
data = message["data"]
return ''.join([escape(i) for i in data])

#convert a flow into pwn script
Expand Down
22 changes: 16 additions & 6 deletions services/go-importer/cmd/assembler/tcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package main

import (
"encoding/hex"
"go-importer/internal/pkg/db"

"sync"
Expand All @@ -27,6 +26,7 @@ var quiet = true

const closeTimeout time.Duration = time.Hour * 24 // Closing inactive: TODO: from CLI
const timeout time.Duration = time.Minute * 5 // Pending bytes: TODO: from CLI
const streamdoc_limit int = 15_000_000

/*
* The TCP factory: returns a new Stream
Expand Down Expand Up @@ -89,6 +89,7 @@ type tcpStream struct {
FlowItems []db.FlowItem
src_port layers.TCPPort
dst_port layers.TCPPort
total_size int
}

func (t *tcpStream) Accept(tcp *layers.TCP, ci gopacket.CaptureInfo, dir reassembly.TCPFlowDirection, nextSeq reassembly.Sequence, start *bool, ac reassembly.AssemblerContext) bool {
Expand Down Expand Up @@ -123,6 +124,17 @@ func (t *tcpStream) ReassembledSG(sg reassembly.ScatterGather, ac reassembly.Ass

data := sg.Fetch(length)

// We have to make sure to stay under the document limit
t.total_size += length
bytes_available := streamdoc_limit - t.total_size
if length > bytes_available {
length = bytes_available
}
if length < 0 {
length = 0
}
string_data := string(data[:length])

var from string
if dir == reassembly.TCPDirClientToServer {
from = "c"
Expand All @@ -134,17 +146,15 @@ func (t *tcpStream) ReassembledSG(sg reassembly.ScatterGather, ac reassembly.Ass
l := len(t.FlowItems)
if l > 0 {
if t.FlowItems[l-1].From == from {
t.FlowItems[l-1].Data += string(data)
t.FlowItems[l-1].Hex += hex.EncodeToString(data)
t.FlowItems[l-1].Data += string_data
// All done, no need to add a new item
return
}
}

// Add a FlowItem based on the data we just reassembled
t.FlowItems = append(t.FlowItems, db.FlowItem{
Data: string(data),
Hex: hex.EncodeToString(data),
Data: string_data,
From: from,
Time: int(timestamp.UnixNano() / 1000000), // TODO; maybe use int64?
})
Expand Down Expand Up @@ -199,7 +209,7 @@ func (t *tcpStream) ReassemblyComplete(ac reassembly.AssemblerContext) bool {
Starred: false,
Blocked: false,
Tags: make([]string, 0),
Suricata: make([]int, 0),
Suricata: make([]string, 0),
Filename: t.source,
Flow: t.FlowItems,
}
Expand Down
6 changes: 2 additions & 4 deletions services/go-importer/internal/pkg/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ import (
type FlowItem struct {
/// From: "s" / "c" for server or client
From string
/// Data, in a somewhat reachable format
/// Data, in a somewhat readable format
Data string
/// Data, as hex string
Hex string
/// Timestamp of the first packet in the flow (Epoch / ms)
Time int
}
Expand All @@ -35,7 +33,7 @@ type FlowEntry struct {
Starred bool
Blocked bool
Filename string
Suricata []int
Suricata []string
Flow []FlowItem
Tags []string
}
Expand Down
28 changes: 28 additions & 0 deletions services/go-importer/test_data/generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pwn import *
import threading

l = listen(1337)
r = remote("localhost", 1337)

r.sendline(b"This should be searchable")
l.sendlineafter(b"This should be searchable", b"And so should this line")

def listen(p):
p.recvall()
def send(p):
g = cyclic_gen(n=8)
payload = g.get(6*1024*1024)
p.sendlineafter(b"line", payload)

sender = threading.Thread(target=send, args=(r,))
listener = threading.Thread(target=listen, args=(l,))
sender.start()
listener.start()


print("sent")


sender.join()
listener.join()

0 comments on commit b286bd3

Please sign in to comment.