Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Spider golint errors #119

Open
wants to merge 8 commits into
base: onionscan-0.3
Choose a base branch
from
19 changes: 11 additions & 8 deletions spider/onionspider.go
Expand Up @@ -13,10 +13,12 @@ import (
"strings"
)

// OnionSpider is the main interface for web crawling in OnionScan.
type OnionSpider struct {
client *http.Client
}

// Crawl walks the site, following links and adding spider entries to the database.
func (os *OnionSpider) Crawl(hiddenservice string, osc *config.OnionScanConfig, report *report.OnionScanReport) {

torDialer, err := proxy.SOCKS5("tcp", osc.TorProxyAddress, nil, proxy.Direct)
Expand Down Expand Up @@ -129,17 +131,17 @@ func (os *OnionSpider) Crawl(hiddenservice string, osc *config.OnionScanConfig,

// Grab Server Status if it Exists
// We add it as a resource so we can pull any information out of it later.
mod_status, _ := url.Parse("http://" + hiddenservice + "/server-status")
osc.LogInfo(fmt.Sprintf("Scanning URI: %s", mod_status.String()))
id, err = os.GetPage(mod_status.String(), base, osc, true)
addCrawl(mod_status.String(), id, err)
modStatus, _ := url.Parse("http://" + hiddenservice + "/server-status")
osc.LogInfo(fmt.Sprintf("Scanning URI: %s", modStatus.String()))
id, err = os.GetPage(modStatus.String(), base, osc, true)
addCrawl(modStatus.String(), id, err)

// Grab Private Key if it Exists
// This would be a major security fail
private_key, _ := url.Parse("http://" + hiddenservice + "/private_key")
osc.LogInfo(fmt.Sprintf("Scanning URI: %s", private_key.String()))
id, err = os.GetPage(private_key.String(), base, osc, true)
addCrawl(private_key.String(), id, err)
privateKey, _ := url.Parse("http://" + hiddenservice + "/private_key")
osc.LogInfo(fmt.Sprintf("Scanning URI: %s", privateKey.String()))
id, err = os.GetPage(privateKey.String(), base, osc, true)
addCrawl(privateKey.String(), id, err)

processed := make(map[string]bool)

Expand Down Expand Up @@ -188,6 +190,7 @@ func (os *OnionSpider) Crawl(hiddenservice string, osc *config.OnionScanConfig,
}
}

// GetPage retrieves the page, inserts a new spider entry in the database, and returns the record id.
func (os *OnionSpider) GetPage(uri string, base *url.URL, osc *config.OnionScanConfig, snapshot bool) (int, error) {
response, err := os.client.Get(uri)

Expand Down
8 changes: 8 additions & 0 deletions spider/pageparser.go
Expand Up @@ -10,6 +10,8 @@ import (
"strings"
)

// NormalizeURI resolves relative URIs and returns the resovled URI.
// All data: URIs are resolved to the same value.
func NormalizeURI(uri string, base *url.URL) string {

if strings.HasPrefix("data:", uri) {
Expand All @@ -24,6 +26,8 @@ func NormalizeURI(uri string, base *url.URL) string {
return res.String()
}

// SnapshotResource reads and returns a snapshot page.
// Snapshots have a maximum size of 0.5 MB.
func SnapshotResource(response io.Reader) model.Page {
page := model.Page{}
buf := make([]byte, 1024*512) // Read Max 0.5 MB
Expand All @@ -32,6 +36,8 @@ func SnapshotResource(response io.Reader) model.Page {
return page
}

// SnapshotBinaryResource reads and returns a raw page.
// Snapshots have a maximum size of 0.5 MB.
func SnapshotBinaryResource(response io.Reader) model.Page {
page := model.Page{}
buf := make([]byte, 1024*512) // Read Max 0.5 MB
Expand All @@ -40,6 +46,8 @@ func SnapshotBinaryResource(response io.Reader) model.Page {
return page
}

// ParsePage parses HTML input and returns a page.
// The following HTML tags are parsed: title, form, input, a, img, link, script
func ParsePage(response io.Reader, base *url.URL, snapshot bool) model.Page {

page := model.Page{}
Expand Down