Skip to content

Commit

Permalink
Improve Search Result Parsing / Show Errors In Web UI (#43)
Browse files Browse the repository at this point in the history
* WIP

* Add testify for better test output.

* Don't share a logger in the irc / dcc mock servers.

* Rewrite search parser to be more maintainable.

- Don't mutate the line as we parse. It makes
  it difficult to reason about the current
  state.
- Split each piece into a separate function
  that takes the original line and extracts
  only what we are looking for
- Handle and add tests for special cases like
  when the file size isn't present. Fixes #41.

* Handle cases when author has weird %\w% text.

- Example: %F77FE9FF1CCD%
- Look for the above format and remove it if present.
- Fixes #42.

* Add additional file extensions.

* Display parse errors in Web UI.

- Allow users to manually copy and paste the
  result string so they can still download
  the files.
- When there are parse errors, a button appears
  at the top of the grid, allowing users to
  view all errors and the raw search result
  string in a separate errors grid. They can
  copy the search result manually to the top
  input box and click download.

* Remove fmt.Println() call.

* Update packages.
  • Loading branch information
evan-buss committed Nov 1, 2021
1 parent ef086d3 commit 1f66178
Show file tree
Hide file tree
Showing 29 changed files with 1,302 additions and 471 deletions.
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions .idea/codeStyles/Project.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions .idea/codeStyles/codeStyleConfig.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/openbooks.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/prettier.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

155 changes: 147 additions & 8 deletions core/search_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package core

import (
"bufio"
"encoding/json"
"errors"
"fmt"
"io"
Expand All @@ -11,7 +12,9 @@ import (
"strings"
)

// List of common file extensions
// List of file extensions that I've encountered.
// Some of them aren't eBooks, but they were returned
// in previous search results.
var fileTypes = [...]string{
"epub",
"mobi",
Expand All @@ -20,7 +23,13 @@ var fileTypes = [...]string{
"rtf",
"pdf",
"cdr",
"rar",
"lit",
"cbr",
"doc",
"htm",
"jpg",
"txt",
"rar", // Compressed extensions should always be last 2 items
"zip",
}

Expand All @@ -35,8 +44,19 @@ type BookDetail struct {
}

type ParseError struct {
Line string
Error error
Line string `json:"line"`
Error error `json:"error"`
}

func (p *ParseError) MarshalJSON() ([]byte, error) {
item := struct {
Line string `json:"line"`
Error string `json:"error"`
}{
Line: p.Line,
Error: p.Error.Error(),
}
return json.Marshal(item)
}

func (p ParseError) String() string {
Expand All @@ -51,20 +71,20 @@ func ParseSearchFile(filePath string) ([]BookDetail, []ParseError) {
}
defer file.Close()

return ParseSearch(file)
return ParseSearchV2(file)
}

func ParseSearch(reader io.Reader) ([]BookDetail, []ParseError) {
var books []BookDetail
var errors []ParseError
var parseErrors []ParseError

scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "!") {
dat, err := parseLine(line)
if err != nil {
errors = append(errors, ParseError{Line: line, Error: err})
parseErrors = append(parseErrors, ParseError{Line: line, Error: err})
} else {
books = append(books, dat)
}
Expand All @@ -73,7 +93,7 @@ func ParseSearch(reader io.Reader) ([]BookDetail, []ParseError) {

sort.Slice(books, func(i, j int) bool { return books[i].Server < books[j].Server })

return books, errors
return books, parseErrors
}

// Parse line extracts data from a single line
Expand Down Expand Up @@ -138,3 +158,122 @@ func parseLine(line string) (BookDetail, error) {

return book, nil
}

func ParseSearchV2(reader io.Reader) ([]BookDetail, []ParseError) {
var books []BookDetail
var parseErrors []ParseError

scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "!") {
dat, err := parseLineV2(line)
if err != nil {
parseErrors = append(parseErrors, ParseError{Line: line, Error: err})
} else {
books = append(books, dat)
}
}
}

sort.Slice(books, func(i, j int) bool { return books[i].Server < books[j].Server })

return books, parseErrors
}

func parseLineV2(line string) (BookDetail, error) {
getServer := func(line string) (string, error) {
if line[0] != '!' {
return "", errors.New("result lines must start with '!'")
}

firstSpace := strings.Index(line, " ")
if firstSpace == -1 {
return "", errors.New("unable parse server name")
}

return line[1:firstSpace], nil
}

getAuthor := func(line string) (string, error) {
firstSpace := strings.Index(line, " ")
dashChar := strings.Index(line, " - ")
if dashChar == -1 {
return "", errors.New("unable to parse author")
}
author := line[firstSpace+len(" ") : dashChar]

// Handles case with weird author characters %\w% ("%F77FE9FF1CCD% Michael Haag")
if strings.Contains(author, "%") {
split := strings.SplitAfterN(author, " ", 2)
return split[1], nil
}

return author, nil
}

getTitle := func(line string) (string, string, int) {
title := ""
fileFormat := ""
endIndex := -1
// Get the Title
for _, ext := range fileTypes { //Loop through each possible file extension we've got on record
endTitle := strings.Index(line, "."+ext) // check if it contains our extension
if endTitle == -1 {
continue
}
fileFormat = ext
if ext == "rar" || ext == "zip" { // If the extension is .rar or .zip the actual format is contained in ()
for _, ext2 := range fileTypes[:len(fileTypes)-2] { // Range over the eBook formats (exclude archives)
if strings.Contains(strings.ToLower(line[:endTitle]), ext2) {
fileFormat = ext2
}
}
}
startIndex := strings.Index(line, " - ")
title = line[startIndex+len(" - ") : endTitle]
endIndex = endTitle
}

return title, fileFormat, endIndex
}

getSize := func(line string) (string, int) {
const delimiter = " ::INFO:: "
infoIndex := strings.LastIndex(line, delimiter)

if infoIndex != -1 {
// Handle cases when there is additional info after the file size (ex ::HASH:: )
parts := strings.Split(line[infoIndex+len(delimiter):], " ")
return parts[0], infoIndex
}

return "N/A", len(line)
}

server, err := getServer(line)
if err != nil {
return BookDetail{}, err
}

author, err := getAuthor(line)
if err != nil {
return BookDetail{}, err
}

title, format, titleIndex := getTitle(line)
if titleIndex == -1 {
return BookDetail{}, errors.New("unable to parse title")
}

size, endIndex := getSize(line)

return BookDetail{
Server: server,
Author: author,
Title: title,
Format: format,
Size: size,
Full: strings.TrimSpace(line[:endIndex]),
}, nil
}

0 comments on commit 1f66178

Please sign in to comment.