Skip to content

Commit

Permalink
basic server working
Browse files Browse the repository at this point in the history
  • Loading branch information
stringertheory committed Jun 6, 2023
1 parent 0b000f9 commit 3b7abc5
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 61 deletions.
2 changes: 1 addition & 1 deletion json_explorer/__main__.py
@@ -1,3 +1,3 @@
from .explore import main
from .cli import main

main()
51 changes: 39 additions & 12 deletions json_explorer/cli.py
@@ -1,28 +1,55 @@
import argparse
import json
import sys
import threading
import webbrowser
from http.server import BaseHTTPRequestHandler, HTTPServer


class RequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
with open("json_explorer/template.html") as infile:
html = infile.read()
self.wfile.write(bytes(html, "utf-8"))
from .explore import TripleCounter


def start_browser(server_ready_event, url):
server_ready_event.wait()
webbrowser.open(url)


def main(ip="localhost", port=8001):
def jsonl_iterator(filename):
with open(filename) as infile:
for line in infile:
yield json.loads(line)


def main():
parser = argparse.ArgumentParser(
prog="ProgramName",
description="What the program does",
epilog="Text at the bottom of help",
)
parser.add_argument(
"filename",
help="filename of a file in JSON Lines format",
)
parser.add_argument("-p", "--port", default=8001, type=int, help="port for server (default 8001)")
parser.add_argument("-n", "--no-serve", help="write HTML to stdout instead of serving", action="store_true")
args = parser.parse_args()

counter = TripleCounter.from_objects(jsonl_iterator(args.filename))
response_text = counter.html()

if args.no_serve:
print(response_text)
return

class RequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(bytes(response_text, "utf-8"))

name = "JSON Explorer"
instruction = "press Control+C to stop"
url = f"http://{ip}:{port}"
url = f"http://localhost:{args.port}"

server_ready = threading.Event()
browser_thread = threading.Thread(
Expand All @@ -31,7 +58,7 @@ def main(ip="localhost", port=8001):
)
browser_thread.start()

server = HTTPServer((ip, port), RequestHandler)
server = HTTPServer(("localhost", args.port), RequestHandler)
print(f"started {name} at {url}", file=sys.stderr)
print(f"\033[1m{instruction}\033[0m\n", file=sys.stderr)
server_ready.set()
Expand Down
41 changes: 7 additions & 34 deletions json_explorer/explore.py
@@ -1,8 +1,6 @@
import collections
import html
import json
import pathlib
import sys

INDEX = object()
INDEX_STRING = "[*]"
Expand Down Expand Up @@ -31,13 +29,6 @@ def oxford_join(items, pluralize=False):
return "{}, or {}".format(", ".join(items[:-1]), items[-1])


def format_type(value, pluralize=False):
if isinstance(value, str):
return f"{value}{'s' if pluralize else ''}"
else:
return oxford_join(value, pluralize=pluralize)


class TripleCounter(dict):
def increment(self, keys, amount):
try:
Expand Down Expand Up @@ -88,6 +79,13 @@ def add_object(self, d):
for keys in iter_object(d, []):
self.increment(keys, 1)

@classmethod
def from_objects(cls, objects):
counter = cls()
for obj in objects:
counter.add_object(obj)
return counter

def html(self):
with open(TEMPLATE_FILENAME) as infile:
html = infile.read()
Expand Down Expand Up @@ -137,17 +135,6 @@ def __init__(self, path, *args, **kwargs):
self.parent = {}
self.children = []

def set_parent(self, parent_node):
self.parent = parent_node

def add_child(self, child_node):
self.children.append(child_node)

def traverse(self):
yield self
for child in self.children:
yield from child.traverse()

def escape(self, string, truncate_length=140):
raw = repr(string)
if len(raw) > truncate_length:
Expand Down Expand Up @@ -394,17 +381,3 @@ def main2():

# tree = counter.tree()
print(counter.html())


def main(args=None):
if args is None:
args = sys.argv[1:]

filename = args[0]

counter = TripleCounter()
with open(filename) as infile:
for line in infile:
counter.add_object(json.loads(line))

print(counter.html())
31 changes: 30 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -23,6 +23,7 @@ deptry = "^0.6.4"
mypy = "^0.981"
pre-commit = "^2.20.0"
tox = "^3.25.1"
beautifulsoup4 = "^4.12.2"

[tool.poetry.group.docs.dependencies]
mkdocs = "^1.4.2"
Expand Down
13 changes: 0 additions & 13 deletions tests/test_cli.py

This file was deleted.

113 changes: 113 additions & 0 deletions tests/test_explore.py
@@ -0,0 +1,113 @@
from pathlib import Path

import bs4
import pytest

from json_explorer.cli import jsonl_iterator
from json_explorer.explore import INDEX, TripleCounter

FIXTURE_DIR = Path(__file__).parent.resolve() / "data"


@pytest.fixture
def example1():
objects = jsonl_iterator(FIXTURE_DIR / "example1.jsonl")
return TripleCounter.from_objects(objects)


@pytest.fixture
def example1_render(example1):
return example1.html()


@pytest.fixture
def example1_soup(example1_render):
return bs4.BeautifulSoup(example1_render, features="html.parser")


def test_counts_simple(example1):
# only one type seen for "refresh"
refresh_types = example1[("refresh",)]
assert len(refresh_types) == 1

# the type seen is an int
assert refresh_types.get("int")

# only one unique integer value seen
refresh_int_values = refresh_types["int"]
assert len(refresh_int_values) == 1

# the value was always 20, and it was seen three times
assert refresh_int_values.get(20) == 3


def test_counts_medium(example1):
# only one type seen for d.rows[*].id
row_id_types = example1[("d", "rows", INDEX, "id")]
assert len(row_id_types) == 1

# the type seen is an int
assert row_id_types.get("int")

# there were 89 different unique int values
row_id_int_values = row_id_types["int"]
assert len(row_id_int_values) == 89

# the value 3490233 occurs once
assert row_id_int_values.get(3490233) == 1


def test_counts_complicated(example1):
# three different types seen for d.rows[*].stream
row_stream_types = example1[("d", "rows", INDEX, "stream")]
assert len(row_stream_types) == 3

# the types seen are null, object, and array
assert row_stream_types.get("NoneType")
assert row_stream_types.get("dict")
assert row_stream_types.get("list")

# null occurred 61 times
assert row_stream_types["NoneType"][None] == 61

# when it was an array, it was always empty
row_stream_list_lengths = row_stream_types["list"]
assert len(row_stream_list_lengths) == 1
assert row_stream_list_lengths.get(0)

# when stream was an object, it had between 2 and 7 properties
row_stream_dict_lengths = row_stream_types["dict"]
assert min(row_stream_dict_lengths) == 2
assert max(row_stream_dict_lengths) == 7


def test_render_simple(example1_render):
assert example1_render.startswith("<!doctype html>")


def test_document_tree(example1_soup):
# tree exists
ul = example1_soup.find("ul", {"class": "tree"})
assert ul

# has only one root element
li_list = ul.find_all("li", recursive=False)
assert len(li_list) == 1

# this example has three objects, each always with exactly three
# properties
root = li_list[0]
root_text = root.find("summary").find("span", {"class": "type"}).text
assert root_text == "object 3, always 3 properties"
assert len(root.find("ul").find_all("li", recursive=False)) == 3


def test_document_dialog_jmespath(example1_soup):
for jmespath in ["refresh", "d.rows[*].id", "d.rows[*].stream"]:
dialog = example1_soup.find("dialog", {"id": jmespath})
assert dialog

h2 = dialog.find("h2", {"class": "jmespath"})
assert h2

assert h2.text.strip() == jmespath

0 comments on commit 3b7abc5

Please sign in to comment.