Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional filters to page list endpoints #1622

Merged
merged 2 commits into from Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
45 changes: 43 additions & 2 deletions backend/btrixcloud/pages.py
Expand Up @@ -24,7 +24,7 @@
PageNoteDelete,
)
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .utils import from_k8s_date
from .utils import from_k8s_date, str_list_to_bools

if TYPE_CHECKING:
from .crawls import CrawlOps
Expand Down Expand Up @@ -360,13 +360,16 @@ async def list_pages(
qa_gt: Optional[float] = None,
qa_lte: Optional[float] = None,
qa_lt: Optional[float] = None,
reviewed: Optional[bool] = None,
approved: Optional[List[Union[bool, None]]] = None,
has_notes: Optional[bool] = None,
page_size: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sort_by: Optional[str] = None,
sort_direction: Optional[int] = -1,
) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]:
"""List all pages in crawl"""
# pylint: disable=duplicate-code, too-many-locals, too-many-branches
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
# Zero-index page for query
page = page - 1
skip = page_size * page
Expand All @@ -377,6 +380,24 @@ async def list_pages(
if org:
query["oid"] = org.id

if reviewed:
query["$or"] = [
{"approved": {"$ne": None}},
{"notes.0": {"$exists": True}},
]

if reviewed is False:
query["$and"] = [
{"approved": {"$eq": None}},
{"notes.0": {"$exists": False}},
]

if approved:
query["approved"] = {"$in": approved}

if has_notes is not None:
query["notes.0"] = {"$exists": has_notes}

if qa_run_id:
query[f"qa.{qa_run_id}"] = {"$exists": True}

Expand Down Expand Up @@ -576,15 +597,25 @@ async def delete_page_notes(
async def get_pages_list(
crawl_id: str,
org: Organization = Depends(org_crawl_dep),
reviewed: Optional[bool] = None,
approved: Optional[str] = None,
hasNotes: Optional[bool] = None,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
):
"""Retrieve paginated list of pages"""
formatted_approved: Optional[List[Union[bool, None]]] = None
if approved:
formatted_approved = str_list_to_bools(approved.split(","))

pages, total = await ops.list_pages(
crawl_id=crawl_id,
org=org,
reviewed=reviewed,
approved=formatted_approved,
has_notes=hasNotes,
page_size=pageSize,
page=page,
sort_by=sortBy,
Expand All @@ -605,13 +636,20 @@ async def get_pages_list_with_qa(
gt: Optional[float] = None,
lte: Optional[float] = None,
lt: Optional[float] = None,
reviewed: Optional[bool] = None,
approved: Optional[str] = None,
hasNotes: Optional[bool] = None,
org: Organization = Depends(org_crawl_dep),
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
):
"""Retrieve paginated list of pages"""
formatted_approved: Optional[List[Union[bool, None]]] = None
if approved:
formatted_approved = str_list_to_bools(approved.split(","))

pages, total = await ops.list_pages(
crawl_id=crawl_id,
org=org,
Expand All @@ -621,6 +659,9 @@ async def get_pages_list_with_qa(
qa_gt=gt,
qa_lte=lte,
qa_lt=lt,
reviewed=reviewed,
approved=formatted_approved,
has_notes=hasNotes,
page_size=pageSize,
page=page,
sort_by=sortBy,
Expand Down
22 changes: 21 additions & 1 deletion backend/btrixcloud/utils.py
Expand Up @@ -94,10 +94,30 @@ def parse_jsonl_error_messages(errors):
def is_bool(stri: Optional[str]) -> bool:
"""Check if the string parameter is stringly true"""
if stri:
return stri.lower() in ("true", "1", "yes")
return stri.lower() in ("true", "1", "yes", "on")
return False


def is_falsy_bool(stri: Optional[str]) -> bool:
"""Check if the string parameter is stringly false"""
if stri:
return stri.lower() in ("false", "0", "no", "off")
return False


def str_list_to_bools(str_list: List[str], allow_none=True) -> List[Union[bool, None]]:
"""Return version of input string list cast to bool or None, ignoring other values"""
output: List[Union[bool, None]] = []
for val in str_list:
if is_bool(val):
output.append(True)
if is_falsy_bool(val):
output.append(False)
if val.lower() in ("none", "null") and allow_none:
output.append(None)
return output


def slug_from_name(name: str) -> str:
"""Generate slug from name"""
return slugify(name.replace("'", ""))
Expand Down
181 changes: 181 additions & 0 deletions backend/test/test_run_crawl.py
Expand Up @@ -459,6 +459,21 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert page.get("modified") is None
assert page.get("approved") is None

# Test reviewed filter (page has no notes or approved so should show up in false)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

# Update page with approval
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
Expand All @@ -470,6 +485,57 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert r.status_code == 200
assert r.json()["updated"]

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["approved"]

# Test approval filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

# Test reviewed filter (page now approved so should show up in True)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
Expand All @@ -490,6 +556,44 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
assert page["modified"]
assert page["approved"]

# Set approved to False and test filter again
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
json={
"approved": False,
},
)
assert r.status_code == 200

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0


def test_re_add_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Re-add pages and verify they were correctly added
Expand Down Expand Up @@ -565,6 +669,83 @@ def test_crawl_page_notes(crawler_auth_headers, default_org_id, crawler_crawl_id
assert first_note["userName"]
assert first_note["text"] == note_text

# Make sure page approval is set to None and re-test filters
r = requests.patch(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}",
headers=crawler_auth_headers,
json={
"approved": None,
},
)
assert r.status_code == 200
assert r.json()["updated"]

# Test approved filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=True,False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=None",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?approved=true,false,none",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Test reviewed filter (page now has notes so should show up in True)
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?reviewed=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Test notes filter
r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?hasNotes=False",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 0

r = requests.get(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages?hasNotes=True",
headers=crawler_auth_headers,
)
assert r.status_code == 200
assert r.json()["total"] == 1

# Add second note to test selective updates/deletes
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/pages/{page_id}/notes",
Expand Down