Skip to content

Commit

Permalink
Merge pull request #49 from hhslepicka/fixes-and-features
Browse files Browse the repository at this point in the history
New Utilities, Improved Docs and Bug Fix
  • Loading branch information
hhslepicka committed Mar 14, 2022
2 parents 0e63174 + 5339c5f commit 6a10d75
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 21 deletions.
2 changes: 2 additions & 0 deletions botcity/web/__init__.py
@@ -1,4 +1,6 @@
from .bot import WebBot, Browser, BROWSER_CONFIGS, By # noqa: F401, F403
from .parsers import table_to_dict, data_from_row, sanitize_header # noqa: F401, F403
from .util import element_as_select # noqa: F401, F403

from botcity.web._version import get_versions
__version__ = get_versions()['version']
Expand Down
105 changes: 97 additions & 8 deletions botcity/web/bot.py
Expand Up @@ -10,6 +10,7 @@
import shutil
import time
from typing import List
from contextlib import contextmanager

from botcity.base import BaseBot, State
from botcity.base.utils import only_if_element
Expand All @@ -21,7 +22,8 @@
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.wait import WebDriverWait, TimeoutException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC

from . import config, cv2find
from .browsers import BROWSER_CONFIGS, Browser
Expand Down Expand Up @@ -233,11 +235,16 @@ def check_driver():
def stop_browser(self):
"""
Stops the Chrome browser and clean up the User Data Directory.
Warning:
After invoking this method, you will need to reassign your custom options and capabilities.
"""
if not self._driver:
return
self._driver.close()
self._driver.quit()
self.options = None
self.capabilities = None
self._driver = None

def set_screen_resolution(self, width=None, height=None):
Expand Down Expand Up @@ -854,6 +861,26 @@ def browse(self, url):
"""
self.navigate_to(url)

@contextmanager
def wait_for_new_page(self, waiting_time=10000, activate=True):
"""Context manager to wait for a new page to load and activate it.
Args:
waiting_time (int, optional): The maximum waiting time. Defaults to 10000.
activate (bool, optional): Whether or not to activate the new page. Defaults to True.
"""
tabs = self.get_tabs()
yield
start_time = time.time()
while tabs == self.get_tabs():
elapsed_time = (time.time() - start_time) * 1000
if elapsed_time > waiting_time:
return None
time.sleep(0.1)
if activate:
self.activate_tab(self.get_tabs()[-1])

def execute_javascript(self, code):
"""
Execute the given javascript code.
Expand Down Expand Up @@ -1032,15 +1059,19 @@ def wait_for_downloads(self, timeout: int = 120000):

wait_method = BROWSER_CONFIGS.get(self.browser).get("wait_for_downloads")
# waits for all the files to be completed
WebDriverWait(self._driver, timeout/1000, 1).until(wait_method)
WebDriverWait(self._driver, timeout/1000.0, 1).until(wait_method)

def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElement]:
def find_elements(self, selector: str, by: By = By.CSS_SELECTOR,
waiting_time=10000, ensure_visible: bool = True) -> List[WebElement]:
"""Find elements using the specified selector with selector type specified by `by`.
Args:
selector (str): The selector string to be used.
by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
[See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
Defaults to 10000ms (10s).
ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
Returns:
List[WebElement]: List of elements found.
Expand All @@ -1054,16 +1085,36 @@ def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElem
...
```
"""
return self._driver.find_elements(by, selector)
if ensure_visible:
condition = EC.visibility_of_all_elements_located
else:
condition = EC.presence_of_all_elements_located

def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
try:
elements = WebDriverWait(
self._driver, timeout=waiting_time / 1000.0
).until(
condition((by, selector))
)
return elements
except (TimeoutException, NoSuchElementException) as ex:
print("Exception on find_elements", ex)
return None

def find_element(self, selector: str, by: str = By.CSS_SELECTOR, waiting_time=10000,
ensure_visible: bool = False, ensure_clickable: bool = False) -> WebElement:
"""Find an element using the specified selector with selector type specified by `by`.
If more than one element is found, the first instance is returned.
Args:
selector (str): The selector string to be used.
by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
[See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
Defaults to 10000ms (10s).
ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to False.
ensure_clickable (bool, optional): Whether to wait for the element to be clickable. Defaults to False.
If True, `ensure_clickable` takes precedence over `ensure_visible`.
Returns:
WebElement: The element found.
Expand All @@ -1079,9 +1130,47 @@ def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
...
```
"""
out = self.find_elements(selector=selector, by=by)
if out:
return out[0]
condition = EC.visibility_of_element_located if ensure_visible else EC.presence_of_element_located
condition = EC.element_to_be_clickable if ensure_clickable else condition

try:
element = WebDriverWait(
self._driver, timeout=waiting_time/1000.0
).until(
condition((by, selector))
)
return element
except (TimeoutException, NoSuchElementException):
return None

def wait_for_stale_element(self, element: WebElement, timeout: int = 10000):
"""
Wait until the WebElement element becomes stale (outdated).
Args:
element (WebElement): The element to monitor for staleness.
timeout (int, optional): Timeout in millis. Defaults to 120000.
"""
try:
WebDriverWait(self._driver, timeout=timeout/1000.0).until(EC.staleness_of(element))
except (TimeoutException, NoSuchElementException):
pass

def wait_for_element_visibility(self, element: WebElement, visible: bool = True, waiting_time=10000):
"""Wait for the element to be visible or hidden.
Args:
element (WebElement): The element to wait for.
visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
Defaults to 10000ms (10s).
"""
if visible:
wait_method = EC.visibility_of
else:
wait_method = EC.invisibility_of_element

WebDriverWait(self._driver, timeout=waiting_time/1000.0).until(wait_method(element))

def set_file_input_element(self, element: WebElement, filepath: str):
"""Configure the filepath for upload in a file element.
Expand Down
90 changes: 90 additions & 0 deletions botcity/web/parsers.py
@@ -0,0 +1,90 @@
import collections
import string
from typing import Dict, List
from selenium.webdriver.remote.webelement import WebElement


def data_from_row(row: WebElement, cell_tag="td") -> List[str]:
"""Extract data from a row and return it as a list.
Args:
row (WebElement): The row element.
cell_tag (str, optional): The HTML tag associated with the row cells. Defaults to "td".
Returns:
list: List of strings with the contents.
"""
return [
col.text for col in row.find_elements_by_tag_name(cell_tag)
]


def sanitize_header(labels: List[str]):
"""Sanitize header labels."""
# Handle Treat Empty Header
for idx, label in enumerate(labels):
if label.strip():
# make it lowercase
label = label.lower()

# remove punctuations
label = ''.join([l for l in label if l not in string.punctuation]) # noqa: E741

# replace spaces with underscores
label = label.replace(" ", "_")
else:
label = f"col_{idx}"
labels[idx] = label

# Deduplicate by adding _1, _2, _3 to repeated labels
counts = {k: v for k, v in collections.Counter(labels).items() if v > 1}
for i in reversed(range(len(labels))):
item = labels[i]
if item in counts and counts[item]:
labels[i] = f"{item}_{counts[item]}"
counts[item] -= 1

return labels


def table_to_dict(table: WebElement, has_header: bool = True,
skip_rows: int = 0, header_tag: str = "th") -> List[Dict]:
"""Convert a table WebElement to a dict of lists.
Args:
table (WebElement): The table element.
has_header (bool, optional): Whether or not to parse a header. Defaults to True.
skip_rows (int, optional): Number of rows to skip from the top. Defaults to 0.
header_tag (str, optional): The HTML tag associated with the header cell. Defaults to "th".
Returns:
list: List with dict for each row.
"""

# Collect all rows from table
rows = table.find_elements_by_tag_name("tr")

# Skip rows if informed
if skip_rows:
rows = rows[skip_rows:]

# Parse header labels
if has_header:
# Read header labels
labels = data_from_row(rows[0], cell_tag=header_tag)
# Sanitize headers
labels = sanitize_header(labels)
# Skip the header
rows = rows[1:]
else:
# Make up header labels
num_cols = len(rows[0].find_elements_by_tag_name("td"))
labels = [f"col_{i}" for i in range(num_cols)]

# Assemble output dictionary
out_list = []
for row in rows:
row_data = data_from_row(row)
out_list.append(dict(zip(labels, row_data)))

return out_list
24 changes: 23 additions & 1 deletion botcity/web/util.py
@@ -1,9 +1,31 @@
import shutil
import tempfile

from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.select import Select

def cleanup_temp_dir(temp_dir):

def cleanup_temp_dir(temp_dir: tempfile.TemporaryDirectory) -> None:
"""
Deletes the temporary directory and all its contents.
Args:
temp_dir (tempfile.TemporaryDirectory): The temporary directory to delete.
"""
if temp_dir:
try:
temp_dir.cleanup()
except OSError:
shutil.rmtree(temp_dir.name, ignore_errors=True)


def element_as_select(element: WebElement) -> Select:
"""Wraps a WebElement in a Select object.
Args:
element (WebElement): The element to wrap.
Returns:
Select: The Select object.
"""
return Select(element)
44 changes: 44 additions & 0 deletions docs/forms.md
@@ -0,0 +1,44 @@
# Interacting with Forms

When dealing with forms, we often need to fill in the form and submit it.

While most of the operations are trivial, there are some things that are not such as selecting a select element or dealing with file uploads.

For that we developed some utilitary functions that you can use.

## Select Element

After grabing the element via the `find_element` or `find_elements` functions, we can use the `element_as_select` to convert it into a `Select` object.

::: botcity.web.util.element_as_select

### Example usage

```python
# Import the function
from botcity.web.util import element_as_select
...
# Fetch the select element
element = self.find_element("select", By.TAG_NAME)
# Convert the element into a Select object
select_element = element_as_select(element)
# Select the option based on visible text
select_element.select_by_visible_text("Option 1")
...
```

## File Upload

After grabing the element via the `find_element` or `find_elements` functions, we can use the `set_file_input_element` to assign the file path to the element.

### Example usage

```python
from botcity.web import By
...
# Find the input element of type `file` using CSS_SELECTOR.
elem = self.find_element("body > form > input[type=file]", By.CSS_SELECTOR)
# Configure the file to be used when processing the upload
self.set_file_input_element(elem, "./test.txt")
...
```

0 comments on commit 6a10d75

Please sign in to comment.