Merge pull request #49 from hhslepicka/fixes-and-features

New Utilities, Improved Docs and Bug Fix
botcity-dev · Mar 14, 2022 · 6a10d75 · 6a10d75
2 parents 0e63174 + 5339c5f
commit 6a10d75
Show file tree

Hide file tree

Showing 8 changed files with 349 additions and 21 deletions.
diff --git a/botcity/web/__init__.py b/botcity/web/__init__.py
@@ -1,4 +1,6 @@
 from .bot import WebBot, Browser, BROWSER_CONFIGS, By  # noqa: F401, F403
+from .parsers import table_to_dict, data_from_row, sanitize_header  # noqa: F401, F403
+from .util import element_as_select  # noqa: F401, F403
 
 from botcity.web._version import get_versions
 __version__ = get_versions()['version']

diff --git a/botcity/web/bot.py b/botcity/web/bot.py
@@ -10,6 +10,7 @@
 import shutil
 import time
 from typing import List
+from contextlib import contextmanager
 
 from botcity.base import BaseBot, State
 from botcity.base.utils import only_if_element
@@ -21,7 +22,8 @@
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.remote.webelement import WebElement
-from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support.wait import WebDriverWait, TimeoutException, NoSuchElementException
+from selenium.webdriver.support import expected_conditions as EC
 
 from . import config, cv2find
 from .browsers import BROWSER_CONFIGS, Browser
@@ -233,11 +235,16 @@ def check_driver():
     def stop_browser(self):
         """
         Stops the Chrome browser and clean up the User Data Directory.
+
+        Warning:
+            After invoking this method, you will need to reassign your custom options and capabilities.
         """
         if not self._driver:
             return
         self._driver.close()
         self._driver.quit()
+        self.options = None
+        self.capabilities = None
         self._driver = None
 
     def set_screen_resolution(self, width=None, height=None):
@@ -854,6 +861,26 @@ def browse(self, url):
         """
         self.navigate_to(url)
 
+    @contextmanager
+    def wait_for_new_page(self, waiting_time=10000, activate=True):
+        """Context manager to wait for a new page to load and activate it.
+
+        Args:
+            waiting_time (int, optional): The maximum waiting time. Defaults to 10000.
+            activate (bool, optional): Whether or not to activate the new page. Defaults to True.
+
+        """
+        tabs = self.get_tabs()
+        yield
+        start_time = time.time()
+        while tabs == self.get_tabs():
+            elapsed_time = (time.time() - start_time) * 1000
+            if elapsed_time > waiting_time:
+                return None
+            time.sleep(0.1)
+        if activate:
+            self.activate_tab(self.get_tabs()[-1])
+
     def execute_javascript(self, code):
         """
         Execute the given javascript code.
@@ -1032,15 +1059,19 @@ def wait_for_downloads(self, timeout: int = 120000):
 
         wait_method = BROWSER_CONFIGS.get(self.browser).get("wait_for_downloads")
         # waits for all the files to be completed
-        WebDriverWait(self._driver, timeout/1000, 1).until(wait_method)
+        WebDriverWait(self._driver, timeout/1000.0, 1).until(wait_method)
 
-    def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElement]:
+    def find_elements(self, selector: str, by: By = By.CSS_SELECTOR,
+                      waiting_time=10000, ensure_visible: bool = True) -> List[WebElement]:
         """Find elements using the specified selector with selector type specified by `by`.
 
         Args:
             selector (str): The selector string to be used.
             by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
                 [See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
+            waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
+                Defaults to 10000ms (10s).
+            ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
 
         Returns:
             List[WebElement]: List of elements found.
@@ -1054,16 +1085,36 @@ def find_elements(self, selector: str, by: By = By.CSS_SELECTOR) -> List[WebElem
         ...
         ```
         """
-        return self._driver.find_elements(by, selector)
+        if ensure_visible:
+            condition = EC.visibility_of_all_elements_located
+        else:
+            condition = EC.presence_of_all_elements_located
 
-    def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
+        try:
+            elements = WebDriverWait(
+                self._driver, timeout=waiting_time / 1000.0
+            ).until(
+                condition((by, selector))
+            )
+            return elements
+        except (TimeoutException, NoSuchElementException) as ex:
+            print("Exception on find_elements", ex)
+            return None
+
+    def find_element(self, selector: str, by: str = By.CSS_SELECTOR, waiting_time=10000,
+                     ensure_visible: bool = False, ensure_clickable: bool = False) -> WebElement:
         """Find an element using the specified selector with selector type specified by `by`.
         If more than one element is found, the first instance is returned.
 
         Args:
             selector (str): The selector string to be used.
             by (str, optional): Selector type. Defaults to By.CSS_SELECTOR.
                 [See more](https://selenium-python.readthedocs.io/api.html#selenium.webdriver.common.by.By)
+            waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
+                Defaults to 10000ms (10s).
+            ensure_visible (bool, optional): Whether to wait for the element to be visible. Defaults to False.
+            ensure_clickable (bool, optional): Whether to wait for the element to be clickable. Defaults to False.
+                If True, `ensure_clickable` takes precedence over `ensure_visible`.
 
         Returns:
             WebElement: The element found.
@@ -1079,9 +1130,47 @@ def find_element(self, selector: str, by: str = By.CSS_SELECTOR) -> WebElement:
         ...
         ```
         """
-        out = self.find_elements(selector=selector, by=by)
-        if out:
-            return out[0]
+        condition = EC.visibility_of_element_located if ensure_visible else EC.presence_of_element_located
+        condition = EC.element_to_be_clickable if ensure_clickable else condition
+
+        try:
+            element = WebDriverWait(
+                self._driver, timeout=waiting_time/1000.0
+            ).until(
+                condition((by, selector))
+            )
+            return element
+        except (TimeoutException, NoSuchElementException):
+            return None
+
+    def wait_for_stale_element(self, element: WebElement, timeout: int = 10000):
+        """
+        Wait until the WebElement element becomes stale (outdated).
+
+        Args:
+            element (WebElement): The element to monitor for staleness.
+            timeout (int, optional): Timeout in millis. Defaults to 120000.
+        """
+        try:
+            WebDriverWait(self._driver, timeout=timeout/1000.0).until(EC.staleness_of(element))
+        except (TimeoutException, NoSuchElementException):
+            pass
+
+    def wait_for_element_visibility(self, element: WebElement, visible: bool = True, waiting_time=10000):
+        """Wait for the element to be visible or hidden.
+
+        Args:
+            element (WebElement): The element to wait for.
+            visible (bool, optional): Whether to wait for the element to be visible. Defaults to True.
+            waiting_time (int, optional): Maximum wait time (ms) to search for a hit.
+                Defaults to 10000ms (10s).
+        """
+        if visible:
+            wait_method = EC.visibility_of
+        else:
+            wait_method = EC.invisibility_of_element
+
+        WebDriverWait(self._driver, timeout=waiting_time/1000.0).until(wait_method(element))
 
     def set_file_input_element(self, element: WebElement, filepath: str):
         """Configure the filepath for upload in a file element.

diff --git a/botcity/web/parsers.py b/botcity/web/parsers.py
@@ -0,0 +1,90 @@
+import collections
+import string
+from typing import Dict, List
+from selenium.webdriver.remote.webelement import WebElement
+
+
+def data_from_row(row: WebElement, cell_tag="td") -> List[str]:
+    """Extract data from a row and return it as a list.
+
+    Args:
+        row (WebElement): The row element.
+        cell_tag (str, optional): The HTML tag associated with the row cells. Defaults to "td".
+
+    Returns:
+        list: List of strings with the contents.
+    """
+    return [
+        col.text for col in row.find_elements_by_tag_name(cell_tag)
+    ]
+
+
+def sanitize_header(labels: List[str]):
+    """Sanitize header labels."""
+    # Handle Treat Empty Header
+    for idx, label in enumerate(labels):
+        if label.strip():
+            # make it lowercase
+            label = label.lower()
+
+            # remove punctuations
+            label = ''.join([l for l in label if l not in string.punctuation])  # noqa: E741
+
+            # replace spaces with underscores
+            label = label.replace(" ", "_")
+        else:
+            label = f"col_{idx}"
+        labels[idx] = label
+
+    # Deduplicate by adding _1, _2, _3 to repeated labels
+    counts = {k: v for k, v in collections.Counter(labels).items() if v > 1}
+    for i in reversed(range(len(labels))):
+        item = labels[i]
+        if item in counts and counts[item]:
+            labels[i] = f"{item}_{counts[item]}"
+            counts[item] -= 1
+
+    return labels
+
+
+def table_to_dict(table: WebElement, has_header: bool = True,
+                  skip_rows: int = 0, header_tag: str = "th") -> List[Dict]:
+    """Convert a table WebElement to a dict of lists.
+
+    Args:
+        table (WebElement): The table element.
+        has_header (bool, optional): Whether or not to parse a header. Defaults to True.
+        skip_rows (int, optional): Number of rows to skip from the top. Defaults to 0.
+        header_tag (str, optional): The HTML tag associated with the header cell. Defaults to "th".
+
+    Returns:
+        list: List with dict for each row.
+    """
+
+    # Collect all rows from table
+    rows = table.find_elements_by_tag_name("tr")
+
+    # Skip rows if informed
+    if skip_rows:
+        rows = rows[skip_rows:]
+
+    # Parse header labels
+    if has_header:
+        # Read header labels
+        labels = data_from_row(rows[0], cell_tag=header_tag)
+        # Sanitize headers
+        labels = sanitize_header(labels)
+        # Skip the header
+        rows = rows[1:]
+    else:
+        # Make up header labels
+        num_cols = len(rows[0].find_elements_by_tag_name("td"))
+        labels = [f"col_{i}" for i in range(num_cols)]
+
+    # Assemble output dictionary
+    out_list = []
+    for row in rows:
+        row_data = data_from_row(row)
+        out_list.append(dict(zip(labels, row_data)))
+
+    return out_list
diff --git a/botcity/web/util.py b/botcity/web/util.py
@@ -1,9 +1,31 @@
 import shutil
+import tempfile
 
+from selenium.webdriver.remote.webelement import WebElement
+from selenium.webdriver.support.select import Select
 
-def cleanup_temp_dir(temp_dir):
+
+def cleanup_temp_dir(temp_dir: tempfile.TemporaryDirectory) -> None:
+    """
+    Deletes the temporary directory and all its contents.
+
+    Args:
+        temp_dir (tempfile.TemporaryDirectory): The temporary directory to delete.
+    """
     if temp_dir:
         try:
             temp_dir.cleanup()
         except OSError:
             shutil.rmtree(temp_dir.name, ignore_errors=True)
+
+
+def element_as_select(element: WebElement) -> Select:
+    """Wraps a WebElement in a Select object.
+
+    Args:
+        element (WebElement): The element to wrap.
+
+    Returns:
+        Select: The Select object.
+    """
+    return Select(element)
diff --git a/docs/forms.md b/docs/forms.md
@@ -0,0 +1,44 @@
+# Interacting with Forms
+
+When dealing with forms, we often need to fill in the form and submit it.
+
+While most of the operations are trivial, there are some things that are not such as selecting a select element or dealing with file uploads.
+
+For that we developed some utilitary functions that you can use.
+
+## Select Element
+
+After grabing the element via the `find_element` or `find_elements` functions, we can use the `element_as_select` to convert it into a `Select` object.
+
+::: botcity.web.util.element_as_select
+
+### Example usage
+
+```python
+# Import the function
+from botcity.web.util import element_as_select
+...
+# Fetch the select element
+element = self.find_element("select", By.TAG_NAME)
+# Convert the element into a Select object
+select_element = element_as_select(element)
+# Select the option based on visible text
+select_element.select_by_visible_text("Option 1")
+...
+```
+
+## File Upload
+
+After grabing the element via the `find_element` or `find_elements` functions, we can use the `set_file_input_element` to assign the file path to the element.
+
+### Example usage
+
+```python
+from botcity.web import By
+...
+# Find the input element of type `file` using CSS_SELECTOR.
+elem = self.find_element("body > form > input[type=file]", By.CSS_SELECTOR)
+# Configure the file to be used when processing the upload
+self.set_file_input_element(elem, "./test.txt")
+...
+```