/
screenshotter.py
83 lines (67 loc) · 2.9 KB
/
screenshotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from constants import *
from image_size import calculate_optimal_segments
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from PIL import Image
import logging
import os
def get_screenshot(entry):
"""Load the page and capture a screenshot of the post with the specified ID. If the screenshot is overly long, split
it into two or three segments at paragraph breaks.
Args:
entry: WebElement to capture
Returns:
Array with split information (y coordinate of split, and number of paragraphs included in each split).
"""
logger = logging.getLogger(__name__)
image_bottom = None
splits = []
num_segments = None
if entry.size["height"] > MAX_IMAGE_HEIGHT:
# This is a tall entry that we'll want to split into multiple screenshots
num_segments = 3 if entry.size["height"] > (MAX_IMAGE_HEIGHT * 2) else 2
# Avoid splitting through an image if there is one
try:
image = entry.find_element(By.CLASS_NAME, "captioned-image")
if image:
image_bottom = (
image.rect["y"] + image.rect["height"] - entry.location["y"]
) * SCALING_FACTOR
except NoSuchElementException:
pass
# Get array of possible split coordinates (top of each <p>)
paragraphs = entry.find_elements(By.TAG_NAME, "p")
heights = [
(p.rect["y"] - entry.location["y"]) * SCALING_FACTOR for p in paragraphs
]
if image_bottom:
heights = list(filter(lambda x: x > image_bottom, heights))
# Decide which of the split possibilities to go with
splits = calculate_optimal_segments(
entry.size["height"] * SCALING_FACTOR, heights, num_segments
)
# Grab screenshot
logger.debug("Capturing screenshot")
screenshot_path = os.path.join(OUTPUT_DIR, "screenshot.png")
entry.screenshot(screenshot_path)
with Image.open(screenshot_path) as image:
entry_with_margin = Image.new(
"RGB",
(image.width + MARGIN * 2, image.height + MARGIN * 2),
(238, 238, 238),
)
entry_with_margin.paste(image, (MARGIN, MARGIN))
splits.append({"y": entry_with_margin.height})
if len(splits) > 1:
logger.debug(f"Splitting screenshot into target {num_segments} segments.")
last_crop = 0
for ind, split in enumerate(splits):
filename = os.path.join(OUTPUT_DIR, FILENAME_ROOT + str(ind) + ".png")
cp = entry_with_margin.copy()
cp = cp.crop((0, last_crop, entry_with_margin.width, split["y"]))
cp.save(filename)
last_crop = split["y"]
else:
entry_with_margin.save(os.path.join(OUTPUT_DIR, FILENAME_ROOT + "0.png"))
os.remove(screenshot_path) # Clean up intermediate file that's not needed anymore
return splits