1
+ import os
2
+ import sys
3
+ import pytest
4
+ import asyncio
5
+ import base64
6
+ from PIL import Image
7
+ import io
8
+
9
+ # Add the parent directory to the Python path
10
+ parent_dir = os .path .dirname (os .path .dirname (os .path .abspath (__file__ )))
11
+ sys .path .append (parent_dir )
12
+
13
+ from crawl4ai .async_webcrawler import AsyncWebCrawler
14
+
15
+ @pytest .mark .asyncio
16
+ async def test_basic_screenshot ():
17
+ async with AsyncWebCrawler (verbose = True ) as crawler :
18
+ url = "https://example.com" # A static website
19
+ result = await crawler .arun (url = url , bypass_cache = True , screenshot = True )
20
+
21
+ assert result .success
22
+ assert result .screenshot is not None
23
+
24
+ # Verify the screenshot is a valid image
25
+ image_data = base64 .b64decode (result .screenshot )
26
+ image = Image .open (io .BytesIO (image_data ))
27
+ assert image .format == "PNG"
28
+
29
+ @pytest .mark .asyncio
30
+ async def test_screenshot_with_wait_for ():
31
+ async with AsyncWebCrawler (verbose = True ) as crawler :
32
+ # Using a website with dynamic content
33
+ url = "https://www.youtube.com"
34
+ wait_for = "css:#content" # Wait for the main content to load
35
+
36
+ result = await crawler .arun (
37
+ url = url ,
38
+ bypass_cache = True ,
39
+ screenshot = True ,
40
+ wait_for = wait_for
41
+ )
42
+
43
+ assert result .success
44
+ assert result .screenshot is not None
45
+
46
+ # Verify the screenshot is a valid image
47
+ image_data = base64 .b64decode (result .screenshot )
48
+ image = Image .open (io .BytesIO (image_data ))
49
+ assert image .format == "PNG"
50
+
51
+ # You might want to add more specific checks here, like image dimensions
52
+ # or even use image recognition to verify certain elements are present
53
+
54
+ @pytest .mark .asyncio
55
+ async def test_screenshot_with_js_wait_for ():
56
+ async with AsyncWebCrawler (verbose = True ) as crawler :
57
+ url = "https://www.amazon.com"
58
+ wait_for = "js:() => document.querySelector('#nav-logo-sprites') !== null"
59
+
60
+ result = await crawler .arun (
61
+ url = url ,
62
+ bypass_cache = True ,
63
+ screenshot = True ,
64
+ wait_for = wait_for
65
+ )
66
+
67
+ assert result .success
68
+ assert result .screenshot is not None
69
+
70
+ image_data = base64 .b64decode (result .screenshot )
71
+ image = Image .open (io .BytesIO (image_data ))
72
+ assert image .format == "PNG"
73
+
74
+ @pytest .mark .asyncio
75
+ async def test_screenshot_without_wait_for ():
76
+ async with AsyncWebCrawler (verbose = True ) as crawler :
77
+ url = "https://www.nytimes.com" # A website with lots of dynamic content
78
+
79
+ result = await crawler .arun (url = url , bypass_cache = True , screenshot = True )
80
+
81
+ assert result .success
82
+ assert result .screenshot is not None
83
+
84
+ image_data = base64 .b64decode (result .screenshot )
85
+ image = Image .open (io .BytesIO (image_data ))
86
+ assert image .format == "PNG"
87
+
88
+ @pytest .mark .asyncio
89
+ async def test_screenshot_comparison ():
90
+ async with AsyncWebCrawler (verbose = True ) as crawler :
91
+ url = "https://www.reddit.com"
92
+ wait_for = "css:#SHORTCUT_FOCUSABLE_DIV"
93
+
94
+ # Take screenshot without wait_for
95
+ result_without_wait = await crawler .arun (
96
+ url = url ,
97
+ bypass_cache = True ,
98
+ screenshot = True
99
+ )
100
+
101
+ # Take screenshot with wait_for
102
+ result_with_wait = await crawler .arun (
103
+ url = url ,
104
+ bypass_cache = True ,
105
+ screenshot = True ,
106
+ wait_for = wait_for
107
+ )
108
+
109
+ assert result_without_wait .success and result_with_wait .success
110
+ assert result_without_wait .screenshot is not None
111
+ assert result_with_wait .screenshot is not None
112
+
113
+ # Compare the two screenshots
114
+ image_without_wait = Image .open (io .BytesIO (base64 .b64decode (result_without_wait .screenshot )))
115
+ image_with_wait = Image .open (io .BytesIO (base64 .b64decode (result_with_wait .screenshot )))
116
+
117
+ # This is a simple size comparison. In a real-world scenario, you might want to use
118
+ # more sophisticated image comparison techniques.
119
+ assert image_with_wait .size [0 ] >= image_without_wait .size [0 ]
120
+ assert image_with_wait .size [1 ] >= image_without_wait .size [1 ]
121
+
122
+ # Entry point for debugging
123
+ if __name__ == "__main__" :
124
+ pytest .main ([__file__ , "-v" ])
0 commit comments