-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathWeb_To_Format.py
More file actions
67 lines (51 loc) · 2.06 KB
/
Web_To_Format.py
File metadata and controls
67 lines (51 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import asyncio
from playwright.async_api import async_playwright
async def url_to_pdf(url, output_path):
async with async_playwright() as p:
browser = await p.chromium.launch()
page = await browser.new_page()
await page.goto(url)
# Wait until network is idle to ensure all content is loaded
await page.wait_for_load_state('networkidle')
# Get the full dimensions of the page
dimensions = await page.evaluate('''() => {
return {
width: document.documentElement.scrollWidth,
height: document.documentElement.scrollHeight
}
}''')
# Convert pixels to inches (1 inch = 96 pixels)
width_in = (dimensions['width'] / 96) + 2.5
height_in = dimensions['height'] / 96
# Generate the PDF with custom width and height
await page.pdf(
path=output_path,
print_background=True,
width=f"{width_in}in",
height=f"{height_in}in",
scale=1,
)
await browser.close()
async def url_to_png(url, output_path):
async with async_playwright() as p:
browser = await p.chromium.launch()
page = await browser.new_page()
await page.goto(url)
# Wait for the page to load completely
await page.wait_for_load_state('networkidle')
# Take a full-page screenshot
await page.screenshot(path=output_path, full_page=True)
await browser.close()
# Example usage
url = input("Full url: ")
temp = input("Convert to PDF (Default yes) / the other option is .png screenshot of the website? ").replace(" ", "").lower()
isPDF = len(temp) == 0 or temp == "yes" or temp == "y"
output_path = input("Full path for output file: ")
if isPDF:
asyncio.run(url_to_pdf(url, output_path))
else:
asyncio.run(url_to_png(url, output_path))
# python -m pip install playwright
# python -m playwright install
# python -m playwright uninstall
# python -m pip uninstall playwright