Skip to content

Commit 535ee7a

Browse files
fixed finally lol
1 parent 40a7cfd commit 535ee7a

File tree

4 files changed

+5519
-27
lines changed

4 files changed

+5519
-27
lines changed

app.py

Lines changed: 57 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from typing import List, Tuple
1818
from playwright.async_api import async_playwright, Response
1919
from playwright._impl._errors import Error as PlaywrightError
20+
from bs4 import BeautifulSoup
21+
import json
2022

2123

2224
# Initialize FastAPI application
@@ -126,7 +128,7 @@ def is_valid_url(url):
126128

127129
async def execute_async_code(code):
128130
"""Executes the dynamic code asynchronously and returns a result."""
129-
exec_globals = {"fetch_data": fetch_data, "__builtins__": safe_builtins}
131+
exec_globals = {"fetch_data": fetch_data, "html_parser": html_parser, "fetch": Fetch, "__builtins__": safe_builtins}
130132
try:
131133
# Execute the dynamic code
132134
exec(code, exec_globals)
@@ -153,6 +155,9 @@ async def execute_async_code(code):
153155
# Raise a custom exception with the traceback details
154156
raise RuntimeError("Error executing dynamic code.") from e
155157

158+
async def html_parser(html):
159+
return BeautifulSoup(html, "html5lib")
160+
156161
async def fetch_data(url):
157162
if not BROWSER_WS:
158163
async with aiohttp.ClientSession() as session:
@@ -168,6 +173,7 @@ async def fetch_data(url):
168173
async with async_playwright() as playwright:
169174
browser = await playwright.chromium.connect(BROWSER_WS)
170175
async with browser:
176+
context = await browser.new_context()
171177
page = await browser.new_page()
172178
try:
173179
response = await page.goto(url)
@@ -181,7 +187,27 @@ async def fetch_data(url):
181187
finally:
182188
await page.close()
183189
await browser.close()
184-
190+
191+
class Fetch:
192+
def __init__(self, url: string):
193+
self.url = url
194+
self.playwright = None
195+
self.browser = None
196+
self.context = None
197+
self.page = None
198+
199+
async def __aenter__(self):
200+
self.playwright = await async_playwright().start()
201+
self.browser = await self.playwright.chromium.connect(BROWSER_WS)
202+
self.context = await self.browser.new_context()
203+
self.page = await self.browser.new_page()
204+
response = await self.page.goto(self.url)
205+
return response
206+
207+
async def __aexit__(self, exc_type, exc_value, traceback):
208+
await self.page.close()
209+
await self.browser.close()
210+
await self.playwright.stop()
185211

186212
@app.get("/", response_class=HTMLResponse)
187213
async def home(request: Request):
@@ -268,9 +294,10 @@ async def dynamic_download(request: Request, key: str, file_name: str = Query(..
268294
if code:
269295
try:
270296
result = await execute_async_code(code)
297+
print(result)
271298

272-
content: bytes
273-
response: Response
299+
content: bytes = bytes()
300+
media_type: str = 'application/octet-stream'
274301

275302
if not BROWSER_WS:
276303
async with aiohttp.ClientSession() as session:
@@ -280,31 +307,35 @@ async def dynamic_download(request: Request, key: str, file_name: str = Query(..
280307
else:
281308
async with async_playwright() as playwright:
282309
browser = await playwright.chromium.connect(BROWSER_WS)
283-
async with browser:
284-
page = await browser.new_page()
310+
try:
311+
context = await browser.new_context()
312+
page = await context.new_page()
313+
response = await page.goto(result)
314+
if response.ok:
315+
content = await response.body()
316+
media_type = response.headers.get('Content-Type', 'application/octet-stream')
317+
except PlaywrightError:
318+
context = await browser.new_context(accept_downloads=True)
319+
page = await context.new_page()
285320
try:
286-
response = await page.goto(result)
287-
if response.ok:
288-
content = await response.body()
289-
except PlaywrightError:
290-
try:
291-
download_future = page.wait_for_event("download")
292-
response = await page.goto(result)
293-
if response.ok:
294-
stream = await (await download_future).create_read_stream() or await response.body()
295-
buffer = io.BytesIO()
296-
async for chunk in stream:
297-
buffer.write(chunk)
298-
content = buffer.getvalue()
299-
except:
300-
pass
301-
finally:
302-
await page.close()
303-
await browser.close()
304-
321+
async with page.expect_download() as download_info:
322+
await page.goto(result)
323+
download = await download_info.value
324+
stream = await download.create_read_stream()
325+
buffer = io.BytesIO()
326+
async for chunk in stream:
327+
buffer.write(chunk)
328+
content = buffer.getvalue()
329+
media_type = response.headers.get('Content-Type', 'application/octet-stream')
330+
except:
331+
pass
332+
finally:
333+
await page.close()
334+
await browser.close()
335+
305336
return StreamingResponse(
306337
iter([content]),
307-
media_type=response.headers.get('Content-Type', 'application/octet-stream'),
338+
media_type=media_type,
308339
headers={'Content-Disposition': f'attachment; filename="{file_name}"'}
309340
)
310341
except Exception as e:

docker-compose.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ services:
99
ports:
1010
- "8000:8000"
1111
restart: always
12+
browser:
13+
image: ghcr.io/browserless/chromium:latest
14+
ports:
15+
- 3000:3000

requirements.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,7 @@ bcrypt
88
python-multipart
99
itsdangerous
1010
pydantic
11-
playwright
11+
playwright
12+
beautifulsoup4
13+
html5lib
14+
lxml

0 commit comments

Comments
 (0)