17
17
from typing import List , Tuple
18
18
from playwright .async_api import async_playwright , Response
19
19
from playwright ._impl ._errors import Error as PlaywrightError
20
+ from bs4 import BeautifulSoup
21
+ import json
20
22
21
23
22
24
# Initialize FastAPI application
@@ -126,7 +128,7 @@ def is_valid_url(url):
126
128
127
129
async def execute_async_code (code ):
128
130
"""Executes the dynamic code asynchronously and returns a result."""
129
- exec_globals = {"fetch_data" : fetch_data , "__builtins__" : safe_builtins }
131
+ exec_globals = {"fetch_data" : fetch_data , "html_parser" : html_parser , "fetch" : Fetch , " __builtins__" : safe_builtins }
130
132
try :
131
133
# Execute the dynamic code
132
134
exec (code , exec_globals )
@@ -153,6 +155,9 @@ async def execute_async_code(code):
153
155
# Raise a custom exception with the traceback details
154
156
raise RuntimeError ("Error executing dynamic code." ) from e
155
157
158
+ async def html_parser (html ):
159
+ return BeautifulSoup (html , "html5lib" )
160
+
156
161
async def fetch_data (url ):
157
162
if not BROWSER_WS :
158
163
async with aiohttp .ClientSession () as session :
@@ -168,6 +173,7 @@ async def fetch_data(url):
168
173
async with async_playwright () as playwright :
169
174
browser = await playwright .chromium .connect (BROWSER_WS )
170
175
async with browser :
176
+ context = await browser .new_context ()
171
177
page = await browser .new_page ()
172
178
try :
173
179
response = await page .goto (url )
@@ -181,7 +187,27 @@ async def fetch_data(url):
181
187
finally :
182
188
await page .close ()
183
189
await browser .close ()
184
-
190
+
191
+ class Fetch :
192
+ def __init__ (self , url : string ):
193
+ self .url = url
194
+ self .playwright = None
195
+ self .browser = None
196
+ self .context = None
197
+ self .page = None
198
+
199
+ async def __aenter__ (self ):
200
+ self .playwright = await async_playwright ().start ()
201
+ self .browser = await self .playwright .chromium .connect (BROWSER_WS )
202
+ self .context = await self .browser .new_context ()
203
+ self .page = await self .browser .new_page ()
204
+ response = await self .page .goto (self .url )
205
+ return response
206
+
207
+ async def __aexit__ (self , exc_type , exc_value , traceback ):
208
+ await self .page .close ()
209
+ await self .browser .close ()
210
+ await self .playwright .stop ()
185
211
186
212
@app .get ("/" , response_class = HTMLResponse )
187
213
async def home (request : Request ):
@@ -268,9 +294,10 @@ async def dynamic_download(request: Request, key: str, file_name: str = Query(..
268
294
if code :
269
295
try :
270
296
result = await execute_async_code (code )
297
+ print (result )
271
298
272
- content : bytes
273
- response : Response
299
+ content : bytes = bytes ()
300
+ media_type : str = 'application/octet-stream'
274
301
275
302
if not BROWSER_WS :
276
303
async with aiohttp .ClientSession () as session :
@@ -280,31 +307,35 @@ async def dynamic_download(request: Request, key: str, file_name: str = Query(..
280
307
else :
281
308
async with async_playwright () as playwright :
282
309
browser = await playwright .chromium .connect (BROWSER_WS )
283
- async with browser :
284
- page = await browser .new_page ()
310
+ try :
311
+ context = await browser .new_context ()
312
+ page = await context .new_page ()
313
+ response = await page .goto (result )
314
+ if response .ok :
315
+ content = await response .body ()
316
+ media_type = response .headers .get ('Content-Type' , 'application/octet-stream' )
317
+ except PlaywrightError :
318
+ context = await browser .new_context (accept_downloads = True )
319
+ page = await context .new_page ()
285
320
try :
286
- response = await page .goto (result )
287
- if response .ok :
288
- content = await response .body ()
289
- except PlaywrightError :
290
- try :
291
- download_future = page .wait_for_event ("download" )
292
- response = await page .goto (result )
293
- if response .ok :
294
- stream = await (await download_future ).create_read_stream () or await response .body ()
295
- buffer = io .BytesIO ()
296
- async for chunk in stream :
297
- buffer .write (chunk )
298
- content = buffer .getvalue ()
299
- except :
300
- pass
301
- finally :
302
- await page .close ()
303
- await browser .close ()
304
-
321
+ async with page .expect_download () as download_info :
322
+ await page .goto (result )
323
+ download = await download_info .value
324
+ stream = await download .create_read_stream ()
325
+ buffer = io .BytesIO ()
326
+ async for chunk in stream :
327
+ buffer .write (chunk )
328
+ content = buffer .getvalue ()
329
+ media_type = response .headers .get ('Content-Type' , 'application/octet-stream' )
330
+ except :
331
+ pass
332
+ finally :
333
+ await page .close ()
334
+ await browser .close ()
335
+
305
336
return StreamingResponse (
306
337
iter ([content ]),
307
- media_type = response . headers . get ( 'Content-Type' , 'application/octet-stream' ) ,
338
+ media_type = media_type ,
308
339
headers = {'Content-Disposition' : f'attachment; filename="{ file_name } "' }
309
340
)
310
341
except Exception as e :
0 commit comments