Skip to content

first regression test #88

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,66 @@ jobs:
name: smoke-test-results
path: junit-smoke.xml

test-regression:
name: Regression Tests
runs-on: ubuntu-latest
needs: test-unit
if: |
contains(github.event.pull_request.labels.*.name, 'test-regression') ||
contains(github.event.pull_request.labels.*.name, 'regression')

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"

- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y xvfb

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install jsonschema
playwright install chromium
playwright install-deps chromium

- name: Run regression tests
run: |
xvfb-run -a pytest tests/ -v \
--cov=stagehand \
--cov-report=xml \
--junit-xml=junit-regression.xml \
-m "regression" \
--tb=short \
--maxfail=10
env:
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY || 'mock-api-key' }}
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID || 'mock-project-id' }}
MODEL_API_KEY: ${{ secrets.MODEL_API_KEY || 'mock-model-key' }}
STAGEHAND_API_URL: ${{ secrets.STAGEHAND_API_URL || 'http://localhost:3000' }}

- name: Upload regression test results
uses: actions/upload-artifact@v4
if: always()
with:
name: regression-test-results
path: junit-regression.xml

- name: Upload coverage data
uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-data-regression
path: |
.coverage
coverage.xml

test-e2e:
name: End-to-End Tests
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ markers =
local: marks tests as local integration tests
api: marks tests as API integration tests
e2e: marks tests as end-to-end tests
regression: marks tests as regression tests

log_cli = true
log_cli_level = INFO
Empty file added tests/regression/__init__.py
Empty file.
103 changes: 103 additions & 0 deletions tests/regression/test_act_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
Regression test for act timeout functionality.

This test verifies that the timeout mechanism works correctly for act operations,
based on the TypeScript expect_act_timeout evaluation.

NOTE: Act timeout functionality has been not been implemented in the Python library yet.
These tests are skipped until timeout support is implemented.
"""

import os
import pytest
import pytest_asyncio

from stagehand import Stagehand, StagehandConfig


class TestActTimeout:
"""Regression test for act timeout functionality"""

@pytest.fixture(scope="class")
def local_config(self):
"""Configuration for LOCAL mode testing"""
return StagehandConfig(
env="LOCAL",
model_name="gpt-4o-mini",
headless=True,
verbose=1,
dom_settle_timeout_ms=2000,
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
)

@pytest.fixture(scope="class")
def browserbase_config(self):
"""Configuration for BROWSERBASE mode testing"""
return StagehandConfig(
env="BROWSERBASE",
api_key=os.getenv("BROWSERBASE_API_KEY"),
project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
model_name="gpt-4o",
headless=False,
verbose=2,
model_client_options={"apiKey": os.getenv("MODEL_API_KEY") or os.getenv("OPENAI_API_KEY")},
)

@pytest_asyncio.fixture
async def local_stagehand(self, local_config):
"""Create a Stagehand instance for LOCAL testing"""
stagehand = Stagehand(config=local_config)
await stagehand.init()
yield stagehand
await stagehand.close()

@pytest_asyncio.fixture
async def browserbase_stagehand(self, browserbase_config):
"""Create a Stagehand instance for BROWSERBASE testing"""
if not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")):
pytest.skip("Browserbase credentials not available")

stagehand = Stagehand(config=browserbase_config)
await stagehand.init()
yield stagehand
await stagehand.close()

@pytest.mark.asyncio
@pytest.mark.regression
@pytest.mark.local
@pytest.mark.skip(reason="Act timeout functionality has been removed from the Python implementation")
async def test_expect_act_timeout_local(self, local_stagehand):
"""
Regression test: expect_act_timeout

SKIPPED: Act timeout functionality has been removed from the Python implementation.
The timeout_ms parameter in ActOptions is not currently handled by the act handler.

Original test purpose:
- Navigate to docs.stagehand.dev
- Attempt action with 1 second timeout
- Expect the action to fail due to timeout
"""
pass

@pytest.mark.asyncio
@pytest.mark.regression
@pytest.mark.api
@pytest.mark.skip(reason="Act timeout functionality has been removed from the Python implementation")
@pytest.mark.skipif(
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
reason="Browserbase credentials not available"
)
async def test_expect_act_timeout_browserbase(self, browserbase_stagehand):
"""
Regression test: expect_act_timeout (Browserbase)

SKIPPED: Act timeout functionality has been removed from the Python implementation.
The timeout_ms parameter in ActOptions is not currently handled by the act handler.

Original test purpose:
- Navigate to docs.stagehand.dev
- Attempt action with 1 second timeout
- Expect the action to fail due to timeout
"""
pass
Loading