Skip to content

Commit

Permalink
Merge pull request #5 from quantile-development/feature/3-retry-mecha…
Browse files Browse the repository at this point in the history
…nism

Feature/3 retry mechanism
  • Loading branch information
paulorijnberg authored Feb 2, 2024
2 parents ddb94c2 + ae604dc commit dc6949e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 52 deletions.
68 changes: 16 additions & 52 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ packages = [{include = "pyspark_cdm"}]
python = "<4.0,>=3.8"
commondatamodel-objectmodel = "^1.7.3"
nest-asyncio = "^1.5.6"
tenacity = "^8.2.3"

[tool.poetry.group.dev.dependencies]
black = "^23.7.0"
Expand Down
9 changes: 9 additions & 0 deletions pyspark_cdm/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@
from cdm.persistence.modeljson import LocalEntityDeclarationPersistence
from pyspark.sql.types import StructField, StructType
from pyspark.sql import DataFrame
from tenacity import retry, stop_after_attempt, wait_random_exponential

def log_attempt_number(retry_state):
"""Print a message after retrying."""
print(f"Retrying: {retry_state.attempt_number}...")

class Entity:
def __init__(
Expand Down Expand Up @@ -135,6 +139,11 @@ def schema(self) -> StructType:
catalog = catalog_factory(self)
return catalog.schema

@retry(
stop=stop_after_attempt(2),
wait=wait_random_exponential(multiplier=3, max=60),
after=log_attempt_number,
)
def get_dataframe(self, spark) -> DataFrame:
return spark.read.csv(
list(self.file_paths),
Expand Down

0 comments on commit dc6949e

Please sign in to comment.