Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finance #124

Merged
merged 6 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
31 changes: 31 additions & 0 deletions dags/app/finance_bot/dag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Send Google Search Report to Discord
"""
from datetime import datetime, timedelta

from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from app.finance import udf

DEFAULT_ARGS = {
"owner": "qchwan",
"depends_on_past": False,
"start_date": datetime(2023, 8, 27),
"retries": 2,
"retry_delay": timedelta(minutes=5),
"on_failure_callback": lambda x: "Need to send notification to Discord",
}
dag = DAG(
"DISCORD_CHORES_REMINDER",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might need to rename this~

default_args=DEFAULT_ARGS,
schedule_interval="@daily",
max_active_runs=1,
catchup=False,
)
with dag:
REMINDER_OF_THIS_TEAM = PythonOperator(
task_id="FINANCE_REMINDER", python_callable=udf.main
)

if __name__ == "__main__":
dag.cli()
98 changes: 98 additions & 0 deletions dags/app/finance_bot/udf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os

import numpy as np
import pandas as pd
import pygsheets
import requests
from app import discord
from google.cloud import bigquery

session = requests.session()


def main() -> None:
# read xls from google doc to df.
df_xls = read_google_xls_to_df()
# read bigquery to df.
df_bigquery = read_bigquery_to_df()
Comment on lines +14 to +17
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thx for writing comments~

# check difference between 2 df
df_diff = df_difference(df_xls, df_bigquery)
# link to bigquery and write xls file
write_to_bigquery(df_diff)
# push to discord
webhook_url = os.getenv("discord_data_stratagy_webhook")
username = "財務機器人"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯

msg = refine_diff_df_to_string(df_diff)
if msg != "no data":
discord.send_webhook_message(webhook_url, username, msg)


def df_difference(df_xls, df_bigquery) -> pd.DataFrame:
merged = pd.merge(df_xls, df_bigquery, how="outer", indicator=True)
return merged[merged["_merge"] == "left_only"].drop("_merge", axis=1)
Comment on lines +30 to +32
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍



def read_bigquery_to_df() -> pd.DataFrame:
client = bigquery.Client()
query = """
SELECT *
FROM `pycontw-225217.test.pycontw_finance`
"""
query_job = client.query(query)
results = query_job.result()
schema = results.schema
column_names = [field.name for field in schema]
data = [list(row.values()) for row in results]
df = pd.DataFrame(data=data, columns=column_names)

return df


def read_google_xls_to_df() -> pd.DataFrame:
gc = pygsheets.authorize(service_file=os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
sheet = gc.open_by_url(os.getenv("finance_xls_path"))
wks = sheet.sheet1
df = wks.get_as_df(include_tailing_empty=False)
df.replace("", np.nan, inplace=True)
df.dropna(inplace=True)
df = df.astype(str)
df.columns = [
"Reason",
"Price",
"Remarks",
"Team_name",
"Details",
"To_who",
"Yes_or_No",
]
return df


def write_to_bigquery(df) -> None:
project_id = "pycontw-225217"
dataset_id = "test"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remember to update the dataset id~

table_id = "pycontw_finance"
client = bigquery.Client(project=project_id)
table = client.dataset(dataset_id).table(table_id)
schema = [
bigquery.SchemaField("Reason", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Price", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Remarks", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Team_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Details", "STRING", mode="REQUIRED"),
bigquery.SchemaField("To_who", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Yes_or_No", "STRING", mode="REQUIRED"),
]
job_config = bigquery.LoadJobConfig(schema=schema)
job = client.load_table_from_dataframe(df, table, job_config=job_config)
job.result()


def refine_diff_df_to_string(df) -> str:
msg = ""
if df.empty:
return "no data"
else:
for row in df.itertuples(index=False):
msg += f"{row[0]}, 花費: {row[1]}, {row[3]}, {row[4]}\n"
Comment on lines +95 to +97
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯. What do you think about calculating the total expenses as well as the expenses per group in your next PR?

return msg
Loading