Skip to content
This repository has been archived by the owner on Jan 18, 2025. It is now read-only.

Commit

Permalink
fix(engines/csgt.vn): it works now
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinNitroG committed Jan 15, 2025
1 parent 9d886e6 commit 301be0d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
6 changes: 4 additions & 2 deletions src/check_phat_nguoi/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .config import CONFIG_PATHS, DETAIL_LOG_MESSAGE, SIMPLE_LOG_MESSAGE
from .get_data import (
API_URL_CSGT_CAPTCHA,
API_URL_CSGT_QUERY,
API_URL_CSGT_QUERY_1,
API_URL_CSGT_QUERY_2,
API_URL_PHATNGUOI,
DATETIME_FORMAT_CHECKPHATNGUOI,
GET_DATA_API_URL_CHECKPHATNGUOI,
Expand All @@ -14,7 +15,8 @@
)

__all__ = [
"API_URL_CSGT_QUERY",
"API_URL_CSGT_QUERY_1",
"API_URL_CSGT_QUERY_2",
"API_URL_CSGT_CAPTCHA",
"CONFIG_PATHS",
"DETAIL_LOG_MESSAGE",
Expand Down
3 changes: 2 additions & 1 deletion src/check_phat_nguoi/constants/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
API_URL_CSGT_CAPTCHA: LiteralString = (
"https://www.csgt.vn/lib/captcha/captcha.class.php"
)
API_URL_CSGT_QUERY: LiteralString = (
API_URL_CSGT_QUERY_1: LiteralString = (
"https://www.csgt.vn/?mod=contact&task=tracuu_post&ajax"
)
API_URL_CSGT_QUERY_2: LiteralString = "https://www.csgt.vn/tra-cuu-phuong-tien-vi-pham.html?&LoaiXe={vehicle_type}&BienKiemSoat={plate}"

DATETIME_FORMAT_CHECKPHATNGUOI: LiteralString = "%H:%M, %d/%m/%Y"

Expand Down
39 changes: 27 additions & 12 deletions src/check_phat_nguoi/get_data/engines/csgt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

from check_phat_nguoi.config import PlateInfo
from check_phat_nguoi.constants import API_URL_CSGT_CAPTCHA as API_CAPTCHA
from check_phat_nguoi.constants import API_URL_CSGT_QUERY as API_QUERY
from check_phat_nguoi.constants import API_URL_CSGT_QUERY_1 as API_QUERY_1
from check_phat_nguoi.constants import API_URL_CSGT_QUERY_2 as API_QUERY_2
from check_phat_nguoi.constants import DATETIME_FORMAT_CHECKPHATNGUOI as DATETIME_FORMAT
from check_phat_nguoi.context import PlateDetail, ViolationDetail
from check_phat_nguoi.types import ApiEnum, VehicleTypeEnum, get_vehicle_enum
Expand All @@ -36,6 +37,7 @@ class _GetDataCsgtCoreEngine(HttpaioSession):

def __init__(self, plate_info: PlateInfo) -> None:
self._plate_info: PlateInfo = plate_info
self._vehicle_type: VehicleTypeEnum = get_vehicle_enum(self._plate_info.type)
HttpaioSession.__init__(self)
self._violations_details_set: set[ViolationDetail] = set()

Expand All @@ -56,28 +58,37 @@ async def _get_phpsessid_and_captcha(self) -> tuple[str, bytes]:
logger.debug(f"Plate {self._plate_info.plate} PHPSESSID: {phpsessid}")
return phpsessid, captcha_img

async def _get_html_data(self, captcha: str, phpsessid: str) -> str:
vehicle_type: VehicleTypeEnum = get_vehicle_enum(self._plate_info.type)
async def _get_html_check(self, captcha: str, phpsessid: str) -> str:
payload: dict[str, str | int] = {
"BienKS": self._plate_info.plate,
"Xe": vehicle_type.value,
"Xe": self._vehicle_type.value,
"captcha": captcha,
"ipClient": "9.9.9.91",
"cUrl": vehicle_type.value,
"cUrl": self._vehicle_type.value,
}
headers: dict[str, str] = {
"Content-Type": "application/x-www-form-urlencoded",
}
cookies: SimpleCookie = SimpleCookie(f"PHPSESSID={phpsessid}")
async with self._session.post(
url=API_QUERY,
url=API_QUERY_1,
headers=headers,
cookies=cookies,
data=payload,
ssl=SSL_CONTEXT,
) as response:
return await response.text()

async def _get_plate_data(self) -> str:
async with self._session.post(
url=API_QUERY_2.format(
vehicle_type=self._vehicle_type.value,
plate=self._plate_info.plate,
),
ssl=SSL_CONTEXT,
) as response:
return await response.text()

def _parse_violation(self, violation_data: str) -> None:
soup: BeautifulSoup = BeautifulSoup(violation_data, "html.parser")
if not soup.css:
Expand All @@ -104,7 +115,7 @@ def _parse_violation(self, violation_data: str) -> None:
type_tag.text.strip()
if (
type_tag := soup.select_one(
".form-group:nth-child(2) > div > div:nth-child(2)"
".form-group:nth-child(3) > div > div:nth-child(2)"
)
)
else None
Expand Down Expand Up @@ -198,8 +209,8 @@ def _parse_violations(
self._parse_violation(violation_data)
return tuple(self._violations_details_set)

def _parse_html(self, html_data: str) -> PlateDetail | None:
soup: BeautifulSoup = BeautifulSoup(html_data, "html.parser")
def _parse_html(self, html: str) -> PlateDetail | None:
soup: BeautifulSoup = BeautifulSoup(html, "html.parser")
violation_group_tag: Tag | NavigableString | None = soup.find(
"div", id="bodyPrint123"
)
Expand All @@ -223,13 +234,17 @@ async def get_data(self) -> PlateDetail | None:
captcha: str = self._bypass_captcha(captcha_img)
logger.debug(f"Plate {self._plate_info.plate} captcha resolved: {captcha}")
logger.debug(
f"Plate {self._plate_info.plate}: Sending request again to get data..."
f"Plate {self._plate_info.plate}: Sending request again to get check..."
)
html_data: str = await self._get_html_data(captcha, phpsessid)
html_data: str = await self._get_html_check(captcha, phpsessid)
if html_data.strip() == "404":
logger.error(f"Plate {self._plate_info.plate}: Wrong captcha")
return
plate_detail: PlateDetail | None = self._parse_html(html_data)
logger.debug(
f"Plate {self._plate_info.plate}: Sending request again to get data..."
)
plate_data: str = await self._get_plate_data()
plate_detail: PlateDetail | None = self._parse_html(plate_data)
return plate_detail
except TimeoutError as e:
logger.error(
Expand Down

0 comments on commit 301be0d

Please sign in to comment.