Skip to content

Commit 8cb6f67

Browse files
committed
first commit
0 parents  commit 8cb6f67

File tree

7 files changed

+90
-0
lines changed

7 files changed

+90
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

.github/workflows/save.yaml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Save latest data
2+
on:
3+
push:
4+
branches:
5+
- main
6+
jobs:
7+
build:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- uses: actions/checkout@v2
11+
- uses: actions/setup-python@v2
12+
with:
13+
python-version: 3.11
14+
- run: |
15+
python -m pip install --upgrade pip
16+
pip install -r requirements.txt
17+
- run: python main.py
18+
- run: |
19+
git config user.email "[email protected]"
20+
git config user.name "GitHub Actions"
21+
git add output/*
22+
git commit -am "Update latest [skip ci]"
23+
- uses: ad-m/github-push-action@master
24+
with:
25+
branch: ${{ github.head_ref }}

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.idea/
2+
venv/

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# bik
2+
3+
Here I’m just trying to get a list of Russian banks in different formats.

main.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import os
2+
import cloudscraper
3+
import zipfile
4+
import codecs
5+
import pandas as pd
6+
from bs4 import BeautifulSoup
7+
from datetime import datetime, timedelta
8+
9+
urlpath = "https://cbr.ru/vfs/mcirabis/BIKNew/"
10+
filename = (datetime.now() - timedelta(days=2)).strftime('%Y%m%d') + "ED01OSBR.zip"
11+
outdir = "output/"
12+
outzip = outdir + filename
13+
14+
s = cloudscraper.create_scraper(disableCloudflareV1=True, browser="chrome", delay=10)
15+
16+
for _ in range(3):
17+
r = s.get(urlpath + filename, allow_redirects=True)
18+
if r.status_code != 200:
19+
print(f"Error downloading zip: {r.status_code}")
20+
continue
21+
open(outzip, "wb").write(r.content)
22+
break
23+
24+
try:
25+
with zipfile.ZipFile(outzip, 'r') as zr:
26+
zr.extractall(outdir)
27+
os.remove(outzip)
28+
except Exception as e:
29+
print(f"Error unpacking file: {e}")
30+
exit(1)
31+
32+
for fp in os.listdir(outdir):
33+
if not fp.endswith("_full.xml"):
34+
continue
35+
with codecs.open(outdir + fp, "r", encoding="windows-1251") as f:
36+
content = f.read()
37+
s = BeautifulSoup(content, "lxml")
38+
df = pd.DataFrame(data=[r.attrs for r in s.findAll("participantinfo")])
39+
df.to_csv(outdir + "latest.csv", index=False, header=True)
40+
df.to_csv(outdir + "latest.tsv", index=False, header=True, sep="\t", quoting=3)
41+
df.to_json(outdir + "latest.json", orient="records")
42+
df.to_xml(outdir + "latest.xml")
43+
os.remove(outdir + fp)

output/.gitkeep

Whitespace-only changes.

requirements.txt

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
beautifulsoup4==4.12.2
2+
certifi==2023.11.17
3+
charset-normalizer==3.3.2
4+
cloudscraper==1.2.71
5+
idna==3.6
6+
lxml==4.9.3
7+
numpy==1.26.2
8+
pandas==2.1.3
9+
pyparsing==3.1.1
10+
python-dateutil==2.8.2
11+
pytz==2023.3.post1
12+
requests==2.31.0
13+
requests-toolbelt==1.0.0
14+
six==1.16.0
15+
soupsieve==2.5
16+
tzdata==2023.3
17+
urllib3==2.1.0

0 commit comments

Comments
 (0)