Skip to content

Commit

Permalink
Fixed pip install -e . + can be installed locally again
Browse files Browse the repository at this point in the history
  • Loading branch information
zdeneklapes committed Dec 25, 2023
1 parent ee47bd1 commit a26f58b
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 143 deletions.
71 changes: 40 additions & 31 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,54 +1,63 @@
FROM python:3.10 as base


FROM base as builder-chromedriver

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# install all packages for chromedriver: https://gist.github.com/varyonic/dea40abcf3dd891d204ef235c6e8dd79
RUN apt-get update && \
apt-get install -y xvfb gnupg wget curl unzip --no-install-recommends && \
RUN set -ex && \
apt-get update && \
apt-get install -y \
xvfb \
gnupg \
wget \
curl \
unzip \
cron \
vim \
fish \
python3-dev \
bat \
--no-install-recommends && \
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list && \
apt-get update -y

RUN apt-get install -y google-chrome-stable && \
CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \
DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER")

RUN wget -q --continue -P /chromedriver "http://chromedriver.storage.googleapis.com/$DRIVERVER/chromedriver_linux64.zip" && \
unzip /chromedriver/chromedriver* -d /chromedriver

# make the chromedriver executable and move it to default selenium path.
RUN chmod +x /chromedriver/chromedriver
RUN mv /chromedriver/chromedriver /usr/bin/chromedriver
RUN set -ex && \
apt-get install -y google-chrome-stable && \
export CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \
export DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER")

#RUN set -ex && \
# echo "Using chromedriver version: $DRIVERVER"
#
#RUN set -ex && \
# wget -q --continue -P /chromedriver "http://chromedriver.storage.googleapis.com/$DRIVERVER/chromedriver_linux64.zip" && \
# unzip /chromedriver/chromedriver* -d /chromedriver
#
## make the chromedriver executable and move it to default selenium path.
#RUN set -ex && \
# chmod +x /chromedriver/chromedriver
#
#RUN set -ex && \
# mv /chromedriver/chromedriver /usr/bin/chromedriver

#RUN curl -LO https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
#RUN apt-get install -y ./google-chrome-stable_current_amd64.deb
#RUN rm google-chrome-stable_current_amd64.deb

FROM builder-chromedriver as builder-python

COPY requirements.txt /app/requirements.txt

RUN set -ex && \
apt-get update && \
apt-get -y install \
cron \
vim \
fish \
python3-dev \
bat \
--no-install-recommends && \
pip install --upgrade pip && \
pip install --no-cache-dir -r /app/requirements.txt && \
rm -rf /var/lib/apt/lists/* && \
service cron start
RUN pip install -e .
pip install -r /app/requirements.txt

FROM builder-python
#RUN set -ex && \
# pip install -e .

# set the proxy addresses
ENV HTTP_PROXY "http://134.209.29.120:8080"
ENV HTTPS_PROXY "https://45.77.71.140:9050"
#ENV HTTP_PROXY "http://134.209.29.120:8080"
#ENV HTTPS_PROXY "https://45.77.71.140:9050"

WORKDIR /app
CMD ["fish"]
120 changes: 32 additions & 88 deletions Dockerfile_selenium
Original file line number Diff line number Diff line change
@@ -1,99 +1,43 @@
# Use the official Selenium Grid Base image as the base image
FROM selenium/standalone-chrome:119.0 as base
#FROM UBUNTU:20.04 as base
FROM selenium/standalone-chrome:latest

# Set the desired Chrome and ChromeDriver versions
ENV CHROME_VERSION 119.0
ENV CHROMEDRIVER_VERSION 119.0
ENV SELENIUM_VERSION 4.9.0
# install google chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
RUN apt-get -y update
RUN apt-get install -y google-chrome-stable

# Install Chrome and ChromeDriver with the specified versions
#RUN wget -q -O /tmp/chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \
# dpkg -i /tmp/chrome.deb || apt-get -f install -y && \
# rm /tmp/chrome.deb
#RUN wget -q -O /tmp/chromedriver.zip https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VERSION}/chromedriver_linux64.zip && \
# unzip /tmp/chromedriver.zip -d /usr/bin && \
# chmod +x /usr/bin/chromedriver && \
# rm /tmp/chromedriver.zip
# Set the desired Chrome and ChromeDriver versions as environment variables
#ENV CHROME_VERSION 119.0
#ENV CHROMEDRIVER_VERSION 119.0
# install chromedriver
RUN apt-get install -yqq unzip
RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/

FROM base as chromedriver
# set display port to avoid crash
ENV DISPLAY=:99

# Install Chrome
RUN wget -q -O /tmp/chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \
dpkg -i /tmp/chrome.deb || apt-get -f install -y && \
rm /tmp/chrome.deb
# install selenium
RUN pip install selenium==3.8.0

# Install ChromeDriver
RUN wget -q -O /tmp/chromedriver.zip https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VERSION}/chromedriver_linux64.zip && \
unzip /tmp/chromedriver.zip -d /usr/bin && \
chmod +x /usr/bin/chromedriver && \
rm /tmp/chromedriver.zip

RUN set -ex && \
apt-get update && \
apt-get install -y \
python3-dev \
--no-install-recommends

# wget \
# curl \
# unzip \
# cron \
# vim \
# fish \
# bat \
# xvfb \
# gnupg \
# rm -rf /var/lib/apt/lists/*

#RUN apt-get install -y google-chrome-stable && \

#RUN apt-get update && \
# apt-get install -y xvfb gnupg wget curl unzip --no-install-recommends && \
# wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
# echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list && \
# apt-get update -y
#
#RUN apt-get install -y google-chrome-stable && \
# CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \
# DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER")
#
#RUN wget -q --continue -P /chromedriver "http://chromedriver.storage.googleapis.com/$DRIVERVER/chromedriver_linux64.zip" && \
# unzip /chromedriver/chromedriver* -d /chromedriver

## install google chrome
#RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
#RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
#RUN apt-get -y update
#RUN apt-get install -y google-chrome-stable
#
## install chromedriver
#RUN apt-get install -yqq unzip
#RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip
#RUN unzip /tmp/chromedriver.zip chromedriver -d /usr/local/bin/
#
## set display port to avoid crash
#ENV DISPLAY=:99
#
## install selenium
#RUN pip install selenium==3.8.0

# Set the Chrome and ChromeDriver versions as environment variables
#ENV CHROME_DRIVER_VERSION $CHROMEDRIVER_VERSION
#ENV CHROME_BROWSER_VERSION $CHROME_VERSION

# Expose the default Selenium Grid port
#EXPOSE 4444

# Start the Selenium Grid server
#CMD ["start-selenium-grid.sh"]

FROM chromedriver as project
RUN set -ex && \
apt-get update && \
apt-get -y install \
cron \
vim \
fish \
python3-dev \
bat \
--no-install-recommends && \
pip install --upgrade pip && \
pip install --no-cache-dir -r /app/requirements.txt && \
rm -rf /var/lib/apt/lists/* && \
service cron start
RUN pip install -e .
pip install --no-cache-dir -r /app/requirements.txt

FROM project
# set the proxy addresses
#ENV HTTP_PROXY "http://134.209.29.120:8080"
#ENV HTTPS_PROXY "https://45.77.71.140:9050"

CMD ["fish"]
RUN pip install -e .
5 changes: 5 additions & 0 deletions bazos/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

from bazos.main import bazos as bz

__version__ = "0.1.0"
__apiversion__ = "0.1.0"
__author__ = 'Zdenek Lapes'
__license__ = 'MIT'


def parse_cli_argument() -> Dict[str, Any]:
parser = argparse.ArgumentParser()
Expand Down
5 changes: 3 additions & 2 deletions bazos/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from bazos.__init__ import main
main()
# from bazos.__init__ import main
# main()

#
# from selenium.webdriver.chrome.options import Options
# from selenium import webdriver
Expand Down
14 changes: 7 additions & 7 deletions bazos/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
import pickle
import pickle # nosec
import sys
from os import path

Expand Down Expand Up @@ -34,7 +34,7 @@ class XPathsBazos:
select_rubrik = "//div[@class='maincontent']/div[1]/form/select"
select_category = "//div[@class='maincontent']/form/div[1]/select"
user_inputs = "//div[@class='maincontent']/form/input"
delete_pwd_input = "//div[@class='maincontent']/div[2]/form/input[1]"
delete_pwd_input = "//div[@class='maincontent']/div[2]/form/input[1]" # nosec
delete_submit = "//div[@class='maincontent']/div[2]/form/input[4]"

auth_phone_input = "//div[@class='maincontent']/form/input[2]"
Expand Down Expand Up @@ -93,8 +93,8 @@ def print_all_rubrics_and_categories(self):
print(_dict)

def check_user_files_available(self) -> None:
if (not os.path.isfile(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl")
or not os.path.isfile(f"{settings.LOCAL_STORAGE_FILE}_{self.bazos_country}.pkl")):
if (not os.path.isfile(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl") # nosec
or not os.path.isfile(f"{settings.LOCAL_STORAGE_FILE}_{self.bazos_country}.pkl")): # nosec
self.save_authentication(user=self.user)

def check_authentication(self) -> None:
Expand All @@ -106,7 +106,7 @@ def check_authentication(self) -> None:

def load_page_with_cookies(self) -> None:
self.driver.get(self.url_moje_inzeraty)
for cookie_dict in pickle.load(open(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl", 'rb')):
for cookie_dict in pickle.load(open(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl", 'rb')): # nosec
self.driver.add_cookie(cookie_dict)
self.driver.get(self.url_moje_inzeraty)

Expand All @@ -132,10 +132,10 @@ def save_authentication(self, user: User) -> None:

# Save cookies
pickle.dump(self.driver.get_cookies(),
file=open(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl", "wb"))
file=open(f"{settings.COOKIES_FILE}_{self.bazos_country}.pkl", "wb")) # nosec
# Save Local Storage
pickle.dump(self.driver.execute_script("return window.localStorage;"),
file=open(f"{settings.LOCAL_STORAGE_FILE}_{self.bazos_country}.pkl", "wb"))
file=open(f"{settings.LOCAL_STORAGE_FILE}_{self.bazos_country}.pkl", "wb")) # nosec

def remove_advertisment(self, user: User):
self.driver.find_element(By.CLASS_NAME, 'inzeratydetdel').find_element(
Expand Down
4 changes: 2 additions & 2 deletions bazos/shared/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def parse_yaml(filename: str) -> Optional[Union[Dict[str, str], FileNotFoundErro


def wait_random_time():
time.sleep(random.random() * 1)
time.sleep(random.random() * 1) # nosec


def wait_n_seconds(seconds: int):
Expand Down Expand Up @@ -56,7 +56,7 @@ def refactor_info_txt(_path: str):
for dir in next(os.walk(_path))[1]: # loop through all directories
file = path.join(_path, dir, 'info.txt')

assert path.isfile(file), 'File not exist: info.txt'
assert path.isfile(file), 'File not exist: info.txt' # nosec

with open(file=file, mode='r') as f:
lines = f.readlines()
Expand Down
10 changes: 6 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ services:
dockerfile: Dockerfile
volumes:
- ./:/app/ # Mount the project directory to the container
- $HOME/Documents/photos-archive/bazos:/app/images/bazos/
# - ./tmp/fish/:/root/.local/share/fish/ # Mount the fish shell history, to remain files when docker container is rebuild: This will create ./tmp/fish/ folder in the project directory if it doesn't exist already
# - ./bazos/:/app/bazos/
# - ./scripts/:/app/scripts
- ./tmp/fish/:/root/.local/share/fish/ # Mount the fish shell history, to remain files when docker container is rebuild: This will create ./tmp/fish/ folder in the project directory if it doesn't exist already
- $HOME/Documents/photos-archive/bazos:/tmp/images/
ports:
- 4444:4444
- 8090:8090
- 9050:9050
stdin_open: true
tty: true
41 changes: 41 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,44 @@
## Build
#[build-system]
#requires = ["setuptools"]
#build-backend = "setuptools.build_meta"
#
#[project]
#authors = [
# {name = "Zdenek Lapes", email = "[email protected]"}
#]
#name = "bazos"
#version = "0.1.0"
#description = "Bazos api (scraping) for python"
#readme = "README.md"
#keywords = [
# "bazos",
# "api",
# "scraping",
# "python",
# "bazos.cz",
# "bazos.sk",
# "bazos.at",
# "bazos.pl",
#]
#license = { file = "LICENSE" }
#requires-python = ">=3.10"
#dynamic = [
# "dependencies",
#]
#
## TODO: Fix exeting the bazos command on cli
#[tool.setuptools.packages.find]
#where = ["."]
#include = ["bazos.*"]
#namespaces = false
#
#[tool.setuptools.dynamic]
#dependencies = { file = ["requirements.txt"] }
#
#[project.scripts]
#bazos = "bazos.__init__:main"

[tool.ruff]
line-length = 120
select = ["E", "F"] # Enable Pyflakes `E` and `F` codes by default.
Expand Down
Loading

0 comments on commit a26f58b

Please sign in to comment.