Skip to content

Commit

Permalink
working version
Browse files Browse the repository at this point in the history
  • Loading branch information
kbkozlev committed Jan 5, 2024
1 parent dc62412 commit bb30102
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 51 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/files/rawPictures/
/files/croppedPictures/
/files/finalPictures/
/files/sharpenedPictures/
24 changes: 13 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@

if __name__ == '__main__':

capture_success = capture_book_pages(book_url="https://library.sap-press.com/reader/main/43ev-bc9p-szan-r8ty/")
# capture_success = capture_book_pages(book_url="https://library.sap-press.com/reader/main/rhes-g69t-27nu-b8xd/")
#
# if capture_success:
# crop_success = crop_images()
#
# if crop_success:
# backup_success = create_backup()
#
# if backup_success:
# sharpen_success = sharpen_images_in_folder()
#
# if sharpen_success:
compress_png_folder()

if capture_success:
crop_success = crop_images()

if crop_success:
sharpen_success = sharpen_images_in_folder()

if sharpen_success:
compress_png_folder()

create_backup()
12 changes: 9 additions & 3 deletions src/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,24 @@


def compress_png_folder():
pngquant_path = 'C:/Users/kbkoz/Downloads/pngquant-windows/pngquant/pngquant.exe'
pngquant_path = 'src/pngquant/pngquant.exe'

# Ensure the output folder exists, create if not
output_folder = '../files/finalPictures/'
output_folder = 'files/finalPictures/'
if not os.path.exists(output_folder):
os.makedirs(output_folder)

# Process each file in the input folder
input_folder = 'files/sharpenedPictures/'
for filename in os.listdir(input_folder):
files = os.listdir(input_folder)
files.sort(key=lambda x: int(os.path.splitext(x)[0]))

for filename in files:
if filename.endswith('.png'):
input_path = os.path.join(input_folder, filename)
output_path = os.path.join(output_folder, filename)

subprocess.run([pngquant_path, "--force", "--output", output_path, input_path])
print(f"Image '{filename}' compressed.")

print(f"All Pictures compressed and saved in folder {output_folder}")
8 changes: 4 additions & 4 deletions src/cropPic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def crop_images() -> bool | None:
os.makedirs(output_folder)

# Get a list of files in the folder and sort them numerically
input_folder = 'src/rawPictures'
input_folder = 'files/rawPictures'
files = os.listdir(input_folder)
files.sort(key=lambda x: int(os.path.splitext(x)[0]))

Expand All @@ -33,14 +33,14 @@ def crop_images() -> bool | None:
cropped_image = image.crop(coordinates)
cropped_image.save(output_image_path)

print(f"Image {filename} has been cropped.")
print(f"Image '{filename}' has been cropped.")

except Exception as e:
print(f"Error: {e}")
return False

print(f"All pictures have been cropped and saved to folder {output_folder}.")
print(f"All pictures have been cropped and saved to folder '{output_folder}'.")
return True
else:
print(f"No Files in {input_folder}")
print(f"No Files in '{input_folder}'.")
return False
Binary file added src/pngquant/pngquant.exe
Binary file not shown.
61 changes: 32 additions & 29 deletions src/seleniumImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import os

Expand All @@ -13,13 +13,7 @@ def __init__(self, message="Login failed!"):
super().__init__(self.message)


def capture_book_pages(book_url: str) -> bool | None:
"""
This function makes a screenshot of every page of a provided book
:param book_url: URL in the SAP Press Library
:return:
"""

def capture_book_pages(book_url: str) -> bool:
directory = 'files/rawPictures'
if not os.path.exists(directory):
os.makedirs(directory)
Expand All @@ -34,7 +28,6 @@ def capture_book_pages(book_url: str) -> bool | None:
driver = webdriver.Chrome(options=options)
driver.get("https://www.sap-press.com/accounts/login/?next=/")

# Login
try:
email_el = driver.find_element(By.ID, "id_login-username")
email_el.send_keys(email)
Expand All @@ -49,51 +42,61 @@ def capture_book_pages(book_url: str) -> bool | None:
raise LoginFailedError()
except TimeoutException:
print("Logged in successfully!")
get_book_pages(driver=driver, book_url=book_url)

except LoginFailedError as e:
print(e)
driver.quit()
return False

# Navigate to book

def get_book_pages(driver, book_url, page_nr=1, max_attempts=3) -> bool | None:
driver.get(book_url)
if driver.find_element(By.CLASS_NAME, "errorpage"):
print("Page not found!")
return False

# Remove last position message and go full screen
time.sleep(2)
try:
WebDriverWait(driver, 10).until(ec.element_to_be_clickable((By.XPATH, '//*[@id="lastReadPanel"]/a[2]'))).click()
except TimeoutException:
pass

time.sleep(2)
try:
WebDriverWait(driver, 10).until(ec.element_to_be_clickable((By.XPATH, '//*[@id="reader_nav"]/ul/li[3]/ul/li[4]/a'))).click()
WebDriverWait(driver, 10).until(
ec.element_to_be_clickable((By.XPATH, '//*[@id="reader_nav"]/ul/li[3]/ul/li[4]/a'))).click()
except TimeoutException:
pass

driver.set_window_size(1500, 1800)

page_nr = 1
while True:
try:
# Wait for the element to be present and visible
try:
while True:
element = WebDriverWait(driver, 10).until(
ec.visibility_of_element_located((By.XPATH, '//*[@id="reader_nav"]/ul/li[8]/a[2]')))
ec.visibility_of_element_located(
(By.XPATH, '/html/body/div[5]/div/div/div[4]/div[1]/div[1]/ul/li[8]/a[2]/span')))

time.sleep(2)
driver.save_screenshot(f'rawPictures/{str(page_nr).zfill(2)}.png')
print(f"Page: {page_nr} Copied")
# timer to wait for the full loading of the page
time.sleep(5)
driver.save_screenshot(f'files/rawPictures/{str(page_nr).zfill(2)}.png')
print(f"Page: '{page_nr}' Copied")
page_nr += 1

# Execute JavaScript to click the next page element
driver.execute_script("arguments[0].click();", element)

except Exception as e:
print(f"Error: {e}")
except TimeoutException:
print("Reached the last page.")

except NoSuchElementException:
if max_attempts > 0:
print("Element not found. Retrying...")
return get_book_pages(driver, book_url, page_nr, max_attempts - 1)
else:
print("Max attempts reached. Exiting.")
driver.quit()
return False
finally:
break

except Exception as e:
print(f"Error: {e}")
driver.quit()
return False

driver.quit()
return True
9 changes: 5 additions & 4 deletions src/zip_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
import os


def create_backup():
def create_backup() -> bool:

# Check if the folder exists
input_folder = 'files/croppedPictures'
if not os.path.exists(input_folder):
print(f"Folder '{input_folder}' does not exist.")
return
return False

output_7z_file = "croppedPictures_backup.7z"

Expand All @@ -22,11 +22,12 @@ def create_backup():
# Add sorted files to the archive
for file in files:
file_path = os.path.join(input_folder, file)
print(f"Adding {file} to archive")
print(f"Adding '{file}' to archive")
archive.write(file_path, os.path.relpath(file_path, input_folder))

print(f"Folder '{input_folder}' successfully compressed to '{output_7z_file}'.")
return True

else:
print(f"No Files in {input_folder}, nothing to archive")
print(f"No Files in '{input_folder}', nothing to archive")
return False

0 comments on commit bb30102

Please sign in to comment.