Description
This is my code. I am applying two types of thresholding to make the grayscale image to binary.
CODE:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
def get_ocr_data(image_path, save_folder):
y_line = []
rows = []
image_read = cv2.imread(image_path)
image_read_gray = cv2.cvtColor(image_read, cv2.COLOR_BGR2GRAY)
image_read = image_read_gray[105:550, 20:590]
image_read1 = image_read_gray[105:550, 20:590]
image_read = cv2.resize(image_read, (0,0), fx=2, fy=2)
image_read1 = cv2.resize(image_read1, (0,0), fx=2, fy=2)
# gray = cv2.cvtColor(image_read, cv2.COLOR_BGR2GRAY)
height, width = image_read1.shape
image_read[image_read<200] = 255
image_read[image_read>230] = 255
image_read[image_read<230] = 0
cv2.imshow("image", image_read)
cv2.waitKey(0)
cv2.destroyAllWindows()
#sl.no
image_read = image_read[:, 0:50]
image_read = cv2.bitwise_not(image_read)
r_sum = np.sum(image_read,axis=1).tolist()
plt.plot(r_sum)
for i in range(0, len(r_sum)):
if r_sum[i] > 8000:
y_line.append(i)
line = [y_line[0]]
for i in range(0, len(y_line)-1):
if y_line[i+1] - y_line[i] > 50:
line.append(y_line[i+1])
for i in range(1,len(line)):
row = image_read1[line[0]:line[1],0:width].copy() # Make a copy of the row
rows.append(row)
# Save the cropped row
filename = f"row_{i}.png"
cv2.imwrite(os.path.join(save_folder, filename), row)
# Check if it's the last row
if i == len(line) - 1:
apply_threshold1(row, target_bgr_1, threshold_1)
else:
apply_threshold2(row, target_bgr_2, threshold_2)
del line[0]
print(f"{len(rows)} rows saved in {save_folder}")
def apply_threshold1(row, target_bgr_1, threshold_1):
target_bgr_1 = [147, 147, 147]
threshold_1= 35
black = [0, 0, 0]
for y in range(row.shape[0]):
for x in range(row.shape[1]):
bgr_values = row[y, x]
if (bgr_values[0] >= target_bgr_1[0] and bgr_values[1] >= target_bgr_1[1] and
bgr_values[2] >= target_bgr_1[2] and (bgr_values[0] - target_bgr_1[0]) <= threshold_1 and
(bgr_values[1] - target_bgr_1[1]) <= threshold_1 and (bgr_values[2] - target_bgr_1[2]) <= threshold_1):
row[y, x] = black
def apply_threshold2(row, target_bgr_2, threshold_2):
black = [0, 0, 0]
#Light
target_bgr_2= [200, 200, 200]
threshold_2= 58
for y in range(row.shape[0]):
for x in range(row.shape[1]):
bgr_values = row[y, x]
# Check if pixel has maximum value for target_bgr_2
if (bgr_values[0] >= target_bgr_2[0] and bgr_values[1] >= target_bgr_2[1] and
bgr_values[2] >= target_bgr_2[2] and (bgr_values[0] - target_bgr_2[0]) <= threshold_2 and
(bgr_values[1] - target_bgr_2[1]) <= threshold_2 and (bgr_values[2] - target_bgr_2[2]) <= threshold_2):
row[y, x] = black
_, binary = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bound_box_image = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
x -= 1
y -= 2
w += 2
h += 3
cv2.rectangle(bound_box_image, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.imshow("Bounding Boxes", bound_box_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Define the image path and folder to save cropped rows
image_path = r"C:\Users\charlote\Documents\OCR_Akash\images\test02.png"
save_folder = r"C:\Users\charlote\Documents\OCR_Akash\output"
Define target bgr values for light and dark thresholds
target_bgr_1 = np.array([147, 147, 147]) # Dark
target_bgr_2 = np.array([200, 200, 200]) # Light
threshold_1 = 35
threshold_2 = 58
Create the folder if it doesn't exist
if not os.path.exists(save_folder):
os.makedirs(save_folder)
Call the function
get_ocr_data(image_path, save_folder)
Traceback (most recent call last):
File "c:\visu_ai\OCR_Akash\code\line_threshold.py", line 120, in
get_ocr_data(image_path, save_folder)
File "c:\visu_ai\OCR_Akash\code\line_threshold.py", line 51, in get_ocr_data
apply_threshold2(row, target_bgr_2, threshold_2)
File "c:\visu_ai\OCR_Akash\code\line_threshold.py", line 82, in apply_threshold2
if (bgr_values[0] >= target_bgr_2[0] and bgr_values[1] >= target_bgr_2[1] and
IndexError: invalid index to scalar variable.
ERROR: