Skip to content

VictorAriz/Datasets-Costelaciones-Emisferio-Sur-datasets

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 

Repository files navigation

This section contains Python scripts used to organize, process and analyze datasets for YOLO model training. The available scripts and their purpose are listed below.

  1. Script: File and Annotation Count by Class This script allows you to count how many images and annotation files exist in each set (train, val, test), in addition to generating an annotation count by class.

What does it do? Checks the distribution of the dataset.

Helps to identify imbalances between classes.

Confirms that the number of images and labels is as expected.

Code:

import os from collections import Counter

def contar_archivos_y_clases(ruta_base): conjuntos = ['train', 'val', 'test'] for conjunto in conjuntos: ruta_images = os.path.join(ruta_base, 'images', conjunto) ruta_labels = os.path.join(ruta_base, 'labels', conjunto)

    num_imagenes = len([f for f in os.listdir(ruta_images) if os.path.isfile(os.path.join(ruta_images, f))])
    print(f"Total de imágenes en {conjunto}: {num_imagenes}")

    clases = Counter()
    num_labels = 0
    for archivo in os.listdir(ruta_labels):
        if archivo.endswith('.txt'):
            num_labels += 1
            with open(os.path.join(ruta_labels, archivo), 'r') as file:
                for linea in file:
                    clase = linea.split()[0]  # Asumiendo que la primera columna es la clase
                    clases[clase] += 1

    print(f"Total de archivos de anotación en {conjunto}: {num_labels}")
    print(f"Conteo de anotaciones por clase en {conjunto}:")
    for clase, conteo in clases.items():
        print(f"Clase {clase}: {conteo} anotaciones")
    print()

ruta_base = os.path.join(os.getcwd(), '003 - Dataset YOLO - Listo Para Entrenar') contar_archivos_y_clases(ruta_base)

  1. Script: Dataset Organization in YOLO Structure This script takes a set of mixed images and annotations, groups them by class and then divides them into train, val and test following a 70%-20%-10% ratio. Finally, it moves them to the YOLO-compatible folder structure.

What does it do? It prepares the dataset automatically for direct use with YOLO.

It groups the images by classes.

Performs a stratified split (maintaining proportions per class).

Code:

import os import shutil import random from collections import defaultdict

base_path = '003 - Dataset Listo Para Entrenamiento' output_path = 'Dataset YOLO'

os.makedirs(os.path.join(output_path, 'images', 'train'), exist_ok=True) os.makedirs(os.path.join(output_path, 'images', 'val'), exist_ok=True) os.makedirs(os.path.join(output_path, 'images', 'test'), exist_ok=True) os.makedirs(os.path.join(output_path, 'labels', 'train'), exist_ok=True) os.makedirs(os.path.join(output_path, 'labels', 'val'), exist_ok=True) os.makedirs(os.path.join(output_path, 'labels', 'test'), exist_ok=True)

img_extensions = ['.jpg', '.jpeg', '.png'] label_extension = '.txt'

class_files = defaultdict(list)

print("Leyendo y agrupando archivos por clase...") for label_file in os.listdir(base_path): if label_file.endswith(label_extension): with open(os.path.join(base_path, label_file), 'r') as file: for line in file: class_id = line.split()[0] class_files[class_id].append(label_file) break # Solo una vez por archivo

total_train = 0 total_val = 0 total_test = 0

for class_id, files in class_files.items(): print(f"Procesando clase {class_id} con {len(files)} archivos...") random.shuffle(files) num_total = len(files) num_train = int(0.7 * num_total) num_val = int(0.2 * num_total)

train_files = files[:num_train]
val_files = files[num_train:num_train + num_val]
test_files = files[num_train + num_val:]

total_train += len(train_files)
total_val += len(val_files)
total_test += len(test_files)

for file in train_files:
    img_file = file.replace(label_extension, img_extensions[0])
    shutil.move(os.path.join(base_path, img_file), os.path.join(output_path, 'images', 'train', img_file))
    shutil.move(os.path.join(base_path, file), os.path.join(output_path, 'labels', 'train', file))

for file in val_files:
    img_file = file.replace(label_extension, img_extensions[0])
    shutil.move(os.path.join(base_path, img_file), os.path.join(output_path, 'images', 'val', img_file))
    shutil.move(os.path.join(base_path, file), os.path.join(output_path, 'labels', 'val', file))

for file in test_files:
    img_file = file.replace(label_extension, img_extensions[0])
    shutil.move(os.path.join(base_path, img_file), os.path.join(output_path, 'images', 'test', img_file))
    shutil.move(os.path.join(base_path, file), os.path.join(output_path, 'labels', 'test', file))

print(f"Clase {class_id} procesada y archivos movidos.")

print("\nResumen final del dataset:") print(f"Total de archivos en train: {total_train}") print(f"Total de archivos en val: {total_val}") print(f"Total de archivos en test: {total_test}") print("Dataset organizado en la estructura recomendada para YOLO con train, val y test.")

About

Dataset: Constellations of the Southern Hemisphere

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published