Skip to content

Commit

Permalink
Merge pull request #241 from Living-with-machines/update_file_saving
Browse files Browse the repository at this point in the history
Update file saving - save as csv files
  • Loading branch information
rwood-97 authored Jul 24, 2023
2 parents 445618e + e375dc5 commit 9ed7c86
Show file tree
Hide file tree
Showing 17 changed files with 152 additions and 101 deletions.
2 changes: 1 addition & 1 deletion docs/source/Worked-examples/mnist_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@
"metadata": {},
"outputs": [],
"source": [
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\"\\t\", index_label=\"image_id\")"
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\",\", index_label=\"image_id\")"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion docs/source/Worked-examples/plant_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2296,7 +2296,7 @@
"metadata": {},
"outputs": [],
"source": [
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\"\\t\", index_label=\"image_id\")"
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\",\", index_label=\"image_id\")"
]
},
{
Expand Down
8 changes: 4 additions & 4 deletions mapreader/annotate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def prepare_annotation(
metadata=annot_file,
index_col=0,
ignore_mismatch=True,
delimiter="\t",
delimiter=",",
tree_level=tree_level,
)

Expand Down Expand Up @@ -584,7 +584,7 @@ def prepare_annotation(
metadata=annot_file,
index_col=0,
ignore_mismatch=True,
delimiter="\t",
delimiter=",",
tree_level=tree_level,
)
# convert images to dataframe
Expand Down Expand Up @@ -659,7 +659,7 @@ def save_annotation(

# Read an existing annotation file (for the same task and userID)
try:
image_df = pd.read_csv(annot_file, sep="\t", index_col=0)
image_df = pd.read_csv(annot_file, index_col=0)
except:
image_df = pd.DataFrame(columns=["image_id", "image_path", "label"])

Expand All @@ -684,7 +684,7 @@ def save_annotation(

if len(image_df) > 0:
#image_df = image_df.set_index("image_id")
image_df.to_csv(annot_file, mode="w", sep="\t")
image_df.to_csv(annot_file, mode="w")
print(f"[INFO] Save {newly_annotated} new annotations to {annot_file}")
print(f"[INFO] {new_labels} labels were not already stored")
print(f"[INFO] Total number of saved annotations: {len(image_df)}")
Expand Down
8 changes: 4 additions & 4 deletions mapreader/classify/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(
self,
patch_df: Union[pd.DataFrame, str],
transform: Union[str, transforms.Compose, Callable],
delimiter: str = "\t",
delimiter: str = ",",
patch_paths_col: Optional[str] = "image_path",
label_col: Optional[str] = None,
label_index_col: Optional[str] = None,
Expand All @@ -47,7 +47,7 @@ def __init__(
and performs image transformations can be used.
At minimum, transform should be ``torchvision.transforms.ToTensor()``.
delimiter : str, optional
The delimiter to use when reading the dataframe. By default ``"\t"``.
The delimiter to use when reading the dataframe. By default ``","``.
patch_paths_col : str, optional
The name of the column in the DataFrame containing the image paths. Default is "image_path".
label_col : str, optional
Expand Down Expand Up @@ -329,7 +329,7 @@ def __init__(
patch_df: Union[pd.DataFrame, str],
transform1: str,
transform2: str,
delimiter: str = "\t",
delimiter: str = ",",
patch_paths_col: Optional[str] = "image_path",
label_col: Optional[str] = None,
label_index_col: Optional[str] = None,
Expand All @@ -356,7 +356,7 @@ def __init__(
Torchvision transform to be applied to target images.
Either "train" or "val".
delimiter : str
The delimiter to use when reading the csv file. By default ``"\t"``.
The delimiter to use when reading the csv file. By default ``","``.
patch_paths_col : str, optional
The name of the column in the DataFrame containing the image paths. Default is "image_path".
label_col : str, optional
Expand Down
25 changes: 12 additions & 13 deletions mapreader/classify/load_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self):
def load(
self,
annotations: Union[str, pd.DataFrame],
delimiter: Optional[str] = "\t",
delimiter: Optional[str] =",",
id_col: Optional[str] = "image_id",
patch_paths_col: Optional[str] = "image_path",
label_col: Optional[str] = "label",
Expand All @@ -48,7 +48,7 @@ def load(
The annotations.
Can either be the path to a csv file or a pandas.DataFrame.
delimiter : Optional[str], optional
The delimiter to use when loading the csv file as a dataframe, by default "\t".
The delimiter to use when loading the csv file as a dataframe, by default ",".
id_col : Optional[str], optional
The name of the column which contains the image IDs, by default "image_id".
patch_paths_col : Optional[str], optional
Expand Down Expand Up @@ -123,7 +123,7 @@ def load(
def _load_annotations_csv(
self,
annotations: str,
delimiter: Optional[str] = "\t",
delimiter: Optional[str] = ",",
scramble_frame: Optional[bool] = False,
reset_index: Optional[bool] = False,
) -> pd.DataFrame:
Expand All @@ -134,7 +134,7 @@ def _load_annotations_csv(
annotations : str
The path to the annotations csv file.
delimiter : Optional[str], optional
The delimiter to use when loading the csv file as a dataframe, by default "\t".
The delimiter to use when loading the csv file as a dataframe, by default ",".
scramble_frame : Optional[bool], optional
Whether to shuffle the rows of the dataframe, by default False.
reset_index : Optional[bool], optional
Expand Down Expand Up @@ -488,8 +488,8 @@ def create_datasets(
assert len(self.annotations) == len(df_train) + len(df_val) + len(df_test)

else:
df_val = labels_temp
df_test = None
df_val = df_temp
df_test = pd.DataFrame(columns=self.annotations.columns)
assert len(self.annotations) == len(df_train) + len(df_val)

train_dataset = PatchDataset(
Expand All @@ -506,13 +506,12 @@ def create_datasets(
label_col=self.label_col,
label_index_col="label_index",
)
if df_test is not None:
test_dataset = PatchDataset(
df_test,
test_transform,
patch_paths_col=self.patch_paths_col,
label_col=self.label_col,
label_index_col="label_index",
test_dataset = PatchDataset(
df_test,
test_transform,
patch_paths_col=self.patch_paths_col,
label_col=self.label_col,
label_index_col="label_index",
)

datasets = {"train": train_dataset, "val": val_dataset, "test": test_dataset}
Expand Down
4 changes: 2 additions & 2 deletions mapreader/download/sheet_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,13 +555,13 @@ def _save_metadata(
).T

if os.path.exists(out_filepath):
existing_metadata_df = pd.read_csv(out_filepath, sep="\t", index_col=0)
existing_metadata_df = pd.read_csv(out_filepath, sep=",", index_col=0)
metadata_df = pd.concat([existing_metadata_df, new_metadata_df], ignore_index=True)
metadata_df.drop_duplicates(subset=["grid_bb"], keep="first", inplace=True)
else:
metadata_df = new_metadata_df

metadata_df.to_csv(out_filepath, sep="\t")
metadata_df.to_csv(out_filepath, sep=",")

def _download_map_sheets(self, features: list, path_save: Optional[str] = "maps", metadata_fname: Optional[str] = "metadata.csv", overwrite: Optional[bool] = False):
"""Download map sheets from a list of features.
Expand Down
51 changes: 31 additions & 20 deletions mapreader/load/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def add_metadata(
self,
metadata: Union[str, pd.DataFrame],
index_col: Optional[Union[int, str]] = 0,
delimiter: Optional[str] = "\t",
delimiter: Optional[str] = ",",
columns: Optional[List[str]] = None,
tree_level: Optional[str] = "parent",
ignore_mismatch: Optional[bool] = False,
Expand All @@ -271,7 +271,7 @@ def add_metadata(
Parameters
----------
metadata : str or pandas.DataFrame
Path to a ``csv``, ``xls`` or ``xlsx`` file or a pandas DataFrame that contains the metadata information.
Path to a ``csv`` (or similar), ``xls`` or ``xlsx`` file or a pandas DataFrame that contains the metadata information.
index_col : int or str, optional
Column to use as the index when reading the file and converting into a panda.DataFrame.
Accepts column indices or column names.
Expand All @@ -280,7 +280,7 @@ def add_metadata(
Only used if a file path is provided as the ``metadata`` parameter.
Ignored if ``columns`` parameter is passed.
delimiter : str, optional
Delimiter used in the ``csv`` file, by default ``"\t"``.
Delimiter used in the ``csv`` file, by default ``","``.
Only used if a ``csv`` file path is provided as
the ``metadata`` parameter.
Expand Down Expand Up @@ -323,31 +323,32 @@ def add_metadata(

else: #if not df
if os.path.isfile(metadata):
if metadata.endswith('csv'):
if metadata.endswith(('xls', 'xlsx')):
if columns:
metadata_df = pd.read_csv(
metadata, usecols=columns, delimiter=delimiter
metadata_df = pd.read_excel(
metadata, usecols=columns,
)
else:
metadata_df = pd.read_csv(
metadata, index_col=index_col, delimiter=delimiter
metadata_df = pd.read_excel(
metadata, index_col=index_col,
)
columns=list(metadata_df.columns)
elif metadata.endswith(('xls', 'xlsx')):

elif metadata.endswith('sv'): #csv, tsv, etc
if columns:
metadata_df = pd.read_excel(
metadata, usecols=columns,
metadata_df = pd.read_csv(
metadata, usecols=columns, delimiter=delimiter
)
else:
metadata_df = pd.read_excel(
metadata, index_col=index_col,
metadata_df = pd.read_csv(
metadata, index_col=index_col, delimiter=delimiter
)
columns=list(metadata_df.columns)


else:
raise ValueError(
"[ERROR] ``metadata`` should either be the path to a ``csv``, ``xls`` or ``xlsx`` file or a pandas DataFrame." # noqa
"[ERROR] ``metadata`` should either be the path to a ``csv`` (or similar), ``xls`` or ``xlsx`` file or a pandas DataFrame." # noqa
)

# identify image_id column
Expand Down Expand Up @@ -1225,7 +1226,12 @@ def calc_pixel_stats(
# Calculate std pixel values
self.patches[patch][f"std_pixel_{band}"] = img_std[i] / 255

def convert_images(self, save: Optional[bool] = False, save_format: Optional[str] ="csv") -> Tuple[pd.DataFrame, pd.DataFrame]:
def convert_images(
self,
save: Optional[bool] = False,
save_format: Optional[str] ="csv",
delimiter: Optional[str]=",",
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Convert the ``MapImages`` instance's ``images`` dictionary into pandas
DataFrames for easy manipulation.
Expand All @@ -1239,6 +1245,8 @@ def convert_images(self, save: Optional[bool] = False, save_format: Optional[str
If ``save = True``, the file format to use when saving the dataframes.
Options of csv ("csv") or excel ("excel" or "xlsx").
By default, "csv".
delimiter : str, optional
The delimiter to use when saving the dataframe. By default ``","``.
Returns
-------
Expand All @@ -1255,9 +1263,9 @@ def convert_images(self, save: Optional[bool] = False, save_format: Optional[str
if save:

if save_format == "csv":
parent_df.to_csv("parent_df.csv", sep="\t")
parent_df.to_csv("parent_df.csv", sep=delimiter)
print('[INFO] Saved parent dataframe as "parent_df.csv"')
patch_df.to_csv("patch_df.csv", sep="\t")
patch_df.to_csv("patch_df.csv", sep=delimiter)
print('[INFO] Saved patch dataframe as "patch_df.csv"')
elif save_format in ["excel", "xlsx"]:
parent_df.to_excel("parent_df.xlsx")
Expand Down Expand Up @@ -1872,6 +1880,7 @@ def load_csv(
clear_images: Optional[bool] = False,
index_col_patch: Optional[int] = 0,
index_col_parent: Optional[int] = 0,
delimiter: Optional[str] = ",",
) -> None:
"""
Load CSV files containing information about parent and patches,
Expand All @@ -1891,6 +1900,8 @@ def load_csv(
Column to set as index for the patch DataFrame, by default ``0``.
index_col_parent : int, optional
Column to set as index for the parent DataFrame, by default ``0``.
delimiter : str, optional
The delimiter to use when reading the dataframe. By default ``","``.
Returns
-------
Expand All @@ -1905,12 +1916,12 @@ def load_csv(
raise ValueError("[ERROR] Please pass ``patch_path`` as string.")

if os.path.isfile(parent_path):
parent_df = pd.read_csv(parent_path, index_col=index_col_parent)
parent_df = pd.read_csv(parent_path, index_col=index_col_parent, sep=delimiter)
else:
raise ValueError(f"[ERROR] {parent_path} cannot be found.")

if os.path.isfile(patch_path):
patch_df = pd.read_csv(patch_path, index_col=index_col_patch)
patch_df = pd.read_csv(patch_path, index_col=index_col_patch, sep=delimiter)
else:
raise ValueError(f"[ERROR] {patch_path} cannot be found.")

Expand Down
Loading

0 comments on commit 9ed7c86

Please sign in to comment.