Skip to content

Commit

Permalink
merge conflict
Browse files Browse the repository at this point in the history
Merge branch 'master' of github.com:decryptr/captcha

# Conflicts:
#	data-raw/trt.R
  • Loading branch information
jtrecenti committed Mar 1, 2023
2 parents 7503651 + 64e95c7 commit fd7be8b
Show file tree
Hide file tree
Showing 54 changed files with 815 additions and 171 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^doc$
^Meta$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ data-raw/*
!data-raw/trt.R
docs
inst/doc
/doc/
/Meta/
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Suggests:
covr,
knitr,
rmarkdown,
testthat (>= 3.0.0)
testthat (>= 3.0.0),
withr
Config/testthat/edition: 3
VignetteBuilder: knitr
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ S3method(decrypt,default)
S3method(length,captcha)
S3method(plot,captcha)
S3method(print,captcha)
export(available_models)
export(captcha_accuracy)
export(captcha_annotate)
export(captcha_available_models)
export(captcha_dataset)
export(captcha_fit_model)
export(captcha_generate)
Expand Down
23 changes: 17 additions & 6 deletions R/annotate.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
#' @title Annotate captchas with their labels
#'
#' @description Given one or more Captchas, this function
#' prompts you to solve them mannually so that later you can train
#' a model with those labels. Annotated captchas are saved at `path`
#' prompts the user to solve them mannually to train a model.
#' Annotated captchas are saved at `path`
#' with their labels in the filename separated by an underscore.
#'
#' @param files Either an object of class `captcha` or a character vector
#' with the paths to captcha files
#' @param labels Either `NULL` (for interactive classification) or
#' a character vector with labels for the Captchas
#' a character vector with labels for the Captchas. See details.
#' @param path Where to save the annotated captcha files.
#' If `NULL`, saves the files in the same folder the unanswered counterparts.
#' @param rm_old Whether or not to delete unanswered captchas after
#' copying and renaming them
#' copying and renaming them.
#'
#' @return A character vector with the paths to the newly created files
#' @details
#' The `labels=`
#' parameter can handle situations where one knows the Captcha label.
#' For example, a workflow that uses an oracle might provide the
#' label automatically. When the label doesn't exist,
#' the `captcha_annotate()` function opens the prompt for classification
#' and shows the image using `plot()`.
#'
#' @return A vector with the paths of the modified files.
#'
#' @export
captcha_annotate <- function(files, labels = NULL, path = NULL, rm_old = FALSE) {
captcha_annotate <- function(files,
labels = NULL,
path = NULL,
rm_old = FALSE) {

if ("captcha" %in% class(files)) {
files <- files$path
Expand Down
85 changes: 80 additions & 5 deletions R/dataset.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
#' File to torch tensor
#'
#' @param x file path
#' @param input_dim resize image to dimension
#' This function uses the `torchvision` package to read and transform the
#' image in a torch tensor. The function tries to adjust the dimensions to
#' deal with black and white or coloured images.
#'
#' @param x character vector with the paths to image files.
#' @param input_dim resize image to dimension. Defaults to 32x192, which is
#' a good default for many Captcha applications.
#'
#' @return torch tensor with dimensions `length(x)`x`3`x`input_dim`.
#'
#' @examples
#'
#' if (!torch::torch_is_installed()) {
#' torch::install_torch()
#' }
#'
#' captcha_file <- fs::dir_ls(
#' system.file("examples/captcha/", package = "captcha"
#' ))
#'
#' result <- captcha_transform_image(captcha_file)
#' class(result)
#' dim(result)
#'
#' @export
captcha_transform_image <- function(x, input_dim = c(32L, 192L)) {
Expand All @@ -23,9 +44,27 @@ adjust_dimensions <- function(img) {

#' File to response matrix (tensor)
#'
#' This function performs a one-hot encoding of the label, transform a label
#' with `N` letters in a matrix of dimensions `N`x`length(vocab)`. All the
#' labels must have the same length.
#'
#' @param all_letters list of tokens for all files
#' @param vocab unique tokens
#'
#' @return torch tensor with dimensions `length(all_letters)`x`length(vocab)`
#' containing only zeros and ones. All rows sum exactly one.
#'
#' @examples
#'
#' if (!torch::torch_is_installed()) {
#' torch::install_torch()
#' }
#'
#' vocab <- letters
#' resp <- captcha_transform_label(c("a","b","c","d","e"), vocab)
#' class(resp)
#' dim(resp)
#'
#' @export
captcha_transform_label <- function(all_letters, vocab) {

Expand All @@ -48,17 +87,53 @@ captcha_transform_label <- function(all_letters, vocab) {
torch::torch_stack()
}

#' Captcha datasets
#' Captcha dataset
#'
#' This object implements a dataset using the [torch::dataset()] framework.
#' It loads all the images in torch tensors, as well as the labels.
#'
#' @param root (string): root directory where the files are stored
#' @param transform_image (callable, optional): A function/transform
#' that takes in an file path and returns an torch tensor prepared
#' to feed the model.
#' to feed the model. By default, uses the [captcha_transform_image()]
#' function.
#' @param transform_label (callable, optional): A function/transform
#' that takes in the file paths and transform them.
#' that takes in the file paths and transform them. By default, uses the
#' [captcha_transform_label()] function.
#' @param augmentation (function, optional) If not `NULL`, applies a
#' function to augment data with randomized preprocessing layers.
#'
#' This is an object of class `dataset_generator` created using
#' [torch::dataset()] function. It has a `initialize()` method that
#' takes a directory containing the input images,
#' then assigns all the information in-memory with the array data
#' structure for the response variable. It also has a `.getitem()` method that
#' correctly extracts one observation of the dataset in this data
#' structure, and a `.length()` method that correctly calculates the
#' number of Captchas of the dataset.
#'
#' The function calculates the vocabulary based on the identified values in
#' the dataset.
#'
#' @examples
#'
#' if (!torch::torch_is_installed()) {
#' torch::install_torch()
#' }
#'
#' annotated_folder <- system.file(
#' "examples/annotated_captcha",
#' package = "captcha"
#' )
#'
#' suppressMessages({
#' ds <- captcha_dataset(annotated_folder)
#' })
#'
#' # gets the first item (the only item in the example)
#' # returns a list with x and y torch tensors.
#' ds$.getitem(1)
#'
#' @export
captcha_dataset <- torch::dataset(
name = "my_captcha",
Expand Down
28 changes: 27 additions & 1 deletion R/decrypt.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,34 @@
#' Function to solve Captchas
#'
#' @param files files to read. Can be a character vector or an object of class `captcha`.
#' Returns a label for an image using a fitted model. The image can be either a
#' character vector (of length one or more) or an object of class `captcha`.
#'
#' @param files files to read. Can be either a character vector
#' or an object of class `captcha`.
#' @param model model of class `luz_module_fitted`
#'
#' @return character vector of the predicted labels.
#'
#' @examples
#'
#' captcha_file <- system.file(
#' "examples/captcha/cadesp.jpg",
#' package = "captcha"
#' )
#'
#' cap <- read_captcha(captcha_file)
#'
#' if (interactive()) {
#' plot(cap)
#' }
#'
#' # the code below uses access to the internet. If you want to run locally,
#' # download the model object from the releases site.
#' if (interactive()) {
#' model <- captcha_load_model("cadesp")
#' decrypt(cap, model_rfb)
#' }
#'
#' @name decrypt
#' @export
decrypt <- function(files, model) {
Expand Down
33 changes: 30 additions & 3 deletions R/fit.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,44 @@
#' Fit Captcha model
#'
#' Provides a basic interface for fitting custom models from a fully labeled
#' data. Annotation can be done manually using the [captcha_annotate()]
#' function presented earlier or with another method developed by the user.
#' The model uses a convolutional neural network architecture, similar
#' to the LeNet-5 model.
#'
#' @param dir directory where the classified images are
#' @param dir_valid (optional) directory to validation files
#' @param prop_valid proportion of total images considered to validation. Default 0.2.
#' @param prop_valid proportion of total images considered to validation.
#' Defaults to 0.2.
#' @param dropout dropout hyperparameter. Default 0.25.
#' @param dense_units number of dense units to use after convolution steps. Default 200.
#' @param dense_units number of dense units to use after convolution steps.
#' Defaults to 200.
#' @param decay Weight decay applied each epoch.
#' @param batch_size Minibatch size. Default 40.
#' @param epochs Number of epochs to use. Default 100. The model uses early
#' stopping, so it is possible that the procedure ends before the total
#' number of epochs actually run.
#'
#' @return fitted model of class `luz_module_fitted`
#' @return fitted model of class `luz_module_fitted`.
#'
#' The modeling step has some assumptions about the file names.
#' Images must be in a folder and have the pattern
#' `path/to/file/<id>_<lab>.<ext>`, where:
#' * `<id>`: can be any name, preferably without accents or other
#' special characters, to avoid encoding issues. It usually contains a
#' name for the type and a hash to identify the image uniquely.
#' __Note__: When annotating a file, the id must be unique, as two
#' Captchas can have the same label.
#' * `<lab>`: is the Captcha label. It is a string of characters between
#' `[a-zA-Z0-9]`, which can be case-sensitive if necessary.
#' All labels must have the same length.
#' * `<ext>`: file extension. It can be `.png`, `.jpeg` or `.jpg`.
#' The operations also work for the `.svg` format, but it may have
#' problems due to the image's transparency.
#'
#' An important note is that the model stops fitting after 20 iterations
#' without significant increment of accuracy (chosen as 1%; for more
#' details, see `vignette("advanced")`.
#'
#' @export
captcha_fit_model <- function(dir,
Expand Down
9 changes: 5 additions & 4 deletions R/generate.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Generate captcha
#' Generate R-Captcha
#'
#' Generates random captcha image
#' Generates a custom captcha image using the `magick` package. We name this
#' captcha as R-Captcha.
#'
#' @param write_disk write image to disk? Defaults to `FALSE`.
#' @param path path to save images. Defaults to current directory.
Expand All @@ -17,8 +18,8 @@
#' @param p_noise probability to add random noise to image. Defaults to 40%.
#' @param p_lat probability to add LAT algorithm to image. Defaults to 0.
#'
#' @return list containing two elements: imagemagick object and captcha
#' value.
#' @return object of class `captcha`, which is a list containing three elements:
#' `image-magick` object and the label.
#'
#' @examples
#'
Expand Down
Loading

0 comments on commit fd7be8b

Please sign in to comment.