microsoft
diff --git a/‎NAMESPACE
+25-25 b/‎NAMESPACE
+25-25
diff --git a/‎R/AzureDataLake.R
+55-345 b/‎R/AzureDataLake.R
+55-345
diff --git a/‎R/AzureSMR-package.R
+20-7 b/‎R/AzureSMR-package.R
+20-7
diff --git a/‎R/internal.R
+290 b/‎R/internal.R
+290
@@ -12,22 +12,33 @@ S3method(str,azureActiveContext)
 S3method(summary,azureScriptActionHistory)
 export()
 export(AzureListRG)
-export(adlFileInputStreamAvailable)
 export(adlFileInputStreamCheck)
-export(adlFileInputStreamClose)
-export(adlFileInputStreamGetPos)
-export(adlFileInputStreamLength)
-export(adlFileInputStreamMark)
-export(adlFileInputStreamMarkSupported)
-export(adlFileInputStreamRead)
-export(adlFileInputStreamReadBuffered)
-export(adlFileInputStreamReset)
-export(adlFileInputStreamSeek)
-export(adlFileInputStreamSkip)
 export(adlFileOutputStreamCheck)
-export(adlFileOutputStreamClose)
-export(adlFileOutputStreamFlush)
-export(adlFileOutputStreamWrite)
+export(adls.append)
+export(adls.append.direct)
+export(adls.concat)
+export(adls.create)
+export(adls.delete)
+export(adls.file.info)
+export(adls.fileinputstream.available)
+export(adls.fileinputstream.close)
+export(adls.fileinputstream.getpos)
+export(adls.fileinputstream.length)
+export(adls.fileinputstream.mark)
+export(adls.fileinputstream.marksupported)
+export(adls.fileinputstream.read)
+export(adls.fileinputstream.readfully)
+export(adls.fileinputstream.reset)
+export(adls.fileinputstream.seek)
+export(adls.fileinputstream.skip)
+export(adls.fileoutputstream.close)
+export(adls.fileoutputstream.flush)
+export(adls.fileoutputstream.write)
+export(adls.ls)
+export(adls.mkdir)
+export(adls.read)
+export(adls.read.direct)
+export(adls.rename)
 export(as.adlFileInputStream)
 export(as.adlFileOutputStream)
 export(as.adlRetryPolicy)
@@ -46,17 +57,6 @@ export(azureCreateResourceGroup)
 export(azureCreateStorageAccount)
 export(azureCreateStorageContainer)
 export(azureDataConsumption)
-export(azureDataLakeAppend)
-export(azureDataLakeAppendBOS)
-export(azureDataLakeConcat)
-export(azureDataLakeCreate)
-export(azureDataLakeDelete)
-export(azureDataLakeGetFileStatus)
-export(azureDataLakeListStatus)
-export(azureDataLakeMkdirs)
-export(azureDataLakeOpenBIS)
-export(azureDataLakeRead)
-export(azureDataLakeRename)
 export(azureDeleteBatchAccount)
 export(azureDeleteBlob)
 export(azureDeleteDeploy)
 
@@ -42,13 +42,26 @@
 #'   - [azurePricingRates()]
 #'   - [azureExpenseCalculator()]
 #' * Azure Data Lake Store functions:
-#'   - [azureDataLakeListStatus()]
-#'   - [azureDataLakeGetFileStatus()]
-#'   - [azureDataLakeMkdirs()]
-#'   - [azureDataLakeCreate()]
-#'   - [azureDataLakeAppend()]
-#'   - [azureDataLakeRead()]
-#'   - [azureDataLakeDelete()]
+#'   - [adls.ls()]
+#'   - [adls.file.info()]
+#'   - [adls.mkdir()]
+#'   - [adls.create()]
+#'   - [adls.delete()]
+#'   - [adls.rename()]
+#'   - [adls.concat()]
+#'   - [adls.append()]
+#'   - [adls.fileoutputstream.write()]
+#'   - [adls.fileoutputstream.flush()]
+#'   - [adls.fileoutputstream.close()]
+#'   - [adls.read()]
+#'   - [adls.fileinputstream.read()]
+#'   - [adls.fileinputstream.seek()]
+#'   - [adls.fileinputstream.skip()]
+#'   - [adls.fileinputstream.available()]
+#'   - [adls.fileinputstream.length()]
+#'   - [adls.fileinputstream.getpos()]
+#'   - [adls.fileinputstream.close()]
+#'
 #'
 #' @name AzureSMR
 #' @aliases AzureSMR-package
 
@@ -281,6 +281,296 @@ printADLSMessage <- function(fileName, functionName, message, error = NULL) {
   print(msg)
 }
 
+# ADLS Ingress - AdlFileOutputStream ----
+
+#' Create an adlFileOutputStream.
+#' Create a container (`adlFileOutputStream`) for holding variables used by the Azure Data Lake Store data functions.
+#'
+#' @inheritParams setAzureContext
+#' @param accountName the account name
+#' @param relativePath Relative path of a file/directory
+#' @param verbose Print tracing information (default FALSE).
+#' @return An `adlFileOutputStream` object
+#'
+#' @family Azure Data Lake Store functions
+adls.fileoutputstream.create <- function(azureActiveContext, accountName, relativePath, verbose = FALSE) {
+  azEnv <- new.env(parent = emptyenv())
+  azEnv <- as.adlFileOutputStream(azEnv)
+  list2env(
+    list(azureActiveContext = "", accountName = "", relativePath = ""),
+    envir = azEnv
+  )
+  if (!missing(azureActiveContext)) azEnv$azureActiveContext <- azureActiveContext
+  if (!missing(accountName)) azEnv$accountName <- accountName
+  if (!missing(relativePath)) azEnv$relativePath <- relativePath
+  azEnv$leaseId <- uuid()
+  azEnv$blockSize <- getAzureDataLakeDefaultBufferSize()
+  azEnv$buffer <- raw(0)
+  # cursors/indices/offsets in R should start from 1 and NOT 0. 
+  # Because of this there are many adjustments that need to be done throughout the code!
+  azEnv$cursor <- 1L
+  res <- adls.file.info(azureActiveContext, accountName, relativePath, verbose)
+  azEnv$remoteCursor <- as.integer(res$FileStatus.length) # this remote cursor starts from 0
+  azEnv$streamClosed <- FALSE
+  azEnv$lastFlushUpdatedMetadata <- FALSE
+  
+  # additional param required to implement bad offset handling
+  azEnv$numRetries <- 0
+  
+  return(azEnv)
+}
+
+adls.fileoutputstream.addtobuffer <- function(adlFileOutputStream, contents, off, len) {
+  bufferlen <- getContentSize(adlFileOutputStream$buffer)
+  cursor <- adlFileOutputStream$cursor
+  if (len > bufferlen - (cursor - 1)) { # if requesting to copy more than remaining space in buffer
+    stop("IllegalArgumentException: invalid buffer copy requested in adls.fileoutputstream.addtobuffer")
+  }
+  # optimized arraycopy
+  adlFileOutputStream$buffer[cursor : (cursor + len - 1)] <- contents[off : (off + len - 1)]
+  adlFileOutputStream$cursor <- as.integer(cursor + len)
+}
+
+adls.fileoutputstream.dozerolengthappend <- function(adlFileOutputStream, azureDataLakeAccount, relativePath, offset, verbose = FALSE) {
+  resHttp <- adls.append.core(adlFileOutputStream$azureActiveContext, adlFileOutputStream,
+                              azureDataLakeAccount, relativePath,
+                              4194304L, contents = raw(0), contentSize = 0L,
+                              leaseId = adlFileOutputStream$leaseId, sessionId = adlFileOutputStream$leaseId,
+                              syncFlag = syncFlagEnum$METADATA, offsetToAppendTo = 0, verbose = verbose)
+  stopWithAzureError(resHttp)
+  # retrun a NULL (void)
+  return(TRUE)
+}
+
+#' The Core Append API.
+#'
+#' @inheritParams setAzureContext
+#' @param azureDataLakeAccount Name of the Azure Data Lake account.
+#' @param adlFileOutputStream The adlFileOutputStream object to operate with.
+#' @param relativePath Relative path of a file.
+#' @param bufferSize Size of the buffer to be used.
+#' @param contents raw contents to be written to the file.
+#' @param contentSize size of `contents` to be written to the file.
+#' @param leaseId a String containing the lease ID (generated by client). Can be null.
+#' @param sessionId a String containing the session ID (generated by client). Can be null.
+#' @param syncFlag
+#'     Use `DATA` when writing more bytes to same file path. Most performant operation.
+#'     Use `METADATA` when metadata for the
+#'         file also needs to be updated especially file length
+#'         retrieved from `adls.file.info` or `adls.ls` API call.
+#'         Has an overhead of updating metadata operation.
+#'     Use `CLOSE` when no more data is
+#'         expected to be written in this path. Adl backend would
+#'         update metadata, close the stream handle and
+#'         release the lease on the
+#'         path if valid leaseId is passed.
+#'         Expensive operation and should be used only when last
+#'         bytes are written.
+#' @param offsetToAppendTo offset at which to append to to file. 
+#'     To let the server choose offset, pass `-1`.
+#' @param verbose Print tracing information (default FALSE).
+#' @return response object
+#' @details Exceptions - IOException
+#' 
+#' @family Azure Data Lake Store functions
+#' 
+#' @references \url{https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-data-operations-rest-api#upload-data}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Append_to_a_File}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Buffer_Size}
+#' @seealso \url{https://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/FileSystem.html#append-org.apache.hadoop.fs.Path-int-org.apache.hadoop.util.Progressable-}
+adls.append.core <- function(azureActiveContext, adlFileOutputStream = NULL, azureDataLakeAccount, relativePath, bufferSize, 
+                             contents, contentSize = -1L, 
+                             leaseId = NULL, sessionId = NULL, syncFlag = NULL, 
+                             offsetToAppendTo = -1,
+                             verbose = FALSE) {
+  if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
+    assert_that(is.azureActiveContext(azureActiveContext))
+    azureCheckToken(azureActiveContext)
+  }
+  assert_that(is_adls_account(azureDataLakeAccount))
+  assert_that(is_relativePath(relativePath))
+  assert_that(is_bufferSize(bufferSize))
+  assert_that(is_content(contents))
+  assert_that(is_contentSize(contentSize))
+  if (contentSize == -1) {
+    contentSize <- getContentSize(contents)
+  }
+  # allow a zero byte append
+  URL <- paste0(
+    getAzureDataLakeBasePath(azureDataLakeAccount),
+    getAzureDataLakeURLEncodedString(relativePath),
+    "?op=APPEND", "&append=true",
+    getAzureDataLakeApiVersion()
+  )
+  if (!missing(bufferSize) && !is.null(bufferSize)) URL <- paste0(URL, "&buffersize=", bufferSize)
+  if (!is.null(leaseId)) URL <- paste0(URL, "&leaseid=", leaseId)
+  if (!is.null(sessionId)) URL <- paste0(URL, "&filesessionid=", sessionId)
+  if (!is.null(syncFlag)) URL <- paste0(URL, "&syncFlag=", syncFlag)
+  if (offsetToAppendTo >= 0) URL <- paste0(URL, "&offset=", offsetToAppendTo)
+  retryPolicy <- createAdlRetryPolicy(azureActiveContext, verbose = verbose)
+  resHttp <- callAzureDataLakeApi(URL, verb = "POST",
+                                  azureActiveContext = azureActiveContext,
+                                  adlRetryPolicy = retryPolicy,
+                                  content = contents[1:contentSize],
+                                  verbose = verbose)
+  # update retry count - required for bad offset handling
+  if (!is.null(adlFileOutputStream)) {
+    adlFileOutputStream$numRetries <- retryPolicy$retryCount
+  }
+  return(resHttp)
+}
+
+# ADLS Egress - AdlFileInputStream ----
+
+#' Create an adls.fileinputstream.create
+#' Create a container (`adlFileInputStream`) for holding variables used by the Azure Data Lake Store data functions.
+#'
+#' @inheritParams setAzureContext
+#' @param accountName the account name
+#' @param relativePath Relative path of a file/directory
+#' @param verbose Print tracing information (default FALSE).
+#' @return An `adlFileOutputStream` object
+#'
+#' @family Azure Data Lake Store functions
+adls.fileinputstream.create <- function(azureActiveContext, accountName, relativePath, verbose = FALSE) {
+  azEnv <- new.env(parent = emptyenv())
+  azEnv <- as.adlFileInputStream(azEnv)
+  list2env(
+    list(azureActiveContext = "", accountName = "", relativePath = ""),
+    envir = azEnv
+  )
+  if (!missing(azureActiveContext)) azEnv$azureActiveContext <- azureActiveContext
+  if (!missing(accountName)) azEnv$accountName <- accountName
+  if (!missing(relativePath)) azEnv$relativePath <- relativePath
+  azEnv$directoryEntry <- adls.file.info(azureActiveContext, accountName, relativePath, verbose)
+  if(azEnv$directoryEntry$FileStatus.type == "DIRECTORY") {
+    msg <- paste0("ADLException: relativePath is not a file: ", relativePath)
+    stop(msg)
+  }
+  azEnv$sessionId <- uuid()
+  azEnv$blockSize <- getAzureDataLakeDefaultBufferSize()
+  azEnv$buffer <- raw(0)
+  # cursors/indices/offsets in R should start from 1 and NOT 0. 
+  # Because of this there are many adjustments that need to be done throughout the code!
+  azEnv$fCursor <- 0L # cursor of buffer within file - offset of next byte to read from remote server
+  azEnv$bCursor <- 1L # cursor of read within buffer - offset of next byte to be returned from buffer
+  azEnv$limit <- 1L # offset of next byte to be read into buffer from service (i.e., upper marker+1 of valid bytes in buffer)
+  azEnv$streamClosed <- FALSE
+  
+  return(azEnv)
+}
+
+#' Core function to open and read a file.
+#'
+#' @inheritParams setAzureContext
+#' @param azureDataLakeAccount Name of the Azure Data Lake account.
+#' @param relativePath Relative path of a file/directory.
+#' @param offset Provide the offset to read from.
+#' @param length Provide length of data to read.
+#' @param bufferSize Size of the buffer to be used. (not honoured).
+#' @param verbose Print tracing information (default FALSE).
+#' @return raw contents of the file.
+#' @details Exceptions - IOException
+#'
+#' @family Azure Data Lake Store functions
+#'
+#' @references \url{https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-data-operations-rest-api#read-data}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Open_and_Read_a_File}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Offset}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Length}
+#' @seealso \url{https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Buffer_Size}
+#' @seealso \url{https://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/FileSystem.html#open-org.apache.hadoop.fs.Path-int-}
+adls.read.core <- function(azureActiveContext, 
+                           azureDataLakeAccount, relativePath, 
+                           offset, length, bufferSize = 4194304L, 
+                           verbose = FALSE) {
+  if (!missing(azureActiveContext) && !is.null(azureActiveContext)) {
+    assert_that(is.azureActiveContext(azureActiveContext))
+    azureCheckToken(azureActiveContext)
+  }
+  assert_that(is_adls_account(azureDataLakeAccount))
+  assert_that(is_relativePath(relativePath))
+  if (!missing(offset) && !is.null(offset)) assert_that(is_offset(offset))
+  if (!missing(length) && !is.null(length)) assert_that(is_length(length))
+  if (!missing(bufferSize) && !is.null(bufferSize)) assert_that(is_bufferSize(bufferSize))
+  URL <- paste0(
+    getAzureDataLakeBasePath(azureDataLakeAccount),
+    getAzureDataLakeURLEncodedString(relativePath),
+    "?op=OPEN", "&read=true",
+    getAzureDataLakeApiVersion()
+  )
+  if (!missing(offset) && !is.null(offset)) URL <- paste0(URL, "&offset=", offset)
+  if (!missing(length) && !is.null(length)) URL <- paste0(URL, "&length=", length)
+  if (!missing(bufferSize) && !is.null(bufferSize)) URL <- paste0(URL, "&buffersize=", bufferSize)
+  retryPolicy <- createAdlRetryPolicy(azureActiveContext, verbose = verbose)
+  resHttp <- callAzureDataLakeApi(URL,
+                                  azureActiveContext = azureActiveContext,
+                                  adlRetryPolicy = retryPolicy,
+                                  verbose = verbose)
+  return(resHttp)
+}
+
+#' Read from service attempts to read `blocksize` bytes from service.
+#' Returns how many bytes are actually read, could be less than blocksize.
+#'
+#' @param adlFileInputStream the `adlFileInputStream` object to read from
+#' @param verbose Print tracing information (default FALSE)
+#' @return number of bytes actually read
+#' 
+#' @family Azure Data Lake Store functions
+adls.fileinputstream.readfromservice <- function(adlFileInputStream, verbose = FALSE) {
+  if (adlFileInputStream$bCursor < adlFileInputStream$limit) return(0) #if there's still unread data in the buffer then dont overwrite it At or past end of file
+  if (adlFileInputStream$fCursor >= adlFileInputStream$directoryEntry$FileStatus.length) return(-1)
+  if (adlFileInputStream$directoryEntry$FileStatus.length <= adlFileInputStream$blockSize)
+    return(adls.fileinputstream.slurpfullfile(adlFileInputStream))
+  
+  #reset buffer to initial state - i.e., throw away existing data
+  adlFileInputStream$bCursor <- 1L
+  adlFileInputStream$limit <- 1L
+  if (is.null(adlFileInputStream$buffer)) adlFileInputStream$buffer <- raw(getAzureDataLakeDefaultBufferSize())
+  
+  resHttp <- adls.read.core(adlFileInputStream$azureActiveContext, 
+                            adlFileInputStream$accountName, adlFileInputStream$relativePath, 
+                            adlFileInputStream$fCursor, adlFileInputStream$blockSize, 
+                            verbose = verbose)
+  stopWithAzureError(resHttp)
+  data <- content(resHttp, "raw", encoding = "UTF-8")
+  bytesRead <- getContentSize(data)
+  adlFileInputStream$buffer[1:bytesRead] <- data[1:bytesRead]
+  adlFileInputStream$limit <- adlFileInputStream$limit + bytesRead
+  adlFileInputStream$fCursor <- adlFileInputStream$fCursor + bytesRead
+  return(bytesRead)
+}
+
+#' Reads the whole file into buffer. Useful when reading small files.
+#'
+#' @param adlFileInputStream the adlFileInputStream object to read from
+#' @param verbose Print tracing information (default FALSE)
+#' @return number of bytes actually read
+adls.fileinputstream.slurpfullfile <- function(adlFileInputStream, verbose = FALSE) {
+  if (is.null(adlFileInputStream$buffer)) {
+    adlFileInputStream$blocksize <- adlFileInputStream$directoryEntry$FileStatus.length
+    adlFileInputStream$buffer <- raw(adlFileInputStream$directoryEntry$FileStatus.length)
+  }
+  
+  #reset buffer to initial state - i.e., throw away existing data
+  adlFileInputStream$bCursor <- adls.fileinputstream.getpos(adlFileInputStream) + 1L  # preserve current file offset (may not be 0 if app did a seek before first read)
+  adlFileInputStream$limit <- 1L
+  adlFileInputStream$fCursor <- 0L  # read from beginning
+  
+  resHttp <- adls.read.core(adlFileInputStream$azureActiveContext, 
+                            adlFileInputStream$accountName, adlFileInputStream$relativePath, 
+                            adlFileInputStream$fCursor, adlFileInputStream$directoryEntry$FileStatus.length, 
+                            verbose = verbose)
+  stopWithAzureError(resHttp)
+  data <- content(resHttp, "raw", encoding = "UTF-8")
+  bytesRead <- getContentSize(data)
+  adlFileInputStream$buffer[1:bytesRead] <- data[1:bytesRead]
+  adlFileInputStream$limit <- adlFileInputStream$limit + bytesRead
+  adlFileInputStream$fCursor <- adlFileInputStream$fCursor + bytesRead
+  return(bytesRead)
+}
+
 # ADLS Retry Policies ----
 
 #' NOTE: Folowing points on ADLS AdlsRetryPolicy: