Skip to content

Commit

Permalink
Provide a Number of fixes/improvements to NCZarr
Browse files Browse the repository at this point in the history
Primary changes:
* Add an improved cache system to speed up performance.
* Fix NCZarr to properly handle scalar variables.

Misc. Related Changes:
* Added unit tests for extendible hash and for the generic cache.
* Add config parameter to set size of the NCZarr cache.
* Add initial performance tests but leave them unused.
* Add CRC64 support.
* Move location of ncdumpchunks utility from /ncgen to /ncdump.
* Refactor auth support.

Misc. Unrelated Changes:
* More cleanup of the S3 support
* Add support for S3 authentication in .rc files: HTTP.S3.ACCESSID and HTTP.S3.SECRETKEY.
* Remove the hashkey from the struct OBJHDR since it is never used.
  • Loading branch information
DennisHeimbigner committed Nov 20, 2020
1 parent 8279a07 commit eb3d9eb
Show file tree
Hide file tree
Showing 92 changed files with 4,401 additions and 709 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ SET(DEFAULT_CHUNKS_IN_CACHE 10 CACHE STRING "Default number of chunks in cache."
SET(CHUNK_CACHE_SIZE 16777216 CACHE STRING "Default Chunk Cache Size.")
SET(CHUNK_CACHE_NELEMS 4133 CACHE STRING "Default maximum number of elements in cache.")
SET(CHUNK_CACHE_PREEMPTION 0.75 CACHE STRING "Default file chunk cache preemption policy for HDf5 files(a number between 0 and 1, inclusive.")
SET(CHUNK_CACHE_SIZE_NCZARR 4194304 CACHE STRING "Default NCZarr Chunk Cache Size.")
SET(MAX_DEFAULT_CACHE_SIZE 67108864 CACHE STRING "Default maximum cache size.")
SET(NETCDF_LIB_NAME "" CACHE STRING "Default name of the netcdf library.")
SET(TEMP_LARGE "." CACHE STRING "Where to put large temp files if large file tests are run.")
Expand Down Expand Up @@ -1004,7 +1005,7 @@ ENDIF(ENABLE_NCZARR_S3)

IF(NOT ENABLE_S3_SDK)
IF(ENABLE_NCZARR_S3 OR ENABLE_NCZARR_S3_TESTS)
message(FATAL_ERROR "AWS S3 libraries not found; please specify options DENABLE_NCZARR_S3=NO AND -DENABLE-NCZARR-S3-TESTS=NO")
message(FATAL_ERROR "AWS S3 libraries not found; please specify option DENABLE_NCZARR_S3=NO")
SET(ENABLE_NCZARR_S3 OFF CACHE BOOL "NCZARR S3 support" FORCE)
SET(ENABLE_NCZARR_S3_TESTS OFF CACHE BOOL "S3 tests" FORCE)
ENDIF()
Expand Down Expand Up @@ -1412,6 +1413,7 @@ CHECK_INCLUDE_FILE("ftw.h" HAVE_FTW_H)
CHECK_INCLUDE_FILE("libgen.h" HAVE_LIBGEN_H)
CHECK_INCLUDE_FILE("execinfo.h" HAVE_EXECINFO_H)
CHECK_INCLUDE_FILE("dirent.h" HAVE_DIRENT_H)
CHECK_INCLUDE_FILE("time.h" HAVE_TIME_H)

# Symbol Exists
CHECK_SYMBOL_EXISTS(isfinite "math.h" HAVE_DECL_ISFINITE)
Expand Down Expand Up @@ -1493,6 +1495,7 @@ ENDIF(SIZEOF_UINTPTR_T)
# __int64 is used on Windows for large file support.
CHECK_TYPE_SIZE("__int64" SIZEOF___INT_64)
CHECK_TYPE_SIZE("int64_t" SIZEOF_INT64_T)
CHECK_TYPE_SIZE("uint64" SIZEOF_UINT64)
CHECK_TYPE_SIZE("uint64_t" SIZEOF_UINT64_T)
CHECK_TYPE_SIZE("unsigned char" SIZEOF_UCHAR)
CHECK_TYPE_SIZE("unsigned short int" SIZEOF_UNSIGNED_SHORT_INT)
Expand Down
4 changes: 3 additions & 1 deletion RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ This file contains a high-level description of this package's evolution. Release

## 4.8.0 - TBD

* [Enhancement] Give the client control over what parts of a DAP2 URL are URL encoded (i.e. %xx). This is to support the different decoding rules that servers apply to incoming URLS.
* [Bug Fix] Implement a better chunk cache system for NCZarr. The cache now uses extendible hashing plus a linked list for provide a combination of expandibility, fast access, and LRU behavior. See [Github #1845](https://github.com/Unidata/netcdf-c/pull/1845) for more information.
* [Enhancement] Provide .rc fields for S3 authentication: HTTP.S3.ACCESSID and HTTP.S3.SECRETKEY.
* [Enhancement] Give the client control over what parts of a DAP2 URL are URL encoded (i.e. %xx). This is to support the different decoding rules that servers apply to incoming URLS. See [Github #1884](https://github.com/Unidata/netcdf-c/pull/1844) for more information.
* [Bug Fix] Fix incorrect time offsets from `ncdump -t`, in some cases when the time `units` attribute contains both a **non-zero** time-of-day, and a time zone suffix containing the letter "T", such as "UTC". See [Github #1866](https://github.com/Unidata/netcdf-c/pull/1866) for more information.
* [Bug Fix] Cleanup the NCZarr S3 build options. See [Github #1869](https://github.com/Unidata/netcdf-c/pull/1869) for more information.
* [Bug Fix] Support aligned access for selected ARM processors. See [Github #1871](https://github.com/Unidata/netcdf-c/pull/1871) for more information.
Expand Down
9 changes: 9 additions & 0 deletions config.h.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ are set when opening a binary file on Windows. */
/* default file chunk cache size in bytes. */
#cmakedefine CHUNK_CACHE_SIZE ${CHUNK_CACHE_SIZE}

/* default nczarr chunk cache size in bytes. */
#cmakedefine CHUNK_CACHE_SIZE_NCZARR ${CHUNK_CACHE_SIZE_NCZARR}

/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
systems. This function is required for `alloca.c' support on those systems.
*/
Expand Down Expand Up @@ -383,6 +386,9 @@ are set when opening a binary file on Windows. */
/* Define to 1 if you have the <sys/types.h> header file. */
#cmakedefine HAVE_SYS_TYPES_H 1

/* Define to 1 if you have the <time.h> header file. */
#cmakedefine HAVE_TIME_H 1

/* Define to 1 if the system has the type `uchar'. */
#cmakedefine HAVE_UCHAR 1

Expand All @@ -392,6 +398,9 @@ are set when opening a binary file on Windows. */
/* Define to 1 if the system has the type `uint64'. */
#cmakedefine HAVE_UINT64 1

/* Define to 1 if the system has the type `uint64_t'. */
#cmakedefine HAVE_UINT64_t 1

/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H 1
#cmakedefine YY_NO_UNISTD_H 1
Expand Down
15 changes: 12 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ AC_ARG_WITH([default-chunks-in-cache],
AC_MSG_RESULT([$DEFAULT_CHUNKS_IN_CACHE])
AC_DEFINE_UNQUOTED([DEFAULT_CHUNKS_IN_CACHE], [$DEFAULT_CHUNKS_IN_CACHE], [num chunks in default per-var chunk cache.])

# Did the user specify a default cache size?
# Did the user specify a default cache size for HDF5?
AC_MSG_CHECKING([whether a default file cache size for HDF5 was specified])
AC_ARG_WITH([chunk-cache-size],
[AS_HELP_STRING([--with-chunk-cache-size=<integer>],
Expand Down Expand Up @@ -625,6 +625,15 @@ if test "x$enable_nczarr_s3_tests" = xyes ; then
fi
AM_CONDITIONAL(ENABLE_NCZARR_S3_TESTS, [test "x$enable_nczarr_s3_tests" = xyes])

# Did the user specify a default cache size for NCZarr?
AC_MSG_CHECKING([whether a default file cache size for NCZarr was specified])
AC_ARG_WITH([chunk-cache-size-nczarr],
[AS_HELP_STRING([--with-chunk-cache-size-nczarr=<integer>],
[Specify default maximum space used by the chunk cache NCZarr.])],
[CHUNK_CACHE_SIZE_NCZARR=$with_chunk_cache_size_nczarr], [CHUNK_CACHE_SIZE_NCZARR=4194304])
AC_MSG_RESULT([$CHUNK_CACHE_SIZE_NCZARR])
AC_DEFINE_UNQUOTED([CHUNK_CACHE_SIZE_NCZARR], [$CHUNK_CACHE_SIZE_NCZARR], [default nczarr chunk cache size.])

# This has multiversion capability
AC_MSG_CHECKING([whether multi-filter support is enabled])
has_multifilters=yes
Expand Down Expand Up @@ -941,7 +950,7 @@ AC_CHECK_HEADERS([sys/param.h])
AC_CHECK_HEADERS([libgen.h])
#AC_CHECK_HEADERS([locale.h])
AC_HEADER_STDC
AC_CHECK_HEADERS([locale.h stdio.h stdarg.h fcntl.h malloc.h stdlib.h string.h strings.h unistd.h sys/stat.h getopt.h sys/time.h sys/types.h dirent.h])
AC_CHECK_HEADERS([locale.h stdio.h stdarg.h fcntl.h malloc.h stdlib.h string.h strings.h unistd.h sys/stat.h getopt.h sys/time.h sys/types.h time.h dirent.h])

# Do sys/resource.h separately
#AC_CHECK_HEADERS([sys/resource.h],[havesysresource=1],[havesysresource=0])
Expand Down Expand Up @@ -1036,7 +1045,7 @@ AC_FUNC_ALLOCA
AC_CHECK_DECLS([isnan, isinf, isfinite],,,[#include <math.h>])
AC_STRUCT_ST_BLKSIZE
UD_CHECK_IEEE
AC_CHECK_TYPES([size_t, ssize_t, schar, uchar, longlong, ushort, uint, int64, uint64, size64_t, ssize64_t, _off64_t])
AC_CHECK_TYPES([size_t, ssize_t, schar, uchar, longlong, ushort, uint, int64, uint64, size64_t, ssize64_t, _off64_t, uint64_t])
AC_TYPE_OFF_T
AC_TYPE_UINTPTR_T
AC_C_CHAR_UNSIGNED
Expand Down
Loading

0 comments on commit eb3d9eb

Please sign in to comment.