-
Notifications
You must be signed in to change notification settings - Fork 266
/
Copy pathchunkspec.c
389 lines (353 loc) · 11.3 KB
/
chunkspec.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
/*********************************************************************
* Copyright 2018, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
* $Id $
*********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "netcdf.h"
#include "list.h"
#include "utils.h"
#include "chunkspec.h"
/* Structure mapping dimension IDs to corresponding chunksizes. */
static struct DimChunkSpecs {
size_t ndims; /* number of dimensions in chunkspec string */
int *idimids; /* (input) ids for dimensions in chunkspec string */
size_t *chunksizes; /* corresponding chunk sizes */
bool_t omit; /* true if chunking to be turned off */
} dimchunkspecs;
struct VarChunkSpec {
size_t rank; /* number of dimensions in chunkspec string */
size_t chunksizes[NC_MAX_VAR_DIMS]; /* corresponding chunk sizes */
bool_t omit; /* true if chunking to be turned off */
int kind;
int igrpid; /* container of the (input) variable */
int ivarid; /* (input) Variable whose chunks are specified */
};
static List* varchunkspecs = NULL; /* List<VarChunkSpec> */
/* Forward */
static int dimchunkspec_parse(int ncid, const char *spec);
static int varchunkspec_parse(int ncid, const char *spec);
void
chunkspecinit(void)
{
/* initialization */
if(varchunkspecs == NULL)
varchunkspecs = listnew();
memset(&dimchunkspecs,0,sizeof(dimchunkspecs));
}
/*
* Parse chunkspec string of either kind.
* Returns NC_NOERR if no error, NC_EINVAL if spec was malformed.
*/
int
chunkspec_parse(int igrp, const char *spec)
{
/* Decide if this is a per-variable or per-dimension chunkspec */
if (!spec || *spec == '\0')
return NC_NOERR; /* Use defaults */
if(strchr(spec,':') == NULL)
return dimchunkspec_parse(igrp,spec);
else
return varchunkspec_parse(igrp,spec);
}
/*
* Parse chunkspec string and convert into dimchunkspec structure.
* ncid: location ID of open netCDF file or group in an open file
* spec: string of form
* dim1/n1,dim2/n2,...,dimk/nk
* specifying chunk size (ni) to be used for dimension named
* dimi. Dimension names may be absolute,
* e.g. "/grp_a/grp_a1/dim". The "ni" part of the spec may be
* omitted, in which case it is assumed to be the entire
* dimension size. That is also the default for dimensions
* not mentioned in the string. However, for unlimited dimensions,
* the default is a default size: 4 megabytes or the
* existing unlimited size if smaller.
* If the chunkspec string is "/", specifying no dimensions or
* chunk sizes, it indicates chunking to be turned off on output.
*
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
* unescaped commas or no chunksize specified for dimension.
*/
static int
dimchunkspec_parse(int igrp, const char *spec)
{
const char *cp; /* character cursor */
const char *pp = spec; /* previous char cursor for detecting escapes */
const char *np; /* beginning of current dimension name */
size_t ndims = 0;
int idim;
int ret = NC_NOERR;
int comma_seen = 0;
dimchunkspecs.ndims = 0;
dimchunkspecs.omit = false;
if (!spec || *spec == '\0') /* default chunking */
goto done;
/* Special rule: // is treated as equivalent to / */
if ((spec[0] == '/' && spec[1] == '\0')
|| (spec[0] == '/' && spec[1] == '/' && spec[2] == '\0')) { /* no chunking */
dimchunkspecs.omit = true;
goto done;
}
/* Count unescaped commas, handle consecutive unescaped commas as error */
for(cp = spec; *cp; cp++) {
if(*cp == ',' && *pp != '\\') {
if(comma_seen) { /* consecutive commas detected */
{ret = NC_EINVAL; goto done;}
}
comma_seen = 1;
ndims++;
} else {
comma_seen = 0;
}
pp = cp;
}
ndims++;
dimchunkspecs.ndims = ndims;
dimchunkspecs.idimids = (int *) emalloc(ndims * sizeof(int));
dimchunkspecs.chunksizes = (size_t *) emalloc(ndims * sizeof(size_t));
/* Look up dimension ids and assign chunksizes */
pp = spec;
np = spec;
idim = 0;
for(cp = spec; ; cp++) {
if(*cp == '\0' || (*cp == ',' && *pp != '\\')) { /* found end of "dim/nn" part */
char* dimname = 0;
char *dp;
int dimid;
size_t chunksize;
for(; pp > np && *pp != '/'; pp--) { /* look backwards for "/" */
}
if(*pp != '/') { /* no '/' found, no chunksize specified for dimension */
ret = NC_EINVAL;
goto done;
}
/* extract dimension name */
dimname = (char *) emalloc((size_t)(pp - np + 1));
dp = dimname;
while(np < pp) {
*dp++ = *np++;
}
*dp = '\0';
/* look up dimension id from dimension pathname */
ret = nc_inq_dimid2(igrp, dimname, &dimid);
if(ret != NC_NOERR)
{if(dimname) free(dimname); goto done;}
dimchunkspecs.idimids[idim] = dimid;
/* parse and assign corresponding chunksize */
pp++; /* now points to first digit of chunksize, ',', or '\0' */
if(*pp == ',' || *pp == '\0') { /* no size specified, use dim len */
size_t dimlen;
ret = nc_inq_dimlen(igrp, dimid, &dimlen);
if(ret != NC_NOERR)
{if(dimname) free(dimname); goto done;}
chunksize = dimlen;
} else { /* convert nnn string to long long integer */
char *ep;
#ifdef HAVE_STRTOLL
long long val = strtoll(pp, &ep, 0);
#else
long long val = strtol(pp, &ep, 0);
#endif
if(ep == pp || errno == ERANGE || val < 1) /* allow chunksize bigger than dimlen */
{if(dimname) free(dimname); ret = NC_EINVAL; goto done;}
chunksize = (size_t)val;
}
dimchunkspecs.chunksizes[idim] = chunksize;
idim++;
if(dimname) free(dimname);
dimname = NULL;
if(*cp == '\0')
break;
/* set np to point to first char after comma */
np = cp + 1;
}
pp = cp;
};
done:
return ret;
}
/* Return size in chunkspec string specified for dimension corresponding to dimid, 0 if not found */
size_t
dimchunkspec_size(int indimid) {
int idim;
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
if(indimid == dimchunkspecs.idimids[idim]) {
return dimchunkspecs.chunksizes[idim];
}
}
return 0;
}
/* Return number of dimensions for which chunking was specified in
* chunkspec string on command line, 0 if no chunkspec string was
* specified. */
size_t
dimchunkspec_ndims(void) {
return dimchunkspecs.ndims;
}
/* Return whether chunking should be omitted, due to explicit
* command-line specification. */
bool_t
dimchunkspec_omit(void) {
return dimchunkspecs.omit;
}
/* Return whether chunking should be omitted, due to explicit
* command-line specification. */
bool_t
dimchunkspec_exists(int indimid) {
int idim;
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
if(indimid == dimchunkspecs.idimids[idim]) {
return 1;
}
}
return 0;
}
/*
* Parse per-variable chunkspec string and convert into varchunkspec structure.
* ncid: location ID of open netCDF file or group in an open file
* spec: string of form
* var:n1,n2,...nk
*
* specifying chunk size (ni) to be used for ith dimension of
* variable named var. Variable names may be absolute.
* e.g. "/grp_a/grp_a1/var".
* If no chunk sizes are specified, then the variable is not chunked at all.
*
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
* unescaped commas or no chunksize specified for dimension.
*/
static int
varchunkspec_parse(int igrp, const char *spec0)
{
int ret = NC_NOERR;
int rank;
int i;
int dimids[NC_MAX_VAR_DIMS];
struct VarChunkSpec* chunkspec = NULL;
char* spec = NULL;
char* p, *q; /* for walking strings */
/* Copy spec so we can modify in place */
spec = strdup(spec0);
if(spec == NULL) {ret = NC_ENOMEM; goto done;}
chunkspec = calloc(1,sizeof(struct VarChunkSpec));
if(chunkspec == NULL) {ret = NC_ENOMEM; goto done;}
chunkspec->igrpid = igrp;
/* First, find the end of the variable part */
p = strchr(spec,':');
if(p == NULL)
{ret = NC_EINVAL; goto done;}
*p++ = '\0';
/* Lookup the variable by name */
ret = nc_inq_varid2(igrp, spec, &chunkspec->ivarid, &chunkspec->igrpid);
if(ret != NC_NOERR) goto done;
if(*p == '\0') {/* we have -c var: => do not chunk var */
chunkspec->omit = 1;
/* add the chunkspec to our list */
listpush(varchunkspecs,chunkspec);
chunkspec = NULL;
goto done;
}
/* See if the remainder matches 'compact' or 'contiguous' */
if(strcasecmp(p,"compact")==0) {
chunkspec->kind = NC_COMPACT;
goto notchunked;
} if(strcasecmp(p,"contiguous")==0) {
chunkspec->kind = NC_CONTIGUOUS;
goto notchunked;
} else
chunkspec->kind = NC_CHUNKED;
/* Iterate over dimension sizes */
while(*p) {
unsigned long dimsize;
q = strchr(p,',');
if(q == NULL)
q = p + strlen(p); /* Fake the endpoint */
else
*q++ = '\0';
/* Scan as unsigned long */
if(sscanf(p,"%lu",&dimsize) != 1)
{ret = NC_EINVAL; goto done;} /* Apparently not a valid dimension size */
if(chunkspec->rank >= NC_MAX_VAR_DIMS) {ret = NC_EINVAL; goto done;} /* to many chunks */
chunkspec->chunksizes[chunkspec->rank] = (size_t)dimsize;
chunkspec->rank++;
p = q;
}
/* Now do some validity checking */
/* Get some info about the var (from input) */
ret = nc_inq_var(chunkspec->igrpid,chunkspec->ivarid,NULL,NULL,&rank,dimids,NULL);
if(ret != NC_NOERR) goto done;
/* 1. check # chunksizes == rank of variable */
if(rank != chunkspec->rank) {ret = NC_EINVAL; goto done;}
/* 2. check that chunksizes are legal for the given dimension sizes */
for(i=0;i<rank;i++) {
size_t len;
ret = nc_inq_dimlen(igrp,dimids[i],&len);
if(ret != NC_NOERR) goto done;
if(chunkspec->chunksizes[i] > len) {ret = NC_EBADCHUNK; goto done;}
}
notchunked:
/* add the chunkspec to our list */
listpush(varchunkspecs,chunkspec);
chunkspec = NULL;
done:
if(chunkspec != NULL)
free(chunkspec);
if(spec != NULL)
free(spec);
return ret;
}
/* Accessors */
/* Return NC_CHUNKED || NC_CONTIGUOUS || NC_COMPACT */
int
varchunkspec_kind(int grpid, int varid)
{
for(size_t i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == grpid && spec->ivarid == varid)
return spec->kind;
}
return NC_CONTIGUOUS; /* default */
}
bool_t
varchunkspec_exists(int igrpid, int ivarid)
{
for(size_t i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return true;
}
return false;
}
bool_t
varchunkspec_omit(int igrpid, int ivarid)
{
for(size_t i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->omit;
}
return dimchunkspecs.omit;
}
size_t*
varchunkspec_chunksizes(int igrpid, int ivarid)
{
for(size_t i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->chunksizes;
}
return NULL;
}
size_t
varchunkspec_rank(int igrpid, int ivarid)
{
for(size_t i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->rank;
}
return 0;
}