-
Notifications
You must be signed in to change notification settings - Fork 266
/
Copy pathtst_unicode.c
156 lines (129 loc) · 4.65 KB
/
tst_unicode.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/* This is part of the netCDF package.
Copyright 2018 University Corporation for Atmospheric Research/Unidata.
See COPYRIGHT file for conditions of use.
This is a very simple example which writes a netCDF file with
Unicode names encoded with UTF-8.
$Id: tst_unicode.c,v 1.12 2008/10/20 01:48:08 ed Exp $
*/
#include <nc_tests.h>
#include "err_macros.h"
#include <stdlib.h>
#include <stdio.h>
#include "netcdf.h"
#include "ncpathmgr.h"
#ifdef _WIN32
#include <windows.h>
#include <direct.h>
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#undef DEBUG
/* The data file we will create. */
static const unsigned char prefix[] = {
't','s','t','_','u','t','f','8','_',
'\xe6', '\xb5', '\xb7',
'\0'
};
/* Other meta-data */
#define UNITS "units"
#define NDIMS 1
#define UTF8_BYTES 18
static unsigned char name_utf8[] = {
0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */
0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */
0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */
0xE1, 0xBD, 0xB3, /* GREEK SMALL LETTER EPSILON
WITH TONOS : 3-bytes utf8 */
0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */
0xCE, 0xB1, 0x00 /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
};
/* Name used for dimension, variable, and attribute value */
#define UNAME ((char *) name_utf8)
#define UNAMELEN (sizeof name_utf8)
/* Note, name was normalized before storing, so retrieved name
won't match original unnormalized name. Check that we get
normalized version, instead. */
/* NFC normalized UTF-8 for Unicode 8-character "Hello" in Greek */
static unsigned char norm_utf8[] = {
0xCE, 0x9A, /* GREEK CAPITAL LETTER KAPPA : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER LAMBDA : 2-bytes utf8 */
0xCE, 0xBB, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0xCE, 0xB7, /* GREEK SMALL LETTER ETA : 2-bytes utf8 */
0xCE, 0xBC, /* GREEK SMALL LETTER MU : 2-bytes utf8 */
0xCE, 0xAD, /* GREEK SMALL LETTER EPSILON WITH TONOS
: 2-bytes utf8 */
0xCF, 0x81, /* GREEK SMALL LETTER RHO : 2-bytes utf8 */
0xCE, 0xB1, /* GREEK SMALL LETTER ALPHA : 2-bytes utf8 */
0x00
};
#define NNAME ((char *) norm_utf8)
#define NNAMELEN (sizeof norm_utf8)
static int
check(int err, int line, const char* file)
{
if(err != 0) {
fprintf(stderr,"ERR %s.%d (%d) %s\n",file,line,err,nc_strerror(err));
fflush(stderr);
}
return err;
}
#define CHECK(err) {if((ret=check(err,__LINE__,__FILE__))) goto done;}
static int
test(int flags, const char* model)
{
int ret = NC_NOERR;
int ncid, dimid, varid;
int dimids[NDIMS];
char name_in[UNAMELEN + 1], strings_in[UNAMELEN + 1];
nc_type att_type;
size_t att_len;
char filename[4096];
/* Construct the file name */
snprintf(filename,sizeof(filename),"%s_%s.nc",prefix,model);
printf("\n*** Testing UTF-8: %s model\n",model);
printf("*** creating UTF-8 test file |%s|...", filename);
CHECK(nc_create(filename, flags, &ncid));
/* Define dimension with Unicode UTF-8 encoded name */
CHECK(nc_def_dim(ncid, UNAME, UTF8_BYTES, &dimid));
dimids[0] = dimid;
/* Define variable with same name */
CHECK(nc_def_var(ncid, UNAME, NC_CHAR, NDIMS, dimids, &varid));
/* Create string attribute with same value */
CHECK(nc_put_att_text(ncid, varid, UNITS, UNAMELEN, UNAME));
CHECK(nc_enddef(ncid));
/* Write string data, UTF-8 encoded, to the file */
CHECK(nc_put_var_text(ncid, varid, UNAME));
CHECK(nc_close(ncid));
/* Check it out. */
/* Reopen the file. */
CHECK(nc_open(filename, NC_NOWRITE, &ncid));
CHECK(nc_inq_varid(ncid, UNAME, &varid));
CHECK(nc_inq_varname(ncid, varid, name_in));
{
if (strncmp(NNAME, name_in, NNAMELEN) != 0)
{CHECK(NC_EBADNAME);}
}
CHECK(nc_inq_att(ncid, varid, UNITS, &att_type, &att_len));
CHECK(att_type != NC_CHAR || att_len != UNAMELEN);
CHECK(nc_get_att_text(ncid, varid, UNITS, strings_in));
strings_in[att_len] = '\0'; /* null terminate, because nc_get_att_text doesn't */
if (strncmp(UNAME, strings_in, UNAMELEN) != 0)
{CHECK(NC_EBADNAME);}
CHECK(nc_close(ncid));
done:
return ret;
}
int
main(int argc, char **argv)
{
/* Run the utf8 test both for netcdf-4 and netcdf-3 */
if(test(0,"classic")) ERR;
#ifdef USE_HDF5
if(test(NC_NETCDF4,"enhanced")) ERR;
#endif
SUMMARIZE_ERR;
FINAL_RESULTS;
}