forked from airbytehq/airbyte
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spec.json
318 lines (318 loc) · 12.4 KB
/
spec.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
{
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/s3",
"supportsIncremental": true,
"supportsNormalization": false,
"supportsDBT": false,
"supported_destination_sync_modes": ["overwrite", "append"],
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "S3 Destination Spec",
"type": "object",
"required": [
"s3_bucket_name",
"s3_bucket_path",
"s3_bucket_region",
"format"
],
"additionalProperties": false,
"properties": {
"s3_endpoint": {
"title": "Endpoint",
"type": "string",
"default": "",
"description": "This is your S3 endpoint url.(if you are working with AWS S3, just leave empty).",
"examples": ["http://localhost:9000"]
},
"s3_bucket_name": {
"title": "S3 Bucket Name",
"type": "string",
"description": "The name of the S3 bucket.",
"examples": ["airbyte_sync"]
},
"s3_bucket_path": {
"description": "Directory under the S3 bucket where data will be written.",
"type": "string",
"examples": ["data_sync/test"]
},
"s3_bucket_region": {
"title": "S3 Bucket Region",
"type": "string",
"default": "",
"description": "The region of the S3 bucket.",
"enum": [
"",
"us-east-1",
"us-east-2",
"us-west-1",
"us-west-2",
"af-south-1",
"ap-east-1",
"ap-south-1",
"ap-northeast-1",
"ap-northeast-2",
"ap-northeast-3",
"ap-southeast-1",
"ap-southeast-2",
"ca-central-1",
"cn-north-1",
"cn-northwest-1",
"eu-central-1",
"eu-north-1",
"eu-south-1",
"eu-west-1",
"eu-west-2",
"eu-west-3",
"sa-east-1",
"me-south-1",
"us-gov-east-1",
"us-gov-west-1"
]
},
"access_key_id": {
"type": "string",
"description": "The access key id to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket, if not set, Airbyte will rely on Instance Profile.",
"title": "S3 Key Id",
"airbyte_secret": true,
"examples": ["A012345678910EXAMPLE"]
},
"secret_access_key": {
"type": "string",
"description": "The corresponding secret to the access key id, if S3 Key Id is set, then S3 Access Key must also be provided",
"title": "S3 Access Key",
"airbyte_secret": true,
"examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"]
},
"format": {
"title": "Output Format",
"type": "object",
"description": "Output data format",
"oneOf": [
{
"title": "Avro: Apache Avro",
"required": ["format_type", "compression_codec"],
"properties": {
"format_type": {
"type": "string",
"enum": ["Avro"],
"default": "Avro"
},
"compression_codec": {
"title": "Compression Codec",
"description": "The compression algorithm used to compress data. Default to no compression.",
"type": "object",
"oneOf": [
{
"title": "no compression",
"required": ["codec"],
"properties": {
"codec": {
"type": "string",
"enum": ["no compression"],
"default": "no compression"
}
}
},
{
"title": "Deflate",
"required": ["codec", "compression_level"],
"properties": {
"codec": {
"type": "string",
"enum": ["Deflate"],
"default": "Deflate"
},
"compression_level": {
"title": "Deflate level",
"description": "0: no compression & fastest, 9: best compression & slowest.",
"type": "integer",
"default": 0,
"minimum": 0,
"maximum": 9
}
}
},
{
"title": "bzip2",
"required": ["codec"],
"properties": {
"codec": {
"type": "string",
"enum": ["bzip2"],
"default": "bzip2"
}
}
},
{
"title": "xz",
"required": ["codec", "compression_level"],
"properties": {
"codec": {
"type": "string",
"enum": ["xz"],
"default": "xz"
},
"compression_level": {
"title": "Compression level",
"description": "See <a href=\"https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/XZCompressorOutputStream.html#XZCompressorOutputStream-java.io.OutputStream-int-\">here</a> for details.",
"type": "integer",
"default": 6,
"minimum": 0,
"maximum": 9
}
}
},
{
"title": "zstandard",
"required": ["codec", "compression_level"],
"properties": {
"codec": {
"type": "string",
"enum": ["zstandard"],
"default": "zstandard"
},
"compression_level": {
"title": "Compression level",
"description": "Negative levels are 'fast' modes akin to lz4 or snappy, levels above 9 are generally for archival purposes, and levels above 18 use a lot of memory.",
"type": "integer",
"default": 3,
"minimum": -5,
"maximum": 22
},
"include_checksum": {
"title": "Include checksum",
"description": "If true, include a checksum with each data block.",
"type": "boolean",
"default": false
}
}
},
{
"title": "snappy",
"required": ["codec"],
"properties": {
"codec": {
"type": "string",
"enum": ["snappy"],
"default": "snappy"
}
}
}
]
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
"examples": [5]
}
}
},
{
"title": "CSV: Comma-Separated Values",
"required": ["format_type", "flattening"],
"properties": {
"format_type": {
"type": "string",
"enum": ["CSV"],
"default": "CSV"
},
"flattening": {
"type": "string",
"title": "Normalization (Flattening)",
"description": "Whether the input json data should be normalized (flattened) in the output CSV. Please refer to docs for details.",
"default": "No flattening",
"enum": ["No flattening", "Root level flattening"]
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
"examples": [5]
}
}
},
{
"title": "JSON Lines: newline-delimited JSON",
"required": ["format_type"],
"properties": {
"format_type": {
"type": "string",
"enum": ["JSONL"],
"default": "JSONL"
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
"examples": [5]
}
}
},
{
"title": "Parquet: Columnar Storage",
"required": ["format_type"],
"properties": {
"format_type": {
"type": "string",
"enum": ["Parquet"],
"default": "Parquet"
},
"compression_codec": {
"title": "Compression Codec",
"description": "The compression algorithm used to compress data pages.",
"type": "string",
"enum": [
"UNCOMPRESSED",
"SNAPPY",
"GZIP",
"LZO",
"BROTLI",
"LZ4",
"ZSTD"
],
"default": "UNCOMPRESSED"
},
"block_size_mb": {
"title": "Block Size (Row Group Size) (MB)",
"description": "This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. Default: 128 MB.",
"type": "integer",
"default": 128,
"examples": [128]
},
"max_padding_size_mb": {
"title": "Max Padding Size (MB)",
"description": "Maximum size allowed as padding to align row groups. This is also the minimum size of a row group. Default: 8 MB.",
"type": "integer",
"default": 8,
"examples": [8]
},
"page_size_kb": {
"title": "Page Size (KB)",
"description": "The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. Default: 1024 KB.",
"type": "integer",
"default": 1024,
"examples": [1024]
},
"dictionary_page_size_kb": {
"title": "Dictionary Page Size (KB)",
"description": "There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. Default: 1024 KB.",
"type": "integer",
"default": 1024,
"examples": [1024]
},
"dictionary_encoding": {
"title": "Dictionary Encoding",
"description": "Default: true.",
"type": "boolean",
"default": true
}
}
}
]
}
}
}
}