Skip to content

Commit fdef7d2

Browse files
authored
Add Full Object Checksum API (#2026)
Add support for full object checksums as described here: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html To enable, use `ChecksumCRC64NVME`, `ChecksumFullObjectCRC32` or `ChecksumFullObjectCRC32C` as checksum type when uploading. Mint tests updated, but can be disabled with `MINT_NO_FULL_OBJECT=anything` env var. PR will fail against community MinIO without above env var.
1 parent 5e9a483 commit fdef7d2

11 files changed

+606
-151
lines changed

.github/workflows/go-windows.yml

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ jobs:
3838
ENABLE_HTTPS: 1
3939
MINIO_KMS_MASTER_KEY: my-minio-key:6368616e676520746869732070617373776f726420746f206120736563726574
4040
MINIO_CI_CD: true
41+
MINT_NO_FULL_OBJECT: true
4142
run: |
4243
New-Item -ItemType Directory -Path "$env:temp/certs-dir"
4344
Copy-Item -Path testcerts\* -Destination "$env:temp/certs-dir"

.github/workflows/go.yml

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
MINIO_KMS_MASTER_KEY: my-minio-key:6368616e676520746869732070617373776f726420746f206120736563726574
4040
SSL_CERT_FILE: /tmp/certs-dir/public.crt
4141
MINIO_CI_CD: true
42+
MINT_NO_FULL_OBJECT: true
4243
run: |
4344
sudo apt update -y
4445
sudo apt install devscripts -y

api-datatypes.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,11 @@ type UploadInfo struct {
143143
// Verified checksum values, if any.
144144
// Values are base64 (standard) encoded.
145145
// For multipart objects this is a checksum of the checksum of each part.
146-
ChecksumCRC32 string
147-
ChecksumCRC32C string
148-
ChecksumSHA1 string
149-
ChecksumSHA256 string
146+
ChecksumCRC32 string
147+
ChecksumCRC32C string
148+
ChecksumSHA1 string
149+
ChecksumSHA256 string
150+
ChecksumCRC64NVME string
150151
}
151152

152153
// RestoreInfo contains information of the restore operation of an archived object
@@ -215,10 +216,11 @@ type ObjectInfo struct {
215216
Restore *RestoreInfo
216217

217218
// Checksum values
218-
ChecksumCRC32 string
219-
ChecksumCRC32C string
220-
ChecksumSHA1 string
221-
ChecksumSHA256 string
219+
ChecksumCRC32 string
220+
ChecksumCRC32C string
221+
ChecksumSHA1 string
222+
ChecksumSHA256 string
223+
ChecksumCRC64NVME string
222224

223225
Internal *struct {
224226
K int // Data blocks

api-put-object-multipart.go

+22-26
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
8383
// HTTPS connection.
8484
hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256)
8585
if len(hashSums) == 0 {
86-
if opts.UserMetadata == nil {
87-
opts.UserMetadata = make(map[string]string, 1)
88-
}
89-
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String()
86+
addAutoChecksumHeaders(&opts)
9087
}
9188

9289
// Initiate a new multipart upload.
@@ -113,7 +110,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
113110

114111
// Create checksums
115112
// CRC32C is ~50% faster on AMD64 @ 30GB/s
116-
var crcBytes []byte
117113
customHeader := make(http.Header)
118114
crc := opts.AutoChecksum.Hasher()
119115
for partNumber <= totalPartsCount {
@@ -154,7 +150,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
154150
crc.Write(buf[:length])
155151
cSum := crc.Sum(nil)
156152
customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum))
157-
crcBytes = append(crcBytes, cSum...)
158153
}
159154

160155
p := uploadPartParams{bucketName: bucketName, objectName: objectName, uploadID: uploadID, reader: rd, partNumber: partNumber, md5Base64: md5Base64, sha256Hex: sha256Hex, size: int64(length), sse: opts.ServerSideEncryption, streamSha256: !opts.DisableContentSha256, customHeader: customHeader}
@@ -182,18 +177,21 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
182177

183178
// Loop over total uploaded parts to save them in
184179
// Parts array before completing the multipart request.
180+
allParts := make([]ObjectPart, 0, len(partsInfo))
185181
for i := 1; i < partNumber; i++ {
186182
part, ok := partsInfo[i]
187183
if !ok {
188184
return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
189185
}
186+
allParts = append(allParts, part)
190187
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
191-
ETag: part.ETag,
192-
PartNumber: part.PartNumber,
193-
ChecksumCRC32: part.ChecksumCRC32,
194-
ChecksumCRC32C: part.ChecksumCRC32C,
195-
ChecksumSHA1: part.ChecksumSHA1,
196-
ChecksumSHA256: part.ChecksumSHA256,
188+
ETag: part.ETag,
189+
PartNumber: part.PartNumber,
190+
ChecksumCRC32: part.ChecksumCRC32,
191+
ChecksumCRC32C: part.ChecksumCRC32C,
192+
ChecksumSHA1: part.ChecksumSHA1,
193+
ChecksumSHA256: part.ChecksumSHA256,
194+
ChecksumCRC64NVME: part.ChecksumCRC64NVME,
197195
})
198196
}
199197

@@ -203,12 +201,8 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
203201
ServerSideEncryption: opts.ServerSideEncryption,
204202
AutoChecksum: opts.AutoChecksum,
205203
}
206-
if len(crcBytes) > 0 {
207-
// Add hash of hashes.
208-
crc.Reset()
209-
crc.Write(crcBytes)
210-
opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): base64.StdEncoding.EncodeToString(crc.Sum(nil))}
211-
}
204+
applyAutoChecksum(&opts, allParts)
205+
212206
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
213207
if err != nil {
214208
return UploadInfo{}, err
@@ -354,10 +348,11 @@ func (c *Client) uploadPart(ctx context.Context, p uploadPartParams) (ObjectPart
354348
// Once successfully uploaded, return completed part.
355349
h := resp.Header
356350
objPart := ObjectPart{
357-
ChecksumCRC32: h.Get("x-amz-checksum-crc32"),
358-
ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
359-
ChecksumSHA1: h.Get("x-amz-checksum-sha1"),
360-
ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
351+
ChecksumCRC32: h.Get(ChecksumCRC32.Key()),
352+
ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()),
353+
ChecksumSHA1: h.Get(ChecksumSHA1.Key()),
354+
ChecksumSHA256: h.Get(ChecksumSHA256.Key()),
355+
ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()),
361356
}
362357
objPart.Size = p.size
363358
objPart.PartNumber = p.partNumber
@@ -457,9 +452,10 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object
457452
Expiration: expTime,
458453
ExpirationRuleID: ruleID,
459454

460-
ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256,
461-
ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1,
462-
ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32,
463-
ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C,
455+
ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256,
456+
ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1,
457+
ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32,
458+
ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C,
459+
ChecksumCRC64NVME: completeMultipartUploadResult.ChecksumCRC64NVME,
464460
}, nil
465461
}

api-put-object-streaming.go

+39-60
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
113113
}
114114
withChecksum := c.trailingHeaderSupport
115115
if withChecksum {
116-
if opts.UserMetadata == nil {
117-
opts.UserMetadata = make(map[string]string, 1)
118-
}
119-
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String()
116+
addAutoChecksumHeaders(&opts)
120117
}
121118
// Initiate a new multipart upload.
122119
uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts)
@@ -240,6 +237,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
240237

241238
// Gather the responses as they occur and update any
242239
// progress bar.
240+
allParts := make([]ObjectPart, 0, totalPartsCount)
243241
for u := 1; u <= totalPartsCount; u++ {
244242
select {
245243
case <-ctx.Done():
@@ -248,16 +246,17 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
248246
if uploadRes.Error != nil {
249247
return UploadInfo{}, uploadRes.Error
250248
}
251-
249+
allParts = append(allParts, uploadRes.Part)
252250
// Update the totalUploadedSize.
253251
totalUploadedSize += uploadRes.Size
254252
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
255-
ETag: uploadRes.Part.ETag,
256-
PartNumber: uploadRes.Part.PartNumber,
257-
ChecksumCRC32: uploadRes.Part.ChecksumCRC32,
258-
ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C,
259-
ChecksumSHA1: uploadRes.Part.ChecksumSHA1,
260-
ChecksumSHA256: uploadRes.Part.ChecksumSHA256,
253+
ETag: uploadRes.Part.ETag,
254+
PartNumber: uploadRes.Part.PartNumber,
255+
ChecksumCRC32: uploadRes.Part.ChecksumCRC32,
256+
ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C,
257+
ChecksumSHA1: uploadRes.Part.ChecksumSHA1,
258+
ChecksumSHA256: uploadRes.Part.ChecksumSHA256,
259+
ChecksumCRC64NVME: uploadRes.Part.ChecksumCRC64NVME,
261260
})
262261
}
263262
}
@@ -275,15 +274,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
275274
AutoChecksum: opts.AutoChecksum,
276275
}
277276
if withChecksum {
278-
// Add hash of hashes.
279-
crc := opts.AutoChecksum.Hasher()
280-
for _, part := range complMultipartUpload.Parts {
281-
cs, err := base64.StdEncoding.DecodeString(part.Checksum(opts.AutoChecksum))
282-
if err == nil {
283-
crc.Write(cs)
284-
}
285-
}
286-
opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))}
277+
applyAutoChecksum(&opts, allParts)
287278
}
288279

289280
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
@@ -312,10 +303,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
312303
}
313304

314305
if !opts.SendContentMd5 {
315-
if opts.UserMetadata == nil {
316-
opts.UserMetadata = make(map[string]string, 1)
317-
}
318-
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String()
306+
addAutoChecksumHeaders(&opts)
319307
}
320308

321309
// Calculate the optimal parts info for a given size.
@@ -342,7 +330,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
342330

343331
// Create checksums
344332
// CRC32C is ~50% faster on AMD64 @ 30GB/s
345-
var crcBytes []byte
346333
customHeader := make(http.Header)
347334
crc := opts.AutoChecksum.Hasher()
348335
md5Hash := c.md5Hasher()
@@ -389,7 +376,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
389376
crc.Write(buf[:length])
390377
cSum := crc.Sum(nil)
391378
customHeader.Set(opts.AutoChecksum.KeyCapitalized(), base64.StdEncoding.EncodeToString(cSum))
392-
crcBytes = append(crcBytes, cSum...)
393379
}
394380

395381
// Update progress reader appropriately to the latest offset
@@ -420,18 +406,21 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
420406

421407
// Loop over total uploaded parts to save them in
422408
// Parts array before completing the multipart request.
409+
allParts := make([]ObjectPart, 0, len(partsInfo))
423410
for i := 1; i < partNumber; i++ {
424411
part, ok := partsInfo[i]
425412
if !ok {
426413
return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
427414
}
415+
allParts = append(allParts, part)
428416
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
429-
ETag: part.ETag,
430-
PartNumber: part.PartNumber,
431-
ChecksumCRC32: part.ChecksumCRC32,
432-
ChecksumCRC32C: part.ChecksumCRC32C,
433-
ChecksumSHA1: part.ChecksumSHA1,
434-
ChecksumSHA256: part.ChecksumSHA256,
417+
ETag: part.ETag,
418+
PartNumber: part.PartNumber,
419+
ChecksumCRC32: part.ChecksumCRC32,
420+
ChecksumCRC32C: part.ChecksumCRC32C,
421+
ChecksumSHA1: part.ChecksumSHA1,
422+
ChecksumSHA256: part.ChecksumSHA256,
423+
ChecksumCRC64NVME: part.ChecksumCRC64NVME,
435424
})
436425
}
437426

@@ -442,12 +431,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
442431
ServerSideEncryption: opts.ServerSideEncryption,
443432
AutoChecksum: opts.AutoChecksum,
444433
}
445-
if len(crcBytes) > 0 {
446-
// Add hash of hashes.
447-
crc.Reset()
448-
crc.Write(crcBytes)
449-
opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))}
450-
}
434+
applyAutoChecksum(&opts, allParts)
451435
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
452436
if err != nil {
453437
return UploadInfo{}, err
@@ -475,10 +459,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
475459
opts.AutoChecksum = opts.Checksum
476460
}
477461
if !opts.SendContentMd5 {
478-
if opts.UserMetadata == nil {
479-
opts.UserMetadata = make(map[string]string, 1)
480-
}
481-
opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String()
462+
addAutoChecksumHeaders(&opts)
482463
}
483464

484465
// Cancel all when an error occurs.
@@ -510,7 +491,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
510491

511492
// Create checksums
512493
// CRC32C is ~50% faster on AMD64 @ 30GB/s
513-
var crcBytes []byte
514494
crc := opts.AutoChecksum.Hasher()
515495

516496
// Total data read and written to server. should be equal to 'size' at the end of the call.
@@ -570,7 +550,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
570550
crc.Write(buf[:length])
571551
cSum := crc.Sum(nil)
572552
customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum))
573-
crcBytes = append(crcBytes, cSum...)
574553
}
575554

576555
wg.Add(1)
@@ -630,18 +609,21 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
630609

631610
// Loop over total uploaded parts to save them in
632611
// Parts array before completing the multipart request.
612+
allParts := make([]ObjectPart, 0, len(partsInfo))
633613
for i := 1; i < partNumber; i++ {
634614
part, ok := partsInfo[i]
635615
if !ok {
636616
return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
637617
}
618+
allParts = append(allParts, part)
638619
complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
639-
ETag: part.ETag,
640-
PartNumber: part.PartNumber,
641-
ChecksumCRC32: part.ChecksumCRC32,
642-
ChecksumCRC32C: part.ChecksumCRC32C,
643-
ChecksumSHA1: part.ChecksumSHA1,
644-
ChecksumSHA256: part.ChecksumSHA256,
620+
ETag: part.ETag,
621+
PartNumber: part.PartNumber,
622+
ChecksumCRC32: part.ChecksumCRC32,
623+
ChecksumCRC32C: part.ChecksumCRC32C,
624+
ChecksumSHA1: part.ChecksumSHA1,
625+
ChecksumSHA256: part.ChecksumSHA256,
626+
ChecksumCRC64NVME: part.ChecksumCRC64NVME,
645627
})
646628
}
647629

@@ -652,12 +634,8 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
652634
ServerSideEncryption: opts.ServerSideEncryption,
653635
AutoChecksum: opts.AutoChecksum,
654636
}
655-
if len(crcBytes) > 0 {
656-
// Add hash of hashes.
657-
crc.Reset()
658-
crc.Write(crcBytes)
659-
opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))}
660-
}
637+
applyAutoChecksum(&opts, allParts)
638+
661639
uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
662640
if err != nil {
663641
return UploadInfo{}, err
@@ -823,9 +801,10 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string,
823801
ExpirationRuleID: ruleID,
824802

825803
// Checksum values
826-
ChecksumCRC32: h.Get("x-amz-checksum-crc32"),
827-
ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
828-
ChecksumSHA1: h.Get("x-amz-checksum-sha1"),
829-
ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
804+
ChecksumCRC32: h.Get(ChecksumCRC32.Key()),
805+
ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()),
806+
ChecksumSHA1: h.Get(ChecksumSHA1.Key()),
807+
ChecksumSHA256: h.Get(ChecksumSHA256.Key()),
808+
ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()),
830809
}, nil
831810
}

0 commit comments

Comments
 (0)