From aa6b5dbd79a9f7086e6e3e568fd3148dc0c66b8a Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 24 Sep 2024 01:40:12 -0700 Subject: [PATCH] Add Checksum to PutObject (#2002) Requires TrailingHeaders to be enabled on the client. Mint test updated. Verified against AWS: ``` {"time":"2024-09-19T12:54:20.5068834+02:00","level":"INFO","name":"minio-go: testPutObjectWithChecksums","duration":3504,"function":"PutObject(bucketName, objectName, reader,size, opts)","args":{"bucketName":"minio-go-test-qgn9l4slibugev06","checksum":"SHA256","objectName":"9oady6pvtp30mjatsc2141txeex5vc","opts":"minio.PutObjectOptions{UserMetadata: metadata, Progress: progress}"},"status":"PASS"} {"time":"2024-09-19T12:54:24.1432974+02:00","level":"INFO","name":"minio-go: testPutObjectWithTrailingChecksums","duration":2742,"function":"PutObject(bucketName, objectName, reader,size, opts)","args":{"bucketName":"minio-go-test-wxrdafbi5xofe9pq","checksum":"SHA256","objectName":"ih64zss0yjp4g2v5xt1atg6y9ouwwk","opts":"minio.PutObjectOptions{UserMetadata: metadata, Progress: progress, TrailChecksum: xxx}"},"status":"PASS"} {"time":"2024-09-19T12:58:44.9830104+02:00","level":"INFO","name":"minio-go: testPutMultipartObjectWithChecksums","duration":259866,"function":"PutObject(bucketName, objectName, reader,size, opts)","args":{"bucketName":"minio-go-test-40wbkvvk55n5whld","checksum":"SHA256","objectName":"c4kvdfsdcvzk94j1oudy9th6es6cvy","opts":"minio.PutObjectOptions{UserMetadata: metadata, Progress: progress Checksum: false}"},"status":"PASS"} {"time":"2024-09-19T12:59:22.7002636+02:00","level":"INFO","name":"minio-go: testPutMultipartObjectWithChecksums","duration":36731,"function":"PutObject(bucketName, objectName, reader,size, opts)","args":{"bucketName":"minio-go-test-mwfrpr43lfe6sc6o","checksum":"SHA256","objectName":"1m3fzred3ldar5gl253gspan0ltzma","opts":"minio.PutObjectOptions{UserMetadata: metadata, Progress: progress Checksum: true}"},"status":"PASS"} ``` - Fix up testGetObjectAttributes using non-explicit checksum - Fix TLS transports, with SKIP_CERT_VALIDATION --- api-put-object-streaming.go | 49 ++++--- api-put-object.go | 26 +++- api-put-object_test.go | 2 +- api-putobject-snowball.go | 2 +- functional_tests.go | 253 +++++++++++++++++++++++++++++++----- 5 files changed, 277 insertions(+), 55 deletions(-) diff --git a/api-put-object-streaming.go b/api-put-object-streaming.go index 7f316564b..eef976c8c 100644 --- a/api-put-object-streaming.go +++ b/api-put-object-streaming.go @@ -108,7 +108,9 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN if err != nil { return UploadInfo{}, err } - + if opts.Checksum.IsSet() { + opts.AutoChecksum = opts.Checksum + } withChecksum := c.trailingHeaderSupport if withChecksum { if opts.UserMetadata == nil { @@ -304,6 +306,11 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b return UploadInfo{}, err } + if opts.Checksum.IsSet() { + opts.AutoChecksum = opts.Checksum + opts.SendContentMd5 = false + } + if !opts.SendContentMd5 { if opts.UserMetadata == nil { opts.UserMetadata = make(map[string]string, 1) @@ -463,7 +470,10 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam if err = s3utils.CheckValidObjectName(objectName); err != nil { return UploadInfo{}, err } - + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + opts.AutoChecksum = opts.Checksum + } if !opts.SendContentMd5 { if opts.UserMetadata == nil { opts.UserMetadata = make(map[string]string, 1) @@ -555,7 +565,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Calculate md5sum. customHeader := make(http.Header) if !opts.SendContentMd5 { - // Add CRC32C instead. + // Add Checksum instead. crc.Reset() crc.Write(buf[:length]) cSum := crc.Sum(nil) @@ -677,6 +687,9 @@ func (c *Client) putObject(ctx context.Context, bucketName, objectName string, r if opts.SendContentMd5 && s3utils.IsGoogleEndpoint(*c.endpointURL) && size < 0 { return UploadInfo{}, errInvalidArgument("MD5Sum cannot be calculated with size '-1'") } + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + } var readSeeker io.Seeker if size > 0 { @@ -746,17 +759,6 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, // Set headers. customHeader := opts.Header() - // Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks. - addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure) - - if addCrc { - // If user has added checksums, don't add them ourselves. - for k := range opts.UserMetadata { - if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") { - addCrc = false - } - } - } // Populate request metadata. reqMetadata := requestMetadata{ bucketName: bucketName, @@ -768,10 +770,23 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, contentSHA256Hex: sha256Hex, streamSha256: !opts.DisableContentSha256, } - if addCrc { - opts.AutoChecksum.SetDefault(ChecksumCRC32C) - reqMetadata.addCrc = &opts.AutoChecksum + // Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks. + addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure) + if opts.Checksum.IsSet() { + reqMetadata.addCrc = &opts.Checksum + } else if addCrc { + // If user has added checksums, don't add them ourselves. + for k := range opts.UserMetadata { + if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") { + addCrc = false + } + } + if addCrc { + opts.AutoChecksum.SetDefault(ChecksumCRC32C) + reqMetadata.addCrc = &opts.AutoChecksum + } } + if opts.Internal.SourceVersionID != "" { if opts.Internal.SourceVersionID != nullVersionID { if _, err := uuid.Parse(opts.Internal.SourceVersionID); err != nil { diff --git a/api-put-object.go b/api-put-object.go index dbb476a97..d769648a7 100644 --- a/api-put-object.go +++ b/api-put-object.go @@ -94,6 +94,13 @@ type PutObjectOptions struct { // If none is specified CRC32C is used, since it is generally the fastest. AutoChecksum ChecksumType + // Checksum will force a checksum of the specific type. + // This requires that the client was created with "TrailingHeaders:true" option, + // and that the destination server supports it. + // Unavailable with V2 signatures & Google endpoints. + // This will disable content MD5 checksums if set. + Checksum ChecksumType + // ConcurrentStreamParts will create NumThreads buffers of PartSize bytes, // fill them serially and upload them in parallel. // This can be used for faster uploads on non-seekable or slow-to-seek input. @@ -240,7 +247,7 @@ func (opts PutObjectOptions) Header() (header http.Header) { } // validate() checks if the UserMetadata map has standard headers or and raises an error if so. -func (opts PutObjectOptions) validate() (err error) { +func (opts PutObjectOptions) validate(c *Client) (err error) { for k, v := range opts.UserMetadata { if !httpguts.ValidHeaderFieldName(k) || isStandardHeader(k) || isSSEHeader(k) || isStorageClassHeader(k) || isMinioHeader(k) { return errInvalidArgument(k + " unsupported user defined metadata name") @@ -255,6 +262,17 @@ func (opts PutObjectOptions) validate() (err error) { if opts.LegalHold != "" && !opts.LegalHold.IsValid() { return errInvalidArgument(opts.LegalHold.String() + " unsupported legal-hold status") } + if opts.Checksum.IsSet() { + switch { + case !c.trailingHeaderSupport: + return errInvalidArgument("Checksum requires Client with TrailingHeaders enabled") + case c.overrideSignerType.IsV2(): + return errInvalidArgument("Checksum cannot be used with v2 signatures") + case s3utils.IsGoogleEndpoint(*c.endpointURL): + return errInvalidArgument("Checksum cannot be used with GCS endpoints") + } + } + return nil } @@ -291,7 +309,7 @@ func (c *Client) PutObject(ctx context.Context, bucketName, objectName string, r return UploadInfo{}, errors.New("object size must be provided with disable multipart upload") } - err = opts.validate() + err = opts.validate(c) if err != nil { return UploadInfo{}, err } @@ -362,6 +380,10 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam return UploadInfo{}, err } + if opts.Checksum.IsSet() { + opts.SendContentMd5 = false + opts.AutoChecksum = opts.Checksum + } if !opts.SendContentMd5 { if opts.UserMetadata == nil { opts.UserMetadata = make(map[string]string, 1) diff --git a/api-put-object_test.go b/api-put-object_test.go index dde4777ee..7fb4f4b43 100644 --- a/api-put-object_test.go +++ b/api-put-object_test.go @@ -61,7 +61,7 @@ func TestPutObjectOptionsValidate(t *testing.T) { for i, testCase := range testCases { err := PutObjectOptions{UserMetadata: map[string]string{ testCase.name: testCase.value, - }}.validate() + }}.validate(nil) if testCase.shouldPass && err != nil { t.Errorf("Test %d - output did not match with reference results, %s", i+1, err) } diff --git a/api-putobject-snowball.go b/api-putobject-snowball.go index eb4da4147..6b6559bf7 100644 --- a/api-putobject-snowball.go +++ b/api-putobject-snowball.go @@ -107,7 +107,7 @@ type readSeekCloser interface { // Total size should be < 5TB. // This function blocks until 'objs' is closed and the content has been uploaded. func (c Client) PutObjectsSnowball(ctx context.Context, bucketName string, opts SnowballOptions, objs <-chan SnowballObject) (err error) { - err = opts.Opts.validate() + err = opts.Opts.validate(&c) if err != nil { return err } diff --git a/functional_tests.go b/functional_tests.go index 8a908e3fd..780dc8997 100644 --- a/functional_tests.go +++ b/functional_tests.go @@ -83,7 +83,7 @@ func createHTTPTransport() (transport *http.Transport) { return nil } - if mustParseBool(os.Getenv(skipCERTValidation)) { + if mustParseBool(os.Getenv(enableHTTPS)) && mustParseBool(os.Getenv(skipCERTValidation)) { transport.TLSClientConfig.InsecureSkipVerify = true } @@ -2334,7 +2334,7 @@ func testPutObjectWithChecksums() { } // Test PutObject with custom checksums. -func testPutMultipartObjectWithChecksums() { +func testPutObjectWithTrailingChecksums() { // initialize logging params startTime := time.Now() testName := getFuncName() @@ -2342,7 +2342,7 @@ func testPutMultipartObjectWithChecksums() { args := map[string]interface{}{ "bucketName": "", "objectName": "", - "opts": "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress}", + "opts": "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress, TrailChecksum: xxx}", } if !isFullMode() { @@ -2356,9 +2356,201 @@ func testPutMultipartObjectWithChecksums() { // Instantiate new minio client object. c, err := minio.New(os.Getenv(serverEndpoint), &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), + Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), + Transport: createHTTPTransport(), + Secure: mustParseBool(os.Getenv(enableHTTPS)), + TrailingHeaders: true, + }) + if err != nil { + logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) + return + } + + // Enable tracing, write to stderr. + // c.TraceOn(os.Stderr) + + // Set user agent. + c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + + // Generate a new random bucket name. + bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") + args["bucketName"] = bucketName + + // Make a new bucket. + err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"}) + if err != nil { + logError(testName, function, args, startTime, "", "Make bucket failed", err) + return + } + + defer cleanupBucket(bucketName, c) + tests := []struct { + cs minio.ChecksumType + }{ + {cs: minio.ChecksumCRC32C}, + {cs: minio.ChecksumCRC32}, + {cs: minio.ChecksumSHA1}, + {cs: minio.ChecksumSHA256}, + } + + for _, test := range tests { + function := "PutObject(bucketName, objectName, reader,size, opts)" + bufSize := dataFileMap["datafile-10-kB"] + + // Save the data + objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") + args["objectName"] = objectName + + cmpChecksum := func(got, want string) { + if want != got { + logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got)) + return + } + } + + meta := map[string]string{} + reader := getDataReader("datafile-10-kB") + b, err := io.ReadAll(reader) + if err != nil { + logError(testName, function, args, startTime, "", "Read failed", err) + return + } + h := test.cs.Hasher() + h.Reset() + + // Test with Wrong CRC. + args["metadata"] = meta + args["range"] = "false" + args["checksum"] = test.cs.String() + + resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{ + DisableMultipart: true, + DisableContentSha256: true, + UserMetadata: meta, + Checksum: test.cs, + }) + if err != nil { + logError(testName, function, args, startTime, "", "PutObject failed", err) + return + } + + h.Write(b) + meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil)) + + cmpChecksum(resp.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + + // Read the data back + gopts := minio.GetObjectOptions{Checksum: true} + + function = "GetObject(...)" + r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + + st, err := r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + cmpChecksum(st.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + + if st.Size != int64(bufSize) { + logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) + return + } + + if err := r.Close(); err != nil { + logError(testName, function, args, startTime, "", "Object Close failed", err) + return + } + if err := r.Close(); err == nil { + logError(testName, function, args, startTime, "", "Object already closed, should respond with error", err) + return + } + + function = "GetObject( Range...)" + args["range"] = "true" + err = gopts.SetRange(100, 1000) + if err != nil { + logError(testName, function, args, startTime, "", "SetRange failed", err) + return + } + r, err = c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + + b, err = io.ReadAll(r) + if err != nil { + logError(testName, function, args, startTime, "", "Read failed", err) + return + } + st, err = r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + + // Range requests should return empty checksums... + cmpChecksum(st.ChecksumSHA256, "") + cmpChecksum(st.ChecksumSHA1, "") + cmpChecksum(st.ChecksumCRC32, "") + cmpChecksum(st.ChecksumCRC32C, "") + + function = "GetObjectAttributes(...)" + s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) + if err != nil { + logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err) + return + } + cmpChecksum(s.Checksum.ChecksumSHA256, meta["x-amz-checksum-sha256"]) + cmpChecksum(s.Checksum.ChecksumSHA1, meta["x-amz-checksum-sha1"]) + cmpChecksum(s.Checksum.ChecksumCRC32, meta["x-amz-checksum-crc32"]) + cmpChecksum(s.Checksum.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + + delete(args, "range") + delete(args, "metadata") + } + + logSuccess(testName, function, args, startTime) +} + +// Test PutObject with custom checksums. +func testPutMultipartObjectWithChecksums(trailing bool) { + // initialize logging params + startTime := time.Now() + testName := getFuncName() + function := "PutObject(bucketName, objectName, reader,size, opts)" + args := map[string]interface{}{ + "bucketName": "", + "objectName": "", + "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Progress: progress Checksum: %v}", trailing), + } + + if !isFullMode() { + logIgnored(testName, function, args, startTime, "Skipping functional tests for short/quick runs") + return + } + + // Seed random based on current time. + rand.Seed(time.Now().Unix()) + + // Instantiate new minio client object. + c, err := minio.New(os.Getenv(serverEndpoint), + &minio.Options{ + Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), + Transport: createHTTPTransport(), + Secure: mustParseBool(os.Getenv(enableHTTPS)), + TrailingHeaders: trailing, }) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) @@ -2445,14 +2637,20 @@ func testPutMultipartObjectWithChecksums() { h.Reset() want := hashMultiPart(b, partSize, test.cs.Hasher()) + var cs minio.ChecksumType + rd := io.Reader(io.NopCloser(bytes.NewReader(b))) + if trailing { + cs = test.cs + rd = bytes.NewReader(b) + } // Set correct CRC. - - resp, err := c.PutObject(context.Background(), bucketName, objectName, io.NopCloser(bytes.NewReader(b)), int64(bufSize), minio.PutObjectOptions{ + resp, err := c.PutObject(context.Background(), bucketName, objectName, rd, int64(bufSize), minio.PutObjectOptions{ DisableContentSha256: true, DisableMultipart: false, UserMetadata: nil, PartSize: partSize, AutoChecksum: test.cs, + Checksum: cs, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -2982,6 +3180,7 @@ func testGetObjectAttributes() { testFiles[i].UploadInfo, err = c.PutObject(context.Background(), v.Bucket, v.Object, reader, int64(bufSize), minio.PutObjectOptions{ ContentType: v.ContentType, SendContentMd5: v.SendContentMd5, + Checksum: minio.ChecksumCRC32C, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -3063,7 +3262,7 @@ func testGetObjectAttributes() { test: objectAttributesTestOptions{ TestFileName: "file1", StorageClass: "STANDARD", - HasFullChecksum: false, + HasFullChecksum: true, }, } @@ -3152,9 +3351,10 @@ func testGetObjectAttributesSSECEncryption() { info, err := c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{ ContentType: "content/custom", - SendContentMd5: true, + SendContentMd5: false, ServerSideEncryption: sse, PartSize: uint64(bufSize) / 2, + Checksum: minio.ChecksumCRC32C, }) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -3174,9 +3374,9 @@ func testGetObjectAttributesSSECEncryption() { ETag: info.ETag, NumberOfParts: 2, ObjectSize: int(info.Size), - HasFullChecksum: false, + HasFullChecksum: true, HasParts: true, - HasPartChecksums: false, + HasPartChecksums: true, }) if err != nil { logError(testName, function, args, startTime, "", "Validating GetObjectsAttributes response failed", err) @@ -5594,18 +5794,12 @@ func testPresignedPostPolicy() { } writer.Close() - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } - httpClient := &http.Client{ // Setting a sensible time out of 30secs to wait for response // headers. Request is pro-actively canceled after 30secs // with no response. Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } args["url"] = presignedPostPolicyURL.String() @@ -7519,7 +7713,7 @@ func testFunctional() { return } - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) + transport := createHTTPTransport() if err != nil { logError(testName, function, args, startTime, "", "DefaultTransport failed", err) return @@ -12450,18 +12644,12 @@ func testFunctionalV2() { return } - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } - httpClient := &http.Client{ // Setting a sensible time out of 30secs to wait for response // headers. Request is pro-actively canceled after 30secs // with no response. Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } req, err := http.NewRequest(http.MethodHead, presignedHeadURL.String(), nil) @@ -13556,14 +13744,9 @@ func testCors() { bucketURL := c.EndpointURL().String() + "/" + bucketName + "/" objectURL := bucketURL + objectName - transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS))) - if err != nil { - logError(testName, function, args, startTime, "", "DefaultTransport failed", err) - return - } httpClient := &http.Client{ Timeout: 30 * time.Second, - Transport: transport, + Transport: createHTTPTransport(), } errStrAccessForbidden := `AccessForbiddenCORSResponse: This CORS request is not allowed. This is usually because the evalution of Origin, request method / Access-Control-Request-Method or Access-Control-Request-Headers are not whitelisted` @@ -14757,7 +14940,9 @@ func main() { testCompose10KSourcesV2() testUserMetadataCopyingV2() testPutObjectWithChecksums() - testPutMultipartObjectWithChecksums() + testPutObjectWithTrailingChecksums() + testPutMultipartObjectWithChecksums(false) + testPutMultipartObjectWithChecksums(true) testPutObject0ByteV2() testPutObjectNoLengthV2() testPutObjectsUnknownV2()