diff --git a/google/cloud/storage/async/writer_connection.h b/google/cloud/storage/async/writer_connection.h index 43d6348f055ae..30eff974c6ef2 100644 --- a/google/cloud/storage/async/writer_connection.h +++ b/google/cloud/storage/async/writer_connection.h @@ -125,6 +125,12 @@ class AsyncWriterConnection { /// Returns the latest write handle, if any. virtual absl::optional WriteHandle() const = 0; + + /// Returns the latest persisted data checksums, if any. + virtual absl::optional + PersistedChecksums() const { + return absl::nullopt; + } }; /** diff --git a/google/cloud/storage/internal/async/connection_impl.cc b/google/cloud/storage/internal/async/connection_impl.cc index 7e123e61ce06c..8aff6b5fd4c9b 100644 --- a/google/cloud/storage/internal/async/connection_impl.cc +++ b/google/cloud/storage/internal/async/connection_impl.cc @@ -110,6 +110,47 @@ std::unique_ptr CreateHashFunction( return storage::internal::CreateNullHashFunction(); } +StatusOr> MakeAppendableWriter( + google::cloud::internal::ImmutableOptions const& current, + google::storage::v2::BidiWriteObjectRequest request, + std::int64_t persisted_size, + std::function>( + google::storage::v2::BidiWriteObjectRequest)> + factory, + StatusOr rpc) { + if (!rpc) return std::move(rpc).status(); + + std::shared_ptr hash; + std::unique_ptr impl; + + if (rpc->first_response.has_resource()) { + auto const& resource = rpc->first_response.resource(); + if (current->get() && + resource.has_checksums() && resource.checksums().has_crc32c()) { + hash = std::make_shared< + ::google::cloud::storage::internal::Crc32cHashFunction>( + resource.checksums().crc32c(), resource.size()); + } else { + hash = CreateHashFunction(*current); + } + impl = std::make_unique( + current, request, std::move(rpc->stream), hash, resource, false); + } else { + persisted_size = rpc->first_response.persisted_size(); + hash = CreateHashFunction(*current); + auto checksums = rpc->first_response.has_persisted_data_checksums() + ? absl::make_optional( + rpc->first_response.persisted_data_checksums()) + : absl::nullopt; + impl = std::make_unique( + current, request, std::move(rpc->stream), hash, persisted_size, false, + checksums); + } + return MakeWriterConnectionResumed(std::move(factory), std::move(impl), + std::move(request), std::move(hash), + rpc->first_response, *current); +} + std::unique_ptr CreateHashValidator( google::storage::v2::ReadObjectRequest const& request, Options const& options) { @@ -315,8 +356,6 @@ AsyncConnectionImpl::AppendableObjectUploadImpl(AppendableUploadParams p) { auto current = internal::MakeImmutableOptions(std::move(p.options)); auto request = p.request; std::int64_t persisted_size = 0; - std::shared_ptr hash_function = - CreateHashFunction(*current); auto retry = std::shared_ptr(retry_policy(*current)); auto backoff = @@ -404,24 +443,10 @@ AsyncConnectionImpl::AppendableObjectUploadImpl(AppendableUploadParams p) { auto pending = factory(std::move(request)); return pending.then( [current, request = std::move(p.request), persisted_size, - hash = std::move(hash_function), fa = std::move(factory)](auto f) mutable + fa = std::move(factory)](auto f) mutable -> StatusOr> { - auto rpc = f.get(); - if (!rpc) return std::move(rpc).status(); - std::unique_ptr impl; - if (rpc->first_response.has_resource()) { - impl = std::make_unique( - current, request, std::move(rpc->stream), hash, - rpc->first_response.resource(), false); - } else { - persisted_size = rpc->first_response.persisted_size(); - impl = std::make_unique( - current, request, std::move(rpc->stream), hash, persisted_size, - false); - } - return MakeWriterConnectionResumed(std::move(fa), std::move(impl), - std::move(request), std::move(hash), - rpc->first_response, *current); + return MakeAppendableWriter(current, std::move(request), persisted_size, + std::move(fa), f.get()); }); } diff --git a/google/cloud/storage/internal/async/connection_impl_appendable_upload_test.cc b/google/cloud/storage/internal/async/connection_impl_appendable_upload_test.cc index bb6833b1526a7..49b6348f4be3c 100644 --- a/google/cloud/storage/internal/async/connection_impl_appendable_upload_test.cc +++ b/google/cloud/storage/internal/async/connection_impl_appendable_upload_test.cc @@ -12,10 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "google/cloud/storage/async/options.h" #include "google/cloud/storage/async/retry_policy.h" #include "google/cloud/storage/async/writer_connection.h" #include "google/cloud/storage/internal/async/connection_impl.h" #include "google/cloud/storage/internal/async/default_options.h" +#include "google/cloud/storage/internal/async/write_object.h" +#include "google/cloud/storage/internal/async/writer_connection_impl.h" +#include "google/cloud/storage/internal/crc32c.h" #include "google/cloud/storage/testing/canonical_errors.h" #include "google/cloud/storage/testing/mock_storage_stub.h" #include "google/cloud/common_options.h" @@ -627,6 +631,257 @@ TEST_F(AsyncConnectionImplAppendableTest, AppendableUploadRedirectNoHandle) { next.first.set_value(true); } +TEST_F(AsyncConnectionImplAppendableTest, + StartAppendableObjectUploadWithChecksum) { + auto constexpr kRequestText = R"pb( + write_object_spec { + resource { + bucket: "projects/_/buckets/test-bucket" + name: "test-object" + content_type: "text/plain" + } + } + )pb"; + AsyncSequencer sequencer; + auto mock = std::make_shared(); + + google::storage::v2::Object initial_resource; + initial_resource.set_bucket("projects/_/buckets/test-bucket"); + initial_resource.set_name("test-object"); + initial_resource.set_size(1024); + initial_resource.mutable_checksums()->set_crc32c(12345); // Some dummy CRC + + auto stream = std::make_unique(); + EXPECT_CALL(*stream, Start).WillOnce([&] { + return sequencer.PushBack("Start"); + }); + + EXPECT_CALL(*stream, Read) + .WillOnce([&, initial_resource] { + return sequencer.PushBack("Read(Takeover)") + .then([initial_resource](auto) { + auto response = google::storage::v2::BidiWriteObjectResponse{}; + *response.mutable_resource() = initial_resource; + return absl::make_optional(std::move(response)); + }); + }) + .WillOnce([&, initial_resource] { + return sequencer.PushBack("Read(FinalObject)") + .then([initial_resource](auto) { + auto response = google::storage::v2::BidiWriteObjectResponse{}; + *response.mutable_resource() = initial_resource; + response.mutable_resource()->set_size( + initial_resource.size() + 9); // "some data" size is 9 + return absl::make_optional(std::move(response)); + }); + }); + + EXPECT_CALL(*stream, Cancel).Times(1); + EXPECT_CALL(*stream, Finish).WillOnce([&] { + return sequencer.PushBack("Finish").then([](auto) { return Status{}; }); + }); + + EXPECT_CALL(*stream, Write) + .WillOnce([&](google::storage::v2::BidiWriteObjectRequest const& request, + grpc::WriteOptions wopt) { + EXPECT_TRUE(request.state_lookup()); + EXPECT_FALSE(wopt.is_last_message()); + return sequencer.PushBack("Write(StateLookup)"); + }) + .WillOnce( + [&](google::storage::v2::BidiWriteObjectRequest const& /*request*/, + grpc::WriteOptions wopt) { + EXPECT_FALSE(wopt.is_last_message()); + return sequencer.PushBack("Write(data)"); + }) + .WillOnce([&](google::storage::v2::BidiWriteObjectRequest const& request, + grpc::WriteOptions wopt) { + EXPECT_TRUE(request.finish_write()); + EXPECT_TRUE(wopt.is_last_message()); + // Here we expect full checksums to be set because we had the resource + // in takeover. + EXPECT_TRUE(request.has_object_checksums()); + auto expected_crc = + google::cloud::storage_internal::ExtendCrc32c(12345, "some data"); + EXPECT_EQ(request.object_checksums().crc32c(), expected_crc); + return sequencer.PushBack("Write(Finalize)"); + }); + + EXPECT_CALL(*mock, AsyncBidiWriteObject).WillOnce([&] { + return std::unique_ptr(std::move(stream)); + }); + + internal::AutomaticallyCreatedBackgroundThreads pool(1); + // Enable CRC32C validation in options + auto options = TestOptions().set(true); + auto connection = MakeTestConnection(pool.cq(), mock, options); + + auto request = google::storage::v2::BidiWriteObjectRequest{}; + ASSERT_TRUE(TextFormat::ParseFromString(kRequestText, &request)); + request.mutable_write_object_spec()->set_appendable(true); + + auto pending = connection->StartAppendableObjectUpload( + {std::move(request), connection->options()}); + + auto next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Start"); + next.first.set_value(true); + + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(StateLookup)"); + next.first.set_value(true); + + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Read(Takeover)"); + next.first.set_value(true); + + auto r = pending.get(); + ASSERT_STATUS_OK(r); + auto writer = *std::move(r); + + // Write some data. + auto w1 = writer->Write(storage::WritePayload("some data")); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(data)"); + next.first.set_value(true); + EXPECT_STATUS_OK(w1.get()); + + // Finalize the upload. + auto w2 = writer->Finalize({}); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(Finalize)"); + next.first.set_value(true); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Read(FinalObject)"); + next.first.set_value(true); + + auto response = w2.get(); + ASSERT_STATUS_OK(response); + + writer.reset(); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Finish"); + next.first.set_value(true); +} + +TEST_F(AsyncConnectionImplAppendableTest, + ResumeAppendableObjectUploadWithChecksum) { + auto constexpr kRequestText = R"pb( + append_object_spec { object: "test-object" } + )pb"; + AsyncSequencer sequencer; + auto mock = std::make_shared(); + + constexpr std::int64_t kPersistedSize = 16384; + constexpr std::uint32_t kPersistedCrc = 12345; + + auto stream = std::make_unique(); + EXPECT_CALL(*stream, Start).WillOnce([&] { + return sequencer.PushBack("Start"); + }); + + EXPECT_CALL(*stream, Read) + .WillOnce([&] { + return sequencer.PushBack("Read(PersistedSize)").then([](auto) { + auto response = google::storage::v2::BidiWriteObjectResponse{}; + response.set_persisted_size(kPersistedSize); + response.mutable_persisted_data_checksums()->set_crc32c( + kPersistedCrc); + return absl::make_optional(std::move(response)); + }); + }) + .WillOnce([&] { + return sequencer.PushBack("Read(FinalObject)").then([](auto) { + auto response = google::storage::v2::BidiWriteObjectResponse{}; + auto object = google::storage::v2::Object{}; + object.set_bucket("projects/_/buckets/test-bucket"); + object.set_name("test-object"); + object.set_size(kPersistedSize + 9); + *response.mutable_resource() = std::move(object); + return absl::make_optional(std::move(response)); + }); + }); + + EXPECT_CALL(*stream, Cancel).Times(1); + EXPECT_CALL(*stream, Finish).WillOnce([&] { + return sequencer.PushBack("Finish").then([](auto) { return Status{}; }); + }); + + EXPECT_CALL(*stream, Write) + .WillOnce([&](google::storage::v2::BidiWriteObjectRequest const& request, + grpc::WriteOptions wopt) { + EXPECT_TRUE(request.state_lookup()); + EXPECT_FALSE(wopt.is_last_message()); + return sequencer.PushBack("Write(StateLookup)"); + }) + .WillOnce( + [&](google::storage::v2::BidiWriteObjectRequest const& /*request*/, + grpc::WriteOptions wopt) { + EXPECT_FALSE(wopt.is_last_message()); + return sequencer.PushBack("Write(data)"); + }) + .WillOnce([&](google::storage::v2::BidiWriteObjectRequest const& request, + grpc::WriteOptions wopt) { + EXPECT_TRUE(request.finish_write()); + EXPECT_TRUE(wopt.is_last_message()); + EXPECT_TRUE(request.has_object_checksums()); + EXPECT_EQ(request.object_checksums().crc32c(), 2901820631); + return sequencer.PushBack("Write(Finalize)"); + }); + + EXPECT_CALL(*mock, AsyncBidiWriteObject) + .WillOnce([&](auto const&, auto, auto) { + return std::unique_ptr(std::move(stream)); + }); + + internal::AutomaticallyCreatedBackgroundThreads pool(1); + auto options = TestOptions().set(true); + auto connection = MakeTestConnection(pool.cq(), mock, options); + + auto request = google::storage::v2::BidiWriteObjectRequest{}; + ASSERT_TRUE(TextFormat::ParseFromString(kRequestText, &request)); + auto pending = connection->ResumeAppendableObjectUpload( + {std::move(request), connection->options()}); + + auto next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Start"); + next.first.set_value(true); + + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(StateLookup)"); + next.first.set_value(true); + + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Read(PersistedSize)"); + next.first.set_value(true); + + auto r = pending.get(); + ASSERT_STATUS_OK(r); + auto writer = *std::move(r); + + auto w1 = writer->Write(storage::WritePayload("some data")); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(data)"); + next.first.set_value(true); + EXPECT_STATUS_OK(w1.get()); + + auto w2 = writer->Finalize({}); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Write(Finalize)"); + next.first.set_value(true); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Read(FinalObject)"); + next.first.set_value(true); + + auto response = w2.get(); + ASSERT_STATUS_OK(response); + + writer.reset(); + next = sequencer.PopFrontWithName(); + EXPECT_EQ(next.second, "Finish"); + next.first.set_value(true); +} + } // namespace GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_END } // namespace storage_internal diff --git a/google/cloud/storage/internal/async/writer_connection_impl.cc b/google/cloud/storage/internal/async/writer_connection_impl.cc index a97a62960e3f6..140cd10463d3f 100644 --- a/google/cloud/storage/internal/async/writer_connection_impl.cc +++ b/google/cloud/storage/internal/async/writer_connection_impl.cc @@ -18,6 +18,7 @@ #include "google/cloud/storage/internal/async/write_payload_impl.h" #include "google/cloud/storage/internal/grpc/ctype_cord_workaround.h" #include "google/cloud/storage/internal/grpc/object_metadata_parser.h" +#include "google/cloud/storage/internal/hash_function_impl.h" #include "google/cloud/internal/make_status.h" namespace google { @@ -49,11 +50,14 @@ AsyncWriterConnectionImpl::AsyncWriterConnectionImpl( google::storage::v2::BidiWriteObjectRequest request, std::unique_ptr impl, std::shared_ptr hash_function, - std::int64_t persisted_size, bool first_request) + std::int64_t persisted_size, bool first_request, + absl::optional + persisted_data_checksums) : AsyncWriterConnectionImpl( std::move(options), std::move(request), std::move(impl), std::move(hash_function), PersistedStateType(persisted_size), - /*offset=*/persisted_size, std::move(first_request)) {} + /*offset=*/persisted_size, std::move(first_request), + std::move(persisted_data_checksums)) {} AsyncWriterConnectionImpl::AsyncWriterConnectionImpl( google::cloud::internal::ImmutableOptions options, @@ -64,14 +68,17 @@ AsyncWriterConnectionImpl::AsyncWriterConnectionImpl( : AsyncWriterConnectionImpl( std::move(options), std::move(request), std::move(impl), std::move(hash_function), PersistedStateType(metadata), - /*offset=*/metadata.size(), std::move(first_request)) {} + /*offset=*/metadata.size(), std::move(first_request), absl::nullopt) { +} AsyncWriterConnectionImpl::AsyncWriterConnectionImpl( google::cloud::internal::ImmutableOptions options, google::storage::v2::BidiWriteObjectRequest request, std::unique_ptr impl, std::shared_ptr hash_function, - PersistedStateType persisted_state, std::int64_t offset, bool first_request) + PersistedStateType persisted_state, std::int64_t offset, bool first_request, + absl::optional + persisted_data_checksums) : options_(std::move(options)), impl_(std::move(impl)), request_(std::move(request)), @@ -79,7 +86,8 @@ AsyncWriterConnectionImpl::AsyncWriterConnectionImpl( persisted_state_(std::move(persisted_state)), offset_(offset), first_request_(std::move(first_request)), - finished_(on_finish_.get_future()) { + finished_(on_finish_.get_future()), + persisted_data_checksums_(std::move(persisted_data_checksums)) { request_.clear_object_checksums(); request_.set_state_lookup(false); request_.set_flush(false); @@ -138,10 +146,15 @@ AsyncWriterConnectionImpl::Finalize(storage::WritePayload payload) { auto p = WritePayloadImpl::GetImpl(payload); auto size = p.size(); - auto action = request_.has_append_object_spec() || - request_.write_object_spec().appendable() - ? PartialUpload::kFinalize - : PartialUpload::kFinalizeWithChecksum; + auto action = PartialUpload::kFinalizeWithChecksum; + if (request_.has_append_object_spec() || + request_.write_object_spec().appendable()) { + if (!absl::holds_alternative( + persisted_state_) && + !persisted_data_checksums_.has_value()) { + action = PartialUpload::kFinalize; + } + } auto coro = PartialUpload::Call(impl_, hash_function_, std::move(write), std::move(p), std::move(action)); return coro->Start().then([coro, size, this](auto f) mutable { @@ -257,6 +270,13 @@ future> AsyncWriterConnectionImpl::OnQuery( } if (response->has_persisted_size()) { persisted_state_ = response->persisted_size(); + + if (response->has_persisted_data_checksums()) { + auto const& checksums = response->persisted_data_checksums(); + if (checksums.has_crc32c()) { + persisted_data_checksums_ = checksums; + } + } return make_ready_future(make_status_or(response->persisted_size())); } if (response->has_resource()) { diff --git a/google/cloud/storage/internal/async/writer_connection_impl.h b/google/cloud/storage/internal/async/writer_connection_impl.h index 4cc1e979f1fe0..49c7a94b357e5 100644 --- a/google/cloud/storage/internal/async/writer_connection_impl.h +++ b/google/cloud/storage/internal/async/writer_connection_impl.h @@ -42,7 +42,9 @@ class AsyncWriterConnectionImpl : public storage::AsyncWriterConnection { google::storage::v2::BidiWriteObjectRequest request, std::unique_ptr impl, std::shared_ptr hash_function, - std::int64_t persisted_size, bool first_request = true); + std::int64_t persisted_size, bool first_request = true, + absl::optional + persisted_data_checksums = absl::nullopt); explicit AsyncWriterConnectionImpl( google::cloud::internal::ImmutableOptions options, google::storage::v2::BidiWriteObjectRequest request, @@ -58,6 +60,10 @@ class AsyncWriterConnectionImpl : public storage::AsyncWriterConnection { const override { return latest_write_handle_; } + absl::optional PersistedChecksums() + const override { + return persisted_data_checksums_; + } absl::variant PersistedState() const override; @@ -77,7 +83,9 @@ class AsyncWriterConnectionImpl : public storage::AsyncWriterConnection { std::unique_ptr impl, std::shared_ptr hash_function, PersistedStateType persisted_state, std::int64_t offset, - bool first_request = true); + bool first_request = true, + absl::optional + persisted_data_checksums = absl::nullopt); google::storage::v2::BidiWriteObjectRequest MakeRequest(); @@ -113,6 +121,10 @@ class AsyncWriterConnectionImpl : public storage::AsyncWriterConnection { // Track the latest write handle seen in responses. absl::optional latest_write_handle_; + // Track the latest persisted data checksums seen in responses. + absl::optional + persisted_data_checksums_; + std::mutex mu_; }; diff --git a/google/cloud/storage/internal/async/writer_connection_resumed.cc b/google/cloud/storage/internal/async/writer_connection_resumed.cc index 5ae78d307bd59..0dd2a6ace9897 100644 --- a/google/cloud/storage/internal/async/writer_connection_resumed.cc +++ b/google/cloud/storage/internal/async/writer_connection_resumed.cc @@ -14,6 +14,8 @@ #include "google/cloud/storage/internal/async/writer_connection_resumed.h" #include "google/cloud/storage/internal/async/write_payload_impl.h" +#include "google/cloud/storage/internal/async/writer_connection_impl.h" +#include "google/cloud/storage/internal/hash_function_impl.h" #include "google/cloud/future.h" #include "google/cloud/internal/make_status.h" #include "google/cloud/status.h" @@ -447,9 +449,19 @@ class AsyncWriterConnectionResumedState } // Regular resume succeeded, object not finalized. Continue writing. auto persisted_offset = absl::get(state); + + auto checksums = impl_->PersistedChecksums(); + + auto hash = hash_function_; + if (checksums && checksums->has_crc32c()) { + hash = std::make_shared< + ::google::cloud::storage::internal::Crc32cHashFunction>( + checksums->crc32c(), persisted_offset); + } + impl_ = std::make_unique( - options_, initial_request_, std::move(res->stream), hash_function_, - persisted_offset, false); + options_, initial_request_, std::move(res->stream), std::move(hash), + persisted_offset, false, checksums); // OnQuery will restart the WriteLoop if necessary. OnQuery(std::move(lk), persisted_offset); } diff --git a/google/cloud/storage/internal/hash_function_impl.h b/google/cloud/storage/internal/hash_function_impl.h index b3a8e68eeebfe..6b028d3cf0bab 100644 --- a/google/cloud/storage/internal/hash_function_impl.h +++ b/google/cloud/storage/internal/hash_function_impl.h @@ -106,6 +106,9 @@ class MD5HashFunction : public HashFunction { class Crc32cHashFunction : public HashFunction { public: Crc32cHashFunction() = default; + explicit Crc32cHashFunction(std::uint32_t initial_crc, + std::int64_t initial_offset) + : current_(initial_crc), minimum_offset_(initial_offset) {} Crc32cHashFunction(Crc32cHashFunction const&) = delete; Crc32cHashFunction& operator=(Crc32cHashFunction const&) = delete;