35
35
#include " public/data_loading/filename_utils.h"
36
36
#include " public/data_loading/readers/delta_record_stream_reader.h"
37
37
#include " public/data_loading/riegeli_metadata.pb.h"
38
+ #include " public/sharding/sharding_function.h"
38
39
#include " src/cpp/telemetry/telemetry_provider.h"
39
40
40
41
namespace kv_server {
@@ -80,6 +81,14 @@ absl::Status ValidateRequiredParams(GenerateSnapshotCommand::Params& params) {
80
81
!IsDeltaFilename (params.ending_delta_file )) {
81
82
return absl::InvalidArgumentError (" Ending delta file is not valid." );
82
83
}
84
+ if (params.shard_number >= 0 &&
85
+ params.number_of_shards <= params.shard_number ) {
86
+ return absl::InvalidArgumentError (absl::StrCat (
87
+ " Shard metadata is invalid. shard_number is " , params.shard_number ,
88
+ " and number_of_shards is " , params.number_of_shards ,
89
+ " . Valid inputs must satisfy the requirement: 0 <= shard_number < "
90
+ " number_of_shards" ));
91
+ }
83
92
return absl::OkStatus ();
84
93
}
85
94
@@ -109,6 +118,10 @@ absl::StatusOr<KVFileMetadata> CreateSnapshotMetadata(
109
118
auto snapshot_metadata = metadata.mutable_snapshot ();
110
119
*snapshot_metadata->mutable_starting_file () = params.starting_file ;
111
120
*snapshot_metadata->mutable_ending_delta_file () = params.ending_delta_file ;
121
+ if (params.shard_number >= 0 ) {
122
+ auto * sharding_metadata = metadata.mutable_sharding_metadata ();
123
+ sharding_metadata->set_shard_num (params.shard_number );
124
+ }
112
125
return metadata;
113
126
}
114
127
@@ -117,6 +130,32 @@ void ResetInputStream(std::istream& istream) {
117
130
istream.seekg (0 , std::ios::beg);
118
131
}
119
132
133
+ absl::Status WriteRecordsToSnapshotStream (
134
+ const GenerateSnapshotCommand::Params& params,
135
+ DeltaRecordStreamReader<std::istream>& record_reader,
136
+ SnapshotStreamWriter<std::ostream>& snapshot_writer) {
137
+ ShardingFunction sharding_function (/* seed=*/ " " );
138
+ return record_reader.ReadRecords (
139
+ [¶ms, &snapshot_writer,
140
+ &sharding_function](DataRecordStruct data_record) {
141
+ if (params.shard_number >= 0 &&
142
+ std::holds_alternative<KeyValueMutationRecordStruct>(
143
+ data_record.record )) {
144
+ KeyValueMutationRecordStruct record_struct =
145
+ std::get<KeyValueMutationRecordStruct>(data_record.record );
146
+ auto record_shard_num = sharding_function.GetShardNumForKey (
147
+ record_struct.key , params.number_of_shards );
148
+ if (params.shard_number != record_shard_num) {
149
+ LOG (INFO) << " Skipping record with key: " << record_struct.key
150
+ << " . The record belongs to shard: " << record_shard_num
151
+ << " , but shard_number is " << params.shard_number ;
152
+ return absl::OkStatus ();
153
+ }
154
+ }
155
+ return snapshot_writer.WriteRecord (data_record);
156
+ });
157
+ }
158
+
120
159
absl::StatusOr<std::string> WriteBaseSnapshotData (
121
160
const GenerateSnapshotCommand::Params& params,
122
161
BlobStorageClient& blob_client,
@@ -129,13 +168,8 @@ absl::StatusOr<std::string> WriteBaseSnapshotData(
129
168
if (!metadata.ok ()) {
130
169
return metadata.status ();
131
170
}
132
- if (blob_reader->CanSeek ()) {
133
- ResetInputStream (blob_reader->Stream ());
134
- } else {
135
- blob_reader = blob_client.GetBlobReader (
136
- {.bucket = params.data_dir .data (), .key = params.starting_file .data ()});
137
- }
138
- if (auto status = snapshot_writer.WriteRecordStream (blob_reader->Stream ());
171
+ if (auto status =
172
+ WriteRecordsToSnapshotStream (params, record_reader, snapshot_writer);
139
173
!status.ok ()) {
140
174
return status;
141
175
}
@@ -163,17 +197,8 @@ absl::Status WriteDeltaFilesToSnapshot(
163
197
auto blob_reader = blob_client.GetBlobReader (
164
198
{.bucket = params.data_dir .data (), .key = delta_file});
165
199
DeltaRecordStreamReader record_reader (blob_reader->Stream ());
166
- auto metadata = record_reader.ReadMetadata ();
167
- if (!metadata.ok ()) {
168
- return metadata.status ();
169
- }
170
- if (blob_reader->CanSeek ()) {
171
- ResetInputStream (blob_reader->Stream ());
172
- } else {
173
- blob_reader = blob_client.GetBlobReader (
174
- {.bucket = params.data_dir .data (), .key = delta_file});
175
- }
176
- if (auto status = snapshot_writer.WriteRecordStream (blob_reader->Stream ());
200
+ if (auto status = WriteRecordsToSnapshotStream (params, record_reader,
201
+ snapshot_writer);
177
202
!status.ok ()) {
178
203
return status;
179
204
}
0 commit comments