Skip to content

Commit 365dd03

Browse files
committed
RDB modules values serialization format version 2.
The original RDB serialization format was not parsable without the module loaded, becuase the structure was managed only by the module itself. Moreover RDB is a streaming protocol in the sense that it is both produce di an append-only fashion, and is also sometimes directly sent to the socket (in the case of diskless replication). The fact that modules values cannot be parsed without the relevant module loaded is a problem in many ways: RDB checking tools must have loaded modules even for doing things not involving the value at all, like splitting an RDB into N RDBs by key or alike, or just checking the RDB for sanity. In theory module values could be just a blob of data with a prefixed length in order for us to be able to skip it. However prefixing the values with a length would mean one of the following: 1. To be able to write some data at a previous offset. This breaks stremaing. 2. To bufferize values before outputting them. This breaks performances. 3. To have some chunked RDB output format. This breaks simplicity. Moreover, the above solution, still makes module values a totally opaque matter, with the fowllowing problems: 1. The RDB check tool can just skip the value without being able to at least check the general structure. For datasets composed mostly of modules values this means to just check the outer level of the RDB not actually doing any checko on most of the data itself. 2. It is not possible to do any recovering or processing of data for which a module no longer exists in the future, or is unknown. So this commit implements a different solution. The modules RDB serialization API is composed if well defined calls to store integers, floats, doubles or strings. After this commit, the parts generated by the module API have a one-byte prefix for each of the above emitted parts, and there is a final EOF byte as well. So even if we don't know exactly how to interpret a module value, we can always parse it at an high level, check the overall structure, understand the types used to store the information, and easily skip the whole value. The change is backward compatible: older RDB files can be still loaded since the new encoding has a new RDB type: MODULE_2 (of value 7). The commit also implements the ability to check RDB files for sanity taking advantage of the new feature.
1 parent c399872 commit 365dd03

File tree

4 files changed

+127
-28
lines changed

4 files changed

+127
-28
lines changed

src/module.c

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2705,11 +2705,13 @@ moduleType *moduleTypeLookupModuleByID(uint64_t id) {
27052705
}
27062706

27072707
/* Turn an (unresolved) module ID into a type name, to show the user an
2708-
* error when RDB files contain module data we can't load. */
2708+
* error when RDB files contain module data we can't load.
2709+
* The buffer pointed by 'name' must be 10 bytes at least. The function will
2710+
* fill it with a null terminated module name. */
27092711
void moduleTypeNameByID(char *name, uint64_t moduleid) {
27102712
const char *cset = ModuleTypeNameCharSet;
27112713

2712-
name[0] = '\0';
2714+
name[9] = '\0';
27132715
char *p = name+8;
27142716
moduleid >>= 10;
27152717
for (int j = 0; j < 9; j++) {
@@ -2877,7 +2879,8 @@ void moduleRDBLoadError(RedisModuleIO *io) {
28772879
* data types. */
28782880
void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) {
28792881
if (io->error) return;
2880-
int retval = rdbSaveLen(io->rio, value);
2882+
int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_UINT);
2883+
if (retval != -1) rdbSaveLen(io->rio, value);
28812884
if (retval == -1) {
28822885
io->error = 1;
28832886
} else {
@@ -2889,13 +2892,18 @@ void RM_SaveUnsigned(RedisModuleIO *io, uint64_t value) {
28892892
* be called in the context of the rdb_load method of modules implementing
28902893
* new data types. */
28912894
uint64_t RM_LoadUnsigned(RedisModuleIO *io) {
2895+
if (io->ver == 2) {
2896+
uint64_t opcode = rdbLoadLen(io->rio,NULL);
2897+
if (opcode != RDB_MODULE_OPCODE_UINT) goto loaderr;
2898+
}
28922899
uint64_t value;
28932900
int retval = rdbLoadLenByRef(io->rio, NULL, &value);
2894-
if (retval == -1) {
2895-
moduleRDBLoadError(io);
2896-
return 0; /* Never reached. */
2897-
}
2901+
if (retval == -1) goto loaderr;
28982902
return value;
2903+
2904+
loaderr:
2905+
moduleRDBLoadError(io);
2906+
return 0; /* Never reached. */
28992907
}
29002908

29012909
/* Like RedisModule_SaveUnsigned() but for signed 64 bit values. */
@@ -2920,7 +2928,8 @@ int64_t RM_LoadSigned(RedisModuleIO *io) {
29202928
* the RDB file. */
29212929
void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) {
29222930
if (io->error) return;
2923-
int retval = rdbSaveStringObject(io->rio,s);
2931+
int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
2932+
if (retval != -1) retval = rdbSaveStringObject(io->rio,s);
29242933
if (retval == -1) {
29252934
io->error = 1;
29262935
} else {
@@ -2932,7 +2941,8 @@ void RM_SaveString(RedisModuleIO *io, RedisModuleString *s) {
29322941
* as input. */
29332942
void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) {
29342943
if (io->error) return;
2935-
int retval = rdbSaveRawString(io->rio,(unsigned char*)str,len);
2944+
int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_STRING);
2945+
if (retval != -1) retval = rdbSaveRawString(io->rio,(unsigned char*)str,len);
29362946
if (retval == -1) {
29372947
io->error = 1;
29382948
} else {
@@ -2942,13 +2952,18 @@ void RM_SaveStringBuffer(RedisModuleIO *io, const char *str, size_t len) {
29422952

29432953
/* Implements RM_LoadString() and RM_LoadStringBuffer() */
29442954
void *moduleLoadString(RedisModuleIO *io, int plain, size_t *lenptr) {
2955+
if (io->ver == 2) {
2956+
uint64_t opcode = rdbLoadLen(io->rio,NULL);
2957+
if (opcode != RDB_MODULE_OPCODE_STRING) goto loaderr;
2958+
}
29452959
void *s = rdbGenericLoadStringObject(io->rio,
29462960
plain ? RDB_LOAD_PLAIN : RDB_LOAD_NONE, lenptr);
2947-
if (s == NULL) {
2948-
moduleRDBLoadError(io);
2949-
return NULL; /* Never reached. */
2950-
}
2961+
if (s == NULL) goto loaderr;
29512962
return s;
2963+
2964+
loaderr:
2965+
moduleRDBLoadError(io);
2966+
return NULL; /* Never reached. */
29522967
}
29532968

29542969
/* In the context of the rdb_load method of a module data type, loads a string
@@ -2980,7 +2995,8 @@ char *RM_LoadStringBuffer(RedisModuleIO *io, size_t *lenptr) {
29802995
* It is possible to load back the value with RedisModule_LoadDouble(). */
29812996
void RM_SaveDouble(RedisModuleIO *io, double value) {
29822997
if (io->error) return;
2983-
int retval = rdbSaveBinaryDoubleValue(io->rio, value);
2998+
int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_DOUBLE);
2999+
if (retval != -1) retval = rdbSaveBinaryDoubleValue(io->rio, value);
29843000
if (retval == -1) {
29853001
io->error = 1;
29863002
} else {
@@ -2991,21 +3007,27 @@ void RM_SaveDouble(RedisModuleIO *io, double value) {
29913007
/* In the context of the rdb_save method of a module data type, loads back the
29923008
* double value saved by RedisModule_SaveDouble(). */
29933009
double RM_LoadDouble(RedisModuleIO *io) {
3010+
if (io->ver == 2) {
3011+
uint64_t opcode = rdbLoadLen(io->rio,NULL);
3012+
if (opcode != RDB_MODULE_OPCODE_DOUBLE) goto loaderr;
3013+
}
29943014
double value;
29953015
int retval = rdbLoadBinaryDoubleValue(io->rio, &value);
2996-
if (retval == -1) {
2997-
moduleRDBLoadError(io);
2998-
return 0; /* Never reached. */
2999-
}
3016+
if (retval == -1) goto loaderr;
30003017
return value;
3018+
3019+
loaderr:
3020+
moduleRDBLoadError(io);
3021+
return 0; /* Never reached. */
30013022
}
30023023

3003-
/* In the context of the rdb_save method of a module data type, saves a float
3024+
/* In the context of the rdb_save method of a module data type, saves a float
30043025
* value to the RDB file. The float can be a valid number, a NaN or infinity.
30053026
* It is possible to load back the value with RedisModule_LoadFloat(). */
30063027
void RM_SaveFloat(RedisModuleIO *io, float value) {
30073028
if (io->error) return;
3008-
int retval = rdbSaveBinaryFloatValue(io->rio, value);
3029+
int retval = rdbSaveLen(io->rio, RDB_MODULE_OPCODE_FLOAT);
3030+
if (retval != -1) retval = rdbSaveBinaryFloatValue(io->rio, value);
30093031
if (retval == -1) {
30103032
io->error = 1;
30113033
} else {
@@ -3016,13 +3038,18 @@ void RM_SaveFloat(RedisModuleIO *io, float value) {
30163038
/* In the context of the rdb_save method of a module data type, loads back the
30173039
* float value saved by RedisModule_SaveFloat(). */
30183040
float RM_LoadFloat(RedisModuleIO *io) {
3041+
if (io->ver == 2) {
3042+
uint64_t opcode = rdbLoadLen(io->rio,NULL);
3043+
if (opcode != RDB_MODULE_OPCODE_FLOAT) goto loaderr;
3044+
}
30193045
float value;
30203046
int retval = rdbLoadBinaryFloatValue(io->rio, &value);
3021-
if (retval == -1) {
3022-
moduleRDBLoadError(io);
3023-
return 0; /* Never reached. */
3024-
}
3047+
if (retval == -1) goto loaderr;
30253048
return value;
3049+
3050+
loaderr:
3051+
moduleRDBLoadError(io);
3052+
return 0; /* Never reached. */
30263053
}
30273054

30283055
/* --------------------------------------------------------------------------

src/rdb.c

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
623623
else
624624
serverPanic("Unknown hash encoding");
625625
case OBJ_MODULE:
626-
return rdbSaveType(rdb,RDB_TYPE_MODULE);
626+
return rdbSaveType(rdb,RDB_TYPE_MODULE_2);
627627
default:
628628
serverPanic("Unknown object type");
629629
}
@@ -775,8 +775,12 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) {
775775
if (retval == -1) return -1;
776776
io.bytes += retval;
777777

778-
/* Then write the module-specific representation. */
778+
/* Then write the module-specific representation + EOF marker. */
779779
mt->rdb_save(&io,mv->value);
780+
retval = rdbSaveLen(rdb,RDB_MODULE_OPCODE_EOF);
781+
if (retval == -1) return -1;
782+
io.bytes += retval;
783+
780784
if (io.ctx) {
781785
moduleFreeContext(io.ctx);
782786
zfree(io.ctx);
@@ -1102,6 +1106,45 @@ void rdbRemoveTempFile(pid_t childpid) {
11021106
unlink(tmpfile);
11031107
}
11041108

1109+
/* This function is called by rdbLoadObject() when the code is in RDB-check
1110+
* mode and we find a module value of type 2 that can be parsed without
1111+
* the need of the actual module. The value is parsed for errors, finally
1112+
* a dummy redis object is returned just to conform to the API. */
1113+
robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) {
1114+
uint64_t opcode;
1115+
while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) {
1116+
if (opcode == RDB_MODULE_OPCODE_SINT ||
1117+
opcode == RDB_MODULE_OPCODE_UINT)
1118+
{
1119+
uint64_t len;
1120+
if (rdbLoadLenByRef(rdb,NULL,&len) == -1) {
1121+
rdbExitReportCorruptRDB(
1122+
"Error reading integer from module %s value", modulename);
1123+
}
1124+
} else if (opcode == RDB_MODULE_OPCODE_STRING) {
1125+
robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL);
1126+
if (o == NULL) {
1127+
rdbExitReportCorruptRDB(
1128+
"Error reading string from module %s value", modulename);
1129+
}
1130+
decrRefCount(o);
1131+
} else if (opcode == RDB_MODULE_OPCODE_FLOAT) {
1132+
float val;
1133+
if (rdbLoadBinaryFloatValue(rdb,&val) == -1) {
1134+
rdbExitReportCorruptRDB(
1135+
"Error reading float from module %s value", modulename);
1136+
}
1137+
} else if (opcode == RDB_MODULE_OPCODE_DOUBLE) {
1138+
double val;
1139+
if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) {
1140+
rdbExitReportCorruptRDB(
1141+
"Error reading double from module %s value", modulename);
1142+
}
1143+
}
1144+
}
1145+
return createStringObject("module-dummy-value",18);
1146+
}
1147+
11051148
/* Load a Redis object of the specified type from the specified file.
11061149
* On success a newly allocated object is returned, otherwise NULL. */
11071150
robj *rdbLoadObject(int rdbtype, rio *rdb) {
@@ -1353,21 +1396,35 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) {
13531396
rdbExitReportCorruptRDB("Unknown RDB encoding type %d",rdbtype);
13541397
break;
13551398
}
1356-
} else if (rdbtype == RDB_TYPE_MODULE) {
1399+
} else if (rdbtype == RDB_TYPE_MODULE || rdbtype == RDB_TYPE_MODULE_2) {
13571400
uint64_t moduleid = rdbLoadLen(rdb,NULL);
13581401
moduleType *mt = moduleTypeLookupModuleByID(moduleid);
13591402
char name[10];
13601403

1404+
if (rdbCheckMode && rdbtype == RDB_TYPE_MODULE_2)
1405+
return rdbLoadCheckModuleValue(rdb,name);
1406+
13611407
if (mt == NULL) {
13621408
moduleTypeNameByID(name,moduleid);
13631409
serverLog(LL_WARNING,"The RDB file contains module data I can't load: no matching module '%s'", name);
13641410
exit(1);
13651411
}
13661412
RedisModuleIO io;
13671413
moduleInitIOContext(io,mt,rdb);
1414+
io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2;
13681415
/* Call the rdb_load method of the module providing the 10 bit
13691416
* encoding version in the lower 10 bits of the module ID. */
13701417
void *ptr = mt->rdb_load(&io,moduleid&1023);
1418+
1419+
/* Module v2 serialization has an EOF mark at the end. */
1420+
if (io.ver == 2) {
1421+
uint64_t eof = rdbLoadLen(rdb,NULL);
1422+
if (eof != RDB_MODULE_OPCODE_EOF) {
1423+
serverLog(LL_WARNING,"The RDB file contains module data for the module '%s' that is not terminated by the proper module value EOF marker", name);
1424+
exit(1);
1425+
}
1426+
}
1427+
13711428
if (ptr == NULL) {
13721429
moduleTypeNameByID(name,moduleid);
13731430
serverLog(LL_WARNING,"The RDB file contains module data for the module type '%s', that the responsible module is not able to load. Check for modules log above for additional clues.", name);

src/rdb.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@
7878
#define RDB_TYPE_HASH 4
7979
#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */
8080
#define RDB_TYPE_MODULE 6
81+
#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without
82+
the generating module being loaded. */
8183
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
8284

8385
/* Object types for encoded objects. */
@@ -90,7 +92,7 @@
9092
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
9193

9294
/* Test if a type is an object type. */
93-
#define rdbIsObjectType(t) ((t >= 0 && t <= 6) || (t >= 9 && t <= 14))
95+
#define rdbIsObjectType(t) ((t >= 0 && t <= 7) || (t >= 9 && t <= 14))
9496

9597
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
9698
#define RDB_OPCODE_AUX 250
@@ -100,6 +102,14 @@
100102
#define RDB_OPCODE_SELECTDB 254
101103
#define RDB_OPCODE_EOF 255
102104

105+
/* Module serialized values sub opcodes */
106+
#define RDB_MODULE_OPCODE_EOF 0 /* End of module value. */
107+
#define RDB_MODULE_OPCODE_SINT 1 /* Signed integer. */
108+
#define RDB_MODULE_OPCODE_UINT 2 /* Unsigned integer. */
109+
#define RDB_MODULE_OPCODE_FLOAT 3 /* Float. */
110+
#define RDB_MODULE_OPCODE_DOUBLE 4 /* Double. */
111+
#define RDB_MODULE_OPCODE_STRING 5 /* String. */
112+
103113
/* rdbLoad...() functions flags. */
104114
#define RDB_LOAD_NONE 0
105115
#define RDB_LOAD_ENC (1<<0)

src/server.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,14 +530,19 @@ typedef struct RedisModuleIO {
530530
rio *rio; /* Rio stream. */
531531
moduleType *type; /* Module type doing the operation. */
532532
int error; /* True if error condition happened. */
533+
int ver; /* Module serialization version: 1 (old),
534+
* 2 (current version with opcodes annotation). */
533535
struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/
534536
} RedisModuleIO;
535537

538+
/* Macro to initialize an IO context. Note that the 'ver' field is populated
539+
* inside rdb.c according to the version of the value to load. */
536540
#define moduleInitIOContext(iovar,mtype,rioptr) do { \
537541
iovar.rio = rioptr; \
538542
iovar.type = mtype; \
539543
iovar.bytes = 0; \
540544
iovar.error = 0; \
545+
iovar.ver = 0; \
541546
iovar.ctx = NULL; \
542547
} while(0);
543548

0 commit comments

Comments
 (0)