Redis 输入输出的抽象(rio)
1. 概述
rio是Redis对IO操作的一个抽象,可以面向不同的输入输出设备,例如一个缓冲区IO、文件IO和socket IO。
一个rio对象提供一下四个方法:
- read:读操作
- write:写操作
- tell:读写的偏移量
- flush:冲洗缓冲区操作
使用C语言,实现面向对象的思想。
2. rio对象的抽象
struct _rio {
/* Backend functions.
* Since this functions do not tolerate short writes or reads the return
* value is simplified to: zero on error, non zero on complete success. */
// 读,写,读写偏移量、冲洗操作的函数指针,非0表示成功
size_t (*read)(struct _rio *, void *buf, size_t len);
size_t (*write)(struct _rio *, const void *buf, size_t len);
off_t (*tell)(struct _rio *);
int (*flush)(struct _rio *);
/* The update_cksum method if not NULL is used to compute the checksum of
* all the data that was read or written so far. The method should be
* designed so that can be called with the current checksum, and the buf
* and len fields pointing to the new block of data to add to the checksum
* computation. */
// 计算和校验函数
void (*update_cksum)(struct _rio *, const void *buf, size_t len);
/* The current checksum */
// 当前校验和
uint64_t cksum;
/* number of bytes read or written */
// 读或写的字节数
size_t processed_bytes;
/* maximum single read or write chunk size */
// 每次读或写的最大字节数
size_t max_processing_chunk;
/* Backend-specific vars. */
// 读写的各种对象
union {
/*内存缓冲区 In-memory buffer target. */
struct {
sds ptr; //缓冲区的指针,本质是char *
off_t pos; //缓冲区的偏移量
} buffer;
/*标准文件IO Stdio file pointer target. */
struct {
FILE *fp; // 文件指针,指向被打开的文件
off_t buffered; /* 最近一次同步之后所写的字节数 Bytes written since last fsync. */
off_t autosync; /* 写入设置的autosync字节后,会执行fsync()同步 fsync after 'autosync' bytes written. */
} file;
/*文件描述符 Multiple FDs target (used to write to N sockets). */
struct {
int *fds; /*文件描述符数组 File descriptors. */
int *state; /*每一个fd所对应的errno Error state of each fd. 0 (if ok) or errno. */
int numfds; // 数组长度,文件描述符个数
off_t pos; // 偏移量
sds buf; // 缓冲区
} fdset;
} io;
};
typedef struct _rio rio;
下面的函数是流对象的接口。
2.1 读操作
static inline size_t rioRead(rio *r, void *buf, size_t len) {
while (len) {
// 读的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
// 调用自身的read方法读到buf中
if (r->read(r,buf,bytes_to_read) == 0)
return 0;
// 更新和校验
if (r->update_cksum) r->update_cksum(r,buf,bytes_to_read);
// 更新偏移量,指向下一个读的位置
buf = (char*)buf + bytes_to_read;
// 计算剩余要读的长度
len -= bytes_to_read;
// 更新读或写的字节数
r->processed_bytes += bytes_to_read;
}
return 1;
}
2.2 写操作
static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
while (len) {
// 写的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
// 更新和校验
if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
// 调用自身的write方法写入
if (r->write(r,buf,bytes_to_write) == 0)
return 0;
// 更新偏移量,指向下一个写的位置
buf = (char*)buf + bytes_to_writ;
// 计算剩余写入的长度
len -= bytes_to_write;
// 更新读或写的字节数
r->processed_bytes += bytes_to_write;
}
return 1;
}
2.3 返回当前偏移量
static inline off_t rioTell(rio *r) {
return r->tell(r);
}
2.4 缓冲区冲洗函数
static inline int rioFlush(rio *r) {
return r->flush(r);
}
3. rio对象的实现
对rio抽象的结构体中,使用了一个共用体(union),它可能是三种不同的对象,分别是:
- 缓冲区 IO(Buffer I/O)
- 标准输入输出 IO(Stdio file pointer)
- 文件描述符集合(File descriptors set)
下面给出所有对象的实现:rio.c 和 rio.h 文件详细注释
3.1 缓冲区IO
/* ------------------------- Buffer I/O implementation ----------------------- */
// 缓冲区IO实现
/* Returns 1 or 0 for success/failure. */
// 将len长的buf写到一个缓冲区对象r中
static size_t rioBufferWrite(rio *r, const void *buf, size_t len) {
r->io.buffer.ptr = sdscatlen(r->io.buffer.ptr,(char*)buf,len); //追加操作
r->io.buffer.pos += len; //更新偏移量
return 1;
}
/* Returns 1 or 0 for success/failure. */
// 讲缓冲区对象r读到buf中,读len长
static size_t rioBufferRead(rio *r, void *buf, size_t len) {
// 缓冲区对象的长度小于len,不够读,返回0
if (sdslen(r->io.buffer.ptr)-r->io.buffer.pos < len)
return 0; /* not enough buffer to return len bytes. */
// 读到buf中
memcpy(buf,r->io.buffer.ptr+r->io.buffer.pos,len);
// 更新偏移量
r->io.buffer.pos += len;
return 1;
}
/* Returns read/write position in buffer. */
// 返回缓冲区对象r当前的偏移量
static off_t rioBufferTell(rio *r) {
return r->io.buffer.pos;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
// 清洗缓冲区
static int rioBufferFlush(rio *r) {
UNUSED(r); //void r,强转成void类型对象,缓冲区就相当于释放
return 1; /* Nothing to do, our write just appends to the buffer. */
}
// 定义一个缓冲区对象并初始化方法和成员
static const rio rioBufferIO = {
rioBufferRead,
rioBufferWrite,
rioBufferTell,
rioBufferFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* bytes read or written */
0, /* read/write chunk size */
{ { NULL, 0 } } /* union for io-specific vars */
};
// 初始化缓冲区对象r并设置缓冲区的地址
void rioInitWithBuffer(rio *r, sds s) {
*r = rioBufferIO;
r->io.buffer.ptr = s;
r->io.buffer.pos = 0;
}
3.2 标准输入输出 IO
/* --------------------- Stdio file pointer implementation ------------------- */
// 标准文件IO实现
/* Returns 1 or 0 for success/failure. */
// 将len长的buf写入一个文件流对象
static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
size_t retval;
// 调用底层库函数
retval = fwrite(buf,len,1,r->io.file.fp);
// 更新写入的长度
r->io.file.buffered += len;
// 如果已经达到自动的同步autosync所设置的字节数
if (r->io.file.autosync &&
r->io.file.buffered >= r->io.file.autosync)
{
// 冲洗键盘缓冲区中的数据到文件中
fflush(r->io.file.fp);
// 同步操作
aof_fsync(fileno(r->io.file.fp));
// 长度初始化为0
r->io.file.buffered = 0;
}
return retval;
}
/* Returns 1 or 0 for success/failure. */
// 从文件流对象r中读出len长度的字节到buf中
static size_t rioFileRead(rio *r, void *buf, size_t len) {
return fread(buf,len,1,r->io.file.fp);
}
/* Returns read/write position in file. */
// 返回文件流对象的偏移量
static off_t rioFileTell(rio *r) {
return ftello(r->io.file.fp);
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
// 清洗文件流
static int rioFileFlush(rio *r) {
return (fflush(r->io.file.fp) == 0) ? 1 : 0;
}
// 初始化一个文件流对象
static const rio rioFileIO = {
rioFileRead,
rioFileWrite,
rioFileTell,
rioFileFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* bytes read or written */
0, /* read/write chunk size */
{ { NULL, 0 } } /* union for io-specific vars */
};
// 初始化一个文件流对象且设置对应文件
void rioInitWithFile(rio *r, FILE *fp) {
*r = rioFileIO;
r->io.file.fp = fp;
r->io.file.buffered = 0;
r->io.file.autosync = 0;
}
3.3 文件描述符集合
/* ------------------- File descriptors set implementation ------------------- */
// 文件描述符合集合实现
/* Returns 1 or 0 for success/failure.
* The function returns success as long as we are able to correctly write
* to at least one file descriptor.
*
* When buf is NULL and len is 0, the function performs a flush operation
* if there is some pending buffer, so this function is also used in order
* to implement rioFdsetFlush(). */
// 将buf写入文件描述符集合对象
static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
ssize_t retval;
int j;
unsigned char *p = (unsigned char*) buf;
int doflush = (buf == NULL && len == 0); //如果buf为空且len为0,相当于flush操作
/* To start we always append to our buffer. If it gets larger than
* a given size, we actually write to the sockets. */
// 将buf中的内容写到文件描述符集合对象的缓冲区中
if (len) {
r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
// 设置写完的标志
len = 0; /* Prevent entering the while below if we don't flush. */
if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1; //如果缓冲区太大需要冲刷到socket中
}
// 冲洗文件描述符集合对象,设置集合缓冲区长度和集合缓冲区地址
if (doflush) {
p = (unsigned char*) r->io.fdset.buf;
len = sdslen(r->io.fdset.buf);
}
/* Write in little chunchs so that when there are big writes we
* parallelize while the kernel is sending data in background to
* the TCP socket. */
// 一次可能无法冲洗完,需要循环多次
while(len) {
// 一次最多冲洗1M字节
size_t count = len < 1024 ? len : 1024;
int broken = 0;
for (j = 0; j < r->io.fdset.numfds; j++) {
// errno为0表示ok,记录不为0的文件描述符个数
if (r->io.fdset.state[j] != 0) {
/* Skip FDs alraedy in error. */
broken++;
continue;
}
/* Make sure to write 'count' bytes to the socket regardless
* of short writes. */
size_t nwritten = 0;
// 新写的数据一次或多次写够count个字节往第一个文件描述符fd
while(nwritten != count) {
retval = write(r->io.fdset.fds[j],p+nwritten,count-nwritten);
// 写失败,判断是不是写阻塞,是则设置超时
if (retval <= 0) {
/* With blocking sockets, which is the sole user of this
* rio target, EWOULDBLOCK is returned only because of
* the SO_SNDTIMEO socket option, so we translate the error
* into one more recognizable by the user. */
if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
break;
}
nwritten += retval; //每次加上写成功的字节数
}
// 如果刚才写失败的情况,则将当前的文件描述符状态设置为错误的标记码
if (nwritten != count) {
/* Mark this FD as broken. */
r->io.fdset.state[j] = errno;
if (r->io.fdset.state[j] == 0) r->io.fdset.state[j] = EIO;
}
}
// 所有的文件描述符都出错返回0
if (broken == r->io.fdset.numfds) return 0; /* All the FDs in error. */
// 更新下次要写入的地址和长度
p += count;
len -= count;
r->io.fdset.pos += count; //已写入的偏移量
}
if (doflush) sdsclear(r->io.fdset.buf); //释放集合缓冲区
return 1;
}
/* Returns 1 or 0 for success/failure. */
// 文件描述符集合对象不支持读,直接返回0
static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
UNUSED(r);
UNUSED(buf);
UNUSED(len);
return 0; /* Error, this target does not support reading. */
}
/* Returns read/write position in file. */
// 获取偏移量
static off_t rioFdsetTell(rio *r) {
return r->io.fdset.pos;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
// 清洗缓冲区的值
static int rioFdsetFlush(rio *r) {
/* Our flush is implemented by the write method, that recognizes a
* buffer set to NULL with a count of zero as a flush request. */
return rioFdsetWrite(r,NULL,0);
}
// 初始化一个文件描述符集合对象
static const rio rioFdsetIO = {
rioFdsetRead,
rioFdsetWrite,
rioFdsetTell,
rioFdsetFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* bytes read or written */
0, /* read/write chunk size */
{ { NULL, 0 } } /* union for io-specific vars */
};
// 初始化一个文件描述符集合对象并设置成员变量
void rioInitWithFdset(rio *r, int *fds, int numfds) {
int j;
*r = rioFdsetIO;
r->io.fdset.fds = zmalloc(sizeof(int)*numfds);
r->io.fdset.state = zmalloc(sizeof(int)*numfds);
memcpy(r->io.fdset.fds,fds,sizeof(int)*numfds);
for (j = 0; j < numfds; j++) r->io.fdset.state[j] = 0;
r->io.fdset.numfds = numfds;
r->io.fdset.pos = 0;
r->io.fdset.buf = sdsempty();
}
/* release the rio stream. */
// 释放文件描述符集合流对象
void rioFreeFdset(rio *r) {
zfree(r->io.fdset.fds);
zfree(r->io.fdset.state);
sdsfree(r->io.fdset.buf);
}
3.4 三种对象的共同实现
- 校验和函数
/* ---------------------------- Generic functions ---------------------------- */
// 通用函数
/* This function can be installed both in memory and file streams when checksum
* computation is needed. */
// 根据CRC64算法进行校验和
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
r->cksum = crc64(r->cksum,buf,len);
}
- 设置自动同步的限制字节
/* Set the file-based rio object to auto-fsync every 'bytes' file written.
* By default this is set to zero that means no automatic file sync is
* performed.
*
* This feature is useful in a few contexts since when we rely on OS write
* buffers sometimes the OS buffers way too much, resulting in too many
* disk I/O concentrated in very little time. When we fsync in an explicit
* way instead the I/O pressure is more distributed across time. */
// 设置自动同步的字节数限制,如果bytes为0,则意味着不执行
void rioSetAutoSync(rio *r, off_t bytes) {
serverAssert(r->read == rioFileIO.read); //限制为文件流对象,不对其他对象设置限制
r->io.file.autosync = bytes;
}
4. 利用rio生成AOF协议
// 以"*<count>\r\n"格式为写如一个int整型的count
size_t rioWriteBulkCount(rio *r, char prefix, int count) {
char cbuf[128];
int clen;
// 构建一个 "*<count>\r\n"
cbuf[0] = prefix;
clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,count);
cbuf[clen++] = '\r';
cbuf[clen++] = '\n';
// 调用rio的接口,将cbuf写如r中
if (rioWrite(r,cbuf,clen) == 0) return 0;
return clen;
}
/* Write binary-safe string in the format: "$<count>\r\n<payload>\r\n". */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个字符串
size_t rioWriteBulkString(rio *r, const char *buf, size_t len) {
size_t nwritten;
// 写入"$<len>\r\n"
if ((nwritten = rioWriteBulkCount(r,'$',len)) == 0) return 0;
// 追加写入一个buf,也就是<payload>部分
if (len > 0 && rioWrite(r,buf,len) == 0) return 0;
// 追加"\r\n"
if (rioWrite(r,"\r\n",2) == 0) return 0;
return nwritten+len+2; //返回长度
}
/* Write a long long value in format: "$<count>\r\n<payload>\r\n". */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个longlong 值
size_t rioWriteBulkLongLong(rio *r, long long l) {
char lbuf[32];
unsigned int llen;
// 将longlong转为字符串,按字符串的格式写入
llen = ll2string(lbuf,sizeof(lbuf),l);
return rioWriteBulkString(r,lbuf,llen);
}
/* Write a double value in the format: "$<count>\r\n<payload>\r\n" */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个 double 值
size_t rioWriteBulkDouble(rio *r, double d) {
char dbuf[128];
unsigned int dlen;
//以宽度为17位的方式写到dbuf中,17位的double双精度浮点数的长度最短且无损
dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d);
return rioWriteBulkString(r,dbuf,dlen);
}