我正面临一个非常奇怪的问题,我有一个简单的UNIX STREAM套接字服务器/客户端代码运行在Linux上。客户端偶尔会向服务器发送一条消息(我也测试过只发送一次),但是在收到第一条消息后,服务器会继续打印相同的消息,即使客户端没有发送任何消息。
整个消息是重新创建的,没有静态数据等。这可能导致相同的消息在多个呼叫上持续。
客户端代码:
int g_fd = -1;
#define SERVER_SOCK "/tmp/server_sock"
int init_fd(void) {
g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (g_fd < 0) {
log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
sa.sun_family = AF_UNIX;
snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);
if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
return -1;
}
int flags = fcntl(g_fd, F_GETFL, 0);
fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
return 0;
}
void send_event(void) {
if (g_fd < 0) {
if (init_fd() < 0) {
log_info("alloc", "failed to connect to server");
return;
}
}
json_t *jtc = json_object();
json_object_set_new(jtc, "msgType", json_integer(650));
json_t *jtype = json_object();
json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
json_object_set_new(jtc, "data", jtype);
char *j_dump_string = NULL;
j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
close(g_fd);
g_fd = -1;
log_info("alloc", "failed to send message to server");
}
log_info("alloc", "GNA: da_send: %s", j_dump_string);
free(j_dump_string);
json_decref(jtc);
}
int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
if (!write_buf)
return -1;
write_buf[0] = (nbytes >> 24);
write_buf[1] = (nbytes >> 16);
write_buf[2] = (nbytes >> 8);
write_buf[3] = (nbytes);
memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
int save_err = errno;
free(write_buf);
errno = save_err;
return -1;
}
free(write_buf);
return 0;
}
static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nwritten;
nleft = nbytes;
while (nleft) {
nwritten = write(fd, ptr, nleft);
if (nwritten <= 0) {
if (errno != EAGAIN) return -1;
continue;
}
nleft -= nwritten;
ptr += nwritten;
}
return 0;
}
客户端使用libev处理套接字,服务器使用select:
static int server_init(void) {
struct sockaddr_un sa;
memset(&sa, 0, sizeof(sa));
int fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
return -1;
}
unlink(SERVER_SOCK);
sa.sun_family = AF_UNIX;
strcpy(sa.sun_path, SERVER_SOCK);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
if (listen(fd, 5)) {
DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
close(fd);
return -1;
}
DebugLog(INFO, "GNA: Create server ready to accept\n");
ev_add_fd(fd, EV_READ, connection_callback, NULL);
return RET_OK;
}
static void connection_callback(int fd, int flags, void *data) {
struct sockaddr_un remote;
socklen_t rlen = sizeof(struct sockaddr_un);
int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
if (connection_fd < 0) {
DebugLog(ERROR, "%s:Accept failed", __func__);
return;
}
DebugLog(INFO, "GNA: received new connection\n");
ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}
static void request_callback(int fd, int flags, void *data) {
char *msg = NULL;
size_t msglen = 0;
if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
strerror(errno));
ev_del_fd(fd);
close(fd); // close the FD we'll reopen a new one next time
if (msg)
free(msg);
return;
}
msg[msglen] = '\0'; // ensure NULL termination
handle_msg(msg, msglen, fd);
if (msg)
free(msg);
return;
}
static void handle_msg(char *msg, int msglen, int fd) {
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}
int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
uint8_t hd[4];
if (read_loop(fd, hd, 4) < 0)
return -1;
size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
(*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
*nbytes = payload_len;
if (read_loop(fd, *ptr, *nbytes) < 0)
return -1;
return 0;
}
static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
ssize_t nleft, nread;
nleft = nbytes;
while (nleft) {
nread = read(fd, ptr, nleft);
if (nread < 0) {
if (errno != EAGAIN) return -1;
continue;
}
if (nread == 0)
break;
nleft -= nread;
ptr += nread;
}
if (nleft != 0) return -1;
return 0;
}
这个日志每隔几秒钟就打印一次,尽管没有人发送任何东西。
DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
在客户端,只有一个send函数,没有其他人调用它,这个日志只出现一次。
log_info("alloc", "GNA: da_send: %s", j_dump_string);
有人能解释一下为什么会发生这种情况吗?如何避免?
**更新1:**在客户端和服务器上的strace建议之后,我发现服务器在select中设置fd,并定期获取read read事件
08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
我杀死了客户端并在开始时附加了strace,以便它可以获取套接字的fd,客户端除了一次之外没有在fd 14上写任何东西:
08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL) = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57
这是FD 14的唯一写入。
**更新2:**上面的send_event
函数被编译为www.example.com库的一部分libabc.so。在我的消息中放入getpid()和gettimeofday()后,我发现另一个守护进程正在使用这个库并调用一个调用send_event
函数的 Package 器函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在创建一个.pid文件,存储客户端的pid,然后在send_event
中比较当前进程的pid和客户端的pid。
1条答案
按热度按时间xhv8bpkk1#
上面的
send_event
函数被编译为libabc.so
库的一部分。在我的消息中放入getpid()
和gettimeofday()
之后,我发现另一个守护进程正在使用这个库并调用一个 Package 器函数,该函数正在调用send_event
函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在创建一个.pid
文件,存储客户端的pid,然后在send_event
中比较当前进程的pid和客户端的pid。谢谢@pts的建议。