C语言 即使客户端不发送数据,也会发生UNIX套接字读取

zkure5ic  于 2023-10-16  发布在  Unix
关注(0)|答案(1)|浏览(168)

我正面临一个非常奇怪的问题,我有一个简单的UNIX STREAM套接字服务器/客户端代码运行在Linux上。客户端偶尔会向服务器发送一条消息(我也测试过只发送一次),但是在收到第一条消息后,服务器会继续打印相同的消息,即使客户端没有发送任何消息。
整个消息是重新创建的,没有静态数据等。这可能导致相同的消息在多个呼叫上持续。
客户端代码:

int g_fd = -1;
#define SERVER_SOCK   "/tmp/server_sock"

int init_fd(void) {
    g_fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (g_fd < 0) {
        log_info("alloc", "socket() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }

    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    sa.sun_family = AF_UNIX;
    snprintf(sa.sun_path, sizeof(sa.sun_path), SERVER_SOCK);

    if (connect(g_fd, (struct sockaddr *) &sa, strlen(sa.sun_path) + sizeof(sa.sun_family)) < 0) {
        log_info("alloc", "connect() failed with error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    int flags = fcntl(g_fd, F_GETFL, 0);
    fcntl(g_fd, F_SETFL, flags | O_NONBLOCK);
    return 0;
}

void send_event(void) {
    if (g_fd < 0) {
        if (init_fd() < 0) {
            log_info("alloc", "failed to connect to server");
            return;
        }
    }
    json_t *jtc = json_object();
    json_object_set_new(jtc, "msgType", json_integer(650));
    json_t *jtype = json_object();
    json_object_set_new(jtype, "type", json_string("MESSAGE_CHANGE"));
    json_object_set_new(jtc, "data", jtype);
    char *j_dump_string = NULL;
    j_dump_string = json_dumps(jtc, JSON_PRESERVE_ORDER);
    if (write_a_msg(g_fd, (uint8_t*)j_dump_string, strlen(j_dump_string)+1) == -1) {
        close(g_fd);
        g_fd = -1;
        log_info("alloc", "failed to send message to server");
    }
    log_info("alloc", "GNA: da_send: %s", j_dump_string);
    free(j_dump_string);
    json_decref(jtc);
}

int write_a_msg(int fd, const uint8_t *ptr, size_t nbytes) {
    uint8_t *write_buf = malloc(nbytes + MSG_LEN_SIZE);
    if (!write_buf)
        return -1;
    write_buf[0] = (nbytes >> 24);
    write_buf[1] = (nbytes >> 16);
    write_buf[2] = (nbytes >> 8);
    write_buf[3] = (nbytes);
    memcpy(write_buf + MSG_LEN_SIZE, ptr, nbytes);
    if (write_loop(fd, write_buf, nbytes + MSG_LEN_SIZE) < 0) {
        int save_err = errno;
        free(write_buf);
        errno = save_err;
        return -1;
    }
    free(write_buf);
    return 0;
}

static int write_loop(int fd, const uint8_t *ptr, size_t nbytes) {
  ssize_t nleft, nwritten;
  nleft = nbytes;
  while (nleft) {
        nwritten = write(fd, ptr, nleft);
        if (nwritten <= 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        nleft -= nwritten;
        ptr += nwritten;
    }
    return 0;
}

客户端使用libev处理套接字,服务器使用select:

static int server_init(void) {
    struct sockaddr_un sa;
    memset(&sa, 0, sizeof(sa));
    int fd = socket(AF_UNIX, SOCK_STREAM, 0);
    if (fd < 0) {
        DebugLog(ERROR, "Could not create socket - error (%d:%s)", errno, strerror(errno));
        return -1;
    }
    unlink(SERVER_SOCK);
    sa.sun_family = AF_UNIX;
    strcpy(sa.sun_path, SERVER_SOCK);
    if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
        DebugLog(ERROR, "Bind failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    if (listen(fd, 5)) {
        DebugLog(ERROR, "Listen failed with error (%d:%s)\n", errno, strerror(errno));
        close(fd);
        return -1;
    }
    DebugLog(INFO, "GNA: Create server ready to accept\n");
    ev_add_fd(fd, EV_READ, connection_callback, NULL);
    return RET_OK;
}

static void connection_callback(int fd, int flags, void *data) {
    struct sockaddr_un remote;
    socklen_t rlen = sizeof(struct sockaddr_un);

    int connection_fd = accept(fd, (struct sockaddr *) &remote, &rlen);
    if (connection_fd < 0) {
        DebugLog(ERROR, "%s:Accept failed", __func__);
        return;
    }
    DebugLog(INFO, "GNA: received new connection\n");
    ev_add_fd(connection_fd, EV_READ, request_callback, NULL);
}

static void request_callback(int fd, int flags, void *data) {
    char *msg = NULL;
    size_t msglen = 0;
    if (read_a_msg(fd, (uint8_t **) & msg, &msglen) < 0) {
        DebugLog(ERROR, "%s:read failed (pid:%d). error %d:%s", __func__, getpid(), errno,
                 strerror(errno));
        ev_del_fd(fd);
        close(fd);              // close the FD we'll reopen a new one next time
        if (msg)
            free(msg);
        return;
    }
    msg[msglen] = '\0';         // ensure NULL termination
    handle_msg(msg, msglen, fd);
    if (msg)
        free(msg);
    return;
}

static void handle_msg(char *msg, int msglen, int fd) {
    DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);
}

int read_a_msg(int fd, uint8_t **ptr, size_t *nbytes) {
    uint8_t hd[4];
    if (read_loop(fd, hd, 4) < 0)
        return -1;
    size_t payload_len = (hd[0]<<24)|(hd[1]<<16)|(hd[2]<<8)|(hd[3]);
    (*ptr) = malloc(payload_len + 1); // allocate one extra byte in case the user is reading strings and wants to add a NULL in the end. the length wont include this, its essential padding for convenience.
    *nbytes = payload_len;
    if (read_loop(fd, *ptr, *nbytes) < 0)
        return -1;
    return 0;
}

static int read_loop(int fd, uint8_t *ptr, size_t nbytes) {
    ssize_t nleft, nread;
    nleft = nbytes;
    while (nleft) {
        nread = read(fd, ptr, nleft);
        if (nread < 0) {
            if (errno != EAGAIN) return -1;
            continue;
        }
        if (nread == 0)
            break;
        nleft -= nread;
        ptr += nread;
    }
    if (nleft != 0) return -1;
    return 0;
}

这个日志每隔几秒钟就打印一次,尽管没有人发送任何东西。

DebugLog(INFO, "%s: GNA: Received msg: %s", __func__, msg);

在客户端,只有一个send函数,没有其他人调用它,这个日志只出现一次。

log_info("alloc", "GNA: da_send: %s", j_dump_string);

有人能解释一下为什么会发生这种情况吗?如何避免?

**更新1:**在客户端和服务器上的strace建议之后,我发现服务器在select中设置fd,并定期获取read read事件

08:19:26.031031 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=16, tv_usec=1049544}) = 1 (in [11], left {tv_sec=17, tv_usec=44972})
08:19:26.035737 read(11, "\0\0\0005", 4) = 4
08:19:26.035814 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:38.982049 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=3, tv_usec=1000564}) = 1 (in [11], left {tv_sec=3, tv_usec=946001})
08:19:39.036745 read(11, "\0\0\0005", 4) = 4
08:19:39.036816 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:50.117012 _newselect(18, [4 5 6 7 8 9 10 11 15 16 17], [], NULL, {tv_sec=4, tv_usec=1276752}) = 1 (in [17], left {tv_sec=5, tv_usec=276586})
08:19:50.117251 read(17, "\0\0\0005", 4) = 4
08:19:50.117308 read(17, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:19:51.910855 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=2, tv_usec=1070595}) = 1 (in [11], left {tv_sec=2, tv_usec=943831})
08:19:52.037758 read(11, "\0\0\0005", 4) = 4
08:19:52.037841 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53
08:20:05.031834 _newselect(16, [4 5 6 7 8 9 10 11 15], [], NULL, {tv_sec=0, tv_usec=1000335}) = 1 (in [11], left {tv_sec=0, tv_usec=993545})
08:20:05.038758 read(11, "\0\0\0005", 4) = 4
08:20:05.038838 read(11, "{\"msgType\": 650, \"data\": {\"type\""..., 53) = 53

我杀死了客户端并在开始时附加了strace,以便它可以获取套接字的fd,客户端除了一次之外没有在fd 14上写任何东西:

08:10:02.362615 socket(AF_UNIX, SOCK_STREAM, 0) = 14
08:10:02.363091 connect(14, {sa_family=AF_UNIX, sun_path="/tmp/server_sock"}, 21) = 0
08:10:02.363251 fcntl64(14, F_GETFL)    = 0x2 (flags O_RDWR)
08:10:02.363297 fcntl64(14, F_SETFL, O_RDWR|O_NONBLOCK) = 0
08:10:02.363837 write(14, "\0\0\0005{\"msgType\": 650, \"data\": {\"t"..., 57) = 57

这是FD 14的唯一写入。

**更新2:**上面的send_event函数被编译为www.example.com库的一部分libabc.so。在我的消息中放入getpid()和gettimeofday()后,我发现另一个守护进程正在使用这个库并调用一个调用send_event函数的 Package 器函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在创建一个.pid文件,存储客户端的pid,然后在send_event中比较当前进程的pid和客户端的pid。

xhv8bpkk

xhv8bpkk1#

上面的send_event函数被编译为libabc.so库的一部分。在我的消息中放入getpid()gettimeofday()之后,我发现另一个守护进程正在使用这个库并调用一个 Package 器函数,该函数正在调用send_event函数,这就是为什么我看到服务器定期接收消息,即使客户端没有发送任何消息。我现在创建一个.pid文件,存储客户端的pid,然后在send_event中比较当前进程的pid和客户端的pid。
谢谢@pts的建议。

相关问题