2 * IRC - Internet Relay Chat, ircd/engine_epoll.c
3 * Copyright (C) 2003 Michael Poole <mdpoole@troilus.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 1, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * @brief Linux epoll_*() event engine.
26 #include "ircd_events.h"
27 #include "ircd_alloc.h"
28 #include "ircd_features.h"
32 /* #include <assert.h> -- Now using assert in ircd_log.h */
34 #include <sys/types.h>
36 #include <stdint.h> /* bah */
39 #include <sys/epoll.h>
40 #include <sys/socket.h>
42 #include <linux/unistd.h>
44 /* The GNU C library may have a valid header but stub implementations
45 * of the epoll system calls. If so, provide our own. */
46 #if defined(__stub_epoll_create) || defined(__stub___epoll_create) || defined(EPOLL_NEED_BODY)
48 /* Oh, did we mention that some glibc releases do not even define the
50 #if !defined(__NR_epoll_create)
52 #define __NR_epoll_create 1243
53 #define __NR_epoll_ctl 1244
54 #define __NR_epoll_wait 1245
55 #elif defined(__x86_64__)
56 #define __NR_epoll_create 214
57 #define __NR_epoll_ctl 233
58 #define __NR_epoll_wait 232
59 #elif defined(__sparc64__) || defined(__sparc__)
60 #define __NR_epoll_create 193
61 #define __NR_epoll_ctl 194
62 #define __NR_epoll_wait 195
63 #elif defined(__s390__) || defined(__m68k__)
64 #define __NR_epoll_create 249
65 #define __NR_epoll_ctl 250
66 #define __NR_epoll_wait 251
67 #elif defined(__ppc64__) || defined(__ppc__)
68 #define __NR_epoll_create 236
69 #define __NR_epoll_ctl 237
70 #define __NR_epoll_wait 238
71 #elif defined(__parisc__) || defined(__arm26__) || defined(__arm__)
72 #define __NR_epoll_create 224
73 #define __NR_epoll_ctl 225
74 #define __NR_epoll_wait 226
75 #elif defined(__alpha__)
76 #define __NR_epoll_create 407
77 #define __NR_epoll_ctl 408
78 #define __NR_epoll_wait 409
79 #elif defined(__sh64__)
80 #define __NR_epoll_create 282
81 #define __NR_epoll_ctl 283
82 #define __NR_epoll_wait 284
83 #elif defined(__i386__) || defined(__sh__) || defined(__m32r__) || defined(__h8300__) || defined(__frv__)
84 #define __NR_epoll_create 254
85 #define __NR_epoll_ctl 255
86 #define __NR_epoll_wait 256
88 #error No system call numbers defined for epoll family.
89 #endif /* cpu types */
90 #endif /* !defined(__NR_epoll_create) */
92 _syscall1(int, epoll_create, int, size)
93 _syscall4(int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event)
94 _syscall4(int, epoll_wait, int, epfd, struct epoll_event *, pevents, int, maxevents, int, timeout)
96 #endif /* epoll_create defined as stub */
98 #define EPOLL_ERROR_THRESHOLD 20 /**< after 20 epoll errors, restart */
99 #define ERROR_EXPIRE_TIME 3600 /**< expire errors after an hour */
101 /** File descriptor for epoll pseudo-file. */
103 /** Number of recent epoll errors. */
105 /** Periodic timer to forget errors. */
106 static struct Timer clear_error;
107 /** Current array of event descriptors. */
108 static struct epoll_event *events;
109 /** Number of ::events elements that have been populated. */
110 static int events_used;
111 /** Current processing position in ::events. */
114 /** Decrement the error count (once per hour).
115 * @param[in] ev Expired timer event (ignored).
118 error_clear(struct Event *ev)
121 timer_del(ev_timer(ev));
124 /** Initialize the epoll engine.
125 * @param[in] max_sockets Maximum number of file descriptors to support.
126 * @return Non-zero on success, or zero on failure.
129 engine_init(int max_sockets)
131 if ((epoll_fd = epoll_create(max_sockets)) < 0) {
132 log_write(LS_SYSTEM, L_WARNING, 0,
133 "epoll() engine cannot initialize: %m");
139 /** Set events for a particular socket.
140 * @param[in] sock Socket to calculate events for.
141 * @param[in] state Current socket state.
142 * @param[in] events User-specified event interest list.
143 * @param[out] evt epoll event structure for socket.
146 set_events(struct Socket *sock, enum SocketState state, unsigned int events, struct epoll_event *evt)
149 assert(0 <= s_fd(sock));
150 memset(evt, 0, sizeof(*evt));
152 evt->data.ptr = sock;
156 evt->events = EPOLLOUT;
161 evt->events = EPOLLIN;
167 switch (events & SOCK_EVENT_MASK) {
171 case SOCK_EVENT_READABLE:
172 evt->events = EPOLLIN;
174 case SOCK_EVENT_WRITABLE:
175 evt->events = EPOLLOUT;
177 case SOCK_EVENT_READABLE|SOCK_EVENT_WRITABLE:
178 evt->events = EPOLLIN|EPOLLOUT;
185 /** Add a socket to the event engine.
186 * @param[in] sock Socket to add to engine.
187 * @return Non-zero on success, or zero on error.
190 engine_add(struct Socket *sock)
192 struct epoll_event evt;
195 Debug((DEBUG_ENGINE, "epoll: Adding socket %d [%p], state %s, to engine",
196 s_fd(sock), sock, state_to_name(s_state(sock))));
197 set_events(sock, s_state(sock), s_events(sock), &evt);
198 if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, s_fd(sock), &evt) < 0) {
199 event_generate(ET_ERROR, sock, errno);
205 /** Handle state transition for a socket.
206 * @param[in] sock Socket changing state.
207 * @param[in] new_state New state for socket.
210 engine_set_state(struct Socket *sock, enum SocketState new_state)
212 struct epoll_event evt;
215 Debug((DEBUG_ENGINE, "epoll: Changing state for socket %p to %s",
216 sock, state_to_name(new_state)));
217 set_events(sock, new_state, s_events(sock), &evt);
218 if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, s_fd(sock), &evt) < 0)
219 event_generate(ET_ERROR, sock, errno);
222 /** Handle change to preferred socket events.
223 * @param[in] sock Socket getting new interest list.
224 * @param[in] new_events New set of interesting events for socket.
227 engine_set_events(struct Socket *sock, unsigned new_events)
229 struct epoll_event evt;
232 Debug((DEBUG_ENGINE, "epoll: Changing event mask for socket %p to [%s]",
233 sock, sock_flags(new_events)));
234 set_events(sock, s_state(sock), new_events, &evt);
235 if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, s_fd(sock), &evt) < 0)
236 event_generate(ET_ERROR, sock, errno);
239 /** Remove a socket from the event engine.
240 * @param[in] sock Socket being destroyed.
243 engine_delete(struct Socket *sock)
248 Debug((DEBUG_ENGINE, "epoll: Deleting socket %d [%p], state %s",
249 s_fd(sock), sock, state_to_name(s_state(sock))));
250 /* Drop any unprocessed events citing this socket. */
251 for (ii = events_i; ii < events_used; ii++) {
252 if (events[ii].data.ptr == sock) {
253 events[ii] = events[--events_used];
258 /** Run engine event loop.
259 * @param[in] gen Lists of generators of various types.
262 engine_loop(struct Generators *gen)
264 struct epoll_event *evt;
267 int events_count, tmp, wait, errcode;
269 if ((events_count = feature_int(FEAT_POLLS_PER_LOOP)) < 20)
271 events = MyMalloc(sizeof(events[0]) * events_count);
273 if ((tmp = feature_int(FEAT_POLLS_PER_LOOP)) >= 20 && tmp != events_count) {
274 events = MyRealloc(events, sizeof(events[0]) * tmp);
278 wait = timer_next(gen) ? (timer_next(gen) - CurrentTime) * 1000 : -1;
279 Debug((DEBUG_ENGINE, "epoll: delay: %d (%d) %d", timer_next(gen),
281 events_used = epoll_wait(epoll_fd, events, events_count, wait);
282 CurrentTime = time(0);
284 if (events_used < 0) {
285 if (errno != EINTR) {
286 log_write(LS_SOCKET, L_ERROR, 0, "epoll() error: %m");
288 timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC,
290 else if (errors > EPOLL_ERROR_THRESHOLD)
291 server_restart("too many epoll errors");
296 for (events_i = 0; events_i < events_used; ) {
297 evt = &events[events_i++];
298 if (!(sock = evt->data.ptr))
302 "epoll: Checking socket %p (fd %d) state %s, events %s",
303 sock, s_fd(sock), state_to_name(s_state(sock)),
304 sock_flags(s_events(sock))));
306 if (evt->events & EPOLLERR) {
308 codesize = sizeof(errcode);
309 if (getsockopt(s_fd(sock), SOL_SOCKET, SO_ERROR, &errcode,
313 event_generate(ET_ERROR, sock, errcode);
317 } else if (evt->events & EPOLLHUP) {
318 event_generate(ET_EOF, sock, 0);
319 } else switch (s_state(sock)) {
321 if (evt->events & EPOLLOUT) /* connection completed */
322 event_generate(ET_CONNECT, sock, 0);
326 if (evt->events & EPOLLIN) /* incoming connection */
327 event_generate(ET_ACCEPT, sock, 0);
334 if (evt->events & EPOLLIN)
335 event_generate(ET_READ, sock, 0);
336 if (evt->events & EPOLLOUT)
337 event_generate(ET_WRITE, sock, 0);
347 /** Descriptor for epoll event engine. */
348 struct Engine engine_epoll = {