2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include "ircd_chattr.h"
28 #include "ircd_string.h"
42 /************************* Nemesi's hash alghoritm ***********************/
44 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
45 * is the string itself (included the trailing '\0') seen as
46 * a baseHASHSHIFT number whose "digits" are the bytes of the
47 * number mapped through a "weight" transformation that gives
48 * the same "weight" to caseless-equal chars, example:
50 * Hashing the string "Nick\0" the result will be:
52 * | | | | `---> ( (hash_weight('\0') * (HASHSHIFT**0) +
53 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
54 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
55 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
56 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
58 * It's actually a lot similar to a base transformation of the
59 * text representation of an integer.
60 * Looking at it this way seems slow and requiring unlimited integer
61 * precision, but we actually do it with a *very* fast loop, using only
62 * short integer arithmetic and by means of two memory accesses and
63 * 3 additions per each byte processed.. and nothing else, as a side
64 * note the distribution of real nicks over the hash table of this
65 * function is about 3 times better than the previous one, and the
66 * hash function itself is about 25% faster with a "normal" HASHSIZE
67 * (it gets slower with larger ones and faster for smallest ones
68 * because the hash table size affect the size of some maps and thus
69 * the effectiveness of RAM caches while accesing them).
70 * These two pages of macros are here to make the following code
71 * _more_ understandeable... I hope ;)
74 /* Internal stuff, think well before changing this, it's how
75 much the weights of two lexicograhically contiguous chars
76 differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
77 One seems to be fine but the alghoritm doesn't depend on it */
80 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
81 that is the first prime bigger than the maximum hash_weight
82 (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
86 /* Are we sure that HASHSHIFT is big enough ? */
87 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
88 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
91 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
92 to be an exact multiple of HASHSHIFT, that would make the distribution
93 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
94 #if ((HASHSIZE%HASHSHIFT)==0)
95 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
98 /* What type of integer do we need in our computations ? the largest
99 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
100 operations we want to keep the tables compact (the cache will work
101 better and we will run faster) while for work variables we prefer
102 to roundup to 'int' if it is the case: on platforms where int!=short
103 int arithmetic is often faster than short arithmetic, we prefer signed
104 types if they are big enough since on some architectures they are faster
105 than unsigned, but we always keep signedness of mem and regs the same,
106 to avoid sign conversions that sometimes require time, the following
107 precompile stuff will set HASHMEMS to an appropriate integer type for
108 the tables stored in memory and HASHREGS to an appropriate int type
109 for the work registers/variables/return types. Everything of type
110 HASH???S will remain internal to this source file so I placed this stuff
111 here and not in the header file. */
116 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
117 #define HASHMEMS short
121 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
122 #define HASHMEMS unsigned short
123 #define HASHREGS unsigned int
126 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
131 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
132 #define HASHMEMS unsigned int
133 #define HASHREGS unsigned int
136 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
137 #define HASHMEMS long
138 #define HASHREGS long
141 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
142 #define HASHMEMS unsigned long
143 #define HASHREGS unsigned long
146 #if (!defined(HASHMEMS))
147 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
150 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
151 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
153 /* Static memory structures */
155 /* We need a first function that, given an integer h between 1 and
156 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
157 We'll map this function in this table */
158 static HASHMEMS hash_map[HASHMAPSIZE];
160 /* Then we need a second function that "maps" a char to its weitgh,
161 changed to a table this one too, with this macro we can use a char
162 as index and not care if it is signed or not, no.. this will not
163 cause an addition to take place at each access, trust me, the
164 optimizer takes it out of the actual code and passes "label+shift"
165 to the linker, and the linker does the addition :) */
166 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
167 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
169 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
170 size tables could be supported but the rehash routine should also
171 rebuild the transformation maps, I kept the tables of equal size
172 so that I can use one hash function and one transformation map */
173 static struct Client *clientTable[HASHSIZE];
174 static struct Channel *channelTable[HASHSIZE];
176 /* This is what the hash function will consider "equal" chars, this function
177 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
178 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
179 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
182 * Initialize the maps used by hash functions and clear the tables */
190 /* Clear the hash tables first */
191 for (l = 0; l < HASHSIZE; l++)
197 /* Here is to what we "map" a char before working on it */
198 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
199 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
201 /* Make them equal for case-independently equal chars, it's
202 lame to do it this way but I wanted the code flexible, it's
203 possible to change the HASHEQ macro and not touch here.
204 I don't actually care about the 32768 loops since it happens
205 only once at startup */
206 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
207 for (j = CHAR_MIN; j < i; j++) {
209 hash_weight(i) = hash_weight(j);
213 /* And this is our hash-loop "transformation" function,
214 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
215 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
216 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
219 l *= (unsigned long)HASHSHIFT;
220 l %= (unsigned long)HASHSIZE;
221 hash_map[m] = (HASHMEMS) l;
225 /* These are the actual hash functions, since they are static
226 and very short any decent compiler at a good optimization level
227 WILL inline these in the following functions */
229 /* This is the string hash function,
230 WARNING: n must be a valid pointer to a _non-null_ string
231 this means that not only strhash(NULL) but also
232 strhash("") _will_ coredump, it's responsibility
233 the caller to eventually check BadPtr(nick). */
235 static HASHREGS strhash(const char *n)
237 HASHREGS hash = hash_weight(*n++);
239 hash = hash_map[hash] + hash_weight(*n++);
240 return hash_map[hash];
243 /* And this is the string hash function for limited lenght strings
244 WARNING: n must be a valid pointer to a non-null string
245 and i must be > 0 ! */
249 The time taken to decrement i makes the function
250 slower than strhash for the average of channel names (tested
251 on 16000 real channel names, 1000 loops. I left the code here
252 as a bookmark if a strnhash is evetually needed in the future.
254 static HASHREGS strnhash(const char *n, int i) {
255 HASHREGS hash = hash_weight(*n++);
258 hash = hash_map[hash] + hash_weight(*n++);
259 return hash_map[hash];
262 #define CHANHASHLEN 30
266 /************************** Externally visible functions ********************/
268 /* Optimization note: in these functions I supposed that the CSE optimization
269 * (Common Subexpression Elimination) does its work decently, this means that
270 * I avoided introducing new variables to do the work myself and I did let
271 * the optimizer play with more free registers, actual tests proved this
272 * solution to be faster than doing things like tmp2=tmp->hnext... and then
273 * use tmp2 myself wich would have given less freedom to the optimizer.
278 * Adds a client's name in the proper hash linked list, can't fail,
279 * cptr must have a non-null name or expect a coredump, the name is
280 * infact taken from cptr->name
282 int hAddClient(struct Client *cptr)
284 HASHREGS hashv = strhash(cli_name(cptr));
286 cli_hnext(cptr) = clientTable[hashv];
287 clientTable[hashv] = cptr;
294 * Adds a channel's name in the proper hash linked list, can't fail.
295 * chptr must have a non-null name or expect a coredump.
296 * As before the name is taken from chptr->name, we do hash its entire
297 * lenght since this proved to be statistically faster
299 int hAddChannel(struct Channel *chptr)
301 HASHREGS hashv = strhash(chptr->chname);
303 chptr->hnext = channelTable[hashv];
304 channelTable[hashv] = chptr;
311 * Removes a Client's name from the hash linked list
313 int hRemClient(struct Client *cptr)
315 HASHREGS hashv = strhash(cli_name(cptr));
316 struct Client *tmp = clientTable[hashv];
319 clientTable[hashv] = cli_hnext(cptr);
320 cli_hnext(cptr) = cptr;
325 if (cli_hnext(tmp) == cptr) {
326 cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
327 cli_hnext(cptr) = cptr;
330 tmp = cli_hnext(tmp);
337 * Removes the old name of a client from a linked list and adds
338 * the new one to another linked list, there is a slight chanche
339 * that this is useless if the two hashes are the same but it still
340 * would need to move the name to the top of the list.
341 * As always it's responsibility of the caller to check that
342 * both newname and cptr->name are valid names (not "" or NULL).
343 * Typically, to change the nick of an already hashed client:
344 * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
345 * hChangeClient(cptr, newname);
346 * strcpy(cptr->name, newname);
348 * There isn't an equivalent function for channels since they
351 int hChangeClient(struct Client *cptr, const char *newname)
353 HASHREGS newhash = strhash(newname);
358 cli_hnext(cptr) = clientTable[newhash];
359 clientTable[newhash] = cptr;
365 * Removes the channel's name from the corresponding hash linked list
367 int hRemChannel(struct Channel *chptr)
369 HASHREGS hashv = strhash(chptr->chname);
370 struct Channel *tmp = channelTable[hashv];
373 channelTable[hashv] = chptr->hnext;
374 chptr->hnext = chptr;
379 if (tmp->hnext == chptr) {
380 tmp->hnext = tmp->hnext->hnext;
381 chptr->hnext = chptr;
392 * New semantics: finds a client whose name is 'name' and whose
393 * status is one of those marked in TMask, if can't find one
394 * returns NULL. If it finds one moves it to the top of the list
397 struct Client* hSeekClient(const char *name, int TMask)
399 HASHREGS hashv = strhash(name);
400 struct Client *cptr = clientTable[hashv];
403 if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
405 while (prev = cptr, cptr = cli_hnext(cptr)) {
406 if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
407 cli_hnext(prev) = cli_hnext(cptr);
408 cli_hnext(cptr) = clientTable[hashv];
409 clientTable[hashv] = cptr;
420 * New semantics: finds a channel whose name is 'name',
421 * if can't find one returns NULL, if can find it moves
422 * it to the top of the list and returns it.
424 struct Channel* hSeekChannel(const char *name)
426 HASHREGS hashv = strhash(name);
427 struct Channel *chptr = channelTable[hashv];
430 if (0 != ircd_strcmp(name, chptr->chname)) {
431 struct Channel* prev;
432 while (prev = chptr, chptr = chptr->hnext) {
433 if (0 == ircd_strcmp(name, chptr->chname)) {
434 prev->hnext = chptr->hnext;
435 chptr->hnext = channelTable[hashv];
436 channelTable[hashv] = chptr;
446 /* I will add some useful(?) statistics here one of these days,
447 but not for DEBUGMODE: just to let the admins play with it,
448 coders are able to SIGCORE the server and look into what goes
451 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
460 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
462 for (i = 0; i < HASHSIZE; ++i) {
463 if ((cl = clientTable[i])) {
466 for ( ; cl; cl = cli_hnext(cl))
474 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
475 "max chain: %d", sptr, count, buckets, max_chain);
481 for (i = 0; i < HASHSIZE; ++i) {
482 if ((ch = channelTable[i])) {
485 for ( ; ch; ch = ch->hnext)
493 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
494 "max chain: %d", sptr, count, buckets, max_chain);
498 /* Nick jupe utilities, these are in a static hash table with entry/bucket
499 ratio of one, collision shift up and roll in a circular fashion, the
500 lowest 12 bits of the hash value are used, deletion is not supported,
501 only addition, test for existence and cleanup of the table are.. */
503 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
504 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
505 #define JUPEHASHMASK (JUPEHASHSIZE-1)
506 #define JUPEMAX (1<<(JUPEHASHBITS-6))
508 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
509 static int jupesCount;
513 * Tells if a nick is juped (nonzero returned) or not (zero)
515 int isNickJuped(const char *nick)
520 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
521 if (0 == ircd_strcmp(nick, jupeTable[pos]))
525 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
530 * Adds a (comma separated list of) nick jupes to the table
532 int addNickJupes(const char *nicks)
534 static char temp[BUFSIZE + 1];
541 ircd_strncpy(temp, nicks, BUFSIZE);
542 temp[BUFSIZE] = '\0';
544 for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
551 if (!jupeTable[pos][0])
553 if (jupesCount == JUPEMAX)
554 return 1; /* Error: Jupe table is full ! */
556 ircd_strncpy(jupeTable[pos], one, NICKLEN);
557 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
560 if (0 == ircd_strcmp(one, jupeTable[pos]))
571 * Cleans up the juped nicks table
573 void clearNickJupes(void)
577 for (i = 0; i < JUPEHASHSIZE; i++)
578 jupeTable[i][0] = '\000';