2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 #include "ircd_chattr.h"
30 #include "ircd_string.h"
44 /************************* Nemesi's hash alghoritm ***********************/
46 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
47 * is the string itself (included the trailing '\0') seen as
48 * a baseHASHSHIFT number whose "digits" are the bytes of the
49 * number mapped through a "weight" transformation that gives
50 * the same "weight" to caseless-equal chars, example:
52 * Hashing the string "Nick\0" the result will be:
54 * | | | | `---> ( (hash_weight('\0') * (HASHSHIFT**0) +
55 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
56 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
57 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
58 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
60 * It's actually a lot similar to a base transformation of the
61 * text representation of an integer.
62 * Looking at it this way seems slow and requiring unlimited integer
63 * precision, but we actually do it with a *very* fast loop, using only
64 * short integer arithmetic and by means of two memory accesses and
65 * 3 additions per each byte processed.. and nothing else, as a side
66 * note the distribution of real nicks over the hash table of this
67 * function is about 3 times better than the previous one, and the
68 * hash function itself is about 25% faster with a "normal" HASHSIZE
69 * (it gets slower with larger ones and faster for smallest ones
70 * because the hash table size affect the size of some maps and thus
71 * the effectiveness of RAM caches while accesing them).
72 * These two pages of macros are here to make the following code
73 * _more_ understandeable... I hope ;)
76 /* Internal stuff, think well before changing this, it's how
77 much the weights of two lexicograhically contiguous chars
78 differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
79 One seems to be fine but the alghoritm doesn't depend on it */
82 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
83 that is the first prime bigger than the maximum hash_weight
84 (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
88 /* Are we sure that HASHSHIFT is big enough ? */
89 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
90 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
93 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
94 to be an exact multiple of HASHSHIFT, that would make the distribution
95 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
96 #if ((HASHSIZE%HASHSHIFT)==0)
97 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
100 /* What type of integer do we need in our computations ? the largest
101 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
102 operations we want to keep the tables compact (the cache will work
103 better and we will run faster) while for work variables we prefer
104 to roundup to 'int' if it is the case: on platforms where int!=short
105 int arithmetic is often faster than short arithmetic, we prefer signed
106 types if they are big enough since on some architectures they are faster
107 than unsigned, but we always keep signedness of mem and regs the same,
108 to avoid sign conversions that sometimes require time, the following
109 precompile stuff will set HASHMEMS to an appropriate integer type for
110 the tables stored in memory and HASHREGS to an appropriate int type
111 for the work registers/variables/return types. Everything of type
112 HASH???S will remain internal to this source file so I placed this stuff
113 here and not in the header file. */
118 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
119 #define HASHMEMS short
123 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
124 #define HASHMEMS unsigned short
125 #define HASHREGS unsigned int
128 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
133 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
134 #define HASHMEMS unsigned int
135 #define HASHREGS unsigned int
138 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
139 #define HASHMEMS long
140 #define HASHREGS long
143 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
144 #define HASHMEMS unsigned long
145 #define HASHREGS unsigned long
148 #if (!defined(HASHMEMS))
149 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
152 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
153 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
155 /* Static memory structures */
157 /* We need a first function that, given an integer h between 1 and
158 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
159 We'll map this function in this table */
160 static HASHMEMS hash_map[HASHMAPSIZE];
162 /* Then we need a second function that "maps" a char to its weitgh,
163 changed to a table this one too, with this macro we can use a char
164 as index and not care if it is signed or not, no.. this will not
165 cause an addition to take place at each access, trust me, the
166 optimizer takes it out of the actual code and passes "label+shift"
167 to the linker, and the linker does the addition :) */
168 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
169 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
171 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
172 size tables could be supported but the rehash routine should also
173 rebuild the transformation maps, I kept the tables of equal size
174 so that I can use one hash function and one transformation map */
175 static struct Client *clientTable[HASHSIZE];
176 static struct Channel *channelTable[HASHSIZE];
178 /* This is what the hash function will consider "equal" chars, this function
179 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
180 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
181 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
184 * Initialize the maps used by hash functions and clear the tables */
192 /* Clear the hash tables first */
193 for (l = 0; l < HASHSIZE; l++)
199 /* Here is to what we "map" a char before working on it */
200 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
201 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
203 /* Make them equal for case-independently equal chars, it's
204 lame to do it this way but I wanted the code flexible, it's
205 possible to change the HASHEQ macro and not touch here.
206 I don't actually care about the 32768 loops since it happens
207 only once at startup */
208 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
209 for (j = CHAR_MIN; j < i; j++) {
211 hash_weight(i) = hash_weight(j);
215 /* And this is our hash-loop "transformation" function,
216 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
217 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
218 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
221 l *= (unsigned long)HASHSHIFT;
222 l %= (unsigned long)HASHSIZE;
223 hash_map[m] = (HASHMEMS) l;
227 /* These are the actual hash functions, since they are static
228 and very short any decent compiler at a good optimization level
229 WILL inline these in the following functions */
231 /* This is the string hash function,
232 WARNING: n must be a valid pointer to a _non-null_ string
233 this means that not only strhash(NULL) but also
234 strhash("") _will_ coredump, it's responsibility
235 the caller to eventually check BadPtr(nick). */
237 static HASHREGS strhash(const char *n)
239 HASHREGS hash = hash_weight(*n++);
241 hash = hash_map[hash] + hash_weight(*n++);
242 return hash_map[hash];
245 /* And this is the string hash function for limited lenght strings
246 WARNING: n must be a valid pointer to a non-null string
247 and i must be > 0 ! */
251 The time taken to decrement i makes the function
252 slower than strhash for the average of channel names (tested
253 on 16000 real channel names, 1000 loops. I left the code here
254 as a bookmark if a strnhash is evetually needed in the future.
256 static HASHREGS strnhash(const char *n, int i) {
257 HASHREGS hash = hash_weight(*n++);
260 hash = hash_map[hash] + hash_weight(*n++);
261 return hash_map[hash];
264 #define CHANHASHLEN 30
268 /************************** Externally visible functions ********************/
270 /* Optimization note: in these functions I supposed that the CSE optimization
271 * (Common Subexpression Elimination) does its work decently, this means that
272 * I avoided introducing new variables to do the work myself and I did let
273 * the optimizer play with more free registers, actual tests proved this
274 * solution to be faster than doing things like tmp2=tmp->hnext... and then
275 * use tmp2 myself wich would have given less freedom to the optimizer.
280 * Adds a client's name in the proper hash linked list, can't fail,
281 * cptr must have a non-null name or expect a coredump, the name is
282 * infact taken from cptr->name
284 int hAddClient(struct Client *cptr)
286 HASHREGS hashv = strhash(cli_name(cptr));
288 cli_hnext(cptr) = clientTable[hashv];
289 clientTable[hashv] = cptr;
296 * Adds a channel's name in the proper hash linked list, can't fail.
297 * chptr must have a non-null name or expect a coredump.
298 * As before the name is taken from chptr->name, we do hash its entire
299 * lenght since this proved to be statistically faster
301 int hAddChannel(struct Channel *chptr)
303 HASHREGS hashv = strhash(chptr->chname);
305 chptr->hnext = channelTable[hashv];
306 channelTable[hashv] = chptr;
313 * Removes a Client's name from the hash linked list
315 int hRemClient(struct Client *cptr)
317 HASHREGS hashv = strhash(cli_name(cptr));
318 struct Client *tmp = clientTable[hashv];
321 clientTable[hashv] = cli_hnext(cptr);
322 cli_hnext(cptr) = cptr;
327 if (cli_hnext(tmp) == cptr) {
328 cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
329 cli_hnext(cptr) = cptr;
332 tmp = cli_hnext(tmp);
339 * Removes the old name of a client from a linked list and adds
340 * the new one to another linked list, there is a slight chanche
341 * that this is useless if the two hashes are the same but it still
342 * would need to move the name to the top of the list.
343 * As always it's responsibility of the caller to check that
344 * both newname and cptr->name are valid names (not "" or NULL).
345 * Typically, to change the nick of an already hashed client:
346 * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
347 * hChangeClient(cptr, newname);
348 * strcpy(cptr->name, newname);
350 * There isn't an equivalent function for channels since they
353 int hChangeClient(struct Client *cptr, const char *newname)
355 HASHREGS newhash = strhash(newname);
360 cli_hnext(cptr) = clientTable[newhash];
361 clientTable[newhash] = cptr;
367 * Removes the channel's name from the corresponding hash linked list
369 int hRemChannel(struct Channel *chptr)
371 HASHREGS hashv = strhash(chptr->chname);
372 struct Channel *tmp = channelTable[hashv];
375 channelTable[hashv] = chptr->hnext;
376 chptr->hnext = chptr;
381 if (tmp->hnext == chptr) {
382 tmp->hnext = tmp->hnext->hnext;
383 chptr->hnext = chptr;
394 * New semantics: finds a client whose name is 'name' and whose
395 * status is one of those marked in TMask, if can't find one
396 * returns NULL. If it finds one moves it to the top of the list
399 struct Client* hSeekClient(const char *name, int TMask)
401 HASHREGS hashv = strhash(name);
402 struct Client *cptr = clientTable[hashv];
405 if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
407 while (prev = cptr, cptr = cli_hnext(cptr)) {
408 if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
409 cli_hnext(prev) = cli_hnext(cptr);
410 cli_hnext(cptr) = clientTable[hashv];
411 clientTable[hashv] = cptr;
422 * New semantics: finds a channel whose name is 'name',
423 * if can't find one returns NULL, if can find it moves
424 * it to the top of the list and returns it.
426 struct Channel* hSeekChannel(const char *name)
428 HASHREGS hashv = strhash(name);
429 struct Channel *chptr = channelTable[hashv];
432 if (0 != ircd_strcmp(name, chptr->chname)) {
433 struct Channel* prev;
434 while (prev = chptr, chptr = chptr->hnext) {
435 if (0 == ircd_strcmp(name, chptr->chname)) {
436 prev->hnext = chptr->hnext;
437 chptr->hnext = channelTable[hashv];
438 channelTable[hashv] = chptr;
448 /* I will add some useful(?) statistics here one of these days,
449 but not for DEBUGMODE: just to let the admins play with it,
450 coders are able to SIGCORE the server and look into what goes
453 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
462 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
464 for (i = 0; i < HASHSIZE; ++i) {
465 if ((cl = clientTable[i])) {
468 for ( ; cl; cl = cli_hnext(cl))
476 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
477 "max chain: %d", sptr, count, buckets, max_chain);
483 for (i = 0; i < HASHSIZE; ++i) {
484 if ((ch = channelTable[i])) {
487 for ( ; ch; ch = ch->hnext)
495 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
496 "max chain: %d", sptr, count, buckets, max_chain);
500 /* Nick jupe utilities, these are in a static hash table with entry/bucket
501 ratio of one, collision shift up and roll in a circular fashion, the
502 lowest 12 bits of the hash value are used, deletion is not supported,
503 only addition, test for existence and cleanup of the table are.. */
505 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
506 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
507 #define JUPEHASHMASK (JUPEHASHSIZE-1)
508 #define JUPEMAX (1<<(JUPEHASHBITS-6))
510 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
511 static int jupesCount;
515 * Tells if a nick is juped (nonzero returned) or not (zero)
517 int isNickJuped(const char *nick)
522 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
523 if (0 == ircd_strcmp(nick, jupeTable[pos]))
527 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
532 * Adds a (comma separated list of) nick jupes to the table
534 int addNickJupes(const char *nicks)
536 static char temp[BUFSIZE + 1];
543 ircd_strncpy(temp, nicks, BUFSIZE);
544 temp[BUFSIZE] = '\0';
546 for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
553 if (!jupeTable[pos][0])
555 if (jupesCount == JUPEMAX)
556 return 1; /* Error: Jupe table is full ! */
558 ircd_strncpy(jupeTable[pos], one, NICKLEN);
559 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
562 if (0 == ircd_strcmp(one, jupeTable[pos]))
573 * Cleans up the juped nicks table
575 void clearNickJupes(void)
579 for (i = 0; i < JUPEHASHSIZE; i++)
580 jupeTable[i][0] = '\000';