2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
38 /************************* Nemesi's hash alghoritm ***********************/
40 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
41 * is the string itself (included the trailing '\0') seen as
42 * a baseHASHSHIFT number whose "digits" are the bytes of the
43 * number mapped through a "weight" transformation that gives
44 * the same "weight" to caseless-equal chars, example:
46 * Hashing the string "Nick\0" the result will be:
48 * | | | | `---> ( (hash_weight('\0') * (HASHSHIFT**0) +
49 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
50 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
51 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
52 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
54 * It's actually a lot similar to a base transformation of the
55 * text representation of an integer.
56 * Looking at it this way seems slow and requiring unlimited integer
57 * precision, but we actually do it with a *very* fast loop, using only
58 * short integer arithmetic and by means of two memory accesses and
59 * 3 additions per each byte processed.. and nothing else, as a side
60 * note the distribution of real nicks over the hash table of this
61 * function is about 3 times better than the previous one, and the
62 * hash function itself is about 25% faster with a "normal" HASHSIZE
63 * (it gets slower with larger ones and faster for smallest ones
64 * because the hash table size affect the size of some maps and thus
65 * the effectiveness of RAM caches while accesing them).
66 * These two pages of macros are here to make the following code
67 * _more_ understandeable... I hope ;)
70 /* Internal stuff, think well before changing this, it's how
71 much the weights of two lexicograhically contiguous chars
72 differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
73 One seems to be fine but the alghoritm doesn't depend on it */
76 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
77 that is the first prime bigger than the maximum hash_weight
78 (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
82 /* Are we sure that HASHSHIFT is big enough ? */
83 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
84 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
87 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
88 to be an exact multiple of HASHSHIFT, that would make the distribution
89 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
90 #if ((HASHSIZE%HASHSHIFT)==0)
91 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
94 /* What type of integer do we need in our computations ? the largest
95 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
96 operations we want to keep the tables compact (the cache will work
97 better and we will run faster) while for work variables we prefer
98 to roundup to 'int' if it is the case: on platforms where int!=short
99 int arithmetic is often faster than short arithmetic, we prefer signed
100 types if they are big enough since on some architectures they are faster
101 than unsigned, but we always keep signedness of mem and regs the same,
102 to avoid sign conversions that sometimes require time, the following
103 precompile stuff will set HASHMEMS to an appropriate integer type for
104 the tables stored in memory and HASHREGS to an appropriate int type
105 for the work registers/variables/return types. Everything of type
106 HASH???S will remain internal to this source file so I placed this stuff
107 here and not in the header file. */
112 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
113 #define HASHMEMS short
117 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
118 #define HASHMEMS unsigned short
119 #define HASHREGS unsigned int
122 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
127 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
128 #define HASHMEMS unsigned int
129 #define HASHREGS unsigned int
132 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
133 #define HASHMEMS long
134 #define HASHREGS long
137 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
138 #define HASHMEMS unsigned long
139 #define HASHREGS unsigned long
142 #if (!defined(HASHMEMS))
143 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
146 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
147 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
149 /* Static memory structures */
151 /* We need a first function that, given an integer h between 1 and
152 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
153 We'll map this function in this table */
154 static HASHMEMS hash_map[HASHMAPSIZE];
156 /* Then we need a second function that "maps" a char to its weitgh,
157 changed to a table this one too, with this macro we can use a char
158 as index and not care if it is signed or not, no.. this will not
159 cause an addition to take place at each access, trust me, the
160 optimizer takes it out of the actual code and passes "label+shift"
161 to the linker, and the linker does the addition :) */
162 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
163 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
165 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
166 size tables could be supported but the rehash routine should also
167 rebuild the transformation maps, I kept the tables of equal size
168 so that I can use one hash function and one transformation map */
169 static aClient *clientTable[HASHSIZE];
170 static aChannel *channelTable[HASHSIZE];
172 /* This is what the hash function will consider "equal" chars, this function
173 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
174 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
175 #define HASHEQ(x,y) (((char) toLower((char) x)) == ((char) toLower((char) y)))
178 * Initialize the maps used by hash functions and clear the tables */
184 /* Clear the hash tables first */
185 for (l = 0; l < HASHSIZE; l++)
187 channelTable[l] = (aChannel *)NULL;
188 clientTable[l] = (aClient *)NULL;
191 /* Here is to what we "map" a char before working on it */
192 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
193 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
195 /* Make them equal for case-independently equal chars, it's
196 lame to do it this way but I wanted the code flexible, it's
197 possible to change the HASHEQ macro and not touch here.
198 I don't actually care about the 32768 loops since it happens
199 only once at startup */
200 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
201 for (j = CHAR_MIN; j < i; j++)
203 hash_weight(i) = hash_weight(j);
205 /* And this is our hash-loop "transformation" function,
206 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
207 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
208 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
211 l *= (unsigned long)HASHSHIFT;
212 l %= (unsigned long)HASHSIZE;
213 hash_map[m] = (HASHMEMS) l;
217 /* These are the actual hash functions, since they are static
218 and very short any decent compiler at a good optimization level
219 WILL inline these in the following functions */
221 /* This is the string hash function,
222 WARNING: n must be a valid pointer to a _non-null_ string
223 this means that not only strhash(NULL) but also
224 strhash("") _will_ coredump, it's responsibility
225 the caller to eventually check BadPtr(nick). */
227 static HASHREGS strhash(register char *n)
229 register HASHREGS hash = hash_weight(*n++);
231 hash = hash_map[hash] + hash_weight(*n++);
232 return hash_map[hash];
235 /* And this is the string hash function for limited lenght strings
236 WARNING: n must be a valid pointer to a non-null string
237 and i must be > 0 ! */
241 The time taken to decrement i makes the function
242 slower than strhash for the average of channel names (tested
243 on 16000 real channel names, 1000 loops. I left the code here
244 as a bookmark if a strnhash is evetually needed in the future.
246 static HASHREGS strnhash(register char *n, register int i) {
247 register HASHREGS hash = hash_weight(*n++);
250 hash = hash_map[hash] + hash_weight(*n++);
251 return hash_map[hash];
254 #define CHANHASHLEN 30
258 /************************** Externally visible functions ********************/
260 /* Optimization note: in these functions I supposed that the CSE optimization
261 * (Common Subexpression Elimination) does its work decently, this means that
262 * I avoided introducing new variables to do the work myself and I did let
263 * the optimizer play with more free registers, actual tests proved this
264 * solution to be faster than doing things like tmp2=tmp->hnext... and then
265 * use tmp2 myself wich would have given less freedom to the optimizer.
270 * Adds a client's name in the proper hash linked list, can't fail,
271 * cptr must have a non-null name or expect a coredump, the name is
272 * infact taken from cptr->name
274 int hAddClient(aClient *cptr)
276 register HASHREGS hashv = strhash(cptr->name);
278 cptr->hnext = clientTable[hashv];
279 clientTable[hashv] = cptr;
286 * Adds a channel's name in the proper hash linked list, can't fail.
287 * chptr must have a non-null name or expect a coredump.
288 * As before the name is taken from chptr->name, we do hash its entire
289 * lenght since this proved to be statistically faster
291 int hAddChannel(aChannel *chptr)
293 register HASHREGS hashv = strhash(chptr->chname);
295 chptr->hnextch = channelTable[hashv];
296 channelTable[hashv] = chptr;
303 * Removes a Client's name from the hash linked list
305 int hRemClient(aClient *cptr)
307 register HASHREGS hashv = strhash(cptr->name);
308 register aClient *tmp = clientTable[hashv];
312 clientTable[hashv] = cptr->hnext;
318 if (tmp->hnext == cptr)
320 tmp->hnext = tmp->hnext->hnext;
332 * Removes the old name of a client from a linked list and adds
333 * the new one to another linked list, there is a slight chanche
334 * that this is useless if the two hashes are the same but it still
335 * would need to move the name to the top of the list.
336 * As always it's responsibility of the caller to check that
337 * both newname and cptr->name are valid names (not "" or NULL).
338 * Typically, to change the nick of an already hashed client:
339 * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
340 * hChangeClient(cptr, newname);
341 * strcpy(cptr->name, newname);
343 * There isn't an equivalent function for channels since they
346 int hChangeClient(aClient *cptr, char *newname)
348 register HASHREGS oldhash = strhash(cptr->name);
349 register HASHREGS newhash = strhash(newname);
350 register aClient *tmp;
352 tmp = clientTable[oldhash];
359 if (tmp->hnext == cptr)
361 tmp->hnext = cptr->hnext;
362 cptr->hnext = clientTable[newhash];
363 clientTable[newhash] = cptr;
369 /* Well... do our best anyway... link it to the correct list,
370 and then... Fail (and core if we are debugging) ! */
371 cptr->hnext = clientTable[newhash];
372 clientTable[newhash] = cptr;
378 * Removes the channel's name from the corresponding hash linked list
380 int hRemChannel(aChannel *chptr)
382 register HASHREGS hashv = strhash(chptr->chname);
383 register aChannel *tmp = channelTable[hashv];
387 channelTable[hashv] = chptr->hnextch;
393 if (tmp->hnextch == chptr)
395 tmp->hnextch = tmp->hnextch->hnextch;
406 * New semantics: finds a client whose name is 'name' and whose
407 * status is one of those marked in TMask, if can't find one
408 * returns NULL. If it finds one moves it to the top of the list
411 aClient *hSeekClient(char *name, int TMask)
413 register HASHREGS hashv = strhash(name);
414 register aClient *cptr = clientTable[hashv];
415 register aClient *prv;
418 if ((!IsStatMask(cptr, TMask)) || strCasediff(name, cptr->name))
419 while (prv = cptr, cptr = cptr->hnext)
420 if (IsStatMask(cptr, TMask) && (!strCasediff(name, cptr->name)))
422 prv->hnext = cptr->hnext;
423 cptr->hnext = clientTable[hashv];
424 clientTable[hashv] = cptr;
434 * New semantics: finds a channel whose name is 'name',
435 * if can't find one returns NULL, if can find it moves
436 * it to the top of the list and returns it.
438 aChannel *hSeekChannel(char *name)
440 register HASHREGS hashv = strhash(name);
441 register aChannel *chptr = channelTable[hashv];
442 register aChannel *prv;
445 if (strCasediff(name, chptr->chname))
446 while (prv = chptr, chptr = chptr->hnextch)
447 if (!strCasediff(name, chptr->chname))
449 prv->hnextch = chptr->hnextch;
450 chptr->hnextch = channelTable[hashv];
451 channelTable[hashv] = chptr;
459 /* I will add some useful(?) statistics here one of these days,
460 but not for DEBUGMODE: just to let the admins play with it,
461 coders are able to SIGCORE the server and look into what goes
464 int m_hash(aClient *UNUSED(cptr), aClient *sptr, int UNUSED(parc), char *parv[])
466 sendto_one(sptr, "NOTICE %s :SUSER SSERV", parv[0]);
467 sendto_one(sptr, "NOTICE %s :SBSDC IRCDC", parv[0]);
468 sendto_one(sptr, "NOTICE %s :CHANC SMISC", parv[0]);
469 sendto_one(sptr, "NOTICE %s :HASHC VERSH", parv[0]);
470 sendto_one(sptr, "NOTICE %s :MAKEF HOSTID", parv[0]);
474 /* Nick jupe utilities, these are in a static hash table with entry/bucket
475 ratio of one, collision shift up and roll in a circular fashion, the
476 lowest 12 bits of the hash value are used, deletion is not supported,
477 only addition, test for existence and cleanup of the table are.. */
479 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
480 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
481 #define JUPEHASHMASK (JUPEHASHSIZE-1)
482 #define JUPEMAX (1<<(JUPEHASHBITS-6))
484 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
485 static int jupesCount;
489 * Tells if a nick is juped (nonzero returned) or not (zero)
491 int isNickJuped(char *nick)
496 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++)
497 if (!strCasediff(nick, jupeTable[pos]))
499 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
504 * Adds a (comma separated list of) nick jupes to the table
506 int addNickJupes(char *nicks)
508 static char temp[512];
514 strncpy(temp, nicks, 512);
517 for (one = strtoken(&p, temp, ","); one; one = strtoken(&p, NULL, ","))
524 if (!jupeTable[pos][0])
526 if (jupesCount == JUPEMAX)
527 return 1; /* Error: Jupe table is full ! */
529 strncpy(jupeTable[pos], one, NICKLEN);
530 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
533 if (!strCasediff(one, jupeTable[pos]))
544 * Cleans up the juped nicks table
546 void clearNickJupes(void)
550 for (i = 0; i < JUPEHASHSIZE; i++)
551 jupeTable[i][0] = '\000';