2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 #include "ircd_chattr.h"
30 #include "ircd_string.h"
43 /************************* Nemesi's hash alghoritm ***********************/
45 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
46 * is the string itself (included the trailing '\0') seen as
47 * a baseHASHSHIFT number whose "digits" are the bytes of the
48 * number mapped through a "weight" transformation that gives
49 * the same "weight" to caseless-equal chars, example:
51 * Hashing the string "Nick\0" the result will be:
53 * | | | | `---> ( (hash_weight('\0') * (HASHSHIFT**0) +
54 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
55 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
56 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
57 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
59 * It's actually a lot similar to a base transformation of the
60 * text representation of an integer.
61 * Looking at it this way seems slow and requiring unlimited integer
62 * precision, but we actually do it with a *very* fast loop, using only
63 * short integer arithmetic and by means of two memory accesses and
64 * 3 additions per each byte processed.. and nothing else, as a side
65 * note the distribution of real nicks over the hash table of this
66 * function is about 3 times better than the previous one, and the
67 * hash function itself is about 25% faster with a "normal" HASHSIZE
68 * (it gets slower with larger ones and faster for smallest ones
69 * because the hash table size affect the size of some maps and thus
70 * the effectiveness of RAM caches while accesing them).
71 * These two pages of macros are here to make the following code
72 * _more_ understandeable... I hope ;)
75 /* Internal stuff, think well before changing this, it's how
76 much the weights of two lexicograhically contiguous chars
77 differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
78 One seems to be fine but the alghoritm doesn't depend on it */
81 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
82 that is the first prime bigger than the maximum hash_weight
83 (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
87 /* Are we sure that HASHSHIFT is big enough ? */
88 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
89 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
92 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
93 to be an exact multiple of HASHSHIFT, that would make the distribution
94 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
95 #if ((HASHSIZE%HASHSHIFT)==0)
96 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
99 /* What type of integer do we need in our computations ? the largest
100 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
101 operations we want to keep the tables compact (the cache will work
102 better and we will run faster) while for work variables we prefer
103 to roundup to 'int' if it is the case: on platforms where int!=short
104 int arithmetic is often faster than short arithmetic, we prefer signed
105 types if they are big enough since on some architectures they are faster
106 than unsigned, but we always keep signedness of mem and regs the same,
107 to avoid sign conversions that sometimes require time, the following
108 precompile stuff will set HASHMEMS to an appropriate integer type for
109 the tables stored in memory and HASHREGS to an appropriate int type
110 for the work registers/variables/return types. Everything of type
111 HASH???S will remain internal to this source file so I placed this stuff
112 here and not in the header file. */
117 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
118 #define HASHMEMS short
122 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
123 #define HASHMEMS unsigned short
124 #define HASHREGS unsigned int
127 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
132 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
133 #define HASHMEMS unsigned int
134 #define HASHREGS unsigned int
137 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
138 #define HASHMEMS long
139 #define HASHREGS long
142 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
143 #define HASHMEMS unsigned long
144 #define HASHREGS unsigned long
147 #if (!defined(HASHMEMS))
148 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
151 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
152 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
154 /* Static memory structures */
156 /* We need a first function that, given an integer h between 1 and
157 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
158 We'll map this function in this table */
159 static HASHMEMS hash_map[HASHMAPSIZE];
161 /* Then we need a second function that "maps" a char to its weitgh,
162 changed to a table this one too, with this macro we can use a char
163 as index and not care if it is signed or not, no.. this will not
164 cause an addition to take place at each access, trust me, the
165 optimizer takes it out of the actual code and passes "label+shift"
166 to the linker, and the linker does the addition :) */
167 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
168 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
170 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
171 size tables could be supported but the rehash routine should also
172 rebuild the transformation maps, I kept the tables of equal size
173 so that I can use one hash function and one transformation map */
174 static struct Client *clientTable[HASHSIZE];
175 static struct Channel *channelTable[HASHSIZE];
177 /* This is what the hash function will consider "equal" chars, this function
178 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
179 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
180 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
183 * Initialize the maps used by hash functions and clear the tables */
191 /* Clear the hash tables first */
192 for (l = 0; l < HASHSIZE; l++)
198 /* Here is to what we "map" a char before working on it */
199 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
200 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
202 /* Make them equal for case-independently equal chars, it's
203 lame to do it this way but I wanted the code flexible, it's
204 possible to change the HASHEQ macro and not touch here.
205 I don't actually care about the 32768 loops since it happens
206 only once at startup */
207 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
208 for (j = CHAR_MIN; j < i; j++) {
210 hash_weight(i) = hash_weight(j);
214 /* And this is our hash-loop "transformation" function,
215 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
216 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
217 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
220 l *= (unsigned long)HASHSHIFT;
221 l %= (unsigned long)HASHSIZE;
222 hash_map[m] = (HASHMEMS) l;
226 /* These are the actual hash functions, since they are static
227 and very short any decent compiler at a good optimization level
228 WILL inline these in the following functions */
230 /* This is the string hash function,
231 WARNING: n must be a valid pointer to a _non-null_ string
232 this means that not only strhash(NULL) but also
233 strhash("") _will_ coredump, it's responsibility
234 the caller to eventually check BadPtr(nick). */
236 static HASHREGS strhash(const char *n)
238 HASHREGS hash = hash_weight(*n++);
240 hash = hash_map[hash] + hash_weight(*n++);
241 return hash_map[hash];
244 /* And this is the string hash function for limited lenght strings
245 WARNING: n must be a valid pointer to a non-null string
246 and i must be > 0 ! */
250 The time taken to decrement i makes the function
251 slower than strhash for the average of channel names (tested
252 on 16000 real channel names, 1000 loops. I left the code here
253 as a bookmark if a strnhash is evetually needed in the future.
255 static HASHREGS strnhash(const char *n, int i) {
256 HASHREGS hash = hash_weight(*n++);
259 hash = hash_map[hash] + hash_weight(*n++);
260 return hash_map[hash];
263 #define CHANHASHLEN 30
267 /************************** Externally visible functions ********************/
269 /* Optimization note: in these functions I supposed that the CSE optimization
270 * (Common Subexpression Elimination) does its work decently, this means that
271 * I avoided introducing new variables to do the work myself and I did let
272 * the optimizer play with more free registers, actual tests proved this
273 * solution to be faster than doing things like tmp2=tmp->hnext... and then
274 * use tmp2 myself wich would have given less freedom to the optimizer.
279 * Adds a client's name in the proper hash linked list, can't fail,
280 * cptr must have a non-null name or expect a coredump, the name is
281 * infact taken from cptr->name
283 int hAddClient(struct Client *cptr)
285 HASHREGS hashv = strhash(cli_name(cptr));
287 cli_hnext(cptr) = clientTable[hashv];
288 clientTable[hashv] = cptr;
295 * Adds a channel's name in the proper hash linked list, can't fail.
296 * chptr must have a non-null name or expect a coredump.
297 * As before the name is taken from chptr->name, we do hash its entire
298 * lenght since this proved to be statistically faster
300 int hAddChannel(struct Channel *chptr)
302 HASHREGS hashv = strhash(chptr->chname);
304 chptr->hnext = channelTable[hashv];
305 channelTable[hashv] = chptr;
312 * Removes a Client's name from the hash linked list
314 int hRemClient(struct Client *cptr)
316 HASHREGS hashv = strhash(cli_name(cptr));
317 struct Client *tmp = clientTable[hashv];
320 clientTable[hashv] = cli_hnext(cptr);
321 cli_hnext(cptr) = cptr;
326 if (cli_hnext(tmp) == cptr) {
327 cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
328 cli_hnext(cptr) = cptr;
331 tmp = cli_hnext(tmp);
338 * Removes the old name of a client from a linked list and adds
339 * the new one to another linked list, there is a slight chanche
340 * that this is useless if the two hashes are the same but it still
341 * would need to move the name to the top of the list.
342 * As always it's responsibility of the caller to check that
343 * both newname and cptr->name are valid names (not "" or NULL).
344 * Typically, to change the nick of an already hashed client:
345 * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
346 * hChangeClient(cptr, newname);
347 * strcpy(cptr->name, newname);
349 * There isn't an equivalent function for channels since they
352 int hChangeClient(struct Client *cptr, const char *newname)
354 HASHREGS newhash = strhash(newname);
359 cli_hnext(cptr) = clientTable[newhash];
360 clientTable[newhash] = cptr;
366 * Removes the channel's name from the corresponding hash linked list
368 int hRemChannel(struct Channel *chptr)
370 HASHREGS hashv = strhash(chptr->chname);
371 struct Channel *tmp = channelTable[hashv];
374 channelTable[hashv] = chptr->hnext;
375 chptr->hnext = chptr;
380 if (tmp->hnext == chptr) {
381 tmp->hnext = tmp->hnext->hnext;
382 chptr->hnext = chptr;
393 * New semantics: finds a client whose name is 'name' and whose
394 * status is one of those marked in TMask, if can't find one
395 * returns NULL. If it finds one moves it to the top of the list
398 struct Client* hSeekClient(const char *name, int TMask)
400 HASHREGS hashv = strhash(name);
401 struct Client *cptr = clientTable[hashv];
404 if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
406 while (prev = cptr, cptr = cli_hnext(cptr)) {
407 if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
408 cli_hnext(prev) = cli_hnext(cptr);
409 cli_hnext(cptr) = clientTable[hashv];
410 clientTable[hashv] = cptr;
421 * New semantics: finds a channel whose name is 'name',
422 * if can't find one returns NULL, if can find it moves
423 * it to the top of the list and returns it.
425 struct Channel* hSeekChannel(const char *name)
427 HASHREGS hashv = strhash(name);
428 struct Channel *chptr = channelTable[hashv];
431 if (0 != ircd_strcmp(name, chptr->chname)) {
432 struct Channel* prev;
433 while (prev = chptr, chptr = chptr->hnext) {
434 if (0 == ircd_strcmp(name, chptr->chname)) {
435 prev->hnext = chptr->hnext;
436 chptr->hnext = channelTable[hashv];
437 channelTable[hashv] = chptr;
447 /* I will add some useful(?) statistics here one of these days,
448 but not for DEBUGMODE: just to let the admins play with it,
449 coders are able to SIGCORE the server and look into what goes
452 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
461 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
463 for (i = 0; i < HASHSIZE; ++i) {
464 if ((cl = clientTable[i])) {
467 for ( ; cl; cl = cli_hnext(cl))
475 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
476 "max chain: %d", sptr, count, buckets, max_chain);
482 for (i = 0; i < HASHSIZE; ++i) {
483 if ((ch = channelTable[i])) {
486 for ( ; ch; ch = ch->hnext)
494 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
495 "max chain: %d", sptr, count, buckets, max_chain);
499 /* Nick jupe utilities, these are in a static hash table with entry/bucket
500 ratio of one, collision shift up and roll in a circular fashion, the
501 lowest 12 bits of the hash value are used, deletion is not supported,
502 only addition, test for existence and cleanup of the table are.. */
504 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
505 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
506 #define JUPEHASHMASK (JUPEHASHSIZE-1)
507 #define JUPEMAX (1<<(JUPEHASHBITS-6))
509 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
510 static int jupesCount;
514 * Tells if a nick is juped (nonzero returned) or not (zero)
516 int isNickJuped(const char *nick)
521 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
522 if (0 == ircd_strcmp(nick, jupeTable[pos]))
526 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
531 * Adds a (comma separated list of) nick jupes to the table
533 int addNickJupes(const char *nicks)
535 static char temp[BUFSIZE + 1];
542 ircd_strncpy(temp, nicks, BUFSIZE);
543 temp[BUFSIZE] = '\0';
545 for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
552 if (!jupeTable[pos][0])
554 if (jupesCount == JUPEMAX)
555 return 1; /* Error: Jupe table is full ! */
557 ircd_strncpy(jupeTable[pos], one, NICKLEN);
558 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
561 if (0 == ircd_strcmp(one, jupeTable[pos]))
572 * Cleans up the juped nicks table
574 void clearNickJupes(void)
578 for (i = 0; i < JUPEHASHSIZE; i++)
579 jupeTable[i][0] = '\000';