2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include "ircd_chattr.h"
28 #include "ircd_string.h"
41 /************************* Nemesi's hash alghoritm ***********************/
43 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
44 * is the string itself (included the trailing '\0') seen as
45 * a baseHASHSHIFT number whose "digits" are the bytes of the
46 * number mapped through a "weight" transformation that gives
47 * the same "weight" to caseless-equal chars, example:
49 * Hashing the string "Nick\0" the result will be:
51 * | | | | `---> ( (hash_weight('\0') * (HASHSHIFT**0) +
52 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
53 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
54 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
55 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
57 * It's actually a lot similar to a base transformation of the
58 * text representation of an integer.
59 * Looking at it this way seems slow and requiring unlimited integer
60 * precision, but we actually do it with a *very* fast loop, using only
61 * short integer arithmetic and by means of two memory accesses and
62 * 3 additions per each byte processed.. and nothing else, as a side
63 * note the distribution of real nicks over the hash table of this
64 * function is about 3 times better than the previous one, and the
65 * hash function itself is about 25% faster with a "normal" HASHSIZE
66 * (it gets slower with larger ones and faster for smallest ones
67 * because the hash table size affect the size of some maps and thus
68 * the effectiveness of RAM caches while accesing them).
69 * These two pages of macros are here to make the following code
70 * _more_ understandeable... I hope ;)
73 /* Internal stuff, think well before changing this, it's how
74 much the weights of two lexicograhically contiguous chars
75 differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
76 One seems to be fine but the alghoritm doesn't depend on it */
79 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
80 that is the first prime bigger than the maximum hash_weight
81 (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
85 /* Are we sure that HASHSHIFT is big enough ? */
86 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
87 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
90 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
91 to be an exact multiple of HASHSHIFT, that would make the distribution
92 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
93 #if ((HASHSIZE%HASHSHIFT)==0)
94 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
97 /* What type of integer do we need in our computations ? the largest
98 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
99 operations we want to keep the tables compact (the cache will work
100 better and we will run faster) while for work variables we prefer
101 to roundup to 'int' if it is the case: on platforms where int!=short
102 int arithmetic is often faster than short arithmetic, we prefer signed
103 types if they are big enough since on some architectures they are faster
104 than unsigned, but we always keep signedness of mem and regs the same,
105 to avoid sign conversions that sometimes require time, the following
106 precompile stuff will set HASHMEMS to an appropriate integer type for
107 the tables stored in memory and HASHREGS to an appropriate int type
108 for the work registers/variables/return types. Everything of type
109 HASH???S will remain internal to this source file so I placed this stuff
110 here and not in the header file. */
115 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
116 #define HASHMEMS short
120 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
121 #define HASHMEMS unsigned short
122 #define HASHREGS unsigned int
125 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
130 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
131 #define HASHMEMS unsigned int
132 #define HASHREGS unsigned int
135 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
136 #define HASHMEMS long
137 #define HASHREGS long
140 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
141 #define HASHMEMS unsigned long
142 #define HASHREGS unsigned long
145 #if (!defined(HASHMEMS))
146 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
149 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
150 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
152 /* Static memory structures */
154 /* We need a first function that, given an integer h between 1 and
155 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
156 We'll map this function in this table */
157 static HASHMEMS hash_map[HASHMAPSIZE];
159 /* Then we need a second function that "maps" a char to its weitgh,
160 changed to a table this one too, with this macro we can use a char
161 as index and not care if it is signed or not, no.. this will not
162 cause an addition to take place at each access, trust me, the
163 optimizer takes it out of the actual code and passes "label+shift"
164 to the linker, and the linker does the addition :) */
165 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
166 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
168 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
169 size tables could be supported but the rehash routine should also
170 rebuild the transformation maps, I kept the tables of equal size
171 so that I can use one hash function and one transformation map */
172 static struct Client *clientTable[HASHSIZE];
173 static struct Channel *channelTable[HASHSIZE];
175 /* This is what the hash function will consider "equal" chars, this function
176 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
177 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
178 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
181 * Initialize the maps used by hash functions and clear the tables */
189 /* Clear the hash tables first */
190 for (l = 0; l < HASHSIZE; l++)
196 /* Here is to what we "map" a char before working on it */
197 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
198 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
200 /* Make them equal for case-independently equal chars, it's
201 lame to do it this way but I wanted the code flexible, it's
202 possible to change the HASHEQ macro and not touch here.
203 I don't actually care about the 32768 loops since it happens
204 only once at startup */
205 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
206 for (j = CHAR_MIN; j < i; j++) {
208 hash_weight(i) = hash_weight(j);
212 /* And this is our hash-loop "transformation" function,
213 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
214 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
215 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
218 l *= (unsigned long)HASHSHIFT;
219 l %= (unsigned long)HASHSIZE;
220 hash_map[m] = (HASHMEMS) l;
224 /* These are the actual hash functions, since they are static
225 and very short any decent compiler at a good optimization level
226 WILL inline these in the following functions */
228 /* This is the string hash function,
229 WARNING: n must be a valid pointer to a _non-null_ string
230 this means that not only strhash(NULL) but also
231 strhash("") _will_ coredump, it's responsibility
232 the caller to eventually check BadPtr(nick). */
234 static HASHREGS strhash(const char *n)
236 HASHREGS hash = hash_weight(*n++);
238 hash = hash_map[hash] + hash_weight(*n++);
239 return hash_map[hash];
242 /* And this is the string hash function for limited lenght strings
243 WARNING: n must be a valid pointer to a non-null string
244 and i must be > 0 ! */
248 The time taken to decrement i makes the function
249 slower than strhash for the average of channel names (tested
250 on 16000 real channel names, 1000 loops. I left the code here
251 as a bookmark if a strnhash is evetually needed in the future.
253 static HASHREGS strnhash(const char *n, int i) {
254 HASHREGS hash = hash_weight(*n++);
257 hash = hash_map[hash] + hash_weight(*n++);
258 return hash_map[hash];
261 #define CHANHASHLEN 30
265 /************************** Externally visible functions ********************/
267 /* Optimization note: in these functions I supposed that the CSE optimization
268 * (Common Subexpression Elimination) does its work decently, this means that
269 * I avoided introducing new variables to do the work myself and I did let
270 * the optimizer play with more free registers, actual tests proved this
271 * solution to be faster than doing things like tmp2=tmp->hnext... and then
272 * use tmp2 myself wich would have given less freedom to the optimizer.
277 * Adds a client's name in the proper hash linked list, can't fail,
278 * cptr must have a non-null name or expect a coredump, the name is
279 * infact taken from cptr->name
281 int hAddClient(struct Client *cptr)
283 HASHREGS hashv = strhash(cptr->name);
285 cptr->hnext = clientTable[hashv];
286 clientTable[hashv] = cptr;
293 * Adds a channel's name in the proper hash linked list, can't fail.
294 * chptr must have a non-null name or expect a coredump.
295 * As before the name is taken from chptr->name, we do hash its entire
296 * lenght since this proved to be statistically faster
298 int hAddChannel(struct Channel *chptr)
300 HASHREGS hashv = strhash(chptr->chname);
302 chptr->hnext = channelTable[hashv];
303 channelTable[hashv] = chptr;
310 * Removes a Client's name from the hash linked list
312 int hRemClient(struct Client *cptr)
314 HASHREGS hashv = strhash(cptr->name);
315 struct Client *tmp = clientTable[hashv];
318 clientTable[hashv] = cptr->hnext;
324 if (tmp->hnext == cptr) {
325 tmp->hnext = tmp->hnext->hnext;
336 * Removes the old name of a client from a linked list and adds
337 * the new one to another linked list, there is a slight chanche
338 * that this is useless if the two hashes are the same but it still
339 * would need to move the name to the top of the list.
340 * As always it's responsibility of the caller to check that
341 * both newname and cptr->name are valid names (not "" or NULL).
342 * Typically, to change the nick of an already hashed client:
343 * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
344 * hChangeClient(cptr, newname);
345 * strcpy(cptr->name, newname);
347 * There isn't an equivalent function for channels since they
350 int hChangeClient(struct Client *cptr, const char *newname)
352 HASHREGS newhash = strhash(newname);
357 cptr->hnext = clientTable[newhash];
358 clientTable[newhash] = cptr;
364 * Removes the channel's name from the corresponding hash linked list
366 int hRemChannel(struct Channel *chptr)
368 HASHREGS hashv = strhash(chptr->chname);
369 struct Channel *tmp = channelTable[hashv];
372 channelTable[hashv] = chptr->hnext;
373 chptr->hnext = chptr;
378 if (tmp->hnext == chptr) {
379 tmp->hnext = tmp->hnext->hnext;
380 chptr->hnext = chptr;
391 * New semantics: finds a client whose name is 'name' and whose
392 * status is one of those marked in TMask, if can't find one
393 * returns NULL. If it finds one moves it to the top of the list
396 struct Client* hSeekClient(const char *name, int TMask)
398 HASHREGS hashv = strhash(name);
399 struct Client *cptr = clientTable[hashv];
402 if (0 == (cptr->status & TMask) || 0 != ircd_strcmp(name, cptr->name)) {
404 while (prev = cptr, cptr = cptr->hnext) {
405 if ((cptr->status & TMask) && (0 == ircd_strcmp(name, cptr->name))) {
406 prev->hnext = cptr->hnext;
407 cptr->hnext = clientTable[hashv];
408 clientTable[hashv] = cptr;
419 * New semantics: finds a channel whose name is 'name',
420 * if can't find one returns NULL, if can find it moves
421 * it to the top of the list and returns it.
423 struct Channel* hSeekChannel(const char *name)
425 HASHREGS hashv = strhash(name);
426 struct Channel *chptr = channelTable[hashv];
429 if (0 != ircd_strcmp(name, chptr->chname)) {
430 struct Channel* prev;
431 while (prev = chptr, chptr = chptr->hnext) {
432 if (0 == ircd_strcmp(name, chptr->chname)) {
433 prev->hnext = chptr->hnext;
434 chptr->hnext = channelTable[hashv];
435 channelTable[hashv] = chptr;
445 /* I will add some useful(?) statistics here one of these days,
446 but not for DEBUGMODE: just to let the admins play with it,
447 coders are able to SIGCORE the server and look into what goes
450 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
459 sendto_one(sptr, "NOTICE %s :Hash Table Statistics", parv[0]);
461 for (i = 0; i < HASHSIZE; ++i) {
462 if ((cl = clientTable[i])) {
465 for ( ; cl; cl = cl->hnext)
473 sendto_one(sptr, "NOTICE %s :Client: entries: %d buckets: %d max chain: %d",
474 parv[0], count, buckets, max_chain);
480 for (i = 0; i < HASHSIZE; ++i) {
481 if ((ch = channelTable[i])) {
484 for ( ; ch; ch = ch->hnext)
492 sendto_one(sptr, "NOTICE %s :Channel: entries: %d buckets: %d max chain: %d",
493 parv[0], count, buckets, max_chain);
497 /* Nick jupe utilities, these are in a static hash table with entry/bucket
498 ratio of one, collision shift up and roll in a circular fashion, the
499 lowest 12 bits of the hash value are used, deletion is not supported,
500 only addition, test for existence and cleanup of the table are.. */
502 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
503 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
504 #define JUPEHASHMASK (JUPEHASHSIZE-1)
505 #define JUPEMAX (1<<(JUPEHASHBITS-6))
507 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
508 static int jupesCount;
512 * Tells if a nick is juped (nonzero returned) or not (zero)
514 int isNickJuped(const char *nick)
519 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
520 if (0 == ircd_strcmp(nick, jupeTable[pos]))
524 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
529 * Adds a (comma separated list of) nick jupes to the table
531 int addNickJupes(const char *nicks)
533 static char temp[BUFSIZE + 1];
540 ircd_strncpy(temp, nicks, BUFSIZE);
541 temp[BUFSIZE] = '\0';
543 for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
550 if (!jupeTable[pos][0])
552 if (jupesCount == JUPEMAX)
553 return 1; /* Error: Jupe table is full ! */
555 ircd_strncpy(jupeTable[pos], one, NICKLEN);
556 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
559 if (0 == ircd_strcmp(one, jupeTable[pos]))
570 * Cleans up the juped nicks table
572 void clearNickJupes(void)
576 for (i = 0; i < JUPEHASHSIZE; i++)
577 jupeTable[i][0] = '\000';