2 * IRC - Internet Relay Chat, ircd/hash.c
3 * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
4 * Previous version was Copyright (C) 1991 Darren Reed, the concept
5 * of linked lists for each hash bucket and the move-to-head
6 * optimization has been borrowed from there.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 1, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 #include "ircd_chattr.h"
28 #include "ircd_string.h"
41 /************************* Nemesi's hash alghoritm ***********************/
43 * @brief Hash table management.
46 * This hash function returns *exactly* N%HASHSIZE, where 'N'
47 * is the string itself (included the trailing '\\0') seen as
48 * a baseHASHSHIFT number whose "digits" are the bytes of the
49 * number mapped through a "weight" transformation that gives
50 * the same "weight" to caseless-equal chars, example:
52 * Hashing the string "Nick\0" the result will be:
54 * | | | | `---> ( (hash_weight('\\0') * (HASHSHIFT**0) +
55 * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
56 * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
57 * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
58 * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
60 * It's actually a lot similar to a base transformation of the
61 * text representation of an integer.
62 * Looking at it this way seems slow and requiring unlimited integer
63 * precision, but we actually do it with a *very* fast loop, using only
64 * short integer arithmetic and by means of two memory accesses and
65 * 3 additions per each byte processed.. and nothing else, as a side
66 * note the distribution of real nicks over the hash table of this
67 * function is about 3 times better than the previous one, and the
68 * hash function itself is about 25% faster with a "normal" HASHSIZE
69 * (it gets slower with larger ones and faster for smallest ones
70 * because the hash table size affect the size of some maps and thus
71 * the effectiveness of RAM caches while accesing them).
72 * These two pages of macros are here to make the following code
73 * _more_ understandeable... I hope ;)
75 * If you ask me, this whole mess is ungodly complicated for very
76 * little benefit. -Entrope
79 /** Internal stuff, think well before changing this, it's how
80 much the weights of two lexicograhically contiguous chars
81 differ, i.e.\ (hash_weight('b')-hash_weight('a')) == HASHSTEP.
82 One seems to be fine but the algorithm doesn't depend on it. */
85 /** The smallest _prime_ int beeing HASHSTEP times bigger than a byte.
86 That is, the first prime bigger than the maximum hash_weight
87 (since the maximum hash weight is gonna be the "biggest-byte * HASHSTEP")
91 /* Are we sure that HASHSHIFT is big enough ? */
92 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
93 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
96 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
97 to be an exact multiple of HASHSHIFT, that would make the distribution
98 a LOT worse, once is not multiple of HASHSHIFT it can be anything */
99 #if ((HASHSIZE%HASHSHIFT)==0)
100 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
103 /* What type of integer do we need in our computations ? the largest
104 value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
105 operations we want to keep the tables compact (the cache will work
106 better and we will run faster) while for work variables we prefer
107 to roundup to 'int' if it is the case: on platforms where int!=short
108 int arithmetic is often faster than short arithmetic, we prefer signed
109 types if they are big enough since on some architectures they are faster
110 than unsigned, but we always keep signedness of mem and regs the same,
111 to avoid sign conversions that sometimes require time, the following
112 precompile stuff will set HASHMEMS to an appropriate integer type for
113 the tables stored in memory and HASHREGS to an appropriate int type
114 for the work registers/variables/return types. Everything of type
115 HASH???S will remain internal to this source file so I placed this stuff
116 here and not in the header file. */
121 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
122 #define HASHMEMS short
126 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
127 #define HASHMEMS unsigned short
128 #define HASHREGS unsigned int
131 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
136 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
137 #define HASHMEMS unsigned int
138 #define HASHREGS unsigned int
141 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
142 #define HASHMEMS long
143 #define HASHREGS long
146 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
147 #define HASHMEMS unsigned long
148 #define HASHREGS unsigned long
151 #if (!defined(HASHMEMS))
152 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
155 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
156 /** Size of #hash_map array. */
157 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
159 /* Static memory structures */
161 /** We need a first function that, given an integer h between 1 and
162 HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) ).
163 We'll map this function in this table. */
164 static HASHMEMS hash_map[HASHMAPSIZE];
166 /** Then we need a second function that "maps" a char to its weight.
167 Changed to a table this one too, with this macro we can use a char
168 as index and not care if it is signed or not, no.. this will not
169 cause an addition to take place at each access, trust me, the
170 optimizer takes it out of the actual code and passes "label+shift"
171 to the linker, and the linker does the addition :) */
172 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
173 /** Helper macro to look characters up in #hash_weight_table. */
174 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
176 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
177 size tables could be supported but the rehash routine should also
178 rebuild the transformation maps, I kept the tables of equal size
179 so that I can use one hash function and one transformation map */
180 /** Hash table for clients. */
181 static struct Client *clientTable[HASHSIZE];
182 /** Hash table for channels. */
183 static struct Channel *channelTable[HASHSIZE];
185 /* This is what the hash function will consider "equal" chars, this function
186 MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
187 be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
188 /** Helper macro for character comparison. */
189 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
191 /** Initialize the maps used by hash functions and clear the tables. */
199 /* Clear the hash tables first */
200 for (l = 0; l < HASHSIZE; l++)
206 /* Here is to what we "map" a char before working on it */
207 for (i = CHAR_MIN; i <= CHAR_MAX; i++)
208 hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
210 /* Make them equal for case-independently equal chars, it's
211 lame to do it this way but I wanted the code flexible, it's
212 possible to change the HASHEQ macro and not touch here.
213 I don't actually care about the 32768 loops since it happens
214 only once at startup */
215 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
216 for (j = CHAR_MIN; j < i; j++) {
218 hash_weight(i) = hash_weight(j);
222 /* And this is our hash-loop "transformation" function,
223 basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
224 defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
225 for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
228 l *= (unsigned long)HASHSHIFT;
229 l %= (unsigned long)HASHSIZE;
230 hash_map[m] = (HASHMEMS) l;
234 /* These are the actual hash functions, since they are static
235 and very short any decent compiler at a good optimization level
236 WILL inline these in the following functions */
238 /* This is the string hash function,
239 WARNING: n must be a valid pointer to a _non-null_ string
240 this means that not only strhash(NULL) but also
241 strhash("") _will_ coredump, it's responsibility
242 the caller to eventually check BadPtr(nick). */
244 /** Calculate hash value for a string. */
245 static HASHREGS strhash(const char *n)
247 HASHREGS hash = hash_weight(*n++);
249 hash = hash_map[hash] + hash_weight(*n++);
250 return hash_map[hash];
253 /* And this is the string hash function for limited lenght strings
254 WARNING: n must be a valid pointer to a non-null string
255 and i must be > 0 ! */
259 The time taken to decrement i makes the function
260 slower than strhash for the average of channel names (tested
261 on 16000 real channel names, 1000 loops. I left the code here
262 as a bookmark if a strnhash is evetually needed in the future.
264 static HASHREGS strnhash(const char *n, int i) {
265 HASHREGS hash = hash_weight(*n++);
268 hash = hash_map[hash] + hash_weight(*n++);
269 return hash_map[hash];
272 #define CHANHASHLEN 30
276 /************************** Externally visible functions ********************/
278 /* Optimization note: in these functions I supposed that the CSE optimization
279 * (Common Subexpression Elimination) does its work decently, this means that
280 * I avoided introducing new variables to do the work myself and I did let
281 * the optimizer play with more free registers, actual tests proved this
282 * solution to be faster than doing things like tmp2=tmp->hnext... and then
283 * use tmp2 myself wich would have given less freedom to the optimizer.
286 /** Prepend a client to the appropriate hash bucket.
287 * @param[in] cptr Client to add to hash table.
290 int hAddClient(struct Client *cptr)
292 HASHREGS hashv = strhash(cli_name(cptr));
294 cli_hnext(cptr) = clientTable[hashv];
295 clientTable[hashv] = cptr;
300 /** Prepend a channel to the appropriate hash bucket.
301 * @param[in] chptr Channel to add to hash table.
304 int hAddChannel(struct Channel *chptr)
306 HASHREGS hashv = strhash(chptr->chname);
308 chptr->hnext = channelTable[hashv];
309 channelTable[hashv] = chptr;
314 /** Remove a client from its hash bucket.
315 * @param[in] cptr Client to remove from hash table.
316 * @return Zero if the client is found and removed, -1 if not found.
318 int hRemClient(struct Client *cptr)
320 HASHREGS hashv = strhash(cli_name(cptr));
321 struct Client *tmp = clientTable[hashv];
324 clientTable[hashv] = cli_hnext(cptr);
325 cli_hnext(cptr) = cptr;
330 if (cli_hnext(tmp) == cptr) {
331 cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
332 cli_hnext(cptr) = cptr;
335 tmp = cli_hnext(tmp);
340 /** Rename a client in the hash table.
341 * @param[in] cptr Client whose nickname is changing.
342 * @param[in] newname New nickname for client.
345 int hChangeClient(struct Client *cptr, const char *newname)
347 HASHREGS newhash = strhash(newname);
352 cli_hnext(cptr) = clientTable[newhash];
353 clientTable[newhash] = cptr;
357 /** Remove a channel from its hash bucket.
358 * @param[in] chptr Channel to remove from hash table.
359 * @return Zero if the channel is found and removed, -1 if not found.
361 int hRemChannel(struct Channel *chptr)
363 HASHREGS hashv = strhash(chptr->chname);
364 struct Channel *tmp = channelTable[hashv];
367 channelTable[hashv] = chptr->hnext;
368 chptr->hnext = chptr;
373 if (tmp->hnext == chptr) {
374 tmp->hnext = tmp->hnext->hnext;
375 chptr->hnext = chptr;
384 /** Find a client by name, filtered by status mask.
385 * If a client is found, it is moved to the top of its hash bucket.
386 * @param[in] name Client name to search for.
387 * @param[in] TMask Bitmask of status bits, any of which are needed to match.
388 * @return Matching client, or NULL if none.
390 struct Client* hSeekClient(const char *name, int TMask)
392 HASHREGS hashv = strhash(name);
393 struct Client *cptr = clientTable[hashv];
396 if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
398 while (prev = cptr, cptr = cli_hnext(cptr)) {
399 if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
400 cli_hnext(prev) = cli_hnext(cptr);
401 cli_hnext(cptr) = clientTable[hashv];
402 clientTable[hashv] = cptr;
411 /** Find a channel by name.
412 * If a channel is found, it is moved to the top of its hash bucket.
413 * @param[in] name Channel name to search for.
414 * @return Matching channel, or NULL if none.
416 struct Channel* hSeekChannel(const char *name)
418 HASHREGS hashv = strhash(name);
419 struct Channel *chptr = channelTable[hashv];
422 if (0 != ircd_strcmp(name, chptr->chname)) {
423 struct Channel* prev;
424 while (prev = chptr, chptr = chptr->hnext) {
425 if (0 == ircd_strcmp(name, chptr->chname)) {
426 prev->hnext = chptr->hnext;
427 chptr->hnext = channelTable[hashv];
428 channelTable[hashv] = chptr;
438 /* I will add some useful(?) statistics here one of these days,
439 but not for DEBUGMODE: just to let the admins play with it,
440 coders are able to SIGCORE the server and look into what goes
443 /** Report hash table statistics to a client.
444 * @param[in] cptr Client that sent us this message.
445 * @param[in] sptr Client that originated the message.
446 * @param[in] parc Number of arguments.
447 * @param[in] parv Argument array.
450 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
459 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
461 for (i = 0; i < HASHSIZE; ++i) {
462 if ((cl = clientTable[i])) {
465 for ( ; cl; cl = cli_hnext(cl))
473 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
474 "max chain: %d", sptr, count, buckets, max_chain);
480 for (i = 0; i < HASHSIZE; ++i) {
481 if ((ch = channelTable[i])) {
484 for ( ; ch; ch = ch->hnext)
492 sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
493 "max chain: %d", sptr, count, buckets, max_chain);
497 /* Nick jupe utilities, these are in a static hash table with entry/bucket
498 ratio of one, collision shift up and roll in a circular fashion, the
499 lowest 12 bits of the hash value are used, deletion is not supported,
500 only addition, test for existence and cleanup of the table are.. */
502 /** Number of bits in jupe hash value. */
503 #define JUPEHASHBITS 12 /* 4096 entries, 64 nick jupes allowed */
504 /** Size of jupe hash table. */
505 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
506 /** Bitmask to select into jupe hash table. */
507 #define JUPEHASHMASK (JUPEHASHSIZE-1)
508 /** Maximum number of jupes allowed. */
509 #define JUPEMAX (1<<(JUPEHASHBITS-6))
511 /** Hash table for jupes. */
512 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1]; /* About 40k */
513 /** Count of jupes. */
514 static int jupesCount;
516 /** Check whether a nickname is juped.
517 * @param[in] nick Nickname to check.
518 * @return Non-zero of the nickname is juped, zero if not.
520 int isNickJuped(const char *nick)
525 for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
526 if (0 == ircd_strcmp(nick, jupeTable[pos]))
530 return 0; /* A bogus pointer is NOT a juped nick, right ? :) */
533 /** Add a comma-separated list of nick jupes.
534 * @param[in] nicks List of nicks to jupe, separated by commas.
535 * @return Zero on success, non-zero on error.
537 int addNickJupes(const char *nicks)
539 static char temp[BUFSIZE + 1];
546 ircd_strncpy(temp, nicks, BUFSIZE);
547 temp[BUFSIZE] = '\0';
549 for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
556 if (!jupeTable[pos][0])
558 if (jupesCount == JUPEMAX)
559 return 1; /* Error: Jupe table is full ! */
561 ircd_strncpy(jupeTable[pos], one, NICKLEN);
562 jupeTable[pos][NICKLEN] = '\000'; /* Better safe than sorry :) */
565 if (0 == ircd_strcmp(one, jupeTable[pos]))
574 /** Empty the table of juped nicknames. */
575 void clearNickJupes(void)
579 for (i = 0; i < JUPEHASHSIZE; i++)
580 jupeTable[i][0] = '\000';