ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  */
  22
  23 #include "sys.h"
  24 #include <stdlib.h>
  25 #include <limits.h>
  26 #include "h.h"
  27 #include "struct.h"
  28 #include "common.h"
  29 #include "hash.h"
  30 #include "channel.h"
  31 #include "send.h"
  32 #include "s_serv.h"
  33 #include "ircd.h"
  34 #include "support.h"
  35
  36 RCSTAG_CC("$Id$");
  37
  38 /************************* Nemesi's hash alghoritm ***********************/
  39
  40 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  41  * is the string itself (included the trailing '\0') seen as
  42  * a baseHASHSHIFT number whose "digits" are the bytes of the
  43  * number mapped through a "weight" transformation that gives
  44  * the same "weight" to caseless-equal chars, example:
  45  *
  46  * Hashing the string "Nick\0" the result will be:
  47  * N  i  c  k \0
  48  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  49  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  50  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  51  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  52  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  53  *
  54  * It's actually a lot similar to a base transformation of the
  55  * text representation of an integer.
  56  * Looking at it this way seems slow and requiring unlimited integer
  57  * precision, but we actually do it with a *very* fast loop, using only
  58  * short integer arithmetic and by means of two memory accesses and
  59  * 3 additions per each byte processed.. and nothing else, as a side
  60  * note the distribution of real nicks over the hash table of this
  61  * function is about 3 times better than the previous one, and the
  62  * hash function itself is about 25% faster with a "normal" HASHSIZE
  63  * (it gets slower with larger ones and faster for smallest ones
  64  * because the hash table size affect the size of some maps and thus
  65  * the effectiveness of RAM caches while accesing them).
  66  * These two pages of macros are here to make the following code
  67  * _more_ understandeable... I hope ;)
  68  */
  69
  70 /* Internal stuff, think well before changing this, it's how
  71    much the weights of two lexicograhically contiguous chars
  72    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  73    One seems to be fine but the alghoritm doesn't depend on it */
  74 #define HASHSTEP 1
  75
  76 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  77    that is the first prime bigger than the maximum hash_weight
  78    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  79  */
  80 #define HASHSHIFT 257
  81
  82 /* Are we sure that HASHSHIFT is big enough ? */
  83 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  84 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  85 #endif
  86
  87 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  88    to be an exact multiple of HASHSHIFT, that would make the distribution
  89    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  90 #if ((HASHSIZE%HASHSHIFT)==0)
  91 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  92 #endif
  93
  94 /* What type of integer do we need in our computations ? the largest
  95    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
  96    operations we want to keep the tables compact (the cache will work
  97    better and we will run faster) while for work variables we prefer
  98    to roundup to 'int' if it is the case: on platforms where int!=short
  99    int arithmetic is often faster than short arithmetic, we prefer signed
 100    types if they are big enough since on some architectures they are faster
 101    than unsigned, but we always keep signedness of mem and regs the same,
 102    to avoid sign conversions that sometimes require time, the following
 103    precompile stuff will set HASHMEMS to an appropriate integer type for
 104    the tables stored in memory and HASHREGS to an appropriate int type
 105    for the work registers/variables/return types. Everything of type
 106    HASH???S will remain internal to this source file so I placed this stuff
 107    here and not in the header file. */
 108
 109 #undef HASHMEMS
 110 #undef HASHREGS
 111
 112 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 113 #define HASHMEMS short
 114 #define HASHREGS int
 115 #endif
 116
 117 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 118 #define HASHMEMS unsigned short
 119 #define HASHREGS unsigned int
 120 #endif
 121
 122 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 123 #define HASHMEMS int
 124 #define HASHREGS int
 125 #endif
 126
 127 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 128 #define HASHMEMS unsigned int
 129 #define HASHREGS unsigned int
 130 #endif
 131
 132 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 133 #define HASHMEMS long
 134 #define HASHREGS long
 135 #endif
 136
 137 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 138 #define HASHMEMS unsigned long
 139 #define HASHREGS unsigned long
 140 #endif
 141
 142 #if (!defined(HASHMEMS))
 143 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 144 #endif
 145
 146 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 147 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 148
 149 /* Static memory structures */
 150
 151 /* We need a first function that, given an integer h between 1 and
 152    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 153    We'll map this function in this table */
 154 static HASHMEMS hash_map[HASHMAPSIZE];
 155
 156 /* Then we need a second function that "maps" a char to its weitgh,
 157    changed to a table this one too, with this macro we can use a char
 158    as index and not care if it is signed or not, no.. this will not
 159    cause an addition to take place at each access, trust me, the
 160    optimizer takes it out of the actual code and passes "label+shift"
 161    to the linker, and the linker does the addition :) */
 162 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 163 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 164
 165 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 166    size tables could be supported but the rehash routine should also
 167    rebuild the transformation maps, I kept the tables of equal size
 168    so that I can use one hash function and one transformation map */
 169 static aClient *clientTable[HASHSIZE];
 170 static aChannel *channelTable[HASHSIZE];
 171
 172 /* This is what the hash function will consider "equal" chars, this function
 173    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 174    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 175 #define HASHEQ(x,y) (((char) toLower((char) x)) == ((char) toLower((char) y)))
 176
 177 /* hash_init
 178  * Initialize the maps used by hash functions and clear the tables */
 179 void hash_init(void)
 180 {
 181   int i, j;
 182   unsigned long l, m;
 183
 184   /* Clear the hash tables first */
 185   for (l = 0; l < HASHSIZE; l++)
 186   {
 187     channelTable[l] = (aChannel *)NULL;
 188     clientTable[l] = (aClient *)NULL;
 189   };
 190
 191   /* Here is to what we "map" a char before working on it */
 192   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 193     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 194
 195   /* Make them equal for case-independently equal chars, it's
 196      lame to do it this way but I wanted the code flexible, it's
 197      possible to change the HASHEQ macro and not touch here.
 198      I don't actually care about the 32768 loops since it happens
 199      only once at startup */
 200   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 201     for (j = CHAR_MIN; j < i; j++)
 202       if (HASHEQ(i, j))
 203         hash_weight(i) = hash_weight(j);
 204
 205   /* And this is our hash-loop "transformation" function,
 206      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 207      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 208   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 209   {
 210     l = m;
 211     l *= (unsigned long)HASHSHIFT;
 212     l %= (unsigned long)HASHSIZE;
 213     hash_map[m] = (HASHMEMS) l;
 214   };
 215 }
 216
 217 /* These are the actual hash functions, since they are static
 218    and very short any decent compiler at a good optimization level
 219    WILL inline these in the following functions */
 220
 221 /* This is the string hash function,
 222    WARNING: n must be a valid pointer to a _non-null_ string
 223    this means that not only strhash(NULL) but also
 224    strhash("") _will_ coredump, it's responsibility
 225    the caller to eventually check BadPtr(nick). */
 226
 227 static HASHREGS strhash(register char *n)
 228 {
 229   register HASHREGS hash = hash_weight(*n++);
 230   while (*n)
 231     hash = hash_map[hash] + hash_weight(*n++);
 232   return hash_map[hash];
 233 }
 234
 235 /* And this is the string hash function for limited lenght strings
 236    WARNING: n must be a valid pointer to a non-null string
 237    and i must be > 0 ! */
 238
 239 /* REMOVED
 240
 241    The time taken to decrement i makes the function
 242    slower than strhash for the average of channel names (tested
 243    on 16000 real channel names, 1000 loops. I left the code here
 244    as a bookmark if a strnhash is evetually needed in the future.
 245
 246    static HASHREGS strnhash(register char *n, register int i) {
 247    register HASHREGS hash = hash_weight(*n++);
 248    i--;
 249    while(*n && i--)
 250    hash = hash_map[hash] + hash_weight(*n++);
 251    return hash_map[hash];
 252    }
 253
 254    #define CHANHASHLEN 30
 255
 256    !REMOVED */
 257
 258 /************************** Externally visible functions ********************/
 259
 260 /* Optimization note: in these functions I supposed that the CSE optimization
 261  * (Common Subexpression Elimination) does its work decently, this means that
 262  * I avoided introducing new variables to do the work myself and I did let
 263  * the optimizer play with more free registers, actual tests proved this
 264  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 265  * use tmp2 myself wich would have given less freedom to the optimizer.
 266  */
 267
 268 /*
 269  * hAddClient
 270  * Adds a client's name in the proper hash linked list, can't fail,
 271  * cptr must have a non-null name or expect a coredump, the name is
 272  * infact taken from cptr->name
 273  */
 274 int hAddClient(aClient *cptr)
 275 {
 276   register HASHREGS hashv = strhash(cptr->name);
 277
 278   cptr->hnext = clientTable[hashv];
 279   clientTable[hashv] = cptr;
 280
 281   return 0;
 282 }
 283
 284 /*
 285  * hAddChannel
 286  * Adds a channel's name in the proper hash linked list, can't fail.
 287  * chptr must have a non-null name or expect a coredump.
 288  * As before the name is taken from chptr->name, we do hash its entire
 289  * lenght since this proved to be statistically faster
 290  */
 291 int hAddChannel(aChannel *chptr)
 292 {
 293   register HASHREGS hashv = strhash(chptr->chname);
 294
 295   chptr->hnextch = channelTable[hashv];
 296   channelTable[hashv] = chptr;
 297
 298   return 0;
 299 }
 300
 301 /*
 302  * hRemClient
 303  * Removes a Client's name from the hash linked list
 304  */
 305 int hRemClient(aClient *cptr)
 306 {
 307   register HASHREGS hashv = strhash(cptr->name);
 308   register aClient *tmp = clientTable[hashv];
 309
 310   if (tmp == cptr)
 311   {
 312     clientTable[hashv] = cptr->hnext;
 313     return 0;
 314   }
 315   while (tmp)
 316   {
 317     if (tmp->hnext == cptr)
 318     {
 319       tmp->hnext = tmp->hnext->hnext;
 320       return 0;
 321     }
 322     tmp = tmp->hnext;
 323   }
 324   return -1;
 325 }
 326
 327 /*
 328  * hChangeClient
 329  * Removes the old name of a client from a linked list and adds
 330  * the new one to another linked list, there is a slight chanche
 331  * that this is useless if the two hashes are the same but it still
 332  * would need to move the name to the top of the list.
 333  * As always it's responsibility of the caller to check that
 334  * both newname and cptr->name are valid names (not "" or NULL).
 335  * Typically, to change the nick of an already hashed client:
 336  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 337  *   hChangeClient(cptr, newname);
 338  *   strcpy(cptr->name, newname);
 339  *   };
 340  * There isn't an equivalent function for channels since they
 341  * don't change name.
 342  */
 343 int hChangeClient(aClient *cptr, char *newname)
 344 {
 345   register HASHREGS newhash = strhash(newname);
 346
 347   hRemClient(cptr);
 348
 349   cptr->hnext = clientTable[newhash];
 350   clientTable[newhash] = cptr;
 351   return 0;
 352 }
 353
 354 /*
 355  * hRemChannel
 356  * Removes the channel's name from the corresponding hash linked list
 357  */
 358 int hRemChannel(aChannel *chptr)
 359 {
 360   register HASHREGS hashv = strhash(chptr->chname);
 361   register aChannel *tmp = channelTable[hashv];
 362
 363   if (tmp == chptr)
 364   {
 365     channelTable[hashv] = chptr->hnextch;
 366     return 0;
 367   };
 368
 369   while (tmp)
 370   {
 371     if (tmp->hnextch == chptr)
 372     {
 373       tmp->hnextch = tmp->hnextch->hnextch;
 374       return 0;
 375     };
 376     tmp = tmp->hnextch;
 377   };
 378
 379   return -1;
 380 }
 381
 382 /*
 383  * hSeekClient
 384  * New semantics: finds a client whose name is 'name' and whose
 385  * status is one of those marked in TMask, if can't find one
 386  * returns NULL. If it finds one moves it to the top of the list
 387  * and returns it.
 388  */
 389 aClient *hSeekClient(char *name, int TMask)
 390 {
 391   register HASHREGS hashv = strhash(name);
 392   register aClient *cptr = clientTable[hashv];
 393   register aClient *prv;
 394
 395   if (cptr)
 396     if ((!IsStatMask(cptr, TMask)) || strCasediff(name, cptr->name))
 397       while (prv = cptr, cptr = cptr->hnext)
 398         if (IsStatMask(cptr, TMask) && (!strCasediff(name, cptr->name)))
 399         {
 400           prv->hnext = cptr->hnext;
 401           cptr->hnext = clientTable[hashv];
 402           clientTable[hashv] = cptr;
 403           break;
 404         };
 405
 406   return cptr;
 407
 408 }
 409
 410 /*
 411  * hSeekChannel
 412  * New semantics: finds a channel whose name is 'name',
 413  * if can't find one returns NULL, if can find it moves
 414  * it to the top of the list and returns it.
 415  */
 416 aChannel *hSeekChannel(char *name)
 417 {
 418   register HASHREGS hashv = strhash(name);
 419   register aChannel *chptr = channelTable[hashv];
 420   register aChannel *prv;
 421
 422   if (chptr)
 423     if (strCasediff(name, chptr->chname))
 424       while (prv = chptr, chptr = chptr->hnextch)
 425         if (!strCasediff(name, chptr->chname))
 426         {
 427           prv->hnextch = chptr->hnextch;
 428           chptr->hnextch = channelTable[hashv];
 429           channelTable[hashv] = chptr;
 430           break;
 431         };
 432
 433   return chptr;
 434
 435 }
 436
 437 /* I will add some useful(?) statistics here one of these days,
 438    but not for DEBUGMODE: just to let the admins play with it,
 439    coders are able to SIGCORE the server and look into what goes
 440    on themselves :-) */
 441
 442 int m_hash(aClient *UNUSED(cptr), aClient *sptr, int UNUSED(parc), char *parv[])
 443 {
 444   sendto_one(sptr, "NOTICE %s :SUSER SSERV", parv[0]);
 445   sendto_one(sptr, "NOTICE %s :SBSDC IRCDC", parv[0]);
 446   sendto_one(sptr, "NOTICE %s :CHANC SMISC", parv[0]);
 447   sendto_one(sptr, "NOTICE %s :HASHC VERSH", parv[0]);
 448   sendto_one(sptr, "NOTICE %s :MAKEF HOSTID", parv[0]);
 449   return 0;
 450 }
 451
 452 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 453    ratio of one, collision shift up and roll in a circular fashion, the
 454    lowest 12 bits of the hash value are used, deletion is not supported,
 455    only addition, test for existence and cleanup of the table are.. */
 456
 457 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 458 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 459 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 460 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 461
 462 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 463 static int jupesCount;
 464
 465 /*
 466  * isNickJuped()
 467  * Tells if a nick is juped (nonzero returned) or not (zero)
 468  */
 469 int isNickJuped(char *nick)
 470 {
 471   register int pos;
 472
 473   if (nick && *nick)
 474     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++)
 475       if (!strCasediff(nick, jupeTable[pos]))
 476         return 1;
 477   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 478 }
 479
 480 /*
 481  * addNickJupes()
 482  * Adds a (comma separated list of) nick jupes to the table
 483  */
 484 int addNickJupes(char *nicks)
 485 {
 486   static char temp[512];
 487   char *one, *p;
 488   register int pos;
 489
 490   if (nicks && *nicks)
 491   {
 492     strncpy(temp, nicks, 512);
 493     temp[512] = '\000';
 494     p = NULL;
 495     for (one = strtoken(&p, temp, ","); one; one = strtoken(&p, NULL, ","))
 496     {
 497       if (!*one)
 498         continue;
 499       pos = strhash(one);
 500     loop:
 501       pos &= JUPEHASHMASK;
 502       if (!jupeTable[pos][0])
 503       {
 504         if (jupesCount == JUPEMAX)
 505           return 1;             /* Error: Jupe table is full ! */
 506         jupesCount++;
 507         strncpy(jupeTable[pos], one, NICKLEN);
 508         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 509         continue;
 510       }
 511       if (!strCasediff(one, jupeTable[pos]))
 512         continue;
 513       ++pos;
 514       goto loop;
 515     }
 516   }
 517   return 0;
 518 }
 519
 520 /*
 521  * clearNickJupes()
 522  * Cleans up the juped nicks table
 523  */
 524 void clearNickJupes(void)
 525 {
 526   register int i;
 527   jupesCount = 0;
 528   for (i = 0; i < JUPEHASHSIZE; i++)
 529     jupeTable[i][0] = '\000';
 530 }