ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  */
  22
  23 #include "sys.h"
  24 #include <stdlib.h>
  25 #include <limits.h>
  26 #include "h.h"
  27 #include "struct.h"
  28 #include "common.h"
  29 #include "hash.h"
  30 #include "channel.h"
  31 #include "send.h"
  32 #include "s_serv.h"
  33 #include "ircd.h"
  34 #include "support.h"
  35
  36 RCSTAG_CC("$Id$");
  37
  38 /************************* Nemesi's hash alghoritm ***********************/
  39
  40 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  41  * is the string itself (included the trailing '\0') seen as
  42  * a baseHASHSHIFT number whose "digits" are the bytes of the
  43  * number mapped through a "weight" transformation that gives
  44  * the same "weight" to caseless-equal chars, example:
  45  *
  46  * Hashing the string "Nick\0" the result will be:
  47  * N  i  c  k \0
  48  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  49  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  50  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  51  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  52  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  53  *
  54  * It's actually a lot similar to a base transformation of the
  55  * text representation of an integer.
  56  * Looking at it this way seems slow and requiring unlimited integer
  57  * precision, but we actually do it with a *very* fast loop, using only
  58  * short integer arithmetic and by means of two memory accesses and
  59  * 3 additions per each byte processed.. and nothing else, as a side
  60  * note the distribution of real nicks over the hash table of this
  61  * function is about 3 times better than the previous one, and the
  62  * hash function itself is about 25% faster with a "normal" HASHSIZE
  63  * (it gets slower with larger ones and faster for smallest ones
  64  * because the hash table size affect the size of some maps and thus
  65  * the effectiveness of RAM caches while accesing them).
  66  * These two pages of macros are here to make the following code
  67  * _more_ understandeable... I hope ;)
  68  */
  69
  70 /* Internal stuff, think well before changing this, it's how
  71    much the weights of two lexicograhically contiguous chars
  72    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  73    One seems to be fine but the alghoritm doesn't depend on it */
  74 #define HASHSTEP 1
  75
  76 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  77    that is the first prime bigger than the maximum hash_weight
  78    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  79  */
  80 #define HASHSHIFT 257
  81
  82 /* Are we sure that HASHSHIFT is big enough ? */
  83 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  84 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  85 #endif
  86
  87 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  88    to be an exact multiple of HASHSHIFT, that would make the distribution
  89    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  90 #if ((HASHSIZE%HASHSHIFT)==0)
  91 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  92 #endif
  93
  94 /* What type of integer do we need in our computations ? the largest
  95    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
  96    operations we want to keep the tables compact (the cache will work
  97    better and we will run faster) while for work variables we prefer
  98    to roundup to 'int' if it is the case: on platforms where int!=short
  99    int arithmetic is often faster than short arithmetic, we prefer signed
 100    types if they are big enough since on some architectures they are faster
 101    than unsigned, but we always keep signedness of mem and regs the same,
 102    to avoid sign conversions that sometimes require time, the following
 103    precompile stuff will set HASHMEMS to an appropriate integer type for
 104    the tables stored in memory and HASHREGS to an appropriate int type
 105    for the work registers/variables/return types. Everything of type
 106    HASH???S will remain internal to this source file so I placed this stuff
 107    here and not in the header file. */
 108
 109 #undef HASHMEMS
 110 #undef HASHREGS
 111
 112 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 113 #define HASHMEMS short
 114 #define HASHREGS int
 115 #endif
 116
 117 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 118 #define HASHMEMS unsigned short
 119 #define HASHREGS unsigned int
 120 #endif
 121
 122 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 123 #define HASHMEMS int
 124 #define HASHREGS int
 125 #endif
 126
 127 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 128 #define HASHMEMS unsigned int
 129 #define HASHREGS unsigned int
 130 #endif
 131
 132 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 133 #define HASHMEMS long
 134 #define HASHREGS long
 135 #endif
 136
 137 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 138 #define HASHMEMS unsigned long
 139 #define HASHREGS unsigned long
 140 #endif
 141
 142 #if (!defined(HASHMEMS))
 143 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 144 #endif
 145
 146 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 147 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 148
 149 /* Static memory structures */
 150
 151 /* We need a first function that, given an integer h between 1 and
 152    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 153    We'll map this function in this table */
 154 static HASHMEMS hash_map[HASHMAPSIZE];
 155
 156 /* Then we need a second function that "maps" a char to its weitgh,
 157    changed to a table this one too, with this macro we can use a char
 158    as index and not care if it is signed or not, no.. this will not
 159    cause an addition to take place at each access, trust me, the
 160    optimizer takes it out of the actual code and passes "label+shift"
 161    to the linker, and the linker does the addition :) */
 162 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 163 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 164
 165 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 166    size tables could be supported but the rehash routine should also
 167    rebuild the transformation maps, I kept the tables of equal size
 168    so that I can use one hash function and one transformation map */
 169 static aClient *clientTable[HASHSIZE];
 170 static aChannel *channelTable[HASHSIZE];
 171
 172 /* This is what the hash function will consider "equal" chars, this function
 173    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 174    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 175 #define HASHEQ(x,y) (((char) toLower((char) x)) == ((char) toLower((char) y)))
 176
 177 /* hash_init
 178  * Initialize the maps used by hash functions and clear the tables */
 179 void hash_init(void)
 180 {
 181   int i, j;
 182   unsigned long l, m;
 183
 184   /* Clear the hash tables first */
 185   for (l = 0; l < HASHSIZE; l++)
 186   {
 187     channelTable[l] = (aChannel *)NULL;
 188     clientTable[l] = (aClient *)NULL;
 189   };
 190
 191   /* Here is to what we "map" a char before working on it */
 192   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 193     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 194
 195   /* Make them equal for case-independently equal chars, it's
 196      lame to do it this way but I wanted the code flexible, it's
 197      possible to change the HASHEQ macro and not touch here.
 198      I don't actually care about the 32768 loops since it happens
 199      only once at startup */
 200   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 201     for (j = CHAR_MIN; j < i; j++)
 202       if (HASHEQ(i, j))
 203         hash_weight(i) = hash_weight(j);
 204
 205   /* And this is our hash-loop "transformation" function,
 206      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 207      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 208   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 209   {
 210     l = m;
 211     l *= (unsigned long)HASHSHIFT;
 212     l %= (unsigned long)HASHSIZE;
 213     hash_map[m] = (HASHMEMS) l;
 214   };
 215 }
 216
 217 /* These are the actual hash functions, since they are static
 218    and very short any decent compiler at a good optimization level
 219    WILL inline these in the following functions */
 220
 221 /* This is the string hash function,
 222    WARNING: n must be a valid pointer to a _non-null_ string
 223    this means that not only strhash(NULL) but also
 224    strhash("") _will_ coredump, it's responsibility
 225    the caller to eventually check BadPtr(nick). */
 226
 227 static HASHREGS strhash(register char *n)
 228 {
 229   register HASHREGS hash = hash_weight(*n++);
 230   while (*n)
 231     hash = hash_map[hash] + hash_weight(*n++);
 232   return hash_map[hash];
 233 }
 234
 235 /* And this is the string hash function for limited lenght strings
 236    WARNING: n must be a valid pointer to a non-null string
 237    and i must be > 0 ! */
 238
 239 /* REMOVED
 240
 241    The time taken to decrement i makes the function
 242    slower than strhash for the average of channel names (tested
 243    on 16000 real channel names, 1000 loops. I left the code here
 244    as a bookmark if a strnhash is evetually needed in the future.
 245
 246    static HASHREGS strnhash(register char *n, register int i) {
 247    register HASHREGS hash = hash_weight(*n++);
 248    i--;
 249    while(*n && i--)
 250    hash = hash_map[hash] + hash_weight(*n++);
 251    return hash_map[hash];
 252    }
 253
 254    #define CHANHASHLEN 30
 255
 256    !REMOVED */
 257
 258 /************************** Externally visible functions ********************/
 259
 260 /* Optimization note: in these functions I supposed that the CSE optimization
 261  * (Common Subexpression Elimination) does its work decently, this means that
 262  * I avoided introducing new variables to do the work myself and I did let
 263  * the optimizer play with more free registers, actual tests proved this
 264  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 265  * use tmp2 myself wich would have given less freedom to the optimizer.
 266  */
 267
 268 /*
 269  * hAddClient
 270  * Adds a client's name in the proper hash linked list, can't fail,
 271  * cptr must have a non-null name or expect a coredump, the name is
 272  * infact taken from cptr->name
 273  */
 274 int hAddClient(aClient *cptr)
 275 {
 276   register HASHREGS hashv = strhash(cptr->name);
 277
 278   cptr->hnext = clientTable[hashv];
 279   clientTable[hashv] = cptr;
 280
 281   return 0;
 282 }
 283
 284 /*
 285  * hAddChannel
 286  * Adds a channel's name in the proper hash linked list, can't fail.
 287  * chptr must have a non-null name or expect a coredump.
 288  * As before the name is taken from chptr->name, we do hash its entire
 289  * lenght since this proved to be statistically faster
 290  */
 291 int hAddChannel(aChannel *chptr)
 292 {
 293   register HASHREGS hashv = strhash(chptr->chname);
 294
 295   chptr->hnextch = channelTable[hashv];
 296   channelTable[hashv] = chptr;
 297
 298   return 0;
 299 }
 300
 301 /*
 302  * hRemClient
 303  * Removes a Client's name from the hash linked list
 304  */
 305 int hRemClient(aClient *cptr)
 306 {
 307   register HASHREGS hashv = strhash(cptr->name);
 308   register aClient *tmp = clientTable[hashv];
 309
 310   if (tmp == cptr)
 311   {
 312     clientTable[hashv] = cptr->hnext;
 313     return 0;
 314   };
 315
 316   while (tmp)
 317   {
 318     if (tmp->hnext == cptr)
 319     {
 320       tmp->hnext = tmp->hnext->hnext;
 321       return 0;
 322     };
 323     tmp = tmp->hnext;
 324   };
 325
 326   return -1;
 327
 328 }
 329
 330 /*
 331  * hChangeClient
 332  * Removes the old name of a client from a linked list and adds
 333  * the new one to another linked list, there is a slight chanche
 334  * that this is useless if the two hashes are the same but it still
 335  * would need to move the name to the top of the list.
 336  * As always it's responsibility of the caller to check that
 337  * both newname and cptr->name are valid names (not "" or NULL).
 338  * Typically, to change the nick of an already hashed client:
 339  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 340  *   hChangeClient(cptr, newname);
 341  *   strcpy(cptr->name, newname);
 342  *   };
 343  * There isn't an equivalent function for channels since they
 344  * don't change name.
 345  */
 346 int hChangeClient(aClient *cptr, char *newname)
 347 {
 348   register HASHREGS oldhash = strhash(cptr->name);
 349   register HASHREGS newhash = strhash(newname);
 350   register aClient *tmp;
 351
 352   tmp = clientTable[oldhash];
 353
 354   if (tmp == cptr)
 355     return 0;
 356
 357   while (tmp)
 358   {
 359     if (tmp->hnext == cptr)
 360     {
 361       tmp->hnext = cptr->hnext;
 362       cptr->hnext = clientTable[newhash];
 363       clientTable[newhash] = cptr;
 364       return 0;
 365     };
 366     tmp = tmp->hnext;
 367   };
 368
 369   /* Well... do our best anyway... link it to the correct list,
 370      and then... Fail (and core if we are debugging) ! */
 371   cptr->hnext = clientTable[newhash];
 372   clientTable[newhash] = cptr;
 373   return -1;
 374 }
 375
 376 /*
 377  * hRemChannel
 378  * Removes the channel's name from the corresponding hash linked list
 379  */
 380 int hRemChannel(aChannel *chptr)
 381 {
 382   register HASHREGS hashv = strhash(chptr->chname);
 383   register aChannel *tmp = channelTable[hashv];
 384
 385   if (tmp == chptr)
 386   {
 387     channelTable[hashv] = chptr->hnextch;
 388     return 0;
 389   };
 390
 391   while (tmp)
 392   {
 393     if (tmp->hnextch == chptr)
 394     {
 395       tmp->hnextch = tmp->hnextch->hnextch;
 396       return 0;
 397     };
 398     tmp = tmp->hnextch;
 399   };
 400
 401   return -1;
 402 }
 403
 404 /*
 405  * hSeekClient
 406  * New semantics: finds a client whose name is 'name' and whose
 407  * status is one of those marked in TMask, if can't find one
 408  * returns NULL. If it finds one moves it to the top of the list
 409  * and returns it.
 410  */
 411 aClient *hSeekClient(char *name, int TMask)
 412 {
 413   register HASHREGS hashv = strhash(name);
 414   register aClient *cptr = clientTable[hashv];
 415   register aClient *prv;
 416
 417   if (cptr)
 418     if ((!IsStatMask(cptr, TMask)) || strCasediff(name, cptr->name))
 419       while (prv = cptr, cptr = cptr->hnext)
 420         if (IsStatMask(cptr, TMask) && (!strCasediff(name, cptr->name)))
 421         {
 422           prv->hnext = cptr->hnext;
 423           cptr->hnext = clientTable[hashv];
 424           clientTable[hashv] = cptr;
 425           break;
 426         };
 427
 428   return cptr;
 429
 430 }
 431
 432 /*
 433  * hSeekChannel
 434  * New semantics: finds a channel whose name is 'name',
 435  * if can't find one returns NULL, if can find it moves
 436  * it to the top of the list and returns it.
 437  */
 438 aChannel *hSeekChannel(char *name)
 439 {
 440   register HASHREGS hashv = strhash(name);
 441   register aChannel *chptr = channelTable[hashv];
 442   register aChannel *prv;
 443
 444   if (chptr)
 445     if (strCasediff(name, chptr->chname))
 446       while (prv = chptr, chptr = chptr->hnextch)
 447         if (!strCasediff(name, chptr->chname))
 448         {
 449           prv->hnextch = chptr->hnextch;
 450           chptr->hnextch = channelTable[hashv];
 451           channelTable[hashv] = chptr;
 452           break;
 453         };
 454
 455   return chptr;
 456
 457 }
 458
 459 /* I will add some useful(?) statistics here one of these days,
 460    but not for DEBUGMODE: just to let the admins play with it,
 461    coders are able to SIGCORE the server and look into what goes
 462    on themselves :-) */
 463
 464 int m_hash(aClient *UNUSED(cptr), aClient *sptr, int UNUSED(parc), char *parv[])
 465 {
 466   sendto_one(sptr, "NOTICE %s :SUSER SSERV", parv[0]);
 467   sendto_one(sptr, "NOTICE %s :SBSDC IRCDC", parv[0]);
 468   sendto_one(sptr, "NOTICE %s :CHANC SMISC", parv[0]);
 469   sendto_one(sptr, "NOTICE %s :HASHC VERSH", parv[0]);
 470   sendto_one(sptr, "NOTICE %s :MAKEF HOSTID", parv[0]);
 471   return 0;
 472 }
 473
 474 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 475    ratio of one, collision shift up and roll in a circular fashion, the
 476    lowest 12 bits of the hash value are used, deletion is not supported,
 477    only addition, test for existence and cleanup of the table are.. */
 478
 479 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 480 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 481 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 482 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 483
 484 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 485 static int jupesCount;
 486
 487 /*
 488  * isNickJuped()
 489  * Tells if a nick is juped (nonzero returned) or not (zero)
 490  */
 491 int isNickJuped(char *nick)
 492 {
 493   register int pos;
 494
 495   if (nick && *nick)
 496     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++)
 497       if (!strCasediff(nick, jupeTable[pos]))
 498         return 1;
 499   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 500 }
 501
 502 /*
 503  * addNickJupes()
 504  * Adds a (comma separated list of) nick jupes to the table
 505  */
 506 int addNickJupes(char *nicks)
 507 {
 508   static char temp[512];
 509   char *one, *p;
 510   register int pos;
 511
 512   if (nicks && *nicks)
 513   {
 514     strncpy(temp, nicks, 512);
 515     temp[512] = '\000';
 516     p = NULL;
 517     for (one = strtoken(&p, temp, ","); one; one = strtoken(&p, NULL, ","))
 518     {
 519       if (!*one)
 520         continue;
 521       pos = strhash(one);
 522     loop:
 523       pos &= JUPEHASHMASK;
 524       if (!jupeTable[pos][0])
 525       {
 526         if (jupesCount == JUPEMAX)
 527           return 1;             /* Error: Jupe table is full ! */
 528         jupesCount++;
 529         strncpy(jupeTable[pos], one, NICKLEN);
 530         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 531         continue;
 532       }
 533       if (!strCasediff(one, jupeTable[pos]))
 534         continue;
 535       ++pos;
 536       goto loop;
 537     }
 538   }
 539   return 0;
 540 }
 541
 542 /*
 543  * clearNickJupes()
 544  * Cleans up the juped nicks table
 545  */
 546 void clearNickJupes(void)
 547 {
 548   register int i;
 549   jupesCount = 0;
 550   for (i = 0; i < JUPEHASHSIZE; i++)
 551     jupeTable[i][0] = '\000';
 552 }