ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  *
  22  * $Id$
  23  */
  24 #include "hash.h"
  25 #include "client.h"
  26 #include "channel.h"
  27 #include "ircd_chattr.h"
  28 #include "ircd_string.h"
  29 #include "ircd.h"
  30 #include "send.h"
  31 #include "struct.h"
  32 #include "support.h"
  33 #include "sys.h"
  34
  35 #include <assert.h>
  36 #include <limits.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39
  40
  41 /************************* Nemesi's hash alghoritm ***********************/
  42
  43 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  44  * is the string itself (included the trailing '\0') seen as
  45  * a baseHASHSHIFT number whose "digits" are the bytes of the
  46  * number mapped through a "weight" transformation that gives
  47  * the same "weight" to caseless-equal chars, example:
  48  *
  49  * Hashing the string "Nick\0" the result will be:
  50  * N  i  c  k \0
  51  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  52  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  53  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  54  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  55  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  56  *
  57  * It's actually a lot similar to a base transformation of the
  58  * text representation of an integer.
  59  * Looking at it this way seems slow and requiring unlimited integer
  60  * precision, but we actually do it with a *very* fast loop, using only
  61  * short integer arithmetic and by means of two memory accesses and
  62  * 3 additions per each byte processed.. and nothing else, as a side
  63  * note the distribution of real nicks over the hash table of this
  64  * function is about 3 times better than the previous one, and the
  65  * hash function itself is about 25% faster with a "normal" HASHSIZE
  66  * (it gets slower with larger ones and faster for smallest ones
  67  * because the hash table size affect the size of some maps and thus
  68  * the effectiveness of RAM caches while accesing them).
  69  * These two pages of macros are here to make the following code
  70  * _more_ understandeable... I hope ;)
  71  */
  72
  73 /* Internal stuff, think well before changing this, it's how
  74    much the weights of two lexicograhically contiguous chars
  75    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  76    One seems to be fine but the alghoritm doesn't depend on it */
  77 #define HASHSTEP 1
  78
  79 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  80    that is the first prime bigger than the maximum hash_weight
  81    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  82  */
  83 #define HASHSHIFT 257
  84
  85 /* Are we sure that HASHSHIFT is big enough ? */
  86 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  87 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  88 #endif
  89
  90 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  91    to be an exact multiple of HASHSHIFT, that would make the distribution
  92    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  93 #if ((HASHSIZE%HASHSHIFT)==0)
  94 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  95 #endif
  96
  97 /* What type of integer do we need in our computations ? the largest
  98    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
  99    operations we want to keep the tables compact (the cache will work
 100    better and we will run faster) while for work variables we prefer
 101    to roundup to 'int' if it is the case: on platforms where int!=short
 102    int arithmetic is often faster than short arithmetic, we prefer signed
 103    types if they are big enough since on some architectures they are faster
 104    than unsigned, but we always keep signedness of mem and regs the same,
 105    to avoid sign conversions that sometimes require time, the following
 106    precompile stuff will set HASHMEMS to an appropriate integer type for
 107    the tables stored in memory and HASHREGS to an appropriate int type
 108    for the work registers/variables/return types. Everything of type
 109    HASH???S will remain internal to this source file so I placed this stuff
 110    here and not in the header file. */
 111
 112 #undef HASHMEMS
 113 #undef HASHREGS
 114
 115 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 116 #define HASHMEMS short
 117 #define HASHREGS int
 118 #endif
 119
 120 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 121 #define HASHMEMS unsigned short
 122 #define HASHREGS unsigned int
 123 #endif
 124
 125 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 126 #define HASHMEMS int
 127 #define HASHREGS int
 128 #endif
 129
 130 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 131 #define HASHMEMS unsigned int
 132 #define HASHREGS unsigned int
 133 #endif
 134
 135 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 136 #define HASHMEMS long
 137 #define HASHREGS long
 138 #endif
 139
 140 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 141 #define HASHMEMS unsigned long
 142 #define HASHREGS unsigned long
 143 #endif
 144
 145 #if (!defined(HASHMEMS))
 146 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 147 #endif
 148
 149 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 150 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 151
 152 /* Static memory structures */
 153
 154 /* We need a first function that, given an integer h between 1 and
 155    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 156    We'll map this function in this table */
 157 static HASHMEMS hash_map[HASHMAPSIZE];
 158
 159 /* Then we need a second function that "maps" a char to its weitgh,
 160    changed to a table this one too, with this macro we can use a char
 161    as index and not care if it is signed or not, no.. this will not
 162    cause an addition to take place at each access, trust me, the
 163    optimizer takes it out of the actual code and passes "label+shift"
 164    to the linker, and the linker does the addition :) */
 165 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 166 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 167
 168 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 169    size tables could be supported but the rehash routine should also
 170    rebuild the transformation maps, I kept the tables of equal size
 171    so that I can use one hash function and one transformation map */
 172 static struct Client *clientTable[HASHSIZE];
 173 static struct Channel *channelTable[HASHSIZE];
 174
 175 /* This is what the hash function will consider "equal" chars, this function
 176    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 177    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 178 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
 179
 180 /* hash_init
 181  * Initialize the maps used by hash functions and clear the tables */
 182 void hash_init(void)
 183 {
 184   int           i;
 185   int           j;
 186   unsigned long l;
 187   unsigned long m;
 188
 189   /* Clear the hash tables first */
 190   for (l = 0; l < HASHSIZE; l++)
 191   {
 192     channelTable[l] = 0;
 193     clientTable[l]  = 0;
 194   }
 195
 196   /* Here is to what we "map" a char before working on it */
 197   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 198     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 199
 200   /* Make them equal for case-independently equal chars, it's
 201      lame to do it this way but I wanted the code flexible, it's
 202      possible to change the HASHEQ macro and not touch here.
 203      I don't actually care about the 32768 loops since it happens
 204      only once at startup */
 205   for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 206     for (j = CHAR_MIN; j < i; j++) {
 207       if (HASHEQ(i, j))
 208         hash_weight(i) = hash_weight(j);
 209     }
 210   }
 211
 212   /* And this is our hash-loop "transformation" function,
 213      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 214      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 215   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 216   {
 217     l = m;
 218     l *= (unsigned long)HASHSHIFT;
 219     l %= (unsigned long)HASHSIZE;
 220     hash_map[m] = (HASHMEMS) l;
 221   }
 222 }
 223
 224 /* These are the actual hash functions, since they are static
 225    and very short any decent compiler at a good optimization level
 226    WILL inline these in the following functions */
 227
 228 /* This is the string hash function,
 229    WARNING: n must be a valid pointer to a _non-null_ string
 230    this means that not only strhash(NULL) but also
 231    strhash("") _will_ coredump, it's responsibility
 232    the caller to eventually check BadPtr(nick). */
 233
 234 static HASHREGS strhash(const char *n)
 235 {
 236   HASHREGS hash = hash_weight(*n++);
 237   while (*n)
 238     hash = hash_map[hash] + hash_weight(*n++);
 239   return hash_map[hash];
 240 }
 241
 242 /* And this is the string hash function for limited lenght strings
 243    WARNING: n must be a valid pointer to a non-null string
 244    and i must be > 0 ! */
 245
 246 /* REMOVED
 247
 248    The time taken to decrement i makes the function
 249    slower than strhash for the average of channel names (tested
 250    on 16000 real channel names, 1000 loops. I left the code here
 251    as a bookmark if a strnhash is evetually needed in the future.
 252
 253    static HASHREGS strnhash(const char *n, int i) {
 254    HASHREGS hash = hash_weight(*n++);
 255    i--;
 256    while(*n && i--)
 257    hash = hash_map[hash] + hash_weight(*n++);
 258    return hash_map[hash];
 259    }
 260
 261    #define CHANHASHLEN 30
 262
 263    !REMOVED */
 264
 265 /************************** Externally visible functions ********************/
 266
 267 /* Optimization note: in these functions I supposed that the CSE optimization
 268  * (Common Subexpression Elimination) does its work decently, this means that
 269  * I avoided introducing new variables to do the work myself and I did let
 270  * the optimizer play with more free registers, actual tests proved this
 271  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 272  * use tmp2 myself wich would have given less freedom to the optimizer.
 273  */
 274
 275 /*
 276  * hAddClient
 277  * Adds a client's name in the proper hash linked list, can't fail,
 278  * cptr must have a non-null name or expect a coredump, the name is
 279  * infact taken from cptr->name
 280  */
 281 int hAddClient(struct Client *cptr)
 282 {
 283   HASHREGS hashv = strhash(cptr->name);
 284
 285   cptr->hnext = clientTable[hashv];
 286   clientTable[hashv] = cptr;
 287
 288   return 0;
 289 }
 290
 291 /*
 292  * hAddChannel
 293  * Adds a channel's name in the proper hash linked list, can't fail.
 294  * chptr must have a non-null name or expect a coredump.
 295  * As before the name is taken from chptr->name, we do hash its entire
 296  * lenght since this proved to be statistically faster
 297  */
 298 int hAddChannel(struct Channel *chptr)
 299 {
 300   HASHREGS hashv = strhash(chptr->chname);
 301
 302   chptr->hnext = channelTable[hashv];
 303   channelTable[hashv] = chptr;
 304
 305   return 0;
 306 }
 307
 308 /*
 309  * hRemClient
 310  * Removes a Client's name from the hash linked list
 311  */
 312 int hRemClient(struct Client *cptr)
 313 {
 314   HASHREGS hashv = strhash(cptr->name);
 315   struct Client *tmp = clientTable[hashv];
 316
 317   if (tmp == cptr) {
 318     clientTable[hashv] = cptr->hnext;
 319     cptr->hnext = cptr;
 320     return 0;
 321   }
 322
 323   while (tmp) {
 324     if (tmp->hnext == cptr) {
 325       tmp->hnext = tmp->hnext->hnext;
 326       cptr->hnext = cptr;
 327       return 0;
 328     }
 329     tmp = tmp->hnext;
 330   }
 331   return -1;
 332 }
 333
 334 /*
 335  * hChangeClient
 336  * Removes the old name of a client from a linked list and adds
 337  * the new one to another linked list, there is a slight chanche
 338  * that this is useless if the two hashes are the same but it still
 339  * would need to move the name to the top of the list.
 340  * As always it's responsibility of the caller to check that
 341  * both newname and cptr->name are valid names (not "" or NULL).
 342  * Typically, to change the nick of an already hashed client:
 343  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 344  *   hChangeClient(cptr, newname);
 345  *   strcpy(cptr->name, newname);
 346  *   };
 347  * There isn't an equivalent function for channels since they
 348  * don't change name.
 349  */
 350 int hChangeClient(struct Client *cptr, const char *newname)
 351 {
 352   HASHREGS newhash = strhash(newname);
 353
 354   assert(0 != cptr);
 355   hRemClient(cptr);
 356
 357   cptr->hnext = clientTable[newhash];
 358   clientTable[newhash] = cptr;
 359   return 0;
 360 }
 361
 362 /*
 363  * hRemChannel
 364  * Removes the channel's name from the corresponding hash linked list
 365  */
 366 int hRemChannel(struct Channel *chptr)
 367 {
 368   HASHREGS hashv = strhash(chptr->chname);
 369   struct Channel *tmp = channelTable[hashv];
 370
 371   if (tmp == chptr) {
 372     channelTable[hashv] = chptr->hnext;
 373     chptr->hnext = chptr;
 374     return 0;
 375   }
 376
 377   while (tmp) {
 378     if (tmp->hnext == chptr) {
 379       tmp->hnext = tmp->hnext->hnext;
 380       chptr->hnext = chptr;
 381       return 0;
 382     }
 383     tmp = tmp->hnext;
 384   }
 385
 386   return -1;
 387 }
 388
 389 /*
 390  * hSeekClient
 391  * New semantics: finds a client whose name is 'name' and whose
 392  * status is one of those marked in TMask, if can't find one
 393  * returns NULL. If it finds one moves it to the top of the list
 394  * and returns it.
 395  */
 396 struct Client* hSeekClient(const char *name, int TMask)
 397 {
 398   HASHREGS hashv      = strhash(name);
 399   struct Client *cptr = clientTable[hashv];
 400
 401   if (cptr) {
 402     if (0 == (cptr->status & TMask) || 0 != ircd_strcmp(name, cptr->name)) {
 403       struct Client* prev;
 404       while (prev = cptr, cptr = cptr->hnext) {
 405         if ((cptr->status & TMask) && (0 == ircd_strcmp(name, cptr->name))) {
 406           prev->hnext = cptr->hnext;
 407           cptr->hnext = clientTable[hashv];
 408           clientTable[hashv] = cptr;
 409           break;
 410         }
 411       }
 412     }
 413   }
 414   return cptr;
 415 }
 416
 417 /*
 418  * hSeekChannel
 419  * New semantics: finds a channel whose name is 'name',
 420  * if can't find one returns NULL, if can find it moves
 421  * it to the top of the list and returns it.
 422  */
 423 struct Channel* hSeekChannel(const char *name)
 424 {
 425   HASHREGS hashv = strhash(name);
 426   struct Channel *chptr = channelTable[hashv];
 427
 428   if (chptr) {
 429     if (0 != ircd_strcmp(name, chptr->chname)) {
 430       struct Channel* prev;
 431       while (prev = chptr, chptr = chptr->hnext) {
 432         if (0 == ircd_strcmp(name, chptr->chname)) {
 433           prev->hnext = chptr->hnext;
 434           chptr->hnext = channelTable[hashv];
 435           channelTable[hashv] = chptr;
 436           break;
 437         }
 438       }
 439     }
 440   }
 441   return chptr;
 442
 443 }
 444
 445 /* I will add some useful(?) statistics here one of these days,
 446    but not for DEBUGMODE: just to let the admins play with it,
 447    coders are able to SIGCORE the server and look into what goes
 448    on themselves :-) */
 449
 450 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
 451 {
 452   int max_chain = 0;
 453   int buckets   = 0;
 454   int count     = 0;
 455   struct Client*  cl;
 456   struct Channel* ch;
 457   int i;
 458
 459   sendto_one(sptr, "NOTICE %s :Hash Table Statistics", parv[0]);
 460
 461   for (i = 0; i < HASHSIZE; ++i) {
 462     if ((cl = clientTable[i])) {
 463       int len = 0;
 464       ++buckets;
 465       for ( ; cl; cl = cl->hnext)
 466         ++len;
 467       if (len > max_chain)
 468         max_chain = len;
 469       count += len;
 470     }
 471   }
 472
 473   sendto_one(sptr, "NOTICE %s :Client: entries: %d buckets: %d max chain: %d",
 474              parv[0], count, buckets, max_chain);
 475
 476   buckets = 0;
 477   count   = 0;
 478   max_chain = 0;
 479
 480   for (i = 0; i < HASHSIZE; ++i) {
 481     if ((ch = channelTable[i])) {
 482       int len = 0;
 483       ++buckets;
 484       for ( ; ch; ch = ch->hnext)
 485         ++len;
 486       if (len > max_chain)
 487         max_chain = len;
 488       count += len;
 489     }
 490   }
 491
 492   sendto_one(sptr, "NOTICE %s :Channel: entries: %d buckets: %d max chain: %d",
 493              parv[0], count, buckets, max_chain);
 494   return 0;
 495 }
 496
 497 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 498    ratio of one, collision shift up and roll in a circular fashion, the
 499    lowest 12 bits of the hash value are used, deletion is not supported,
 500    only addition, test for existence and cleanup of the table are.. */
 501
 502 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 503 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 504 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 505 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 506
 507 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 508 static int jupesCount;
 509
 510 /*
 511  * isNickJuped()
 512  * Tells if a nick is juped (nonzero returned) or not (zero)
 513  */
 514 int isNickJuped(const char *nick)
 515 {
 516   int pos;
 517
 518   if (nick && *nick) {
 519     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
 520       if (0 == ircd_strcmp(nick, jupeTable[pos]))
 521         return 1;
 522     }
 523   }
 524   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 525 }
 526
 527 /*
 528  * addNickJupes()
 529  * Adds a (comma separated list of) nick jupes to the table
 530  */
 531 int addNickJupes(const char *nicks)
 532 {
 533   static char temp[BUFSIZE + 1];
 534   char* one;
 535   char* p;
 536   int   pos;
 537
 538   if (nicks && *nicks)
 539   {
 540     ircd_strncpy(temp, nicks, BUFSIZE);
 541     temp[BUFSIZE] = '\0';
 542     p = NULL;
 543     for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
 544     {
 545       if (!*one)
 546         continue;
 547       pos = strhash(one);
 548 loop:
 549       pos &= JUPEHASHMASK;
 550       if (!jupeTable[pos][0])
 551       {
 552         if (jupesCount == JUPEMAX)
 553           return 1;             /* Error: Jupe table is full ! */
 554         jupesCount++;
 555         ircd_strncpy(jupeTable[pos], one, NICKLEN);
 556         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 557         continue;
 558       }
 559       if (0 == ircd_strcmp(one, jupeTable[pos]))
 560         continue;
 561       ++pos;
 562       goto loop;
 563     }
 564   }
 565   return 0;
 566 }
 567
 568 /*
 569  * clearNickJupes()
 570  * Cleans up the juped nicks table
 571  */
 572 void clearNickJupes(void)
 573 {
 574   int i;
 575   jupesCount = 0;
 576   for (i = 0; i < JUPEHASHSIZE; i++)
 577     jupeTable[i][0] = '\000';
 578 }