ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  *
  22  * $Id$
  23  */
  24 #include "hash.h"
  25 #include "client.h"
  26 #include "channel.h"
  27 #include "ircd_chattr.h"
  28 #include "ircd_string.h"
  29 #include "ircd.h"
  30 #include "msg.h"
  31 #include "send.h"
  32 #include "struct.h"
  33 #include "support.h"
  34 #include "sys.h"
  35
  36 #include <assert.h>
  37 #include <limits.h>
  38 #include <stdlib.h>
  39 #include <string.h>
  40
  41
  42 /************************* Nemesi's hash alghoritm ***********************/
  43
  44 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  45  * is the string itself (included the trailing '\0') seen as
  46  * a baseHASHSHIFT number whose "digits" are the bytes of the
  47  * number mapped through a "weight" transformation that gives
  48  * the same "weight" to caseless-equal chars, example:
  49  *
  50  * Hashing the string "Nick\0" the result will be:
  51  * N  i  c  k \0
  52  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  53  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  54  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  55  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  56  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  57  *
  58  * It's actually a lot similar to a base transformation of the
  59  * text representation of an integer.
  60  * Looking at it this way seems slow and requiring unlimited integer
  61  * precision, but we actually do it with a *very* fast loop, using only
  62  * short integer arithmetic and by means of two memory accesses and
  63  * 3 additions per each byte processed.. and nothing else, as a side
  64  * note the distribution of real nicks over the hash table of this
  65  * function is about 3 times better than the previous one, and the
  66  * hash function itself is about 25% faster with a "normal" HASHSIZE
  67  * (it gets slower with larger ones and faster for smallest ones
  68  * because the hash table size affect the size of some maps and thus
  69  * the effectiveness of RAM caches while accesing them).
  70  * These two pages of macros are here to make the following code
  71  * _more_ understandeable... I hope ;)
  72  */
  73
  74 /* Internal stuff, think well before changing this, it's how
  75    much the weights of two lexicograhically contiguous chars
  76    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  77    One seems to be fine but the alghoritm doesn't depend on it */
  78 #define HASHSTEP 1
  79
  80 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  81    that is the first prime bigger than the maximum hash_weight
  82    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  83  */
  84 #define HASHSHIFT 257
  85
  86 /* Are we sure that HASHSHIFT is big enough ? */
  87 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  88 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  89 #endif
  90
  91 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  92    to be an exact multiple of HASHSHIFT, that would make the distribution
  93    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  94 #if ((HASHSIZE%HASHSHIFT)==0)
  95 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  96 #endif
  97
  98 /* What type of integer do we need in our computations ? the largest
  99    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
 100    operations we want to keep the tables compact (the cache will work
 101    better and we will run faster) while for work variables we prefer
 102    to roundup to 'int' if it is the case: on platforms where int!=short
 103    int arithmetic is often faster than short arithmetic, we prefer signed
 104    types if they are big enough since on some architectures they are faster
 105    than unsigned, but we always keep signedness of mem and regs the same,
 106    to avoid sign conversions that sometimes require time, the following
 107    precompile stuff will set HASHMEMS to an appropriate integer type for
 108    the tables stored in memory and HASHREGS to an appropriate int type
 109    for the work registers/variables/return types. Everything of type
 110    HASH???S will remain internal to this source file so I placed this stuff
 111    here and not in the header file. */
 112
 113 #undef HASHMEMS
 114 #undef HASHREGS
 115
 116 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 117 #define HASHMEMS short
 118 #define HASHREGS int
 119 #endif
 120
 121 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 122 #define HASHMEMS unsigned short
 123 #define HASHREGS unsigned int
 124 #endif
 125
 126 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 127 #define HASHMEMS int
 128 #define HASHREGS int
 129 #endif
 130
 131 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 132 #define HASHMEMS unsigned int
 133 #define HASHREGS unsigned int
 134 #endif
 135
 136 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 137 #define HASHMEMS long
 138 #define HASHREGS long
 139 #endif
 140
 141 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 142 #define HASHMEMS unsigned long
 143 #define HASHREGS unsigned long
 144 #endif
 145
 146 #if (!defined(HASHMEMS))
 147 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 148 #endif
 149
 150 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 151 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 152
 153 /* Static memory structures */
 154
 155 /* We need a first function that, given an integer h between 1 and
 156    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 157    We'll map this function in this table */
 158 static HASHMEMS hash_map[HASHMAPSIZE];
 159
 160 /* Then we need a second function that "maps" a char to its weitgh,
 161    changed to a table this one too, with this macro we can use a char
 162    as index and not care if it is signed or not, no.. this will not
 163    cause an addition to take place at each access, trust me, the
 164    optimizer takes it out of the actual code and passes "label+shift"
 165    to the linker, and the linker does the addition :) */
 166 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 167 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 168
 169 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 170    size tables could be supported but the rehash routine should also
 171    rebuild the transformation maps, I kept the tables of equal size
 172    so that I can use one hash function and one transformation map */
 173 static struct Client *clientTable[HASHSIZE];
 174 static struct Channel *channelTable[HASHSIZE];
 175
 176 /* This is what the hash function will consider "equal" chars, this function
 177    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 178    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 179 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
 180
 181 /* init_hash
 182  * Initialize the maps used by hash functions and clear the tables */
 183 void init_hash(void)
 184 {
 185   int           i;
 186   int           j;
 187   unsigned long l;
 188   unsigned long m;
 189
 190   /* Clear the hash tables first */
 191   for (l = 0; l < HASHSIZE; l++)
 192   {
 193     channelTable[l] = 0;
 194     clientTable[l]  = 0;
 195   }
 196
 197   /* Here is to what we "map" a char before working on it */
 198   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 199     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 200
 201   /* Make them equal for case-independently equal chars, it's
 202      lame to do it this way but I wanted the code flexible, it's
 203      possible to change the HASHEQ macro and not touch here.
 204      I don't actually care about the 32768 loops since it happens
 205      only once at startup */
 206   for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 207     for (j = CHAR_MIN; j < i; j++) {
 208       if (HASHEQ(i, j))
 209         hash_weight(i) = hash_weight(j);
 210     }
 211   }
 212
 213   /* And this is our hash-loop "transformation" function,
 214      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 215      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 216   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 217   {
 218     l = m;
 219     l *= (unsigned long)HASHSHIFT;
 220     l %= (unsigned long)HASHSIZE;
 221     hash_map[m] = (HASHMEMS) l;
 222   }
 223 }
 224
 225 /* These are the actual hash functions, since they are static
 226    and very short any decent compiler at a good optimization level
 227    WILL inline these in the following functions */
 228
 229 /* This is the string hash function,
 230    WARNING: n must be a valid pointer to a _non-null_ string
 231    this means that not only strhash(NULL) but also
 232    strhash("") _will_ coredump, it's responsibility
 233    the caller to eventually check BadPtr(nick). */
 234
 235 static HASHREGS strhash(const char *n)
 236 {
 237   HASHREGS hash = hash_weight(*n++);
 238   while (*n)
 239     hash = hash_map[hash] + hash_weight(*n++);
 240   return hash_map[hash];
 241 }
 242
 243 /* And this is the string hash function for limited lenght strings
 244    WARNING: n must be a valid pointer to a non-null string
 245    and i must be > 0 ! */
 246
 247 /* REMOVED
 248
 249    The time taken to decrement i makes the function
 250    slower than strhash for the average of channel names (tested
 251    on 16000 real channel names, 1000 loops. I left the code here
 252    as a bookmark if a strnhash is evetually needed in the future.
 253
 254    static HASHREGS strnhash(const char *n, int i) {
 255    HASHREGS hash = hash_weight(*n++);
 256    i--;
 257    while(*n && i--)
 258    hash = hash_map[hash] + hash_weight(*n++);
 259    return hash_map[hash];
 260    }
 261
 262    #define CHANHASHLEN 30
 263
 264    !REMOVED */
 265
 266 /************************** Externally visible functions ********************/
 267
 268 /* Optimization note: in these functions I supposed that the CSE optimization
 269  * (Common Subexpression Elimination) does its work decently, this means that
 270  * I avoided introducing new variables to do the work myself and I did let
 271  * the optimizer play with more free registers, actual tests proved this
 272  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 273  * use tmp2 myself wich would have given less freedom to the optimizer.
 274  */
 275
 276 /*
 277  * hAddClient
 278  * Adds a client's name in the proper hash linked list, can't fail,
 279  * cptr must have a non-null name or expect a coredump, the name is
 280  * infact taken from cptr->name
 281  */
 282 int hAddClient(struct Client *cptr)
 283 {
 284   HASHREGS hashv = strhash(cli_name(cptr));
 285
 286   cli_hnext(cptr) = clientTable[hashv];
 287   clientTable[hashv] = cptr;
 288
 289   return 0;
 290 }
 291
 292 /*
 293  * hAddChannel
 294  * Adds a channel's name in the proper hash linked list, can't fail.
 295  * chptr must have a non-null name or expect a coredump.
 296  * As before the name is taken from chptr->name, we do hash its entire
 297  * lenght since this proved to be statistically faster
 298  */
 299 int hAddChannel(struct Channel *chptr)
 300 {
 301   HASHREGS hashv = strhash(chptr->chname);
 302
 303   chptr->hnext = channelTable[hashv];
 304   channelTable[hashv] = chptr;
 305
 306   return 0;
 307 }
 308
 309 /*
 310  * hRemClient
 311  * Removes a Client's name from the hash linked list
 312  */
 313 int hRemClient(struct Client *cptr)
 314 {
 315   HASHREGS hashv = strhash(cli_name(cptr));
 316   struct Client *tmp = clientTable[hashv];
 317
 318   if (tmp == cptr) {
 319     clientTable[hashv] = cli_hnext(cptr);
 320     cli_hnext(cptr) = cptr;
 321     return 0;
 322   }
 323
 324   while (tmp) {
 325     if (cli_hnext(tmp) == cptr) {
 326       cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
 327       cli_hnext(cptr) = cptr;
 328       return 0;
 329     }
 330     tmp = cli_hnext(tmp);
 331   }
 332   return -1;
 333 }
 334
 335 /*
 336  * hChangeClient
 337  * Removes the old name of a client from a linked list and adds
 338  * the new one to another linked list, there is a slight chanche
 339  * that this is useless if the two hashes are the same but it still
 340  * would need to move the name to the top of the list.
 341  * As always it's responsibility of the caller to check that
 342  * both newname and cptr->name are valid names (not "" or NULL).
 343  * Typically, to change the nick of an already hashed client:
 344  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 345  *   hChangeClient(cptr, newname);
 346  *   strcpy(cptr->name, newname);
 347  *   };
 348  * There isn't an equivalent function for channels since they
 349  * don't change name.
 350  */
 351 int hChangeClient(struct Client *cptr, const char *newname)
 352 {
 353   HASHREGS newhash = strhash(newname);
 354
 355   assert(0 != cptr);
 356   hRemClient(cptr);
 357
 358   cli_hnext(cptr) = clientTable[newhash];
 359   clientTable[newhash] = cptr;
 360   return 0;
 361 }
 362
 363 /*
 364  * hRemChannel
 365  * Removes the channel's name from the corresponding hash linked list
 366  */
 367 int hRemChannel(struct Channel *chptr)
 368 {
 369   HASHREGS hashv = strhash(chptr->chname);
 370   struct Channel *tmp = channelTable[hashv];
 371
 372   if (tmp == chptr) {
 373     channelTable[hashv] = chptr->hnext;
 374     chptr->hnext = chptr;
 375     return 0;
 376   }
 377
 378   while (tmp) {
 379     if (tmp->hnext == chptr) {
 380       tmp->hnext = tmp->hnext->hnext;
 381       chptr->hnext = chptr;
 382       return 0;
 383     }
 384     tmp = tmp->hnext;
 385   }
 386
 387   return -1;
 388 }
 389
 390 /*
 391  * hSeekClient
 392  * New semantics: finds a client whose name is 'name' and whose
 393  * status is one of those marked in TMask, if can't find one
 394  * returns NULL. If it finds one moves it to the top of the list
 395  * and returns it.
 396  */
 397 struct Client* hSeekClient(const char *name, int TMask)
 398 {
 399   HASHREGS hashv      = strhash(name);
 400   struct Client *cptr = clientTable[hashv];
 401
 402   if (cptr) {
 403     if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
 404       struct Client* prev;
 405       while (prev = cptr, cptr = cli_hnext(cptr)) {
 406         if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
 407           cli_hnext(prev) = cli_hnext(cptr);
 408           cli_hnext(cptr) = clientTable[hashv];
 409           clientTable[hashv] = cptr;
 410           break;
 411         }
 412       }
 413     }
 414   }
 415   return cptr;
 416 }
 417
 418 /*
 419  * hSeekChannel
 420  * New semantics: finds a channel whose name is 'name',
 421  * if can't find one returns NULL, if can find it moves
 422  * it to the top of the list and returns it.
 423  */
 424 struct Channel* hSeekChannel(const char *name)
 425 {
 426   HASHREGS hashv = strhash(name);
 427   struct Channel *chptr = channelTable[hashv];
 428
 429   if (chptr) {
 430     if (0 != ircd_strcmp(name, chptr->chname)) {
 431       struct Channel* prev;
 432       while (prev = chptr, chptr = chptr->hnext) {
 433         if (0 == ircd_strcmp(name, chptr->chname)) {
 434           prev->hnext = chptr->hnext;
 435           chptr->hnext = channelTable[hashv];
 436           channelTable[hashv] = chptr;
 437           break;
 438         }
 439       }
 440     }
 441   }
 442   return chptr;
 443
 444 }
 445
 446 /* I will add some useful(?) statistics here one of these days,
 447    but not for DEBUGMODE: just to let the admins play with it,
 448    coders are able to SIGCORE the server and look into what goes
 449    on themselves :-) */
 450
 451 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
 452 {
 453   int max_chain = 0;
 454   int buckets   = 0;
 455   int count     = 0;
 456   struct Client*  cl;
 457   struct Channel* ch;
 458   int i;
 459
 460   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
 461
 462   for (i = 0; i < HASHSIZE; ++i) {
 463     if ((cl = clientTable[i])) {
 464       int len = 0;
 465       ++buckets;
 466       for ( ; cl; cl = cli_hnext(cl))
 467         ++len;
 468       if (len > max_chain)
 469         max_chain = len;
 470       count += len;
 471     }
 472   }
 473
 474   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
 475                 "max chain: %d", sptr, count, buckets, max_chain);
 476
 477   buckets = 0;
 478   count   = 0;
 479   max_chain = 0;
 480
 481   for (i = 0; i < HASHSIZE; ++i) {
 482     if ((ch = channelTable[i])) {
 483       int len = 0;
 484       ++buckets;
 485       for ( ; ch; ch = ch->hnext)
 486         ++len;
 487       if (len > max_chain)
 488         max_chain = len;
 489       count += len;
 490     }
 491   }
 492
 493   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
 494                 "max chain: %d", sptr, count, buckets, max_chain);
 495   return 0;
 496 }
 497
 498 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 499    ratio of one, collision shift up and roll in a circular fashion, the
 500    lowest 12 bits of the hash value are used, deletion is not supported,
 501    only addition, test for existence and cleanup of the table are.. */
 502
 503 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 504 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 505 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 506 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 507
 508 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 509 static int jupesCount;
 510
 511 /*
 512  * isNickJuped()
 513  * Tells if a nick is juped (nonzero returned) or not (zero)
 514  */
 515 int isNickJuped(const char *nick)
 516 {
 517   int pos;
 518
 519   if (nick && *nick) {
 520     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
 521       if (0 == ircd_strcmp(nick, jupeTable[pos]))
 522         return 1;
 523     }
 524   }
 525   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 526 }
 527
 528 /*
 529  * addNickJupes()
 530  * Adds a (comma separated list of) nick jupes to the table
 531  */
 532 int addNickJupes(const char *nicks)
 533 {
 534   static char temp[BUFSIZE + 1];
 535   char* one;
 536   char* p;
 537   int   pos;
 538
 539   if (nicks && *nicks)
 540   {
 541     ircd_strncpy(temp, nicks, BUFSIZE);
 542     temp[BUFSIZE] = '\0';
 543     p = NULL;
 544     for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
 545     {
 546       if (!*one)
 547         continue;
 548       pos = strhash(one);
 549 loop:
 550       pos &= JUPEHASHMASK;
 551       if (!jupeTable[pos][0])
 552       {
 553         if (jupesCount == JUPEMAX)
 554           return 1;             /* Error: Jupe table is full ! */
 555         jupesCount++;
 556         ircd_strncpy(jupeTable[pos], one, NICKLEN);
 557         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 558         continue;
 559       }
 560       if (0 == ircd_strcmp(one, jupeTable[pos]))
 561         continue;
 562       ++pos;
 563       goto loop;
 564     }
 565   }
 566   return 0;
 567 }
 568
 569 /*
 570  * clearNickJupes()
 571  * Cleans up the juped nicks table
 572  */
 573 void clearNickJupes(void)
 574 {
 575   int i;
 576   jupesCount = 0;
 577   for (i = 0; i < JUPEHASHSIZE; i++)
 578     jupeTable[i][0] = '\000';
 579 }