ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  *
  22  * $Id$
  23  */
  24 #include "config.h"
  25
  26 #include "hash.h"
  27 #include "client.h"
  28 #include "channel.h"
  29 #include "ircd_chattr.h"
  30 #include "ircd_string.h"
  31 #include "ircd.h"
  32 #include "msg.h"
  33 #include "send.h"
  34 #include "struct.h"
  35 #include "support.h"
  36 #include "sys.h"
  37
  38 #include <assert.h>
  39 #include <limits.h>
  40 #include <stdlib.h>
  41 #include <string.h>
  42
  43
  44 /************************* Nemesi's hash alghoritm ***********************/
  45
  46 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  47  * is the string itself (included the trailing '\0') seen as
  48  * a baseHASHSHIFT number whose "digits" are the bytes of the
  49  * number mapped through a "weight" transformation that gives
  50  * the same "weight" to caseless-equal chars, example:
  51  *
  52  * Hashing the string "Nick\0" the result will be:
  53  * N  i  c  k \0
  54  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  55  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  56  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  57  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  58  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  59  *
  60  * It's actually a lot similar to a base transformation of the
  61  * text representation of an integer.
  62  * Looking at it this way seems slow and requiring unlimited integer
  63  * precision, but we actually do it with a *very* fast loop, using only
  64  * short integer arithmetic and by means of two memory accesses and
  65  * 3 additions per each byte processed.. and nothing else, as a side
  66  * note the distribution of real nicks over the hash table of this
  67  * function is about 3 times better than the previous one, and the
  68  * hash function itself is about 25% faster with a "normal" HASHSIZE
  69  * (it gets slower with larger ones and faster for smallest ones
  70  * because the hash table size affect the size of some maps and thus
  71  * the effectiveness of RAM caches while accesing them).
  72  * These two pages of macros are here to make the following code
  73  * _more_ understandeable... I hope ;)
  74  */
  75
  76 /* Internal stuff, think well before changing this, it's how
  77    much the weights of two lexicograhically contiguous chars
  78    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  79    One seems to be fine but the alghoritm doesn't depend on it */
  80 #define HASHSTEP 1
  81
  82 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  83    that is the first prime bigger than the maximum hash_weight
  84    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  85  */
  86 #define HASHSHIFT 257
  87
  88 /* Are we sure that HASHSHIFT is big enough ? */
  89 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  90 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  91 #endif
  92
  93 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  94    to be an exact multiple of HASHSHIFT, that would make the distribution
  95    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  96 #if ((HASHSIZE%HASHSHIFT)==0)
  97 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  98 #endif
  99
 100 /* What type of integer do we need in our computations ? the largest
 101    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
 102    operations we want to keep the tables compact (the cache will work
 103    better and we will run faster) while for work variables we prefer
 104    to roundup to 'int' if it is the case: on platforms where int!=short
 105    int arithmetic is often faster than short arithmetic, we prefer signed
 106    types if they are big enough since on some architectures they are faster
 107    than unsigned, but we always keep signedness of mem and regs the same,
 108    to avoid sign conversions that sometimes require time, the following
 109    precompile stuff will set HASHMEMS to an appropriate integer type for
 110    the tables stored in memory and HASHREGS to an appropriate int type
 111    for the work registers/variables/return types. Everything of type
 112    HASH???S will remain internal to this source file so I placed this stuff
 113    here and not in the header file. */
 114
 115 #undef HASHMEMS
 116 #undef HASHREGS
 117
 118 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 119 #define HASHMEMS short
 120 #define HASHREGS int
 121 #endif
 122
 123 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 124 #define HASHMEMS unsigned short
 125 #define HASHREGS unsigned int
 126 #endif
 127
 128 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 129 #define HASHMEMS int
 130 #define HASHREGS int
 131 #endif
 132
 133 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 134 #define HASHMEMS unsigned int
 135 #define HASHREGS unsigned int
 136 #endif
 137
 138 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 139 #define HASHMEMS long
 140 #define HASHREGS long
 141 #endif
 142
 143 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 144 #define HASHMEMS unsigned long
 145 #define HASHREGS unsigned long
 146 #endif
 147
 148 #if (!defined(HASHMEMS))
 149 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 150 #endif
 151
 152 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 153 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 154
 155 /* Static memory structures */
 156
 157 /* We need a first function that, given an integer h between 1 and
 158    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 159    We'll map this function in this table */
 160 static HASHMEMS hash_map[HASHMAPSIZE];
 161
 162 /* Then we need a second function that "maps" a char to its weitgh,
 163    changed to a table this one too, with this macro we can use a char
 164    as index and not care if it is signed or not, no.. this will not
 165    cause an addition to take place at each access, trust me, the
 166    optimizer takes it out of the actual code and passes "label+shift"
 167    to the linker, and the linker does the addition :) */
 168 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 169 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 170
 171 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 172    size tables could be supported but the rehash routine should also
 173    rebuild the transformation maps, I kept the tables of equal size
 174    so that I can use one hash function and one transformation map */
 175 static struct Client *clientTable[HASHSIZE];
 176 static struct Channel *channelTable[HASHSIZE];
 177
 178 /* This is what the hash function will consider "equal" chars, this function
 179    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 180    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 181 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
 182
 183 /* init_hash
 184  * Initialize the maps used by hash functions and clear the tables */
 185 void init_hash(void)
 186 {
 187   int           i;
 188   int           j;
 189   unsigned long l;
 190   unsigned long m;
 191
 192   /* Clear the hash tables first */
 193   for (l = 0; l < HASHSIZE; l++)
 194   {
 195     channelTable[l] = 0;
 196     clientTable[l]  = 0;
 197   }
 198
 199   /* Here is to what we "map" a char before working on it */
 200   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 201     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 202
 203   /* Make them equal for case-independently equal chars, it's
 204      lame to do it this way but I wanted the code flexible, it's
 205      possible to change the HASHEQ macro and not touch here.
 206      I don't actually care about the 32768 loops since it happens
 207      only once at startup */
 208   for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 209     for (j = CHAR_MIN; j < i; j++) {
 210       if (HASHEQ(i, j))
 211         hash_weight(i) = hash_weight(j);
 212     }
 213   }
 214
 215   /* And this is our hash-loop "transformation" function,
 216      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 217      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 218   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 219   {
 220     l = m;
 221     l *= (unsigned long)HASHSHIFT;
 222     l %= (unsigned long)HASHSIZE;
 223     hash_map[m] = (HASHMEMS) l;
 224   }
 225 }
 226
 227 /* These are the actual hash functions, since they are static
 228    and very short any decent compiler at a good optimization level
 229    WILL inline these in the following functions */
 230
 231 /* This is the string hash function,
 232    WARNING: n must be a valid pointer to a _non-null_ string
 233    this means that not only strhash(NULL) but also
 234    strhash("") _will_ coredump, it's responsibility
 235    the caller to eventually check BadPtr(nick). */
 236
 237 static HASHREGS strhash(const char *n)
 238 {
 239   HASHREGS hash = hash_weight(*n++);
 240   while (*n)
 241     hash = hash_map[hash] + hash_weight(*n++);
 242   return hash_map[hash];
 243 }
 244
 245 /* And this is the string hash function for limited lenght strings
 246    WARNING: n must be a valid pointer to a non-null string
 247    and i must be > 0 ! */
 248
 249 /* REMOVED
 250
 251    The time taken to decrement i makes the function
 252    slower than strhash for the average of channel names (tested
 253    on 16000 real channel names, 1000 loops. I left the code here
 254    as a bookmark if a strnhash is evetually needed in the future.
 255
 256    static HASHREGS strnhash(const char *n, int i) {
 257    HASHREGS hash = hash_weight(*n++);
 258    i--;
 259    while(*n && i--)
 260    hash = hash_map[hash] + hash_weight(*n++);
 261    return hash_map[hash];
 262    }
 263
 264    #define CHANHASHLEN 30
 265
 266    !REMOVED */
 267
 268 /************************** Externally visible functions ********************/
 269
 270 /* Optimization note: in these functions I supposed that the CSE optimization
 271  * (Common Subexpression Elimination) does its work decently, this means that
 272  * I avoided introducing new variables to do the work myself and I did let
 273  * the optimizer play with more free registers, actual tests proved this
 274  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 275  * use tmp2 myself wich would have given less freedom to the optimizer.
 276  */
 277
 278 /*
 279  * hAddClient
 280  * Adds a client's name in the proper hash linked list, can't fail,
 281  * cptr must have a non-null name or expect a coredump, the name is
 282  * infact taken from cptr->name
 283  */
 284 int hAddClient(struct Client *cptr)
 285 {
 286   HASHREGS hashv = strhash(cli_name(cptr));
 287
 288   cli_hnext(cptr) = clientTable[hashv];
 289   clientTable[hashv] = cptr;
 290
 291   return 0;
 292 }
 293
 294 /*
 295  * hAddChannel
 296  * Adds a channel's name in the proper hash linked list, can't fail.
 297  * chptr must have a non-null name or expect a coredump.
 298  * As before the name is taken from chptr->name, we do hash its entire
 299  * lenght since this proved to be statistically faster
 300  */
 301 int hAddChannel(struct Channel *chptr)
 302 {
 303   HASHREGS hashv = strhash(chptr->chname);
 304
 305   chptr->hnext = channelTable[hashv];
 306   channelTable[hashv] = chptr;
 307
 308   return 0;
 309 }
 310
 311 /*
 312  * hRemClient
 313  * Removes a Client's name from the hash linked list
 314  */
 315 int hRemClient(struct Client *cptr)
 316 {
 317   HASHREGS hashv = strhash(cli_name(cptr));
 318   struct Client *tmp = clientTable[hashv];
 319
 320   if (tmp == cptr) {
 321     clientTable[hashv] = cli_hnext(cptr);
 322     cli_hnext(cptr) = cptr;
 323     return 0;
 324   }
 325
 326   while (tmp) {
 327     if (cli_hnext(tmp) == cptr) {
 328       cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
 329       cli_hnext(cptr) = cptr;
 330       return 0;
 331     }
 332     tmp = cli_hnext(tmp);
 333   }
 334   return -1;
 335 }
 336
 337 /*
 338  * hChangeClient
 339  * Removes the old name of a client from a linked list and adds
 340  * the new one to another linked list, there is a slight chanche
 341  * that this is useless if the two hashes are the same but it still
 342  * would need to move the name to the top of the list.
 343  * As always it's responsibility of the caller to check that
 344  * both newname and cptr->name are valid names (not "" or NULL).
 345  * Typically, to change the nick of an already hashed client:
 346  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 347  *   hChangeClient(cptr, newname);
 348  *   strcpy(cptr->name, newname);
 349  *   };
 350  * There isn't an equivalent function for channels since they
 351  * don't change name.
 352  */
 353 int hChangeClient(struct Client *cptr, const char *newname)
 354 {
 355   HASHREGS newhash = strhash(newname);
 356
 357   assert(0 != cptr);
 358   hRemClient(cptr);
 359
 360   cli_hnext(cptr) = clientTable[newhash];
 361   clientTable[newhash] = cptr;
 362   return 0;
 363 }
 364
 365 /*
 366  * hRemChannel
 367  * Removes the channel's name from the corresponding hash linked list
 368  */
 369 int hRemChannel(struct Channel *chptr)
 370 {
 371   HASHREGS hashv = strhash(chptr->chname);
 372   struct Channel *tmp = channelTable[hashv];
 373
 374   if (tmp == chptr) {
 375     channelTable[hashv] = chptr->hnext;
 376     chptr->hnext = chptr;
 377     return 0;
 378   }
 379
 380   while (tmp) {
 381     if (tmp->hnext == chptr) {
 382       tmp->hnext = tmp->hnext->hnext;
 383       chptr->hnext = chptr;
 384       return 0;
 385     }
 386     tmp = tmp->hnext;
 387   }
 388
 389   return -1;
 390 }
 391
 392 /*
 393  * hSeekClient
 394  * New semantics: finds a client whose name is 'name' and whose
 395  * status is one of those marked in TMask, if can't find one
 396  * returns NULL. If it finds one moves it to the top of the list
 397  * and returns it.
 398  */
 399 struct Client* hSeekClient(const char *name, int TMask)
 400 {
 401   HASHREGS hashv      = strhash(name);
 402   struct Client *cptr = clientTable[hashv];
 403
 404   if (cptr) {
 405     if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
 406       struct Client* prev;
 407       while (prev = cptr, cptr = cli_hnext(cptr)) {
 408         if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
 409           cli_hnext(prev) = cli_hnext(cptr);
 410           cli_hnext(cptr) = clientTable[hashv];
 411           clientTable[hashv] = cptr;
 412           break;
 413         }
 414       }
 415     }
 416   }
 417   return cptr;
 418 }
 419
 420 /*
 421  * hSeekChannel
 422  * New semantics: finds a channel whose name is 'name',
 423  * if can't find one returns NULL, if can find it moves
 424  * it to the top of the list and returns it.
 425  */
 426 struct Channel* hSeekChannel(const char *name)
 427 {
 428   HASHREGS hashv = strhash(name);
 429   struct Channel *chptr = channelTable[hashv];
 430
 431   if (chptr) {
 432     if (0 != ircd_strcmp(name, chptr->chname)) {
 433       struct Channel* prev;
 434       while (prev = chptr, chptr = chptr->hnext) {
 435         if (0 == ircd_strcmp(name, chptr->chname)) {
 436           prev->hnext = chptr->hnext;
 437           chptr->hnext = channelTable[hashv];
 438           channelTable[hashv] = chptr;
 439           break;
 440         }
 441       }
 442     }
 443   }
 444   return chptr;
 445
 446 }
 447
 448 /* I will add some useful(?) statistics here one of these days,
 449    but not for DEBUGMODE: just to let the admins play with it,
 450    coders are able to SIGCORE the server and look into what goes
 451    on themselves :-) */
 452
 453 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
 454 {
 455   int max_chain = 0;
 456   int buckets   = 0;
 457   int count     = 0;
 458   struct Client*  cl;
 459   struct Channel* ch;
 460   int i;
 461
 462   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
 463
 464   for (i = 0; i < HASHSIZE; ++i) {
 465     if ((cl = clientTable[i])) {
 466       int len = 0;
 467       ++buckets;
 468       for ( ; cl; cl = cli_hnext(cl))
 469         ++len;
 470       if (len > max_chain)
 471         max_chain = len;
 472       count += len;
 473     }
 474   }
 475
 476   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
 477                 "max chain: %d", sptr, count, buckets, max_chain);
 478
 479   buckets = 0;
 480   count   = 0;
 481   max_chain = 0;
 482
 483   for (i = 0; i < HASHSIZE; ++i) {
 484     if ((ch = channelTable[i])) {
 485       int len = 0;
 486       ++buckets;
 487       for ( ; ch; ch = ch->hnext)
 488         ++len;
 489       if (len > max_chain)
 490         max_chain = len;
 491       count += len;
 492     }
 493   }
 494
 495   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
 496                 "max chain: %d", sptr, count, buckets, max_chain);
 497   return 0;
 498 }
 499
 500 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 501    ratio of one, collision shift up and roll in a circular fashion, the
 502    lowest 12 bits of the hash value are used, deletion is not supported,
 503    only addition, test for existence and cleanup of the table are.. */
 504
 505 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 506 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 507 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 508 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 509
 510 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 511 static int jupesCount;
 512
 513 /*
 514  * isNickJuped()
 515  * Tells if a nick is juped (nonzero returned) or not (zero)
 516  */
 517 int isNickJuped(const char *nick)
 518 {
 519   int pos;
 520
 521   if (nick && *nick) {
 522     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
 523       if (0 == ircd_strcmp(nick, jupeTable[pos]))
 524         return 1;
 525     }
 526   }
 527   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 528 }
 529
 530 /*
 531  * addNickJupes()
 532  * Adds a (comma separated list of) nick jupes to the table
 533  */
 534 int addNickJupes(const char *nicks)
 535 {
 536   static char temp[BUFSIZE + 1];
 537   char* one;
 538   char* p;
 539   int   pos;
 540
 541   if (nicks && *nicks)
 542   {
 543     ircd_strncpy(temp, nicks, BUFSIZE);
 544     temp[BUFSIZE] = '\0';
 545     p = NULL;
 546     for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
 547     {
 548       if (!*one)
 549         continue;
 550       pos = strhash(one);
 551 loop:
 552       pos &= JUPEHASHMASK;
 553       if (!jupeTable[pos][0])
 554       {
 555         if (jupesCount == JUPEMAX)
 556           return 1;             /* Error: Jupe table is full ! */
 557         jupesCount++;
 558         ircd_strncpy(jupeTable[pos], one, NICKLEN);
 559         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 560         continue;
 561       }
 562       if (0 == ircd_strcmp(one, jupeTable[pos]))
 563         continue;
 564       ++pos;
 565       goto loop;
 566     }
 567   }
 568   return 0;
 569 }
 570
 571 /*
 572  * clearNickJupes()
 573  * Cleans up the juped nicks table
 574  */
 575 void clearNickJupes(void)
 576 {
 577   int i;
 578   jupesCount = 0;
 579   for (i = 0; i < JUPEHASHSIZE; i++)
 580     jupeTable[i][0] = '\000';
 581 }