ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  *
  22  * $Id$
  23  */
  24 #include "config.h"
  25
  26 #include "hash.h"
  27 #include "client.h"
  28 #include "channel.h"
  29 #include "ircd_chattr.h"
  30 #include "ircd_string.h"
  31 #include "ircd.h"
  32 #include "msg.h"
  33 #include "send.h"
  34 #include "struct.h"
  35 #include "sys.h"
  36
  37 #include <assert.h>
  38 #include <limits.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41
  42
  43 /************************* Nemesi's hash alghoritm ***********************/
  44
  45 /* This hash function returns *exactly* N%HASHSIZE, where 'N'
  46  * is the string itself (included the trailing '\0') seen as
  47  * a baseHASHSHIFT number whose "digits" are the bytes of the
  48  * number mapped through a "weight" transformation that gives
  49  * the same "weight" to caseless-equal chars, example:
  50  *
  51  * Hashing the string "Nick\0" the result will be:
  52  * N  i  c  k \0
  53  * |  |  |  |  `--->  ( (hash_weight('\0') * (HASHSHIFT**0) +
  54  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  55  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  56  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  57  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  58  *
  59  * It's actually a lot similar to a base transformation of the
  60  * text representation of an integer.
  61  * Looking at it this way seems slow and requiring unlimited integer
  62  * precision, but we actually do it with a *very* fast loop, using only
  63  * short integer arithmetic and by means of two memory accesses and
  64  * 3 additions per each byte processed.. and nothing else, as a side
  65  * note the distribution of real nicks over the hash table of this
  66  * function is about 3 times better than the previous one, and the
  67  * hash function itself is about 25% faster with a "normal" HASHSIZE
  68  * (it gets slower with larger ones and faster for smallest ones
  69  * because the hash table size affect the size of some maps and thus
  70  * the effectiveness of RAM caches while accesing them).
  71  * These two pages of macros are here to make the following code
  72  * _more_ understandeable... I hope ;)
  73  */
  74
  75 /* Internal stuff, think well before changing this, it's how
  76    much the weights of two lexicograhically contiguous chars
  77    differ, i.e. (hash_weight('b')-hash_weight('a')) == HASHSTEP
  78    One seems to be fine but the alghoritm doesn't depend on it */
  79 #define HASHSTEP 1
  80
  81 /* The smallest _prime_ int beeing HASHSTEP times bigger than a byte,
  82    that is the first prime bigger than the maximum hash_weight
  83    (since the maximum hash weight is gonne be the "biggest-byte * HASHSTEP")
  84  */
  85 #define HASHSHIFT 257
  86
  87 /* Are we sure that HASHSHIFT is big enough ? */
  88 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  89 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  90 #endif
  91
  92 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  93    to be an exact multiple of HASHSHIFT, that would make the distribution
  94    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  95 #if ((HASHSIZE%HASHSHIFT)==0)
  96 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
  97 #endif
  98
  99 /* What type of integer do we need in our computations ? the largest
 100    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
 101    operations we want to keep the tables compact (the cache will work
 102    better and we will run faster) while for work variables we prefer
 103    to roundup to 'int' if it is the case: on platforms where int!=short
 104    int arithmetic is often faster than short arithmetic, we prefer signed
 105    types if they are big enough since on some architectures they are faster
 106    than unsigned, but we always keep signedness of mem and regs the same,
 107    to avoid sign conversions that sometimes require time, the following
 108    precompile stuff will set HASHMEMS to an appropriate integer type for
 109    the tables stored in memory and HASHREGS to an appropriate int type
 110    for the work registers/variables/return types. Everything of type
 111    HASH???S will remain internal to this source file so I placed this stuff
 112    here and not in the header file. */
 113
 114 #undef HASHMEMS
 115 #undef HASHREGS
 116
 117 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 118 #define HASHMEMS short
 119 #define HASHREGS int
 120 #endif
 121
 122 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 123 #define HASHMEMS unsigned short
 124 #define HASHREGS unsigned int
 125 #endif
 126
 127 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 128 #define HASHMEMS int
 129 #define HASHREGS int
 130 #endif
 131
 132 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 133 #define HASHMEMS unsigned int
 134 #define HASHREGS unsigned int
 135 #endif
 136
 137 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 138 #define HASHMEMS long
 139 #define HASHREGS long
 140 #endif
 141
 142 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 143 #define HASHMEMS unsigned long
 144 #define HASHREGS unsigned long
 145 #endif
 146
 147 #if (!defined(HASHMEMS))
 148 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 149 #endif
 150
 151 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 152 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 153
 154 /* Static memory structures */
 155
 156 /* We need a first function that, given an integer h between 1 and
 157    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) )
 158    We'll map this function in this table */
 159 static HASHMEMS hash_map[HASHMAPSIZE];
 160
 161 /* Then we need a second function that "maps" a char to its weitgh,
 162    changed to a table this one too, with this macro we can use a char
 163    as index and not care if it is signed or not, no.. this will not
 164    cause an addition to take place at each access, trust me, the
 165    optimizer takes it out of the actual code and passes "label+shift"
 166    to the linker, and the linker does the addition :) */
 167 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 168 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 169
 170 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 171    size tables could be supported but the rehash routine should also
 172    rebuild the transformation maps, I kept the tables of equal size
 173    so that I can use one hash function and one transformation map */
 174 static struct Client *clientTable[HASHSIZE];
 175 static struct Channel *channelTable[HASHSIZE];
 176
 177 /* This is what the hash function will consider "equal" chars, this function
 178    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 179    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 180 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
 181
 182 /* init_hash
 183  * Initialize the maps used by hash functions and clear the tables */
 184 void init_hash(void)
 185 {
 186   int           i;
 187   int           j;
 188   unsigned long l;
 189   unsigned long m;
 190
 191   /* Clear the hash tables first */
 192   for (l = 0; l < HASHSIZE; l++)
 193   {
 194     channelTable[l] = 0;
 195     clientTable[l]  = 0;
 196   }
 197
 198   /* Here is to what we "map" a char before working on it */
 199   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 200     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 201
 202   /* Make them equal for case-independently equal chars, it's
 203      lame to do it this way but I wanted the code flexible, it's
 204      possible to change the HASHEQ macro and not touch here.
 205      I don't actually care about the 32768 loops since it happens
 206      only once at startup */
 207   for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 208     for (j = CHAR_MIN; j < i; j++) {
 209       if (HASHEQ(i, j))
 210         hash_weight(i) = hash_weight(j);
 211     }
 212   }
 213
 214   /* And this is our hash-loop "transformation" function,
 215      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 216      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 217   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 218   {
 219     l = m;
 220     l *= (unsigned long)HASHSHIFT;
 221     l %= (unsigned long)HASHSIZE;
 222     hash_map[m] = (HASHMEMS) l;
 223   }
 224 }
 225
 226 /* These are the actual hash functions, since they are static
 227    and very short any decent compiler at a good optimization level
 228    WILL inline these in the following functions */
 229
 230 /* This is the string hash function,
 231    WARNING: n must be a valid pointer to a _non-null_ string
 232    this means that not only strhash(NULL) but also
 233    strhash("") _will_ coredump, it's responsibility
 234    the caller to eventually check BadPtr(nick). */
 235
 236 static HASHREGS strhash(const char *n)
 237 {
 238   HASHREGS hash = hash_weight(*n++);
 239   while (*n)
 240     hash = hash_map[hash] + hash_weight(*n++);
 241   return hash_map[hash];
 242 }
 243
 244 /* And this is the string hash function for limited lenght strings
 245    WARNING: n must be a valid pointer to a non-null string
 246    and i must be > 0 ! */
 247
 248 /* REMOVED
 249
 250    The time taken to decrement i makes the function
 251    slower than strhash for the average of channel names (tested
 252    on 16000 real channel names, 1000 loops. I left the code here
 253    as a bookmark if a strnhash is evetually needed in the future.
 254
 255    static HASHREGS strnhash(const char *n, int i) {
 256    HASHREGS hash = hash_weight(*n++);
 257    i--;
 258    while(*n && i--)
 259    hash = hash_map[hash] + hash_weight(*n++);
 260    return hash_map[hash];
 261    }
 262
 263    #define CHANHASHLEN 30
 264
 265    !REMOVED */
 266
 267 /************************** Externally visible functions ********************/
 268
 269 /* Optimization note: in these functions I supposed that the CSE optimization
 270  * (Common Subexpression Elimination) does its work decently, this means that
 271  * I avoided introducing new variables to do the work myself and I did let
 272  * the optimizer play with more free registers, actual tests proved this
 273  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 274  * use tmp2 myself wich would have given less freedom to the optimizer.
 275  */
 276
 277 /*
 278  * hAddClient
 279  * Adds a client's name in the proper hash linked list, can't fail,
 280  * cptr must have a non-null name or expect a coredump, the name is
 281  * infact taken from cptr->name
 282  */
 283 int hAddClient(struct Client *cptr)
 284 {
 285   HASHREGS hashv = strhash(cli_name(cptr));
 286
 287   cli_hnext(cptr) = clientTable[hashv];
 288   clientTable[hashv] = cptr;
 289
 290   return 0;
 291 }
 292
 293 /*
 294  * hAddChannel
 295  * Adds a channel's name in the proper hash linked list, can't fail.
 296  * chptr must have a non-null name or expect a coredump.
 297  * As before the name is taken from chptr->name, we do hash its entire
 298  * lenght since this proved to be statistically faster
 299  */
 300 int hAddChannel(struct Channel *chptr)
 301 {
 302   HASHREGS hashv = strhash(chptr->chname);
 303
 304   chptr->hnext = channelTable[hashv];
 305   channelTable[hashv] = chptr;
 306
 307   return 0;
 308 }
 309
 310 /*
 311  * hRemClient
 312  * Removes a Client's name from the hash linked list
 313  */
 314 int hRemClient(struct Client *cptr)
 315 {
 316   HASHREGS hashv = strhash(cli_name(cptr));
 317   struct Client *tmp = clientTable[hashv];
 318
 319   if (tmp == cptr) {
 320     clientTable[hashv] = cli_hnext(cptr);
 321     cli_hnext(cptr) = cptr;
 322     return 0;
 323   }
 324
 325   while (tmp) {
 326     if (cli_hnext(tmp) == cptr) {
 327       cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
 328       cli_hnext(cptr) = cptr;
 329       return 0;
 330     }
 331     tmp = cli_hnext(tmp);
 332   }
 333   return -1;
 334 }
 335
 336 /*
 337  * hChangeClient
 338  * Removes the old name of a client from a linked list and adds
 339  * the new one to another linked list, there is a slight chanche
 340  * that this is useless if the two hashes are the same but it still
 341  * would need to move the name to the top of the list.
 342  * As always it's responsibility of the caller to check that
 343  * both newname and cptr->name are valid names (not "" or NULL).
 344  * Typically, to change the nick of an already hashed client:
 345  * if (!BadPtr(newname) && ClearTheNameSomeHow(newname)) {
 346  *   hChangeClient(cptr, newname);
 347  *   strcpy(cptr->name, newname);
 348  *   };
 349  * There isn't an equivalent function for channels since they
 350  * don't change name.
 351  */
 352 int hChangeClient(struct Client *cptr, const char *newname)
 353 {
 354   HASHREGS newhash = strhash(newname);
 355
 356   assert(0 != cptr);
 357   hRemClient(cptr);
 358
 359   cli_hnext(cptr) = clientTable[newhash];
 360   clientTable[newhash] = cptr;
 361   return 0;
 362 }
 363
 364 /*
 365  * hRemChannel
 366  * Removes the channel's name from the corresponding hash linked list
 367  */
 368 int hRemChannel(struct Channel *chptr)
 369 {
 370   HASHREGS hashv = strhash(chptr->chname);
 371   struct Channel *tmp = channelTable[hashv];
 372
 373   if (tmp == chptr) {
 374     channelTable[hashv] = chptr->hnext;
 375     chptr->hnext = chptr;
 376     return 0;
 377   }
 378
 379   while (tmp) {
 380     if (tmp->hnext == chptr) {
 381       tmp->hnext = tmp->hnext->hnext;
 382       chptr->hnext = chptr;
 383       return 0;
 384     }
 385     tmp = tmp->hnext;
 386   }
 387
 388   return -1;
 389 }
 390
 391 /*
 392  * hSeekClient
 393  * New semantics: finds a client whose name is 'name' and whose
 394  * status is one of those marked in TMask, if can't find one
 395  * returns NULL. If it finds one moves it to the top of the list
 396  * and returns it.
 397  */
 398 struct Client* hSeekClient(const char *name, int TMask)
 399 {
 400   HASHREGS hashv      = strhash(name);
 401   struct Client *cptr = clientTable[hashv];
 402
 403   if (cptr) {
 404     if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
 405       struct Client* prev;
 406       while (prev = cptr, cptr = cli_hnext(cptr)) {
 407         if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
 408           cli_hnext(prev) = cli_hnext(cptr);
 409           cli_hnext(cptr) = clientTable[hashv];
 410           clientTable[hashv] = cptr;
 411           break;
 412         }
 413       }
 414     }
 415   }
 416   return cptr;
 417 }
 418
 419 /*
 420  * hSeekChannel
 421  * New semantics: finds a channel whose name is 'name',
 422  * if can't find one returns NULL, if can find it moves
 423  * it to the top of the list and returns it.
 424  */
 425 struct Channel* hSeekChannel(const char *name)
 426 {
 427   HASHREGS hashv = strhash(name);
 428   struct Channel *chptr = channelTable[hashv];
 429
 430   if (chptr) {
 431     if (0 != ircd_strcmp(name, chptr->chname)) {
 432       struct Channel* prev;
 433       while (prev = chptr, chptr = chptr->hnext) {
 434         if (0 == ircd_strcmp(name, chptr->chname)) {
 435           prev->hnext = chptr->hnext;
 436           chptr->hnext = channelTable[hashv];
 437           channelTable[hashv] = chptr;
 438           break;
 439         }
 440       }
 441     }
 442   }
 443   return chptr;
 444
 445 }
 446
 447 /* I will add some useful(?) statistics here one of these days,
 448    but not for DEBUGMODE: just to let the admins play with it,
 449    coders are able to SIGCORE the server and look into what goes
 450    on themselves :-) */
 451
 452 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
 453 {
 454   int max_chain = 0;
 455   int buckets   = 0;
 456   int count     = 0;
 457   struct Client*  cl;
 458   struct Channel* ch;
 459   int i;
 460
 461   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
 462
 463   for (i = 0; i < HASHSIZE; ++i) {
 464     if ((cl = clientTable[i])) {
 465       int len = 0;
 466       ++buckets;
 467       for ( ; cl; cl = cli_hnext(cl))
 468         ++len;
 469       if (len > max_chain)
 470         max_chain = len;
 471       count += len;
 472     }
 473   }
 474
 475   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
 476                 "max chain: %d", sptr, count, buckets, max_chain);
 477
 478   buckets = 0;
 479   count   = 0;
 480   max_chain = 0;
 481
 482   for (i = 0; i < HASHSIZE; ++i) {
 483     if ((ch = channelTable[i])) {
 484       int len = 0;
 485       ++buckets;
 486       for ( ; ch; ch = ch->hnext)
 487         ++len;
 488       if (len > max_chain)
 489         max_chain = len;
 490       count += len;
 491     }
 492   }
 493
 494   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
 495                 "max chain: %d", sptr, count, buckets, max_chain);
 496   return 0;
 497 }
 498
 499 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 500    ratio of one, collision shift up and roll in a circular fashion, the
 501    lowest 12 bits of the hash value are used, deletion is not supported,
 502    only addition, test for existence and cleanup of the table are.. */
 503
 504 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 505 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 506 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 507 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 508
 509 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 510 static int jupesCount;
 511
 512 /*
 513  * isNickJuped()
 514  * Tells if a nick is juped (nonzero returned) or not (zero)
 515  */
 516 int isNickJuped(const char *nick)
 517 {
 518   int pos;
 519
 520   if (nick && *nick) {
 521     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
 522       if (0 == ircd_strcmp(nick, jupeTable[pos]))
 523         return 1;
 524     }
 525   }
 526   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 527 }
 528
 529 /*
 530  * addNickJupes()
 531  * Adds a (comma separated list of) nick jupes to the table
 532  */
 533 int addNickJupes(const char *nicks)
 534 {
 535   static char temp[BUFSIZE + 1];
 536   char* one;
 537   char* p;
 538   int   pos;
 539
 540   if (nicks && *nicks)
 541   {
 542     ircd_strncpy(temp, nicks, BUFSIZE);
 543     temp[BUFSIZE] = '\0';
 544     p = NULL;
 545     for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
 546     {
 547       if (!*one)
 548         continue;
 549       pos = strhash(one);
 550 loop:
 551       pos &= JUPEHASHMASK;
 552       if (!jupeTable[pos][0])
 553       {
 554         if (jupesCount == JUPEMAX)
 555           return 1;             /* Error: Jupe table is full ! */
 556         jupesCount++;
 557         ircd_strncpy(jupeTable[pos], one, NICKLEN);
 558         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 559         continue;
 560       }
 561       if (0 == ircd_strcmp(one, jupeTable[pos]))
 562         continue;
 563       ++pos;
 564       goto loop;
 565     }
 566   }
 567   return 0;
 568 }
 569
 570 /*
 571  * clearNickJupes()
 572  * Cleans up the juped nicks table
 573  */
 574 void clearNickJupes(void)
 575 {
 576   int i;
 577   jupesCount = 0;
 578   for (i = 0; i < JUPEHASHSIZE; i++)
 579     jupeTable[i][0] = '\000';
 580 }