ircd/hash.c

   1 /*
   2  * IRC - Internet Relay Chat, ircd/hash.c
   3  * Copyright (C) 1998 Andrea Cocito, completely rewritten version.
   4  * Previous version was Copyright (C) 1991 Darren Reed, the concept
   5  * of linked lists for each hash bucket and the move-to-head
   6  * optimization has been borrowed from there.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 1, or (at your option)
  11  * any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  */
  22 #include "config.h"
  23
  24 #include "hash.h"
  25 #include "client.h"
  26 #include "channel.h"
  27 #include "ircd_chattr.h"
  28 #include "ircd_string.h"
  29 #include "ircd.h"
  30 #include "msg.h"
  31 #include "send.h"
  32 #include "struct.h"
  33 #include "sys.h"
  34
  35 #include <assert.h>
  36 #include <limits.h>
  37 #include <stdlib.h>
  38 #include <string.h>
  39
  40
  41 /************************* Nemesi's hash alghoritm ***********************/
  42 /** @file
  43  * @brief Hash table management.
  44  * @version $Id$
  45  *
  46  * This hash function returns *exactly* N%HASHSIZE, where 'N'
  47  * is the string itself (included the trailing '\\0') seen as
  48  * a baseHASHSHIFT number whose "digits" are the bytes of the
  49  * number mapped through a "weight" transformation that gives
  50  * the same "weight" to caseless-equal chars, example:
  51  *
  52  * Hashing the string "Nick\0" the result will be:
  53  * N  i  c  k \\0
  54  * |  |  |  |  `--->  ( (hash_weight('\\0') * (HASHSHIFT**0) +
  55  * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
  56  * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
  57  * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
  58  * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
  59  *
  60  * It's actually a lot similar to a base transformation of the
  61  * text representation of an integer.
  62  * Looking at it this way seems slow and requiring unlimited integer
  63  * precision, but we actually do it with a *very* fast loop, using only
  64  * short integer arithmetic and by means of two memory accesses and
  65  * 3 additions per each byte processed.. and nothing else, as a side
  66  * note the distribution of real nicks over the hash table of this
  67  * function is about 3 times better than the previous one, and the
  68  * hash function itself is about 25% faster with a "normal" HASHSIZE
  69  * (it gets slower with larger ones and faster for smallest ones
  70  * because the hash table size affect the size of some maps and thus
  71  * the effectiveness of RAM caches while accesing them).
  72  * These two pages of macros are here to make the following code
  73  * _more_ understandeable... I hope ;)
  74  *
  75  * If you ask me, this whole mess is ungodly complicated for very
  76  * little benefit. -Entrope
  77  */
  78
  79 /** Internal stuff, think well before changing this, it's how
  80    much the weights of two lexicograhically contiguous chars
  81    differ, i.e.\ (hash_weight('b')-hash_weight('a')) == HASHSTEP.
  82    One seems to be fine but the algorithm doesn't depend on it. */
  83 #define HASHSTEP 1
  84
  85 /** The smallest _prime_ int beeing HASHSTEP times bigger than a byte.
  86    That is, the first prime bigger than the maximum hash_weight
  87    (since the maximum hash weight is gonna be the "biggest-byte * HASHSTEP")
  88  */
  89 #define HASHSHIFT 257
  90
  91 /* Are we sure that HASHSHIFT is big enough ? */
  92 #if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
  93 #error "No no, I cannot, please make HASHSHIFT a bigger prime !"
  94 #endif
  95
  96 /* Now HASHSIZE doesn't need to be a prime, but we really don't want it
  97    to be an exact multiple of HASHSHIFT, that would make the distribution
  98    a LOT worse, once is not multiple of HASHSHIFT it can be anything */
  99 #if ((HASHSIZE%HASHSHIFT)==0)
 100 #error "Please set HASHSIZE to something not multiple of HASHSHIFT"
 101 #endif
 102
 103 /* What type of integer do we need in our computations ? the largest
 104    value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
 105    operations we want to keep the tables compact (the cache will work
 106    better and we will run faster) while for work variables we prefer
 107    to roundup to 'int' if it is the case: on platforms where int!=short
 108    int arithmetic is often faster than short arithmetic, we prefer signed
 109    types if they are big enough since on some architectures they are faster
 110    than unsigned, but we always keep signedness of mem and regs the same,
 111    to avoid sign conversions that sometimes require time, the following
 112    precompile stuff will set HASHMEMS to an appropriate integer type for
 113    the tables stored in memory and HASHREGS to an appropriate int type
 114    for the work registers/variables/return types. Everything of type
 115    HASH???S will remain internal to this source file so I placed this stuff
 116    here and not in the header file. */
 117
 118 #undef HASHMEMS
 119 #undef HASHREGS
 120
 121 #if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
 122 #define HASHMEMS short
 123 #define HASHREGS int
 124 #endif
 125
 126 #if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
 127 #define HASHMEMS unsigned short
 128 #define HASHREGS unsigned int
 129 #endif
 130
 131 #if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
 132 #define HASHMEMS int
 133 #define HASHREGS int
 134 #endif
 135
 136 #if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
 137 #define HASHMEMS unsigned int
 138 #define HASHREGS unsigned int
 139 #endif
 140
 141 #if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
 142 #define HASHMEMS long
 143 #define HASHREGS long
 144 #endif
 145
 146 #if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
 147 #define HASHMEMS unsigned long
 148 #define HASHREGS unsigned long
 149 #endif
 150
 151 #if (!defined(HASHMEMS))
 152 #error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
 153 #endif
 154
 155 /* Now we are sure that HASHMEMS and HASHREGS can contain the following */
 156 /** Size of #hash_map array. */
 157 #define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
 158
 159 /* Static memory structures */
 160
 161 /** We need a first function that, given an integer h between 1 and
 162    HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) ).
 163    We'll map this function in this table. */
 164 static HASHMEMS hash_map[HASHMAPSIZE];
 165
 166 /** Then we need a second function that "maps" a char to its weight.
 167    Changed to a table this one too, with this macro we can use a char
 168    as index and not care if it is signed or not, no.. this will not
 169    cause an addition to take place at each access, trust me, the
 170    optimizer takes it out of the actual code and passes "label+shift"
 171    to the linker, and the linker does the addition :) */
 172 static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
 173 /** Helper macro to look characters up in #hash_weight_table. */
 174 #define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
 175
 176 /* The actual hash tables, both MUST be of the same HASHSIZE, variable
 177    size tables could be supported but the rehash routine should also
 178    rebuild the transformation maps, I kept the tables of equal size
 179    so that I can use one hash function and one transformation map */
 180 /** Hash table for clients. */
 181 static struct Client *clientTable[HASHSIZE];
 182 /** Hash table for channels. */
 183 static struct Channel *channelTable[HASHSIZE];
 184
 185 /* This is what the hash function will consider "equal" chars, this function
 186    MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
 187    be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
 188 /** Helper macro for character comparison. */
 189 #define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
 190
 191 /** Initialize the maps used by hash functions and clear the tables. */
 192 void init_hash(void)
 193 {
 194   int           i;
 195   int           j;
 196   unsigned long l;
 197   unsigned long m;
 198
 199   /* Clear the hash tables first */
 200   for (l = 0; l < HASHSIZE; l++)
 201   {
 202     channelTable[l] = 0;
 203     clientTable[l]  = 0;
 204   }
 205
 206   /* Here is to what we "map" a char before working on it */
 207   for (i = CHAR_MIN; i <= CHAR_MAX; i++)
 208     hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
 209
 210   /* Make them equal for case-independently equal chars, it's
 211      lame to do it this way but I wanted the code flexible, it's
 212      possible to change the HASHEQ macro and not touch here.
 213      I don't actually care about the 32768 loops since it happens
 214      only once at startup */
 215   for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 216     for (j = CHAR_MIN; j < i; j++) {
 217       if (HASHEQ(i, j))
 218         hash_weight(i) = hash_weight(j);
 219     }
 220   }
 221
 222   /* And this is our hash-loop "transformation" function,
 223      basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
 224      defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
 225   for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
 226   {
 227     l = m;
 228     l *= (unsigned long)HASHSHIFT;
 229     l %= (unsigned long)HASHSIZE;
 230     hash_map[m] = (HASHMEMS) l;
 231   }
 232 }
 233
 234 /* These are the actual hash functions, since they are static
 235    and very short any decent compiler at a good optimization level
 236    WILL inline these in the following functions */
 237
 238 /* This is the string hash function,
 239    WARNING: n must be a valid pointer to a _non-null_ string
 240    this means that not only strhash(NULL) but also
 241    strhash("") _will_ coredump, it's responsibility
 242    the caller to eventually check BadPtr(nick). */
 243
 244 /** Calculate hash value for a string. */
 245 static HASHREGS strhash(const char *n)
 246 {
 247   HASHREGS hash = hash_weight(*n++);
 248   while (*n)
 249     hash = hash_map[hash] + hash_weight(*n++);
 250   return hash_map[hash];
 251 }
 252
 253 /* And this is the string hash function for limited lenght strings
 254    WARNING: n must be a valid pointer to a non-null string
 255    and i must be > 0 ! */
 256
 257 /* REMOVED
 258
 259    The time taken to decrement i makes the function
 260    slower than strhash for the average of channel names (tested
 261    on 16000 real channel names, 1000 loops. I left the code here
 262    as a bookmark if a strnhash is evetually needed in the future.
 263
 264    static HASHREGS strnhash(const char *n, int i) {
 265    HASHREGS hash = hash_weight(*n++);
 266    i--;
 267    while(*n && i--)
 268    hash = hash_map[hash] + hash_weight(*n++);
 269    return hash_map[hash];
 270    }
 271
 272    #define CHANHASHLEN 30
 273
 274    !REMOVED */
 275
 276 /************************** Externally visible functions ********************/
 277
 278 /* Optimization note: in these functions I supposed that the CSE optimization
 279  * (Common Subexpression Elimination) does its work decently, this means that
 280  * I avoided introducing new variables to do the work myself and I did let
 281  * the optimizer play with more free registers, actual tests proved this
 282  * solution to be faster than doing things like tmp2=tmp->hnext... and then
 283  * use tmp2 myself wich would have given less freedom to the optimizer.
 284  */
 285
 286 /** Prepend a client to the appropriate hash bucket.
 287  * @param[in] cptr Client to add to hash table.
 288  * @return Zero.
 289  */
 290 int hAddClient(struct Client *cptr)
 291 {
 292   HASHREGS hashv = strhash(cli_name(cptr));
 293
 294   cli_hnext(cptr) = clientTable[hashv];
 295   clientTable[hashv] = cptr;
 296
 297   return 0;
 298 }
 299
 300 /** Prepend a channel to the appropriate hash bucket.
 301  * @param[in] chptr Channel to add to hash table.
 302  * @return Zero.
 303  */
 304 int hAddChannel(struct Channel *chptr)
 305 {
 306   HASHREGS hashv = strhash(chptr->chname);
 307
 308   chptr->hnext = channelTable[hashv];
 309   channelTable[hashv] = chptr;
 310
 311   return 0;
 312 }
 313
 314 /** Remove a client from its hash bucket.
 315  * @param[in] cptr Client to remove from hash table.
 316  * @return Zero if the client is found and removed, -1 if not found.
 317  */
 318 int hRemClient(struct Client *cptr)
 319 {
 320   HASHREGS hashv = strhash(cli_name(cptr));
 321   struct Client *tmp = clientTable[hashv];
 322
 323   if (tmp == cptr) {
 324     clientTable[hashv] = cli_hnext(cptr);
 325     cli_hnext(cptr) = cptr;
 326     return 0;
 327   }
 328
 329   while (tmp) {
 330     if (cli_hnext(tmp) == cptr) {
 331       cli_hnext(tmp) = cli_hnext(cli_hnext(tmp));
 332       cli_hnext(cptr) = cptr;
 333       return 0;
 334     }
 335     tmp = cli_hnext(tmp);
 336   }
 337   return -1;
 338 }
 339
 340 /** Rename a client in the hash table.
 341  * @param[in] cptr Client whose nickname is changing.
 342  * @param[in] newname New nickname for client.
 343  * @return Zero.
 344  */
 345 int hChangeClient(struct Client *cptr, const char *newname)
 346 {
 347   HASHREGS newhash = strhash(newname);
 348
 349   assert(0 != cptr);
 350   hRemClient(cptr);
 351
 352   cli_hnext(cptr) = clientTable[newhash];
 353   clientTable[newhash] = cptr;
 354   return 0;
 355 }
 356
 357 /** Remove a channel from its hash bucket.
 358  * @param[in] chptr Channel to remove from hash table.
 359  * @return Zero if the channel is found and removed, -1 if not found.
 360  */
 361 int hRemChannel(struct Channel *chptr)
 362 {
 363   HASHREGS hashv = strhash(chptr->chname);
 364   struct Channel *tmp = channelTable[hashv];
 365
 366   if (tmp == chptr) {
 367     channelTable[hashv] = chptr->hnext;
 368     chptr->hnext = chptr;
 369     return 0;
 370   }
 371
 372   while (tmp) {
 373     if (tmp->hnext == chptr) {
 374       tmp->hnext = tmp->hnext->hnext;
 375       chptr->hnext = chptr;
 376       return 0;
 377     }
 378     tmp = tmp->hnext;
 379   }
 380
 381   return -1;
 382 }
 383
 384 /** Find a client by name, filtered by status mask.
 385  * If a client is found, it is moved to the top of its hash bucket.
 386  * @param[in] name Client name to search for.
 387  * @param[in] TMask Bitmask of status bits, any of which are needed to match.
 388  * @return Matching client, or NULL if none.
 389  */
 390 struct Client* hSeekClient(const char *name, int TMask)
 391 {
 392   HASHREGS hashv      = strhash(name);
 393   struct Client *cptr = clientTable[hashv];
 394
 395   if (cptr) {
 396     if (0 == (cli_status(cptr) & TMask) || 0 != ircd_strcmp(name, cli_name(cptr))) {
 397       struct Client* prev;
 398       while (prev = cptr, cptr = cli_hnext(cptr)) {
 399         if ((cli_status(cptr) & TMask) && (0 == ircd_strcmp(name, cli_name(cptr)))) {
 400           cli_hnext(prev) = cli_hnext(cptr);
 401           cli_hnext(cptr) = clientTable[hashv];
 402           clientTable[hashv] = cptr;
 403           break;
 404         }
 405       }
 406     }
 407   }
 408   return cptr;
 409 }
 410
 411 /** Find a channel by name.
 412  * If a channel is found, it is moved to the top of its hash bucket.
 413  * @param[in] name Channel name to search for.
 414  * @return Matching channel, or NULL if none.
 415  */
 416 struct Channel* hSeekChannel(const char *name)
 417 {
 418   HASHREGS hashv = strhash(name);
 419   struct Channel *chptr = channelTable[hashv];
 420
 421   if (chptr) {
 422     if (0 != ircd_strcmp(name, chptr->chname)) {
 423       struct Channel* prev;
 424       while (prev = chptr, chptr = chptr->hnext) {
 425         if (0 == ircd_strcmp(name, chptr->chname)) {
 426           prev->hnext = chptr->hnext;
 427           chptr->hnext = channelTable[hashv];
 428           channelTable[hashv] = chptr;
 429           break;
 430         }
 431       }
 432     }
 433   }
 434   return chptr;
 435
 436 }
 437
 438 /* I will add some useful(?) statistics here one of these days,
 439    but not for DEBUGMODE: just to let the admins play with it,
 440    coders are able to SIGCORE the server and look into what goes
 441    on themselves :-) */
 442
 443 /** Report hash table statistics to a client.
 444  * @param[in] cptr Client that sent us this message.
 445  * @param[in] sptr Client that originated the message.
 446  * @param[in] parc Number of arguments.
 447  * @param[in] parv Argument array.
 448  * @return Zero.
 449  */
 450 int m_hash(struct Client *cptr, struct Client *sptr, int parc, char *parv[])
 451 {
 452   int max_chain = 0;
 453   int buckets   = 0;
 454   int count     = 0;
 455   struct Client*  cl;
 456   struct Channel* ch;
 457   int i;
 458
 459   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Hash Table Statistics", sptr);
 460
 461   for (i = 0; i < HASHSIZE; ++i) {
 462     if ((cl = clientTable[i])) {
 463       int len = 0;
 464       ++buckets;
 465       for ( ; cl; cl = cli_hnext(cl))
 466         ++len;
 467       if (len > max_chain)
 468         max_chain = len;
 469       count += len;
 470     }
 471   }
 472
 473   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Client: entries: %d buckets: %d "
 474                 "max chain: %d", sptr, count, buckets, max_chain);
 475
 476   buckets = 0;
 477   count   = 0;
 478   max_chain = 0;
 479
 480   for (i = 0; i < HASHSIZE; ++i) {
 481     if ((ch = channelTable[i])) {
 482       int len = 0;
 483       ++buckets;
 484       for ( ; ch; ch = ch->hnext)
 485         ++len;
 486       if (len > max_chain)
 487         max_chain = len;
 488       count += len;
 489     }
 490   }
 491
 492   sendcmdto_one(&me, CMD_NOTICE, sptr, "%C :Channel: entries: %d buckets: %d "
 493                 "max chain: %d", sptr, count, buckets, max_chain);
 494   return 0;
 495 }
 496
 497 /* Nick jupe utilities, these are in a static hash table with entry/bucket
 498    ratio of one, collision shift up and roll in a circular fashion, the
 499    lowest 12 bits of the hash value are used, deletion is not supported,
 500    only addition, test for existence and cleanup of the table are.. */
 501
 502 /** Number of bits in jupe hash value. */
 503 #define JUPEHASHBITS 12         /* 4096 entries, 64 nick jupes allowed */
 504 /** Size of jupe hash table. */
 505 #define JUPEHASHSIZE (1<<JUPEHASHBITS)
 506 /** Bitmask to select into jupe hash table. */
 507 #define JUPEHASHMASK (JUPEHASHSIZE-1)
 508 /** Maximum number of jupes allowed. */
 509 #define JUPEMAX      (1<<(JUPEHASHBITS-6))
 510
 511 /** Hash table for jupes. */
 512 static char jupeTable[JUPEHASHSIZE][NICKLEN + 1];       /* About 40k */
 513 /** Count of jupes. */
 514 static int jupesCount;
 515
 516 /** Check whether a nickname is juped.
 517  * @param[in] nick Nickname to check.
 518  * @return Non-zero of the nickname is juped, zero if not.
 519  */
 520 int isNickJuped(const char *nick)
 521 {
 522   int pos;
 523
 524   if (nick && *nick) {
 525     for (pos = strhash(nick); (pos &= JUPEHASHMASK), jupeTable[pos][0]; pos++) {
 526       if (0 == ircd_strcmp(nick, jupeTable[pos]))
 527         return 1;
 528     }
 529   }
 530   return 0;                     /* A bogus pointer is NOT a juped nick, right ? :) */
 531 }
 532
 533 /** Add a comma-separated list of nick jupes.
 534  * @param[in] nicks List of nicks to jupe, separated by commas.
 535  * @return Zero on success, non-zero on error.
 536  */
 537 int addNickJupes(const char *nicks)
 538 {
 539   static char temp[BUFSIZE + 1];
 540   char* one;
 541   char* p;
 542   int   pos;
 543
 544   if (nicks && *nicks)
 545   {
 546     ircd_strncpy(temp, nicks, BUFSIZE);
 547     temp[BUFSIZE] = '\0';
 548     p = NULL;
 549     for (one = ircd_strtok(&p, temp, ","); one; one = ircd_strtok(&p, NULL, ","))
 550     {
 551       if (!*one)
 552         continue;
 553       pos = strhash(one);
 554 loop:
 555       pos &= JUPEHASHMASK;
 556       if (!jupeTable[pos][0])
 557       {
 558         if (jupesCount == JUPEMAX)
 559           return 1;             /* Error: Jupe table is full ! */
 560         jupesCount++;
 561         ircd_strncpy(jupeTable[pos], one, NICKLEN);
 562         jupeTable[pos][NICKLEN] = '\000';       /* Better safe than sorry :) */
 563         continue;
 564       }
 565       if (0 == ircd_strcmp(one, jupeTable[pos]))
 566         continue;
 567       ++pos;
 568       goto loop;
 569     }
 570   }
 571   return 0;
 572 }
 573
 574 /** Empty the table of juped nicknames. */
 575 void clearNickJupes(void)
 576 {
 577   int i;
 578   jupesCount = 0;
 579   for (i = 0; i < JUPEHASHSIZE; i++)
 580     jupeTable[i][0] = '\000';
 581 }