Make "/stats flag server param" work as intended again.

[ircu2.10.12-pk.git] / ircd / hash.c
diff --git a/ircd/hash.c b/ircd/hash.c

index 94ff51dd5a4b1f7470cb88040795125905477896..aa264ddb158500431bc94d4830e4d9b00e48a4bc 100644 (file)
--- a/ircd/hash.c
+++ b/ircd/hash.c
@@ -24,255 +24,84 @@
  #include "hash.h"
  #include "client.h"
  #include "channel.h"
+#include "ircd_alloc.h"
  #include "ircd_chattr.h"
+#include "ircd_log.h"
+#include "ircd_reply.h"
  #include "ircd_string.h"
  #include "ircd.h"
+#include "match.h"
  #include "msg.h"
+#include "numeric.h"
+#include "random.h"
  #include "send.h"
  #include "struct.h"
  #include "sys.h"
  
-#include <assert.h>
+/* #include <assert.h> -- Now using assert in ircd_log.h */
  #include <limits.h>
  #include <stdlib.h>
  #include <string.h>
  
  
-/************************* Nemesi's hash alghoritm ***********************/
  /** @file
   * @brief Hash table management.
   * @version $Id$
   *
- * This hash function returns *exactly* N%HASHSIZE, where 'N'
- * is the string itself (included the trailing '\\0') seen as
- * a baseHASHSHIFT number whose "digits" are the bytes of the
- * number mapped through a "weight" transformation that gives
- * the same "weight" to caseless-equal chars, example:
- *
- * Hashing the string "Nick\0" the result will be:
- * N  i  c  k \\0
- * |  |  |  |  `--->  ( (hash_weight('\\0') * (HASHSHIFT**0) +
- * |  |  |  `------>    (hash_weight('k')  * (HASHSHIFT**1) +
- * |  |  `--------->    (hash_weight('c')  * (HASHSHIFT**2) +
- * |  `------------>    (hash_weight('i')  * (HASHSHIFT**3) +
- * `--------------->    (hash_weight('N')  * (HASHSHIFT**4)   ) % HASHSIZE
- *
- * It's actually a lot similar to a base transformation of the
- * text representation of an integer.
- * Looking at it this way seems slow and requiring unlimited integer
- * precision, but we actually do it with a *very* fast loop, using only 
- * short integer arithmetic and by means of two memory accesses and 
- * 3 additions per each byte processed.. and nothing else, as a side
- * note the distribution of real nicks over the hash table of this
- * function is about 3 times better than the previous one, and the
- * hash function itself is about 25% faster with a "normal" HASHSIZE
- * (it gets slower with larger ones and faster for smallest ones
- * because the hash table size affect the size of some maps and thus
- * the effectiveness of RAM caches while accesing them).
- * These two pages of macros are here to make the following code
- * _more_ understandeable... I hope ;)
- *
- * If you ask me, this whole mess is ungodly complicated for very
- * little benefit. -Entrope
+ * This file used to use some very complicated hash function.  Now it
+ * uses CRC-32, but effectively remaps each input byte according to a
+ * table initialized at startup.
   */
  
-/** Internal stuff, think well before changing this, it's how
-   much the weights of two lexicograhically contiguous chars
-   differ, i.e.\ (hash_weight('b')-hash_weight('a')) == HASHSTEP.
-   One seems to be fine but the algorithm doesn't depend on it. */
-#define HASHSTEP 1
-
-/** The smallest _prime_ int beeing HASHSTEP times bigger than a byte.
-   That is, the first prime bigger than the maximum hash_weight
-   (since the maximum hash weight is gonna be the "biggest-byte * HASHSTEP")
- */
-#define HASHSHIFT 257
-
-/* Are we sure that HASHSHIFT is big enough ? */
-#if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
-#error "No no, I cannot, please make HASHSHIFT a bigger prime !"
-#endif
-
-/* Now HASHSIZE doesn't need to be a prime, but we really don't want it
-   to be an exact multiple of HASHSHIFT, that would make the distribution
-   a LOT worse, once is not multiple of HASHSHIFT it can be anything */
-#if ((HASHSIZE%HASHSHIFT)==0)
-#error "Please set HASHSIZE to something not multiple of HASHSHIFT"
-#endif
-
-/* What type of integer do we need in our computations ? the largest
-   value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
-   operations we want to keep the tables compact (the cache will work
-   better and we will run faster) while for work variables we prefer
-   to roundup to 'int' if it is the case: on platforms where int!=short
-   int arithmetic is often faster than short arithmetic, we prefer signed
-   types if they are big enough since on some architectures they are faster
-   than unsigned, but we always keep signedness of mem and regs the same,
-   to avoid sign conversions that sometimes require time, the following 
-   precompile stuff will set HASHMEMS to an appropriate integer type for 
-   the tables stored in memory and HASHREGS to an appropriate int type
-   for the work registers/variables/return types. Everything of type
-   HASH???S will remain internal to this source file so I placed this stuff
-   here and not in the header file. */
-
-#undef HASHMEMS
-#undef HASHREGS
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
-#define HASHMEMS short
-#define HASHREGS int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned short
-#define HASHREGS unsigned int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
-#define HASHMEMS int
-#define HASHREGS int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned int
-#define HASHREGS unsigned int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
-#define HASHMEMS long
-#define HASHREGS long
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned long
-#define HASHREGS unsigned long
-#endif
-
-#if (!defined(HASHMEMS))
-#error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
-#endif
-
-/* Now we are sure that HASHMEMS and HASHREGS can contain the following */
-/** Size of #hash_map array. */
-#define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
-
-/* Static memory structures */
-
-/** We need a first function that, given an integer h between 1 and
-   HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) ).
-   We'll map this function in this table. */
-static HASHMEMS hash_map[HASHMAPSIZE];
-
-/** Then we need a second function that "maps" a char to its weight.
-   Changed to a table this one too, with this macro we can use a char
-   as index and not care if it is signed or not, no.. this will not
-   cause an addition to take place at each access, trust me, the
-   optimizer takes it out of the actual code and passes "label+shift"
-   to the linker, and the linker does the addition :) */
-static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
-/** Helper macro to look characters up in #hash_weight_table. */
-#define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
-
-/* The actual hash tables, both MUST be of the same HASHSIZE, variable
-   size tables could be supported but the rehash routine should also
-   rebuild the transformation maps, I kept the tables of equal size 
-   so that I can use one hash function and one transformation map */
  /** Hash table for clients. */
  static struct Client *clientTable[HASHSIZE];
  /** Hash table for channels. */
  static struct Channel *channelTable[HASHSIZE];
+/** CRC-32 update table. */
+static uint32_t crc32hash[256];
  
-/* This is what the hash function will consider "equal" chars, this function 
-   MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
-   be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
-/** Helper macro for character comparison. */
-#define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
-
-/** Initialize the maps used by hash functions and clear the tables. */
+/** Initialize the map used by the hash function. */
  void init_hash(void)
  {
-  int           i;
-  int           j;
-  unsigned long l;
-  unsigned long m;
+  unsigned int ii, jj, rand, poly;
  
-  /* Clear the hash tables first */
-  for (l = 0; l < HASHSIZE; l++)
+  /* First calculate a normal CRC-32 table. */
+  for (ii = 0, poly = 0xedb88320; ii < 256; ii++)
    {
-    channelTable[l] = 0;
-    clientTable[l]  = 0;
+    rand = ii;
+    for (jj = 0; jj < 8; jj++)
+      rand = (rand & 1) ? poly ^ (rand >> 1) : rand >> 1;
+    crc32hash[ii] = rand;
    }
  
-  /* Here is to what we "map" a char before working on it */
-  for (i = CHAR_MIN; i <= CHAR_MAX; i++)
-    hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
-
-  /* Make them equal for case-independently equal chars, it's
-     lame to do it this way but I wanted the code flexible, it's
-     possible to change the HASHEQ macro and not touch here. 
-     I don't actually care about the 32768 loops since it happens 
-     only once at startup */
-  for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
-    for (j = CHAR_MIN; j < i; j++) {
-      if (HASHEQ(i, j))
-        hash_weight(i) = hash_weight(j);
-    }
-  }
-
-  /* And this is our hash-loop "transformation" function, 
-     basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
-     defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
-  for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
+  /* Now reorder the hash table. */
+  for (ii = 0, rand = 0; ii < 256; ii++)
    {
-    l = m;
-    l *= (unsigned long)HASHSHIFT;
-    l %= (unsigned long)HASHSIZE;
-    hash_map[m] = (HASHMEMS) l;
+    if (!rand)
+      rand = ircrandom();
+    poly = ii + rand % (256 - ii);
+    jj = crc32hash[ii];
+    crc32hash[ii] = crc32hash[poly];
+    crc32hash[poly] = jj;
+    rand >>= 8;
    }
  }
  
-/* These are the actual hash functions, since they are static
-   and very short any decent compiler at a good optimization level
-   WILL inline these in the following functions */
-
-/* This is the string hash function,
-   WARNING: n must be a valid pointer to a _non-null_ string
-   this means that not only strhash(NULL) but also
-   strhash("") _will_ coredump, it's responsibility
-   the caller to eventually check BadPtr(nick). */
+/** Output type of hash function. */
+typedef unsigned int HASHREGS;
  
-/** Calculate hash value for a string. */
+/** Calculate hash value for a string.
+ * @param[in] n String to hash.
+ * @return Hash value for string.
+ */
  static HASHREGS strhash(const char *n)
  {
-  HASHREGS hash = hash_weight(*n++);
+  HASHREGS hash = crc32hash[ToLower(*n++) & 255];
    while (*n)
-    hash = hash_map[hash] + hash_weight(*n++);
-  return hash_map[hash];
+    hash = (hash >> 8) ^ crc32hash[(hash ^ ToLower(*n++)) & 255];
+  return hash % HASHSIZE;
  }
  
-/* And this is the string hash function for limited lenght strings
-   WARNING: n must be a valid pointer to a non-null string
-   and i must be > 0 ! */
-
-/* REMOVED
-
-   The time taken to decrement i makes the function
-   slower than strhash for the average of channel names (tested
-   on 16000 real channel names, 1000 loops. I left the code here
-   as a bookmark if a strnhash is evetually needed in the future.
-
-   static HASHREGS strnhash(const char *n, int i) {
-   HASHREGS hash = hash_weight(*n++);
-   i--;
-   while(*n && i--)
-   hash = hash_map[hash] + hash_weight(*n++);
-   return hash_map[hash];
-   }
-
-   #define CHANHASHLEN 30
-
-   !REMOVED */
-
  /************************** Externally visible functions ********************/
  
  /* Optimization note: in these functions I supposed that the CSE optimization
@@ -280,7 +109,7 @@ static HASHREGS strhash(const char *n)
   * I avoided introducing new variables to do the work myself and I did let
   * the optimizer play with more free registers, actual tests proved this
   * solution to be faster than doing things like tmp2=tmp->hnext... and then
- * use tmp2 myself wich would have given less freedom to the optimizer.
+ * use tmp2 myself which would have given less freedom to the optimizer.
   */
  
  /** Prepend a client to the appropriate hash bucket.
@@ -579,3 +408,74 @@ void clearNickJupes(void)
    for (i = 0; i < JUPEHASHSIZE; i++)
      jupeTable[i][0] = '\000';
  }
+
+/** Report all nick jupes to a user.
+ * @param[in] to Client requesting statistics.
+ * @param[in] sd Stats descriptor for request (ignored).
+ * @param[in] param Extra parameter from user (ignored).
+ */
+void
+stats_nickjupes(struct Client* to, const struct StatDesc* sd, char* param)
+{
+  int i;
+  for (i = 0; i < JUPEHASHSIZE; i++)
+    if (jupeTable[i][0])
+      send_reply(to, RPL_STATSJLINE, jupeTable[i]);
+}
+
+/** Send more channels to a client in mid-LIST.
+ * @param[in] cptr Client to send the list to.
+ */
+void list_next_channels(struct Client *cptr)
+{
+  struct ListingArgs *args;
+  struct Channel *chptr;
+
+  /* Walk consecutive buckets until we hit the end. */
+  for (args = cli_listing(cptr); args->bucket < HASHSIZE; args->bucket++)
+  {
+    /* Send all the matching channels in the bucket. */
+    for (chptr = channelTable[args->bucket]; chptr; chptr = chptr->hnext)
+    {
+      if (chptr->users > args->min_users
+          && chptr->users < args->max_users
+          && chptr->creationtime > args->min_time
+          && chptr->creationtime < args->max_time
+          && (!args->wildcard[0] || (args->flags & LISTARG_NEGATEWILDCARD) ||
+              (!match(args->wildcard, chptr->chname)))
+          && (!(args->flags & LISTARG_NEGATEWILDCARD) ||
+              match(args->wildcard, chptr->chname))
+          && (!(args->flags & LISTARG_TOPICLIMITS)
+              || (chptr->topic[0]
+                  && chptr->topic_time > args->min_topic_time
+                  && chptr->topic_time < args->max_topic_time))
+          && ((args->flags & LISTARG_SHOWSECRET)
+              || ShowChannel(cptr, chptr)))
+      {
+        if (args->flags & LISTARG_SHOWMODES) {
+          char modebuf[MODEBUFLEN];
+          char parabuf[MODEBUFLEN];
+
+          modebuf[0] = modebuf[1] = parabuf[0] = '\0';
+          channel_modes(cptr, modebuf, parabuf, sizeof(parabuf), chptr, NULL);
+          send_reply(cptr, RPL_LIST | SND_EXPLICIT, "%s %u %s %s :%s",
+                     chptr->chname, chptr->users, modebuf, parabuf, chptr->topic);
+        } else {
+          send_reply(cptr, RPL_LIST, chptr->chname, chptr->users, chptr->topic);
+        }
+      }
+    }
+    /* If, at the end of the bucket, client sendq is more than half
+     * full, stop. */
+    if (MsgQLength(&cli_sendQ(cptr)) > cli_max_sendq(cptr) / 2)
+      break;
+  }
+
+  /* If we did all buckets, clean the client and send RPL_LISTEND. */
+  if (args->bucket >= HASHSIZE)
+  {
+    MyFree(cli_listing(cptr));
+    cli_listing(cptr) = NULL;
+    send_reply(cptr, RPL_LISTEND);
+  }
+}