#include "hash.h"
#include "client.h"
#include "channel.h"
+#include "ircd_alloc.h"
#include "ircd_chattr.h"
+#include "ircd_log.h"
+#include "ircd_reply.h"
#include "ircd_string.h"
#include "ircd.h"
+#include "match.h"
#include "msg.h"
+#include "numeric.h"
+#include "random.h"
#include "send.h"
#include "struct.h"
#include "sys.h"
-#include <assert.h>
+/* #include <assert.h> -- Now using assert in ircd_log.h */
#include <limits.h>
#include <stdlib.h>
#include <string.h>
-/************************* Nemesi's hash alghoritm ***********************/
/** @file
* @brief Hash table management.
* @version $Id$
*
- * This hash function returns *exactly* N%HASHSIZE, where 'N'
- * is the string itself (included the trailing '\\0') seen as
- * a baseHASHSHIFT number whose "digits" are the bytes of the
- * number mapped through a "weight" transformation that gives
- * the same "weight" to caseless-equal chars, example:
- *
- * Hashing the string "Nick\0" the result will be:
- * N i c k \\0
- * | | | | `---> ( (hash_weight('\\0') * (HASHSHIFT**0) +
- * | | | `------> (hash_weight('k') * (HASHSHIFT**1) +
- * | | `---------> (hash_weight('c') * (HASHSHIFT**2) +
- * | `------------> (hash_weight('i') * (HASHSHIFT**3) +
- * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE
- *
- * It's actually a lot similar to a base transformation of the
- * text representation of an integer.
- * Looking at it this way seems slow and requiring unlimited integer
- * precision, but we actually do it with a *very* fast loop, using only
- * short integer arithmetic and by means of two memory accesses and
- * 3 additions per each byte processed.. and nothing else, as a side
- * note the distribution of real nicks over the hash table of this
- * function is about 3 times better than the previous one, and the
- * hash function itself is about 25% faster with a "normal" HASHSIZE
- * (it gets slower with larger ones and faster for smallest ones
- * because the hash table size affect the size of some maps and thus
- * the effectiveness of RAM caches while accesing them).
- * These two pages of macros are here to make the following code
- * _more_ understandeable... I hope ;)
- *
- * If you ask me, this whole mess is ungodly complicated for very
- * little benefit. -Entrope
+ * This file used to use some very complicated hash function. Now it
+ * uses CRC-32, but effectively remaps each input byte according to a
+ * table initialized at startup.
*/
-/** Internal stuff, think well before changing this, it's how
- much the weights of two lexicograhically contiguous chars
- differ, i.e.\ (hash_weight('b')-hash_weight('a')) == HASHSTEP.
- One seems to be fine but the algorithm doesn't depend on it. */
-#define HASHSTEP 1
-
-/** The smallest _prime_ int beeing HASHSTEP times bigger than a byte.
- That is, the first prime bigger than the maximum hash_weight
- (since the maximum hash weight is gonna be the "biggest-byte * HASHSTEP")
- */
-#define HASHSHIFT 257
-
-/* Are we sure that HASHSHIFT is big enough ? */
-#if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN)))
-#error "No no, I cannot, please make HASHSHIFT a bigger prime !"
-#endif
-
-/* Now HASHSIZE doesn't need to be a prime, but we really don't want it
- to be an exact multiple of HASHSHIFT, that would make the distribution
- a LOT worse, once is not multiple of HASHSHIFT it can be anything */
-#if ((HASHSIZE%HASHSHIFT)==0)
-#error "Please set HASHSIZE to something not multiple of HASHSHIFT"
-#endif
-
-/* What type of integer do we need in our computations ? the largest
- value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory
- operations we want to keep the tables compact (the cache will work
- better and we will run faster) while for work variables we prefer
- to roundup to 'int' if it is the case: on platforms where int!=short
- int arithmetic is often faster than short arithmetic, we prefer signed
- types if they are big enough since on some architectures they are faster
- than unsigned, but we always keep signedness of mem and regs the same,
- to avoid sign conversions that sometimes require time, the following
- precompile stuff will set HASHMEMS to an appropriate integer type for
- the tables stored in memory and HASHREGS to an appropriate int type
- for the work registers/variables/return types. Everything of type
- HASH???S will remain internal to this source file so I placed this stuff
- here and not in the header file. */
-
-#undef HASHMEMS
-#undef HASHREGS
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT)))
-#define HASHMEMS short
-#define HASHREGS int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned short
-#define HASHREGS unsigned int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT)))
-#define HASHMEMS int
-#define HASHREGS int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned int
-#define HASHREGS unsigned int
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT)))
-#define HASHMEMS long
-#define HASHREGS long
-#endif
-
-#if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT)))
-#define HASHMEMS unsigned long
-#define HASHREGS unsigned long
-#endif
-
-#if (!defined(HASHMEMS))
-#error "Uh oh... I have a problem, do you want a 16GB hash table ? !"
-#endif
-
-/* Now we are sure that HASHMEMS and HASHREGS can contain the following */
-/** Size of #hash_map array. */
-#define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1)
-
-/* Static memory structures */
-
-/** We need a first function that, given an integer h between 1 and
- HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) ).
- We'll map this function in this table. */
-static HASHMEMS hash_map[HASHMAPSIZE];
-
-/** Then we need a second function that "maps" a char to its weight.
- Changed to a table this one too, with this macro we can use a char
- as index and not care if it is signed or not, no.. this will not
- cause an addition to take place at each access, trust me, the
- optimizer takes it out of the actual code and passes "label+shift"
- to the linker, and the linker does the addition :) */
-static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1];
-/** Helper macro to look characters up in #hash_weight_table. */
-#define hash_weight(ch) hash_weight_table[ch-CHAR_MIN]
-
-/* The actual hash tables, both MUST be of the same HASHSIZE, variable
- size tables could be supported but the rehash routine should also
- rebuild the transformation maps, I kept the tables of equal size
- so that I can use one hash function and one transformation map */
/** Hash table for clients. */
static struct Client *clientTable[HASHSIZE];
/** Hash table for channels. */
static struct Channel *channelTable[HASHSIZE];
+/** CRC-32 update table. */
+static uint32_t crc32hash[256];
-/* This is what the hash function will consider "equal" chars, this function
- MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST
- be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */
-/** Helper macro for character comparison. */
-#define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y)))
-
-/** Initialize the maps used by hash functions and clear the tables. */
+/** Initialize the map used by the hash function. */
void init_hash(void)
{
- int i;
- int j;
- unsigned long l;
- unsigned long m;
+ unsigned int ii, jj, rand, poly;
- /* Clear the hash tables first */
- for (l = 0; l < HASHSIZE; l++)
+ /* First calculate a normal CRC-32 table. */
+ for (ii = 0, poly = 0xedb88320; ii < 256; ii++)
{
- channelTable[l] = 0;
- clientTable[l] = 0;
+ rand = ii;
+ for (jj = 0; jj < 8; jj++)
+ rand = (rand & 1) ? poly ^ (rand >> 1) : rand >> 1;
+ crc32hash[ii] = rand;
}
- /* Here is to what we "map" a char before working on it */
- for (i = CHAR_MIN; i <= CHAR_MAX; i++)
- hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i));
-
- /* Make them equal for case-independently equal chars, it's
- lame to do it this way but I wanted the code flexible, it's
- possible to change the HASHEQ macro and not touch here.
- I don't actually care about the 32768 loops since it happens
- only once at startup */
- for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
- for (j = CHAR_MIN; j < i; j++) {
- if (HASHEQ(i, j))
- hash_weight(i) = hash_weight(j);
- }
- }
-
- /* And this is our hash-loop "transformation" function,
- basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE)
- defined for 0<=x<=(HASHSIZE+HASHSHIFT) */
- for (m = 0; m < (unsigned long)HASHMAPSIZE; m++)
+ /* Now reorder the hash table. */
+ for (ii = 0, rand = 0; ii < 256; ii++)
{
- l = m;
- l *= (unsigned long)HASHSHIFT;
- l %= (unsigned long)HASHSIZE;
- hash_map[m] = (HASHMEMS) l;
+ if (!rand)
+ rand = ircrandom();
+ poly = ii + rand % (256 - ii);
+ jj = crc32hash[ii];
+ crc32hash[ii] = crc32hash[poly];
+ crc32hash[poly] = jj;
+ rand >>= 8;
}
}
-/* These are the actual hash functions, since they are static
- and very short any decent compiler at a good optimization level
- WILL inline these in the following functions */
-
-/* This is the string hash function,
- WARNING: n must be a valid pointer to a _non-null_ string
- this means that not only strhash(NULL) but also
- strhash("") _will_ coredump, it's responsibility
- the caller to eventually check BadPtr(nick). */
+/** Output type of hash function. */
+typedef unsigned int HASHREGS;
-/** Calculate hash value for a string. */
+/** Calculate hash value for a string.
+ * @param[in] n String to hash.
+ * @return Hash value for string.
+ */
static HASHREGS strhash(const char *n)
{
- HASHREGS hash = hash_weight(*n++);
+ HASHREGS hash = crc32hash[ToLower(*n++) & 255];
while (*n)
- hash = hash_map[hash] + hash_weight(*n++);
- return hash_map[hash];
+ hash = (hash >> 8) ^ crc32hash[(hash ^ ToLower(*n++)) & 255];
+ return hash % HASHSIZE;
}
-/* And this is the string hash function for limited lenght strings
- WARNING: n must be a valid pointer to a non-null string
- and i must be > 0 ! */
-
-/* REMOVED
-
- The time taken to decrement i makes the function
- slower than strhash for the average of channel names (tested
- on 16000 real channel names, 1000 loops. I left the code here
- as a bookmark if a strnhash is evetually needed in the future.
-
- static HASHREGS strnhash(const char *n, int i) {
- HASHREGS hash = hash_weight(*n++);
- i--;
- while(*n && i--)
- hash = hash_map[hash] + hash_weight(*n++);
- return hash_map[hash];
- }
-
- #define CHANHASHLEN 30
-
- !REMOVED */
-
/************************** Externally visible functions ********************/
/* Optimization note: in these functions I supposed that the CSE optimization
* I avoided introducing new variables to do the work myself and I did let
* the optimizer play with more free registers, actual tests proved this
* solution to be faster than doing things like tmp2=tmp->hnext... and then
- * use tmp2 myself wich would have given less freedom to the optimizer.
+ * use tmp2 myself which would have given less freedom to the optimizer.
*/
/** Prepend a client to the appropriate hash bucket.
for (i = 0; i < JUPEHASHSIZE; i++)
jupeTable[i][0] = '\000';
}
+
+/** Report all nick jupes to a user.
+ * @param[in] to Client requesting statistics.
+ * @param[in] sd Stats descriptor for request (ignored).
+ * @param[in] param Extra parameter from user (ignored).
+ */
+void
+stats_nickjupes(struct Client* to, const struct StatDesc* sd, char* param)
+{
+ int i;
+ for (i = 0; i < JUPEHASHSIZE; i++)
+ if (jupeTable[i][0])
+ send_reply(to, RPL_STATSJLINE, jupeTable[i]);
+}
+
+/** Send more channels to a client in mid-LIST.
+ * @param[in] cptr Client to send the list to.
+ */
+void list_next_channels(struct Client *cptr)
+{
+ struct ListingArgs *args;
+ struct Channel *chptr;
+
+ /* Walk consecutive buckets until we hit the end. */
+ for (args = cli_listing(cptr); args->bucket < HASHSIZE; args->bucket++)
+ {
+ /* Send all the matching channels in the bucket. */
+ for (chptr = channelTable[args->bucket]; chptr; chptr = chptr->hnext)
+ {
+ if (chptr->users > args->min_users
+ && chptr->users < args->max_users
+ && chptr->creationtime > args->min_time
+ && chptr->creationtime < args->max_time
+ && (!args->wildcard[0] || (args->flags & LISTARG_NEGATEWILDCARD) ||
+ (!match(args->wildcard, chptr->chname)))
+ && (!(args->flags & LISTARG_NEGATEWILDCARD) ||
+ match(args->wildcard, chptr->chname))
+ && (!(args->flags & LISTARG_TOPICLIMITS)
+ || (chptr->topic[0]
+ && chptr->topic_time > args->min_topic_time
+ && chptr->topic_time < args->max_topic_time))
+ && ((args->flags & LISTARG_SHOWSECRET)
+ || ShowChannel(cptr, chptr)))
+ {
+ if (args->flags & LISTARG_SHOWMODES) {
+ char modebuf[MODEBUFLEN];
+ char parabuf[MODEBUFLEN];
+
+ modebuf[0] = modebuf[1] = parabuf[0] = '\0';
+ channel_modes(cptr, modebuf, parabuf, sizeof(parabuf), chptr, NULL);
+ send_reply(cptr, RPL_LIST | SND_EXPLICIT, "%s %u %s %s :%s",
+ chptr->chname, chptr->users, modebuf, parabuf, chptr->topic);
+ } else {
+ send_reply(cptr, RPL_LIST, chptr->chname, chptr->users, chptr->topic);
+ }
+ }
+ }
+ /* If, at the end of the bucket, client sendq is more than half
+ * full, stop. */
+ if (MsgQLength(&cli_sendQ(cptr)) > cli_max_sendq(cptr) / 2)
+ break;
+ }
+
+ /* If we did all buckets, clean the client and send RPL_LISTEND. */
+ if (args->bucket >= HASHSIZE)
+ {
+ MyFree(cli_listing(cptr));
+ cli_listing(cptr) = NULL;
+ send_reply(cptr, RPL_LISTEND);
+ }
+}