X-Git-Url: http://git.pk910.de/?a=blobdiff_plain;f=ircd%2Fhash.c;h=aa264ddb158500431bc94d4830e4d9b00e48a4bc;hb=refs%2Fheads%2Fupstream;hp=94ff51dd5a4b1f7470cb88040795125905477896;hpb=caedb441b8692d644e24344492db46aedbc62f5a;p=ircu2.10.12-pk.git diff --git a/ircd/hash.c b/ircd/hash.c index 94ff51d..aa264dd 100644 --- a/ircd/hash.c +++ b/ircd/hash.c @@ -24,255 +24,84 @@ #include "hash.h" #include "client.h" #include "channel.h" +#include "ircd_alloc.h" #include "ircd_chattr.h" +#include "ircd_log.h" +#include "ircd_reply.h" #include "ircd_string.h" #include "ircd.h" +#include "match.h" #include "msg.h" +#include "numeric.h" +#include "random.h" #include "send.h" #include "struct.h" #include "sys.h" -#include +/* #include -- Now using assert in ircd_log.h */ #include #include #include -/************************* Nemesi's hash alghoritm ***********************/ /** @file * @brief Hash table management. * @version $Id$ * - * This hash function returns *exactly* N%HASHSIZE, where 'N' - * is the string itself (included the trailing '\\0') seen as - * a baseHASHSHIFT number whose "digits" are the bytes of the - * number mapped through a "weight" transformation that gives - * the same "weight" to caseless-equal chars, example: - * - * Hashing the string "Nick\0" the result will be: - * N i c k \\0 - * | | | | `---> ( (hash_weight('\\0') * (HASHSHIFT**0) + - * | | | `------> (hash_weight('k') * (HASHSHIFT**1) + - * | | `---------> (hash_weight('c') * (HASHSHIFT**2) + - * | `------------> (hash_weight('i') * (HASHSHIFT**3) + - * `---------------> (hash_weight('N') * (HASHSHIFT**4) ) % HASHSIZE - * - * It's actually a lot similar to a base transformation of the - * text representation of an integer. - * Looking at it this way seems slow and requiring unlimited integer - * precision, but we actually do it with a *very* fast loop, using only - * short integer arithmetic and by means of two memory accesses and - * 3 additions per each byte processed.. and nothing else, as a side - * note the distribution of real nicks over the hash table of this - * function is about 3 times better than the previous one, and the - * hash function itself is about 25% faster with a "normal" HASHSIZE - * (it gets slower with larger ones and faster for smallest ones - * because the hash table size affect the size of some maps and thus - * the effectiveness of RAM caches while accesing them). - * These two pages of macros are here to make the following code - * _more_ understandeable... I hope ;) - * - * If you ask me, this whole mess is ungodly complicated for very - * little benefit. -Entrope + * This file used to use some very complicated hash function. Now it + * uses CRC-32, but effectively remaps each input byte according to a + * table initialized at startup. */ -/** Internal stuff, think well before changing this, it's how - much the weights of two lexicograhically contiguous chars - differ, i.e.\ (hash_weight('b')-hash_weight('a')) == HASHSTEP. - One seems to be fine but the algorithm doesn't depend on it. */ -#define HASHSTEP 1 - -/** The smallest _prime_ int beeing HASHSTEP times bigger than a byte. - That is, the first prime bigger than the maximum hash_weight - (since the maximum hash weight is gonna be the "biggest-byte * HASHSTEP") - */ -#define HASHSHIFT 257 - -/* Are we sure that HASHSHIFT is big enough ? */ -#if !(HASHSHIFT > (HASHSTEP*(CHAR_MAX-CHAR_MIN))) -#error "No no, I cannot, please make HASHSHIFT a bigger prime !" -#endif - -/* Now HASHSIZE doesn't need to be a prime, but we really don't want it - to be an exact multiple of HASHSHIFT, that would make the distribution - a LOT worse, once is not multiple of HASHSHIFT it can be anything */ -#if ((HASHSIZE%HASHSHIFT)==0) -#error "Please set HASHSIZE to something not multiple of HASHSHIFT" -#endif - -/* What type of integer do we need in our computations ? the largest - value we need to work on is (HASHSIZE+HASHSHIFT+1), for memory - operations we want to keep the tables compact (the cache will work - better and we will run faster) while for work variables we prefer - to roundup to 'int' if it is the case: on platforms where int!=short - int arithmetic is often faster than short arithmetic, we prefer signed - types if they are big enough since on some architectures they are faster - than unsigned, but we always keep signedness of mem and regs the same, - to avoid sign conversions that sometimes require time, the following - precompile stuff will set HASHMEMS to an appropriate integer type for - the tables stored in memory and HASHREGS to an appropriate int type - for the work registers/variables/return types. Everything of type - HASH???S will remain internal to this source file so I placed this stuff - here and not in the header file. */ - -#undef HASHMEMS -#undef HASHREGS - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (SHRT_MAX-HASHSHIFT))) -#define HASHMEMS short -#define HASHREGS int -#endif - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (USHRT_MAX-HASHSHIFT))) -#define HASHMEMS unsigned short -#define HASHREGS unsigned int -#endif - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (INT_MAX-HASHSHIFT))) -#define HASHMEMS int -#define HASHREGS int -#endif - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (UINT_MAX-HASHSHIFT))) -#define HASHMEMS unsigned int -#define HASHREGS unsigned int -#endif - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (LONG_MAX-HASHSHIFT))) -#define HASHMEMS long -#define HASHREGS long -#endif - -#if ((!defined(HASHMEMS)) && (HASHSIZE < (ULONG_MAX-HASHSHIFT))) -#define HASHMEMS unsigned long -#define HASHREGS unsigned long -#endif - -#if (!defined(HASHMEMS)) -#error "Uh oh... I have a problem, do you want a 16GB hash table ? !" -#endif - -/* Now we are sure that HASHMEMS and HASHREGS can contain the following */ -/** Size of #hash_map array. */ -#define HASHMAPSIZE (HASHSIZE+HASHSHIFT+1) - -/* Static memory structures */ - -/** We need a first function that, given an integer h between 1 and - HASHSIZE+HASHSHIFT, returns ( (h * HASHSHIFT) % HASHSIZE ) ). - We'll map this function in this table. */ -static HASHMEMS hash_map[HASHMAPSIZE]; - -/** Then we need a second function that "maps" a char to its weight. - Changed to a table this one too, with this macro we can use a char - as index and not care if it is signed or not, no.. this will not - cause an addition to take place at each access, trust me, the - optimizer takes it out of the actual code and passes "label+shift" - to the linker, and the linker does the addition :) */ -static HASHMEMS hash_weight_table[CHAR_MAX - CHAR_MIN + 1]; -/** Helper macro to look characters up in #hash_weight_table. */ -#define hash_weight(ch) hash_weight_table[ch-CHAR_MIN] - -/* The actual hash tables, both MUST be of the same HASHSIZE, variable - size tables could be supported but the rehash routine should also - rebuild the transformation maps, I kept the tables of equal size - so that I can use one hash function and one transformation map */ /** Hash table for clients. */ static struct Client *clientTable[HASHSIZE]; /** Hash table for channels. */ static struct Channel *channelTable[HASHSIZE]; +/** CRC-32 update table. */ +static uint32_t crc32hash[256]; -/* This is what the hash function will consider "equal" chars, this function - MUST be transitive, if HASHEQ(y,x)&&HASHEQ(y,z) then HASHEQ(y,z), and MUST - be symmetric, if HASHEQ(a,b) then HASHEQ(b,a), obvious ok but... :) */ -/** Helper macro for character comparison. */ -#define HASHEQ(x,y) ((ToLower(x)) == (ToLower(y))) - -/** Initialize the maps used by hash functions and clear the tables. */ +/** Initialize the map used by the hash function. */ void init_hash(void) { - int i; - int j; - unsigned long l; - unsigned long m; + unsigned int ii, jj, rand, poly; - /* Clear the hash tables first */ - for (l = 0; l < HASHSIZE; l++) + /* First calculate a normal CRC-32 table. */ + for (ii = 0, poly = 0xedb88320; ii < 256; ii++) { - channelTable[l] = 0; - clientTable[l] = 0; + rand = ii; + for (jj = 0; jj < 8; jj++) + rand = (rand & 1) ? poly ^ (rand >> 1) : rand >> 1; + crc32hash[ii] = rand; } - /* Here is to what we "map" a char before working on it */ - for (i = CHAR_MIN; i <= CHAR_MAX; i++) - hash_weight(i) = (HASHMEMS) (HASHSTEP * ((unsigned char)i)); - - /* Make them equal for case-independently equal chars, it's - lame to do it this way but I wanted the code flexible, it's - possible to change the HASHEQ macro and not touch here. - I don't actually care about the 32768 loops since it happens - only once at startup */ - for (i = CHAR_MIN; i <= CHAR_MAX; i++) { - for (j = CHAR_MIN; j < i; j++) { - if (HASHEQ(i, j)) - hash_weight(i) = hash_weight(j); - } - } - - /* And this is our hash-loop "transformation" function, - basically it will be hash_map[x] == ((x*HASHSHIFT)%HASHSIZE) - defined for 0<=x<=(HASHSIZE+HASHSHIFT) */ - for (m = 0; m < (unsigned long)HASHMAPSIZE; m++) + /* Now reorder the hash table. */ + for (ii = 0, rand = 0; ii < 256; ii++) { - l = m; - l *= (unsigned long)HASHSHIFT; - l %= (unsigned long)HASHSIZE; - hash_map[m] = (HASHMEMS) l; + if (!rand) + rand = ircrandom(); + poly = ii + rand % (256 - ii); + jj = crc32hash[ii]; + crc32hash[ii] = crc32hash[poly]; + crc32hash[poly] = jj; + rand >>= 8; } } -/* These are the actual hash functions, since they are static - and very short any decent compiler at a good optimization level - WILL inline these in the following functions */ - -/* This is the string hash function, - WARNING: n must be a valid pointer to a _non-null_ string - this means that not only strhash(NULL) but also - strhash("") _will_ coredump, it's responsibility - the caller to eventually check BadPtr(nick). */ +/** Output type of hash function. */ +typedef unsigned int HASHREGS; -/** Calculate hash value for a string. */ +/** Calculate hash value for a string. + * @param[in] n String to hash. + * @return Hash value for string. + */ static HASHREGS strhash(const char *n) { - HASHREGS hash = hash_weight(*n++); + HASHREGS hash = crc32hash[ToLower(*n++) & 255]; while (*n) - hash = hash_map[hash] + hash_weight(*n++); - return hash_map[hash]; + hash = (hash >> 8) ^ crc32hash[(hash ^ ToLower(*n++)) & 255]; + return hash % HASHSIZE; } -/* And this is the string hash function for limited lenght strings - WARNING: n must be a valid pointer to a non-null string - and i must be > 0 ! */ - -/* REMOVED - - The time taken to decrement i makes the function - slower than strhash for the average of channel names (tested - on 16000 real channel names, 1000 loops. I left the code here - as a bookmark if a strnhash is evetually needed in the future. - - static HASHREGS strnhash(const char *n, int i) { - HASHREGS hash = hash_weight(*n++); - i--; - while(*n && i--) - hash = hash_map[hash] + hash_weight(*n++); - return hash_map[hash]; - } - - #define CHANHASHLEN 30 - - !REMOVED */ - /************************** Externally visible functions ********************/ /* Optimization note: in these functions I supposed that the CSE optimization @@ -280,7 +109,7 @@ static HASHREGS strhash(const char *n) * I avoided introducing new variables to do the work myself and I did let * the optimizer play with more free registers, actual tests proved this * solution to be faster than doing things like tmp2=tmp->hnext... and then - * use tmp2 myself wich would have given less freedom to the optimizer. + * use tmp2 myself which would have given less freedom to the optimizer. */ /** Prepend a client to the appropriate hash bucket. @@ -579,3 +408,74 @@ void clearNickJupes(void) for (i = 0; i < JUPEHASHSIZE; i++) jupeTable[i][0] = '\000'; } + +/** Report all nick jupes to a user. + * @param[in] to Client requesting statistics. + * @param[in] sd Stats descriptor for request (ignored). + * @param[in] param Extra parameter from user (ignored). + */ +void +stats_nickjupes(struct Client* to, const struct StatDesc* sd, char* param) +{ + int i; + for (i = 0; i < JUPEHASHSIZE; i++) + if (jupeTable[i][0]) + send_reply(to, RPL_STATSJLINE, jupeTable[i]); +} + +/** Send more channels to a client in mid-LIST. + * @param[in] cptr Client to send the list to. + */ +void list_next_channels(struct Client *cptr) +{ + struct ListingArgs *args; + struct Channel *chptr; + + /* Walk consecutive buckets until we hit the end. */ + for (args = cli_listing(cptr); args->bucket < HASHSIZE; args->bucket++) + { + /* Send all the matching channels in the bucket. */ + for (chptr = channelTable[args->bucket]; chptr; chptr = chptr->hnext) + { + if (chptr->users > args->min_users + && chptr->users < args->max_users + && chptr->creationtime > args->min_time + && chptr->creationtime < args->max_time + && (!args->wildcard[0] || (args->flags & LISTARG_NEGATEWILDCARD) || + (!match(args->wildcard, chptr->chname))) + && (!(args->flags & LISTARG_NEGATEWILDCARD) || + match(args->wildcard, chptr->chname)) + && (!(args->flags & LISTARG_TOPICLIMITS) + || (chptr->topic[0] + && chptr->topic_time > args->min_topic_time + && chptr->topic_time < args->max_topic_time)) + && ((args->flags & LISTARG_SHOWSECRET) + || ShowChannel(cptr, chptr))) + { + if (args->flags & LISTARG_SHOWMODES) { + char modebuf[MODEBUFLEN]; + char parabuf[MODEBUFLEN]; + + modebuf[0] = modebuf[1] = parabuf[0] = '\0'; + channel_modes(cptr, modebuf, parabuf, sizeof(parabuf), chptr, NULL); + send_reply(cptr, RPL_LIST | SND_EXPLICIT, "%s %u %s %s :%s", + chptr->chname, chptr->users, modebuf, parabuf, chptr->topic); + } else { + send_reply(cptr, RPL_LIST, chptr->chname, chptr->users, chptr->topic); + } + } + } + /* If, at the end of the bucket, client sendq is more than half + * full, stop. */ + if (MsgQLength(&cli_sendQ(cptr)) > cli_max_sendq(cptr) / 2) + break; + } + + /* If we did all buckets, clean the client and send RPL_LISTEND. */ + if (args->bucket >= HASHSIZE) + { + MyFree(cli_listing(cptr)); + cli_listing(cptr) = NULL; + send_reply(cptr, RPL_LISTEND); + } +}