ircd/match.c

   1 /*
   2  * IRC - Internet Relay Chat, common/match.c
   3  * Copyright (C) 1990 Jarkko Oikarinen
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "match.h"
  22 #include "ircd_chattr.h"
  23 /*
  24  * mmatch()
  25  *
  26  * Written by Run (carlo@runaway.xs4all.nl), 25-10-96
  27  *
  28  *
  29  * From: Carlo Wood <carlo@runaway.xs4all.nl>
  30  * Message-Id: <199609021026.MAA02393@runaway.xs4all.nl>
  31  * Subject: [C-Com] Analysis for `mmatch' (was: gline4 problem)
  32  * To: coder-com@mail.undernet.org (coder committee)
  33  * Date: Mon, 2 Sep 1996 12:26:01 +0200 (MET DST)
  34  *
  35  * We need a new function `mmatch(const char *old_mask, const char *new_mask)'
  36  * which returns `true' likewise the current `match' (start with copying it),
  37  * but which treats '*' and '?' in `new_mask' differently (not "\*" and "\?" !)
  38  * as follows:  a '*' in `new_mask' does not match a '?' in `old_mask' and
  39  * a '?' in `new_mask' does not match a '\?' in `old_mask'.
  40  * And ofcourse... a '*' in `new_mask' does not match a '\*' in `old_mask'...
  41  * And last but not least, '\?' and '\*' in `new_mask' now become one character.
  42  */
  43
  44 int mmatch(const char *old_mask, const char *new_mask)
  45 {
  46   const char *m = old_mask;
  47   const char *n = new_mask;
  48   const char *ma = m;
  49   const char *na = n;
  50   int wild = 0;
  51   int mq = 0, nq = 0;
  52
  53   while (1)
  54   {
  55     if (*m == '*')
  56     {
  57       while (*m == '*')
  58         m++;
  59       wild = 1;
  60       ma = m;
  61       na = n;
  62     }
  63
  64     if (!*m)
  65     {
  66       if (!*n)
  67         return 0;
  68       for (m--; (m > old_mask) && (*m == '?'); m--)
  69         ;
  70       if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
  71         return 0;
  72       if (!wild)
  73         return 1;
  74       m = ma;
  75
  76       /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
  77       if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
  78         ++na;
  79
  80       n = ++na;
  81     }
  82     else if (!*n)
  83     {
  84       while (*m == '*')
  85         m++;
  86       return (*m != 0);
  87     }
  88     if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
  89     {
  90       m++;
  91       mq = 1;
  92     }
  93     else
  94       mq = 0;
  95
  96     /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
  97     if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
  98     {
  99       n++;
 100       nq = 1;
 101     }
 102     else
 103       nq = 0;
 104
 105 /*
 106  * This `if' has been changed compared to match() to do the following:
 107  * Match when:
 108  *   old (m)         new (n)         boolean expression
 109  *    *               any             (*m == '*' && !mq) ||
 110  *    ?               any except '*'  (*m == '?' && !mq && (*n != '*' || nq)) ||
 111  * any except * or ?  same as m       (!((*m == '*' || *m == '?') && !mq) &&
 112  *                                      ToLower(*m) == ToLower(*n) &&
 113  *                                        !((mq && !nq) || (!mq && nq)))
 114  *
 115  * Here `any' also includes \* and \? !
 116  *
 117  * After reworking the boolean expressions, we get:
 118  * (Optimized to use boolean shortcircuits, with most frequently occuring
 119  *  cases upfront (which took 2 hours!)).
 120  */
 121     if ((*m == '*' && !mq) ||
 122         ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
 123         (*m == '?' && !mq && (*n != '*' || nq)))
 124     {
 125       if (*m)
 126         m++;
 127       if (*n)
 128         n++;
 129     }
 130     else
 131     {
 132       if (!wild)
 133         return 1;
 134       m = ma;
 135
 136       /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
 137       if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
 138         ++na;
 139
 140       n = ++na;
 141     }
 142   }
 143 }
 144
 145 /*
 146  * Compare if a given string (name) matches the given
 147  * mask (which can contain wild cards: '*' - match any
 148  * number of chars, '?' - match any single character.
 149  *
 150  * return  0, if match
 151  *         1, if no match
 152  */
 153
 154 /*
 155  * match
 156  *
 157  * Rewritten by Andrea Cocito (Nemesi), November 1998.
 158  *
 159  */
 160
 161 /****************** Nemesi's match() ***************/
 162
 163 int match(const char *mask, const char *string)
 164 {
 165   const char *m = mask, *s = string;
 166   char ch;
 167   const char *bm, *bs;          /* Will be reg anyway on a decent CPU/compiler */
 168
 169   /* Process the "head" of the mask, if any */
 170   while ((ch = *m++) && (ch != '*'))
 171     switch (ch)
 172     {
 173       case '\\':
 174         if (*m == '?' || *m == '*')
 175           ch = *m++;
 176       default:
 177         if (ToLower(*s) != ToLower(ch))
 178           return 1;
 179       case '?':
 180         if (!*s++)
 181           return 1;
 182     };
 183   if (!ch)
 184     return *s;
 185
 186   /* We got a star: quickly find if/where we match the next char */
 187 got_star:
 188   bm = m;                       /* Next try rollback here */
 189   while ((ch = *m++))
 190     switch (ch)
 191     {
 192       case '?':
 193         if (!*s++)
 194           return 1;
 195       case '*':
 196         bm = m;
 197         continue;               /* while */
 198       case '\\':
 199         if (*m == '?' || *m == '*')
 200           ch = *m++;
 201       default:
 202         goto break_while;       /* C is structured ? */
 203     };
 204 break_while:
 205   if (!ch)
 206     return 0;                   /* mask ends with '*', we got it */
 207   ch = ToLower(ch);
 208   while (ToLower(*s++) != ch)
 209     if (!*s)
 210       return 1;
 211   bs = s;                       /* Next try start from here */
 212
 213   /* Check the rest of the "chunk" */
 214   while ((ch = *m++))
 215   {
 216     switch (ch)
 217     {
 218       case '*':
 219         goto got_star;
 220       case '\\':
 221         if (*m == '?' || *m == '*')
 222           ch = *m++;
 223       default:
 224         if (ToLower(*s) != ToLower(ch))
 225         {
 226           m = bm;
 227           s = bs;
 228           goto got_star;
 229         };
 230       case '?':
 231         if (!*s++)
 232           return 1;
 233     };
 234   };
 235   if (*s)
 236   {
 237     m = bm;
 238     s = bs;
 239     goto got_star;
 240   };
 241   return 0;
 242 }
 243
 244 /*
 245  * collapse()
 246  * Collapse a pattern string into minimal components.
 247  * This particular version is "in place", so that it changes the pattern
 248  * which is to be reduced to a "minimal" size.
 249  *
 250  * (C) Carlo Wood - 6 Oct 1998
 251  * Speedup rewrite by Andrea Cocito, December 1998.
 252  * Note that this new optimized alghoritm can *only* work in place.
 253  */
 254
 255 char *collapse(char *mask)
 256 {
 257   int star = 0;
 258   char *m = mask;
 259   char *b;
 260
 261   if (m)
 262   {
 263     do
 264     {
 265       if ((*m == '*') && ((m[1] == '*') || (m[1] == '?')))
 266       {
 267         b = m;
 268         do
 269         {
 270           if (*m == '*')
 271             star = 1;
 272           else
 273           {
 274             if (star && (*m != '?'))
 275             {
 276               *b++ = '*';
 277               star = 0;
 278             };
 279             *b++ = *m;
 280             if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
 281               *b++ = *++m;
 282           };
 283         }
 284         while (*m++);
 285         break;
 286       }
 287       else
 288       {
 289         if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
 290           m++;
 291       };
 292     }
 293     while (*m++);
 294   };
 295   return mask;
 296 }
 297
 298 /*
 299  ***************** Nemesi's matchcomp() / matchexec() **************
 300  */
 301
 302 /* These functions allow the use of "compiled" masks, you compile a mask
 303  * by means of matchcomp() that gets the plain text mask as input and writes
 304  * its result in the memory locations addressed by the 3 parameters:
 305  * - *cmask will contain the text of the compiled mask
 306  * - *minlen will contain the lenght of the shortest string that can match
 307  *   the mask
 308  * - *charset will contain the minimal set of chars needed to match the mask
 309  * You can pass NULL as *charset and it will be simply not returned, but you
 310  * MUST pass valid pointers for *minlen and *cmask (wich must be big enough
 311  * to contain the compiled mask text that is in the worst case as long as the
 312  * text of the mask itself in plaintext format) and the return value of
 313  * matchcomp() will be the number of chars actually written there (excluded
 314  * the trailing zero). cmask can be == mask, matchcomp() can work in place.
 315  * The {cmask, minlen} couple of values make the real compiled mask and
 316  * need to be passed to the functions that use the compiled mask, if you pass
 317  * the wrong minlen or something wrong in cmask to one of these expect a
 318  * coredump. This means that when you record a compiled mask you must store
 319  * *both* these values.
 320  * Once compiled the mask can be used to match a string by means of
 321  * matchexec(), it can be printed back to human-readable format by means
 322  * of sprintmatch() or it can be compared to another compiled mask by means
 323  * of mmexec() that will tell if it completely overrides that mask (a lot like
 324  * what mmatch() does for plain text masks).
 325  * You can gain a lot of speed in many situations avoiding to matchexec() when:
 326  * - The maximum lenght of the field you are about to match() the mask to is
 327  *   shorter than minlen, in example when matching abc*def*ghil with a nick:
 328  *   It just cannot match since a nick is at most 9 chars long and the mask
 329  *   needs at least 10 chars (10 will be the value returned in minlen).
 330  * - The charset allowed for the field you are about to match to doesn't
 331  *   "contain" the charset returned by matchcomp(), in example when you
 332  *   have *.* as mask it makes no sense to try to match it against a nick
 333  *   because, again, a nick can't contain a '.', you can check this with
 334  *   a simple (charset & NTL_IRCNK) in this case.
 335  * - As a special case, since compiled masks are forced to lowercase,
 336  *   it would make no sense to use the NTL_LOWER and NTL_UPPER on a compiled
 337  *   mask, thus they are reused as follows: if the NTL_LOWER bit of charset
 338  *   is set it means that the mask contains only non-wilds chars (i.e. you can
 339  *   use strCasecmp() to match it or a direct hash lookup), if the NTL_UPPER
 340  *   bit is set it means that it contains only wild chars (and you can
 341  *   match it with strlen(field)>=minlen).
 342  * Do these optimizations ONLY when the data you are about to pass to
 343  * matchexec() are *known* to be invalid in advance, using strChattr()
 344  * or strlen() on the text would be slower than calling matchexec() directly
 345  * and let it fail.
 346  * Internally a compiled mask contain in the *cmask area the text of
 347  * the plain text form of the mask itself with applied the following hacks:
 348  * - All characters are forced to lowercase (so that uppercase letters and
 349  *   specifically the symbols 'A' and 'Z' are reserved for special use)
 350  * - All non-escaped stars '*' are replaced by the letter 'Z'
 351  * - All non-escaped question marks '?' are replaced by the letter 'A'
 352  * - All escape characters are removed, the wilds escaped by them are
 353  *   then passed by without the escape since they don't collide anymore
 354  *   with the real wilds (encoded as A/Z)
 355  * - Finally the part of the mask that follows the last asterisk is
 356  *   reversed (byte order mirroring) and moved right after the first
 357  *   asterisk.
 358  * After all this a mask like:   Head*CHUNK1*chu\*nK2*ch??k3*TaIl
 359  *               .... becomes:   headZliatZchunk1Zchu*nk2ZchAAk3
 360  * This can still be printed on a console, more or less understood by an
 361  * human and handled with the usual str*() library functions.
 362  * When you store somewhere the compiled mask you can avoid storing the
 363  * textform of it since it can be "decompiled" by means of sprintmatch(),
 364  * but at that time the following things are changed in the mask:
 365  * - All chars have been forced to lowercase.
 366  * - The mask is collapsed.
 367  * The balance point of using compiled masks in terms of CPU is when you expect
 368  * to use matchexec() instead of match() at least 20 times on the same mask
 369  * or when you expect to use mmexec() instead of mmatch() 3 times.
 370  */
 371
 372  /*
 373     * matchcomp()
 374     *
 375     * Compiles a mask into a form suitable for using in matchexec().
 376   */
 377
 378 int matchcomp(char *cmask, int *minlen, int *charset, const char *mask)
 379 {
 380   const char *m = mask;
 381   char *b = cmask;
 382   char *fs = 0;
 383   char *ls = 0;
 384   char *x1, *x2;
 385   int l1, l2, lmin, loop, sign;
 386   int star = 0;
 387   int cnt = 0;
 388   char ch;
 389   int chset = ~0;
 390   int chset2 = (NTL_LOWER | NTL_UPPER);
 391
 392   if (m)
 393     while ((ch = *m++))
 394       switch (ch)
 395       {
 396         case '*':
 397           star = 1;
 398           break;
 399         case '?':
 400           cnt++;
 401           *b++ = 'A';
 402           chset2 &= ~NTL_LOWER;
 403           break;
 404         case '\\':
 405           if ((*m == '?') || (*m == '*'))
 406             ch = *m++;
 407         default:
 408           if (star)
 409           {
 410             ls = b;
 411             fs = fs ? fs : b;
 412             *b++ = 'Z';
 413             chset2 &= ~NTL_LOWER;
 414             star = 0;
 415           };
 416           cnt++;
 417           *b = ToLower(ch);
 418           chset &= IRCD_CharAttrTab[*b++ - CHAR_MIN];
 419           chset2 &= ~NTL_UPPER;
 420       };
 421
 422   if (charset)
 423     *charset = (chset | chset2);
 424
 425   if (star)
 426   {
 427     ls = b;
 428     fs = (fs ? fs : b);
 429     *b++ = 'Z';
 430   };
 431
 432   if (ls)
 433   {
 434     for (x1 = ls + 1, x2 = (b - 1); x1 < x2; x1++, x2--)
 435     {
 436       ch = *x1;
 437       *x1 = *x2;
 438       *x2 = ch;
 439     };
 440     l1 = (ls - fs);
 441     l2 = (b - ls);
 442     x1 = fs;
 443     while ((lmin = (l1 < l2) ? l1 : l2))
 444     {
 445       x2 = x1 + l1;
 446       for (loop = 0; loop < lmin; loop++)
 447       {
 448         ch = x1[loop];
 449         x1[loop] = x2[loop];
 450         x2[loop] = ch;
 451       };
 452       x1 += lmin;
 453       sign = l1 - l2;
 454       l1 -= (sign < 0) ? 0 : lmin;
 455       l2 -= (sign > 0) ? 0 : lmin;
 456     };
 457   };
 458
 459   *b = '\0';
 460   *minlen = cnt;
 461   return (b - cmask);
 462
 463 }
 464
 465 /*
 466  * matchexec()
 467  *
 468  * Executes a match with a mask previosuly compiled with matchcomp()
 469  * Note 1: If the mask isn't correctly produced by matchcomp() I will core
 470  * Note 2: 'min' MUST be the value returned by matchcomp on that mask,
 471  *         or.... I will core even faster :-)
 472  * Note 3: This piece of code is not intended to be nice but efficient.
 473  */
 474
 475 int matchexec(const char *string, const char *cmask, int minlen)
 476 {
 477   const char *s = string - 1;
 478   const char *b = cmask - 1;
 479   int trash;
 480   const char *bb, *bs;
 481   char ch;
 482
 483 tryhead:
 484   while ((ToLower(*++s) == *++b) && *s);
 485   if (!*s)
 486     return ((*b != '\0') && ((*b++ != 'Z') || (*b != '\0')));
 487   if (*b != 'Z')
 488   {
 489     if (*b == 'A')
 490       goto tryhead;
 491     return 1;
 492   };
 493
 494   bs = s;
 495   while (*++s);
 496
 497   if ((trash = (s - string - minlen)) < 0)
 498     return 2;
 499
 500 trytail:
 501   while ((ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b
 502       && (ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b);
 503   if (*b != 'Z')
 504   {
 505     if (*b == 'A')
 506       goto trytail;
 507     return (*b != '\0');
 508   };
 509
 510   s = --bs;
 511   bb = b;
 512
 513   while ((ch = *++b))
 514   {
 515     while ((ToLower(*++s) != ch))
 516       if (--trash < 0)
 517         return 4;
 518     bs = s;
 519
 520 trychunk:
 521     while ((ToLower(*++s) == *++b) && *b);
 522     if (!*b)
 523       return 0;
 524     if (*b == 'Z')
 525     {
 526       bs = --s;
 527       bb = b;
 528       continue;
 529     };
 530     if (*b == 'A')
 531       goto trychunk;
 532
 533     b = bb;
 534     s = bs;
 535     if (--trash < 0)
 536       return 5;
 537   };
 538
 539   return 0;
 540 }
 541
 542 /*
 543  * matchdecomp()
 544  * Prints the human readable version of *cmask into *mask, (decompiles
 545  * cmask).
 546  * The area pointed by *mask MUST be big enough (the mask might be up to
 547  * twice the size of its compiled form if it's made all of \? or \*, and
 548  * this function can NOT work in place since it might enflate the mask)
 549  * The printed mask is not identical to the one that was compiled to cmask,
 550  * infact it is 1) forced to all lowercase, 2) collapsed, both things
 551  * are supposed to NOT change it's meaning.
 552  * It returns the number of chars actually written to *mask;
 553  */
 554
 555 int matchdecomp(char *mask, const char *cmask)
 556 {
 557   char *rtb = mask;
 558   const char *rcm = cmask;
 559   const char *begtail, *endtail;
 560
 561   if (rtb ==0)
 562     return (-1);
 563
 564   if (rcm == 0)
 565     return (-2);
 566
 567   for (; (*rcm != 'Z'); rcm++, rtb++)
 568   {
 569     if ((*rcm == '?') || (*rcm == '*'))
 570       *rtb++ = '\\';
 571     if (!((*rtb = ((*rcm == 'A') ? '?' : *rcm))))
 572       return (rtb - mask);
 573   };
 574
 575   begtail = rcm++;
 576   *rtb++ = '*';
 577
 578   while (*rcm && (*rcm != 'Z'))
 579     rcm++;
 580
 581   endtail = rcm;
 582
 583   if (*rcm)
 584   {
 585     while (*++rcm)
 586       switch (*rcm)
 587       {
 588         case 'A':
 589           *rtb++ = '?';
 590           break;
 591         case 'Z':
 592           *rtb++ = '*';
 593           break;
 594         case '*':
 595         case '?':
 596           *rtb++ = '\\';
 597         default:
 598           *rtb++ = *rcm;
 599       };
 600     *rtb++ = '*';
 601   };
 602
 603   for (rcm = endtail; (--rcm) > begtail; *rtb++ = ((*rcm == 'A') ? '?' : *rcm))
 604     if ((*rcm == '?') || (*rcm == '*'))
 605       *rtb++ = '\\';
 606
 607   *rtb = '\0';
 608   return (rtb - mask);
 609 }
 610
 611 /*
 612  * mmexec()
 613  * Checks if a wider compiled mask (wcm/wminlen) completely overrides
 614  * a more restrict one (rcm/rminlen), basically what mmatch() does for
 615  * non-compiled masks, returns 0 if the override is true (like mmatch()).
 616  * "the wider overrides the restrict" means that any string that matches
 617  * the restrict one _will_ also match the wider one, always.
 618  * In this we behave differently from mmatch() because in example we return
 619  * true for " a?*cd overrides a*bcd " for wich the override happens for how
 620  * we literally defined it, here mmatch() would have returned false.
 621  * The original concepts and the base alghoritm are copied from mmatch()
 622  * written by Run (Carlo Wood), this function is written by
 623  * Nemesi (Andrea Cocito)
 624  */
 625
 626 int mmexec(const char *wcm, int wminlen, const char *rcm, int rminlen)
 627 {
 628   const char *w, *r, *br, *bw, *rx, *rz;
 629   int eat, trash;
 630
 631   /* First of all rm must have enough non-stars to 'contain' wm */
 632   if ((trash = rminlen - wminlen) < 0)
 633     return 1;
 634   w = wcm;
 635   r = rcm;
 636   eat = 0;
 637
 638   /* Let's start the game, remember that '*' is mapped to 'Z', '?'
 639      is mapped to 'A' and that head?*??*?chunk*???*tail becomes
 640      headAAAAZliatAAAZchunk for compiled masks */
 641
 642   /* Match the head of wm with the head of rm */
 643   for (; (*r) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); r++, w++);
 644   if (*r == 'Z')
 645     while (*w == 'A')           /* Eat extra '?' before '*' in wm if got '*' in rm */
 646       w++, eat++;
 647   if (*w != 'Z')                /* head1<any>.. can't match head2<any>.. */
 648     return ((*w) || (*r)) ? 1 : 0;      /* and head<nul> matches only head<nul> */
 649   if (!*++w)
 650     return 0;                   /* headZ<nul> matches head<anything>    */
 651
 652   /* Does rm have any stars in it ? let's check */
 653   for (rx = r; *r && (*r != 'Z'); r++);
 654   if (!*r)
 655   {
 656     /* rm has no stars and thus isn't a mask but it's just a flat
 657        string: special handling occurs here, note that eat must be 0 here */
 658
 659     /* match the tail */
 660     if (*w != 'Z')
 661     {
 662       for (; r--, (*w) && ((*w == *r) || (*w == 'A')); w++);
 663       if (*w != 'Z')            /* headZliat1<any> fails on head<any>2tail  */
 664         return (*w) ? 1 : 0;    /* but headZliat<nul> matches head<any>tail */
 665     }
 666
 667     /* match the chunks */
 668     while (1)
 669     {                           /* This loop can't break but only return   */
 670
 671       for (bw = w++; (*w != *rx); rx++) /* Seek the 1st char of the chunk */
 672         if (--trash < 0)        /* See if we can trash one more char of rm */
 673           return 1;             /* If not we can only fail of course       */
 674       for (r = ++rx, w++; (*w) && ((*w == *r) || (*w == 'A')); r++, w++);
 675       if (!*w)                  /* Did last loop match the rest of chunk ? */
 676         return 0;               /* ... Yes, end of wm, matched !           */
 677       if (*w != 'Z')
 678       {                         /* ... No, hitted non-star                 */
 679         w = bw;                 /* Rollback at beginning of chunk          */
 680         if (--trash < 0)        /* Trashed the char where this try started */
 681           return 1;             /* if we can't trash more chars fail       */
 682       }
 683       else
 684       {
 685         rx = r;                 /* Successfully matched a chunk, move rx   */
 686       }                 /* and go on with the next one             */
 687     }
 688   }
 689
 690   /* rm has at least one '*' and thus is a 'real' mask */
 691   rz = r++;                     /* rx = unused of head, rz = beg-tail */
 692
 693   /* Match the tail of wm (if any) against the tail of rm */
 694   if (*w != 'Z')
 695   {
 696     for (; (*w) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); w++, r++);
 697     if (*r == 'Z')              /* extra '?' before tail are fluff, just flush 'em */
 698       while (*w == 'A')
 699         w++;
 700     if (*w != 'Z')              /* We aren't matching a chunk, can't rollback      */
 701       return (*w) ? 1 : 0;
 702   }
 703
 704   /* Match the chunks of wm against what remains of the head of rm */
 705   while (1)
 706   {
 707     bw = w;
 708     for (bw++; (rx < rz) && (*bw != *rx); rx++) /* Seek the first           */
 709       if (--trash < 0)          /* waste some trash reserve */
 710         return 1;
 711     if (!(rx < rz))             /* head finished            */
 712       break;
 713     for (bw++, (br = ++rx);
 714         (br < rz) && (*bw) && ((*bw == *br) || (*bw == 'A')); br++, bw++);
 715     if (!(br < rz))             /* Note that we didn't use any 'eat' char yet, if  */
 716       while (*bw == 'A')        /* there were eat-en chars the head would be over  */
 717         bw++, eat++;            /* Happens only at end of head, and eat is still 0 */
 718     if (!*bw)
 719       return 0;
 720     if (*bw != 'Z')
 721     {
 722       eat = 0;
 723       if (!(br < rz))
 724       {                         /* If we failed because we got the end of head */
 725         trash -= (br - rx);     /* it makes no sense to rollback, just trash   */
 726         if (--trash < 0)        /* all the rest of the head wich isn't long    */
 727           return 1;             /* enough for this chunk and go out of this    */
 728         break;                  /* loop, then we try with the chunks of rm     */
 729       };
 730       if (--trash < 0)
 731         return 1;
 732     }
 733     else
 734     {
 735       w = bw;
 736       rx = br;
 737     }
 738   }
 739
 740   /* Match the unused chunks of wm against the chunks of rm */
 741   rx = r;
 742   for (; *r && (*r != 'Z'); r++);
 743   rz = r;
 744   if (*r++)
 745   {
 746     while (*r)
 747     {
 748       bw = w;
 749       while (eat && *r)         /* the '?' we had eated make us skip as many chars */
 750         if (*r++ != 'Z')        /* here, but can't skip stars or trailing zero     */
 751           eat--;
 752       for (bw++; (*r) && (*bw != *r); r++)
 753         if ((*r != 'Z') && (--trash < 0))
 754           return 1;
 755       if (!*r)
 756         break;
 757       for ((br = ++r), bw++;
 758           (*br) && (*br != 'Z') && ((*bw == *br) || (*bw == 'A')); br++, bw++);
 759       if (*br == 'Z')
 760         while (*bw == 'A')
 761           bw++, eat++;
 762       if (!*bw)
 763         return 0;
 764       if (*bw != 'Z')
 765       {
 766         eat = 0;
 767         if ((!*br) || (*r == 'Z'))
 768         {                       /* If we hit the end of rm or a star in it */
 769           trash -= (br - r);    /* makes no sense to rollback within this  */
 770           if (trash < 0)        /* same chunk of br, skip it all and then  */
 771             return 1;           /* either rollback or break this loop if   */
 772           if (!*br)             /* it was the end of rm                    */
 773             break;
 774           r = br;
 775         }
 776         if (--trash < 0)
 777           return 1;
 778       }
 779       else
 780       {
 781         r = br;
 782         w = bw;
 783       }
 784     }
 785   }
 786
 787   /* match the remaining chunks of wm against what remains of the tail of rm */
 788   r = rz - eat - 1;             /* can't have <nul> or 'Z'within the tail, so just move r */
 789   while (r >= rx)
 790   {
 791     bw = w;
 792     for (bw++; (*bw != *r); r--)
 793       if (--trash < 0)
 794         return 1;
 795     if (!(r >= rx))
 796       return 1;
 797     for ((br = --r), bw++;
 798         (*bw) && (br >= rx) && ((*bw == *br) || (*bw == 'A')); br--, bw++);
 799     if (!*bw)
 800       return 0;
 801     if (!(br >= rx))
 802       return 1;
 803     if (*bw != 'Z')
 804     {
 805       if (--trash < 0)
 806         return 1;
 807     }
 808     else
 809     {
 810       r = br;
 811       w = bw;
 812     }
 813   }
 814   return 1;                     /* Auch... something left out ? Fail */
 815 }
 816
 817 /*
 818  * matchcompIP()
 819  * Compiles an IP mask into an in_mask structure
 820  * The given <mask> can either be:
 821  * - An usual irc type mask, containing * and or ?
 822  * - An ip number plus a /bitnumber part, that will only consider
 823  *   the first "bitnumber" bits of the IP (bitnumber must be in 0-31 range)
 824  * - An ip numer plus a /ip.bit.mask.values that will consider
 825  *   only the bits marked as 1 in the ip.bit.mask.values
 826  * In the last two cases both the ip number and the bitmask can specify
 827  * less than 4 bytes, the missing bytes then default to zero, note that
 828  * this is *different* from the way inet_aton() does and that this does
 829  * NOT happen for normal IPmasks (not containing '/')
 830  * If the returned value is zero the produced in_mask might match some IP,
 831  * if it's nonzero it will never match anything (and the imask struct is
 832  * set so that always fails).
 833  *
 834  * The returned structure contains 3 fields whose meaning is the following:
 835  * im.mask = The bits considered significative in the IP
 836  * im.bits = What these bits should look like to have a match
 837  * im.fall = If zero means that the above information used as
 838  *           ((IP & im.mask) == im.bits) is enough to tell if the compiled
 839  *           mask matches the given IP, nonzero means that it is needed,
 840  *           in case they did match, to call also the usual text match
 841  *           functions, because the mask wasn't "completely compiled"
 842  *
 843  * They should be used like:
 844  * matchcompIP(&im, mask);
 845  * if ( ((IP & im.mask)!=im.bits)) || (im.fall&&match(mask,inet_ntoa(IP))) )
 846  *    { handle_non_match } else { handle_match };
 847  * instead of:
 848  * if ( match(mask, inet_ntoa(IP)) )
 849  *    { handle_non_match } else { handle_match };
 850  *
 851  * Note: This function could be smarter when dealing with complex masks,
 852  *       this implementation is quite lazy and understands only very simple
 853  *       cases, whatever contains a ? anywhere or contains a '*' that isn't
 854  *       part of a trailing '.*' will fallback to text-match, this could be
 855  *       avoided for masks like 12?3.5.6 12.*.3.4 1.*.*.2 72?72?72?72 and
 856  *       so on that "could" be completely compiled to IP masks.
 857  *       If you try to improve this be aware of the fact that ? and *
 858  *       could match both dots and digits and we _must_ always reject
 859  *       what doesn't match in textform (like leading zeros and so on),
 860  *       so it's a LOT more tricky than it might seem. By now most common
 861  *       cases are optimized.
 862  */
 863
 864 int matchcompIP(struct in_mask *imask, const char *mask)
 865 {
 866   const char *m = mask;
 867   unsigned int bits = 0;
 868   unsigned int filt = 0;
 869   int unco = 0;
 870   int digits = 0;
 871   int shift = 24;
 872   int tmp = 0;
 873
 874   do
 875   {
 876     switch (*m)
 877     {
 878       case '\\':
 879         if ((m[1] == '\\') || (m[1] == '*') || (m[1] == '?')
 880             || (m[1] == '\0'))
 881           break;
 882         continue;
 883       case '0':
 884       case '1':
 885       case '2':
 886       case '3':
 887       case '4':
 888       case '5':
 889       case '6':
 890       case '7':
 891       case '8':
 892       case '9':
 893         if (digits && !tmp)     /* Leading zeros */
 894           break;
 895         digits++;
 896         tmp *= 10;
 897         tmp += (*m - '0');      /* Can't overflow, INT_MAX > 2559 */
 898         if (tmp > 255)
 899           break;
 900         continue;
 901       case '\0':
 902         filt = 0xFFFFFFFF;
 903         /* Intentional fallthrough */
 904       case '.':
 905         if ((!shift) != (!*m))
 906           break;
 907         /* Intentional fallthrough */
 908       case '/':
 909         bits |= (tmp << shift);
 910         shift -= 8;
 911         digits = 0;
 912         tmp = 0;
 913         if (*m != '/')
 914           continue;
 915         shift = 24;
 916         do
 917         {
 918           m++;
 919           if (IsDigit(*m))
 920           {
 921             if (digits && !tmp) /* Leading zeros */
 922               break;
 923             digits++;
 924             tmp *= 10;
 925             tmp += (*m - '0');  /* Can't overflow, INT_MAX > 2559 */
 926             if (tmp > 255)
 927               break;
 928           }
 929           else
 930           {
 931             switch (*m)
 932             {
 933               case '.':
 934               case '\0':
 935                 if ((!shift) && (*m))
 936                   break;
 937                 filt |= (tmp << shift);
 938                 shift -= 8;
 939                 tmp = 0;
 940                 digits = 0;
 941                 continue;
 942               default:
 943                 break;
 944             }
 945             break;
 946           }
 947         }
 948         while (*m);
 949         if (*m)
 950           break;
 951         if (filt && (!(shift < 16)) && (!(filt & 0xE0FFFFFF)))
 952           filt = 0xFFFFFFFF << (32 - ((filt >> 24)));
 953         bits &= filt;
 954         continue;
 955       case '?':
 956         unco = 1;
 957         /* Intentional fallthrough */
 958       case '*':
 959         if (digits)
 960           unco = 1;
 961         filt = (0xFFFFFFFF << (shift)) << 8;
 962         while (*++m)
 963         {
 964           if (IsDigit(*m))
 965             unco = 1;
 966           else
 967           {
 968             switch (*m)
 969             {
 970               case '.':
 971                 if (m[1] != '*')
 972                   unco = 1;
 973                 if (!shift)
 974                   break;
 975                 shift -= 8;
 976                 continue;
 977               case '?':
 978                 unco = 1;
 979               case '*':
 980                 continue;
 981               default:
 982                 break;
 983             }
 984             break;
 985           }
 986         }
 987         if (*m)
 988           break;
 989         continue;
 990       default:
 991         break;
 992     }
 993
 994     /* If we get here there is some error and this can't ever match */
 995     filt = 0;
 996     bits = ~0;
 997     unco = 0;
 998     break;                      /* This time break the loop :) */
 999
1000   }
1001   while (*m++);
1002
1003   imask->bits.s_addr = htonl(bits);
1004   imask->mask.s_addr = htonl(filt);
1005   imask->fall = unco;
1006   return ((bits & ~filt) ? -1 : 0);
1007
1008 }