ircd/match.c

   1 /*
   2  * IRC - Internet Relay Chat, common/match.c
   3  * Copyright (C) 1990 Jarkko Oikarinen
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 1, or (at your option)
   8  * any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18  *
  19  * $Id$
  20  */
  21 #include "config.h"
  22
  23 #include "match.h"
  24 #include "ircd_chattr.h"
  25 /*
  26  * mmatch()
  27  *
  28  * Written by Run (carlo@runaway.xs4all.nl), 25-10-96
  29  *
  30  *
  31  * From: Carlo Wood <carlo@runaway.xs4all.nl>
  32  * Message-Id: <199609021026.MAA02393@runaway.xs4all.nl>
  33  * Subject: [C-Com] Analysis for `mmatch' (was: gline4 problem)
  34  * To: coder-com@mail.undernet.org (coder committee)
  35  * Date: Mon, 2 Sep 1996 12:26:01 +0200 (MET DST)
  36  *
  37  * We need a new function `mmatch(const char *old_mask, const char *new_mask)'
  38  * which returns `true' likewise the current `match' (start with copying it),
  39  * but which treats '*' and '?' in `new_mask' differently (not "\*" and "\?" !)
  40  * as follows:  a '*' in `new_mask' does not match a '?' in `old_mask' and
  41  * a '?' in `new_mask' does not match a '\?' in `old_mask'.
  42  * And ofcourse... a '*' in `new_mask' does not match a '\*' in `old_mask'...
  43  * And last but not least, '\?' and '\*' in `new_mask' now become one character.
  44  */
  45
  46 int mmatch(const char *old_mask, const char *new_mask)
  47 {
  48   const char *m = old_mask;
  49   const char *n = new_mask;
  50   const char *ma = m;
  51   const char *na = n;
  52   int wild = 0;
  53   int mq = 0, nq = 0;
  54
  55   while (1)
  56   {
  57     if (*m == '*')
  58     {
  59       while (*m == '*')
  60         m++;
  61       wild = 1;
  62       ma = m;
  63       na = n;
  64     }
  65
  66     if (!*m)
  67     {
  68       if (!*n)
  69         return 0;
  70       for (m--; (m > old_mask) && (*m == '?'); m--)
  71         ;
  72       if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
  73         return 0;
  74       if (!wild)
  75         return 1;
  76       m = ma;
  77
  78       /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
  79       if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
  80         ++na;
  81
  82       n = ++na;
  83     }
  84     else if (!*n)
  85     {
  86       while (*m == '*')
  87         m++;
  88       return (*m != 0);
  89     }
  90     if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
  91     {
  92       m++;
  93       mq = 1;
  94     }
  95     else
  96       mq = 0;
  97
  98     /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
  99     if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
 100     {
 101       n++;
 102       nq = 1;
 103     }
 104     else
 105       nq = 0;
 106
 107 /*
 108  * This `if' has been changed compared to match() to do the following:
 109  * Match when:
 110  *   old (m)         new (n)         boolean expression
 111  *    *               any             (*m == '*' && !mq) ||
 112  *    ?               any except '*'  (*m == '?' && !mq && (*n != '*' || nq)) ||
 113  * any except * or ?  same as m       (!((*m == '*' || *m == '?') && !mq) &&
 114  *                                      ToLower(*m) == ToLower(*n) &&
 115  *                                        !((mq && !nq) || (!mq && nq)))
 116  *
 117  * Here `any' also includes \* and \? !
 118  *
 119  * After reworking the boolean expressions, we get:
 120  * (Optimized to use boolean shortcircuits, with most frequently occuring
 121  *  cases upfront (which took 2 hours!)).
 122  */
 123     if ((*m == '*' && !mq) ||
 124         ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
 125         (*m == '?' && !mq && (*n != '*' || nq)))
 126     {
 127       if (*m)
 128         m++;
 129       if (*n)
 130         n++;
 131     }
 132     else
 133     {
 134       if (!wild)
 135         return 1;
 136       m = ma;
 137
 138       /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
 139       if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
 140         ++na;
 141
 142       n = ++na;
 143     }
 144   }
 145 }
 146
 147 /*
 148  * Compare if a given string (name) matches the given
 149  * mask (which can contain wild cards: '*' - match any
 150  * number of chars, '?' - match any single character.
 151  *
 152  * return  0, if match
 153  *         1, if no match
 154  */
 155
 156 /*
 157  * match
 158  *
 159  * Rewritten by Andrea Cocito (Nemesi), November 1998.
 160  *
 161  */
 162
 163 /****************** Nemesi's match() ***************/
 164
 165 int match(const char *mask, const char *string)
 166 {
 167   const char *m = mask, *s = string;
 168   char ch;
 169   const char *bm, *bs;          /* Will be reg anyway on a decent CPU/compiler */
 170
 171   /* Process the "head" of the mask, if any */
 172   while ((ch = *m++) && (ch != '*'))
 173     switch (ch)
 174     {
 175       case '\\':
 176         if (*m == '?' || *m == '*')
 177           ch = *m++;
 178       default:
 179         if (ToLower(*s) != ToLower(ch))
 180           return 1;
 181       case '?':
 182         if (!*s++)
 183           return 1;
 184     };
 185   if (!ch)
 186     return *s;
 187
 188   /* We got a star: quickly find if/where we match the next char */
 189 got_star:
 190   bm = m;                       /* Next try rollback here */
 191   while ((ch = *m++))
 192     switch (ch)
 193     {
 194       case '?':
 195         if (!*s++)
 196           return 1;
 197       case '*':
 198         bm = m;
 199         continue;               /* while */
 200       case '\\':
 201         if (*m == '?' || *m == '*')
 202           ch = *m++;
 203       default:
 204         goto break_while;       /* C is structured ? */
 205     };
 206 break_while:
 207   if (!ch)
 208     return 0;                   /* mask ends with '*', we got it */
 209   ch = ToLower(ch);
 210   while (ToLower(*s++) != ch)
 211     if (!*s)
 212       return 1;
 213   bs = s;                       /* Next try start from here */
 214
 215   /* Check the rest of the "chunk" */
 216   while ((ch = *m++))
 217   {
 218     switch (ch)
 219     {
 220       case '*':
 221         goto got_star;
 222       case '\\':
 223         if (*m == '?' || *m == '*')
 224           ch = *m++;
 225       default:
 226         if (ToLower(*s) != ToLower(ch))
 227         {
 228           m = bm;
 229           s = bs;
 230           goto got_star;
 231         };
 232       case '?':
 233         if (!*s++)
 234           return 1;
 235     };
 236   };
 237   if (*s)
 238   {
 239     m = bm;
 240     s = bs;
 241     goto got_star;
 242   };
 243   return 0;
 244 }
 245
 246 /*
 247  * collapse()
 248  * Collapse a pattern string into minimal components.
 249  * This particular version is "in place", so that it changes the pattern
 250  * which is to be reduced to a "minimal" size.
 251  *
 252  * (C) Carlo Wood - 6 Oct 1998
 253  * Speedup rewrite by Andrea Cocito, December 1998.
 254  * Note that this new optimized alghoritm can *only* work in place.
 255  */
 256
 257 char *collapse(char *mask)
 258 {
 259   int star = 0;
 260   char *m = mask;
 261   char *b;
 262
 263   if (m)
 264   {
 265     do
 266     {
 267       if ((*m == '*') && ((m[1] == '*') || (m[1] == '?')))
 268       {
 269         b = m;
 270         do
 271         {
 272           if (*m == '*')
 273             star = 1;
 274           else
 275           {
 276             if (star && (*m != '?'))
 277             {
 278               *b++ = '*';
 279               star = 0;
 280             };
 281             *b++ = *m;
 282             if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
 283               *b++ = *++m;
 284           };
 285         }
 286         while (*m++);
 287         break;
 288       }
 289       else
 290       {
 291         if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
 292           m++;
 293       };
 294     }
 295     while (*m++);
 296   };
 297   return mask;
 298 }
 299
 300 /*
 301  ***************** Nemesi's matchcomp() / matchexec() **************
 302  */
 303
 304 /* These functions allow the use of "compiled" masks, you compile a mask
 305  * by means of matchcomp() that gets the plain text mask as input and writes
 306  * its result in the memory locations addressed by the 3 parameters:
 307  * - *cmask will contain the text of the compiled mask
 308  * - *minlen will contain the lenght of the shortest string that can match
 309  *   the mask
 310  * - *charset will contain the minimal set of chars needed to match the mask
 311  * You can pass NULL as *charset and it will be simply not returned, but you
 312  * MUST pass valid pointers for *minlen and *cmask (wich must be big enough
 313  * to contain the compiled mask text that is in the worst case as long as the
 314  * text of the mask itself in plaintext format) and the return value of
 315  * matchcomp() will be the number of chars actually written there (excluded
 316  * the trailing zero). cmask can be == mask, matchcomp() can work in place.
 317  * The {cmask, minlen} couple of values make the real compiled mask and
 318  * need to be passed to the functions that use the compiled mask, if you pass
 319  * the wrong minlen or something wrong in cmask to one of these expect a
 320  * coredump. This means that when you record a compiled mask you must store
 321  * *both* these values.
 322  * Once compiled the mask can be used to match a string by means of
 323  * matchexec(), it can be printed back to human-readable format by means
 324  * of sprintmatch() or it can be compared to another compiled mask by means
 325  * of mmexec() that will tell if it completely overrides that mask (a lot like
 326  * what mmatch() does for plain text masks).
 327  * You can gain a lot of speed in many situations avoiding to matchexec() when:
 328  * - The maximum lenght of the field you are about to match() the mask to is
 329  *   shorter than minlen, in example when matching abc*def*ghil with a nick:
 330  *   It just cannot match since a nick is at most 9 chars long and the mask
 331  *   needs at least 10 chars (10 will be the value returned in minlen).
 332  * - The charset allowed for the field you are about to match to doesn't
 333  *   "contain" the charset returned by matchcomp(), in example when you
 334  *   have *.* as mask it makes no sense to try to match it against a nick
 335  *   because, again, a nick can't contain a '.', you can check this with
 336  *   a simple (charset & NTL_IRCNK) in this case.
 337  * - As a special case, since compiled masks are forced to lowercase,
 338  *   it would make no sense to use the NTL_LOWER and NTL_UPPER on a compiled
 339  *   mask, thus they are reused as follows: if the NTL_LOWER bit of charset
 340  *   is set it means that the mask contains only non-wilds chars (i.e. you can
 341  *   use strCasecmp() to match it or a direct hash lookup), if the NTL_UPPER
 342  *   bit is set it means that it contains only wild chars (and you can
 343  *   match it with strlen(field)>=minlen).
 344  * Do these optimizations ONLY when the data you are about to pass to
 345  * matchexec() are *known* to be invalid in advance, using strChattr()
 346  * or strlen() on the text would be slower than calling matchexec() directly
 347  * and let it fail.
 348  * Internally a compiled mask contain in the *cmask area the text of
 349  * the plain text form of the mask itself with applied the following hacks:
 350  * - All characters are forced to lowercase (so that uppercase letters and
 351  *   specifically the symbols 'A' and 'Z' are reserved for special use)
 352  * - All non-escaped stars '*' are replaced by the letter 'Z'
 353  * - All non-escaped question marks '?' are replaced by the letter 'A'
 354  * - All escape characters are removed, the wilds escaped by them are
 355  *   then passed by without the escape since they don't collide anymore
 356  *   with the real wilds (encoded as A/Z)
 357  * - Finally the part of the mask that follows the last asterisk is
 358  *   reversed (byte order mirroring) and moved right after the first
 359  *   asterisk.
 360  * After all this a mask like:   Head*CHUNK1*chu\*nK2*ch??k3*TaIl
 361  *               .... becomes:   headZliatZchunk1Zchu*nk2ZchAAk3
 362  * This can still be printed on a console, more or less understood by an
 363  * human and handled with the usual str*() library functions.
 364  * When you store somewhere the compiled mask you can avoid storing the
 365  * textform of it since it can be "decompiled" by means of sprintmatch(),
 366  * but at that time the following things are changed in the mask:
 367  * - All chars have been forced to lowercase.
 368  * - The mask is collapsed.
 369  * The balance point of using compiled masks in terms of CPU is when you expect
 370  * to use matchexec() instead of match() at least 20 times on the same mask
 371  * or when you expect to use mmexec() instead of mmatch() 3 times.
 372  */
 373
 374  /*
 375     * matchcomp()
 376     *
 377     * Compiles a mask into a form suitable for using in matchexec().
 378   */
 379
 380 int matchcomp(char *cmask, int *minlen, int *charset, const char *mask)
 381 {
 382   const char *m = mask;
 383   char *b = cmask;
 384   char *fs = 0;
 385   char *ls = 0;
 386   char *x1, *x2;
 387   int l1, l2, lmin, loop, sign;
 388   int star = 0;
 389   int cnt = 0;
 390   char ch;
 391   int chset = ~0;
 392   int chset2 = (NTL_LOWER | NTL_UPPER);
 393
 394   if (m)
 395     while ((ch = *m++))
 396       switch (ch)
 397       {
 398         case '*':
 399           star = 1;
 400           break;
 401         case '?':
 402           cnt++;
 403           *b++ = 'A';
 404           chset2 &= ~NTL_LOWER;
 405           break;
 406         case '\\':
 407           if ((*m == '?') || (*m == '*'))
 408             ch = *m++;
 409         default:
 410           if (star)
 411           {
 412             ls = b;
 413             fs = fs ? fs : b;
 414             *b++ = 'Z';
 415             chset2 &= ~NTL_LOWER;
 416             star = 0;
 417           };
 418           cnt++;
 419           *b = ToLower(ch);
 420           chset &= IRCD_CharAttrTab[*b++ - CHAR_MIN];
 421           chset2 &= ~NTL_UPPER;
 422       };
 423
 424   if (charset)
 425     *charset = (chset | chset2);
 426
 427   if (star)
 428   {
 429     ls = b;
 430     fs = (fs ? fs : b);
 431     *b++ = 'Z';
 432   };
 433
 434   if (ls)
 435   {
 436     for (x1 = ls + 1, x2 = (b - 1); x1 < x2; x1++, x2--)
 437     {
 438       ch = *x1;
 439       *x1 = *x2;
 440       *x2 = ch;
 441     };
 442     l1 = (ls - fs);
 443     l2 = (b - ls);
 444     x1 = fs;
 445     while ((lmin = (l1 < l2) ? l1 : l2))
 446     {
 447       x2 = x1 + l1;
 448       for (loop = 0; loop < lmin; loop++)
 449       {
 450         ch = x1[loop];
 451         x1[loop] = x2[loop];
 452         x2[loop] = ch;
 453       };
 454       x1 += lmin;
 455       sign = l1 - l2;
 456       l1 -= (sign < 0) ? 0 : lmin;
 457       l2 -= (sign > 0) ? 0 : lmin;
 458     };
 459   };
 460
 461   *b = '\0';
 462   *minlen = cnt;
 463   return (b - cmask);
 464
 465 }
 466
 467 /*
 468  * matchexec()
 469  *
 470  * Executes a match with a mask previosuly compiled with matchcomp()
 471  * Note 1: If the mask isn't correctly produced by matchcomp() I will core
 472  * Note 2: 'min' MUST be the value returned by matchcomp on that mask,
 473  *         or.... I will core even faster :-)
 474  * Note 3: This piece of code is not intended to be nice but efficient.
 475  */
 476
 477 int matchexec(const char *string, const char *cmask, int minlen)
 478 {
 479   const char *s = string - 1;
 480   const char *b = cmask - 1;
 481   int trash;
 482   const char *bb, *bs;
 483   char ch;
 484
 485 tryhead:
 486   while ((ToLower(*++s) == *++b) && *s);
 487   if (!*s)
 488     return ((*b != '\0') && ((*b++ != 'Z') || (*b != '\0')));
 489   if (*b != 'Z')
 490   {
 491     if (*b == 'A')
 492       goto tryhead;
 493     return 1;
 494   };
 495
 496   bs = s;
 497   while (*++s);
 498
 499   if ((trash = (s - string - minlen)) < 0)
 500     return 2;
 501
 502 trytail:
 503   while ((ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b
 504       && (ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b);
 505   if (*b != 'Z')
 506   {
 507     if (*b == 'A')
 508       goto trytail;
 509     return (*b != '\0');
 510   };
 511
 512   s = --bs;
 513   bb = b;
 514
 515   while ((ch = *++b))
 516   {
 517     while ((ToLower(*++s) != ch))
 518       if (--trash < 0)
 519         return 4;
 520     bs = s;
 521
 522 trychunk:
 523     while ((ToLower(*++s) == *++b) && *b);
 524     if (!*b)
 525       return 0;
 526     if (*b == 'Z')
 527     {
 528       bs = --s;
 529       bb = b;
 530       continue;
 531     };
 532     if (*b == 'A')
 533       goto trychunk;
 534
 535     b = bb;
 536     s = bs;
 537     if (--trash < 0)
 538       return 5;
 539   };
 540
 541   return 0;
 542 }
 543
 544 /*
 545  * matchdecomp()
 546  * Prints the human readable version of *cmask into *mask, (decompiles
 547  * cmask).
 548  * The area pointed by *mask MUST be big enough (the mask might be up to
 549  * twice the size of its compiled form if it's made all of \? or \*, and
 550  * this function can NOT work in place since it might enflate the mask)
 551  * The printed mask is not identical to the one that was compiled to cmask,
 552  * infact it is 1) forced to all lowercase, 2) collapsed, both things
 553  * are supposed to NOT change it's meaning.
 554  * It returns the number of chars actually written to *mask;
 555  */
 556
 557 int matchdecomp(char *mask, const char *cmask)
 558 {
 559   char *rtb = mask;
 560   const char *rcm = cmask;
 561   const char *begtail, *endtail;
 562
 563   if (rtb ==0)
 564     return (-1);
 565
 566   if (rcm == 0)
 567     return (-2);
 568
 569   for (; (*rcm != 'Z'); rcm++, rtb++)
 570   {
 571     if ((*rcm == '?') || (*rcm == '*'))
 572       *rtb++ = '\\';
 573     if (!((*rtb = ((*rcm == 'A') ? '?' : *rcm))))
 574       return (rtb - mask);
 575   };
 576
 577   begtail = rcm++;
 578   *rtb++ = '*';
 579
 580   while (*rcm && (*rcm != 'Z'))
 581     rcm++;
 582
 583   endtail = rcm;
 584
 585   if (*rcm)
 586   {
 587     while (*++rcm)
 588       switch (*rcm)
 589       {
 590         case 'A':
 591           *rtb++ = '?';
 592           break;
 593         case 'Z':
 594           *rtb++ = '*';
 595           break;
 596         case '*':
 597         case '?':
 598           *rtb++ = '\\';
 599         default:
 600           *rtb++ = *rcm;
 601       };
 602     *rtb++ = '*';
 603   };
 604
 605   for (rcm = endtail; (--rcm) > begtail; *rtb++ = ((*rcm == 'A') ? '?' : *rcm))
 606     if ((*rcm == '?') || (*rcm == '*'))
 607       *rtb++ = '\\';
 608
 609   *rtb = '\0';
 610   return (rtb - mask);
 611 }
 612
 613 /*
 614  * mmexec()
 615  * Checks if a wider compiled mask (wcm/wminlen) completely overrides
 616  * a more restrict one (rcm/rminlen), basically what mmatch() does for
 617  * non-compiled masks, returns 0 if the override is true (like mmatch()).
 618  * "the wider overrides the restrict" means that any string that matches
 619  * the restrict one _will_ also match the wider one, always.
 620  * In this we behave differently from mmatch() because in example we return
 621  * true for " a?*cd overrides a*bcd " for wich the override happens for how
 622  * we literally defined it, here mmatch() would have returned false.
 623  * The original concepts and the base alghoritm are copied from mmatch()
 624  * written by Run (Carlo Wood), this function is written by
 625  * Nemesi (Andrea Cocito)
 626  */
 627
 628 int mmexec(const char *wcm, int wminlen, const char *rcm, int rminlen)
 629 {
 630   const char *w, *r, *br, *bw, *rx, *rz;
 631   int eat, trash;
 632
 633   /* First of all rm must have enough non-stars to 'contain' wm */
 634   if ((trash = rminlen - wminlen) < 0)
 635     return 1;
 636   w = wcm;
 637   r = rcm;
 638   eat = 0;
 639
 640   /* Let's start the game, remember that '*' is mapped to 'Z', '?'
 641      is mapped to 'A' and that head?*??*?chunk*???*tail becomes
 642      headAAAAZliatAAAZchunk for compiled masks */
 643
 644   /* Match the head of wm with the head of rm */
 645   for (; (*r) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); r++, w++);
 646   if (*r == 'Z')
 647     while (*w == 'A')           /* Eat extra '?' before '*' in wm if got '*' in rm */
 648       w++, eat++;
 649   if (*w != 'Z')                /* head1<any>.. can't match head2<any>.. */
 650     return ((*w) || (*r)) ? 1 : 0;      /* and head<nul> matches only head<nul> */
 651   if (!*++w)
 652     return 0;                   /* headZ<nul> matches head<anything>    */
 653
 654   /* Does rm have any stars in it ? let's check */
 655   for (rx = r; *r && (*r != 'Z'); r++);
 656   if (!*r)
 657   {
 658     /* rm has no stars and thus isn't a mask but it's just a flat
 659        string: special handling occurs here, note that eat must be 0 here */
 660
 661     /* match the tail */
 662     if (*w != 'Z')
 663     {
 664       for (; r--, (*w) && ((*w == *r) || (*w == 'A')); w++);
 665       if (*w != 'Z')            /* headZliat1<any> fails on head<any>2tail  */
 666         return (*w) ? 1 : 0;    /* but headZliat<nul> matches head<any>tail */
 667     }
 668
 669     /* match the chunks */
 670     while (1)
 671     {                           /* This loop can't break but only return   */
 672
 673       for (bw = w++; (*w != *rx); rx++) /* Seek the 1st char of the chunk */
 674         if (--trash < 0)        /* See if we can trash one more char of rm */
 675           return 1;             /* If not we can only fail of course       */
 676       for (r = ++rx, w++; (*w) && ((*w == *r) || (*w == 'A')); r++, w++);
 677       if (!*w)                  /* Did last loop match the rest of chunk ? */
 678         return 0;               /* ... Yes, end of wm, matched !           */
 679       if (*w != 'Z')
 680       {                         /* ... No, hitted non-star                 */
 681         w = bw;                 /* Rollback at beginning of chunk          */
 682         if (--trash < 0)        /* Trashed the char where this try started */
 683           return 1;             /* if we can't trash more chars fail       */
 684       }
 685       else
 686       {
 687         rx = r;                 /* Successfully matched a chunk, move rx   */
 688       }                 /* and go on with the next one             */
 689     }
 690   }
 691
 692   /* rm has at least one '*' and thus is a 'real' mask */
 693   rz = r++;                     /* rx = unused of head, rz = beg-tail */
 694
 695   /* Match the tail of wm (if any) against the tail of rm */
 696   if (*w != 'Z')
 697   {
 698     for (; (*w) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); w++, r++);
 699     if (*r == 'Z')              /* extra '?' before tail are fluff, just flush 'em */
 700       while (*w == 'A')
 701         w++;
 702     if (*w != 'Z')              /* We aren't matching a chunk, can't rollback      */
 703       return (*w) ? 1 : 0;
 704   }
 705
 706   /* Match the chunks of wm against what remains of the head of rm */
 707   while (1)
 708   {
 709     bw = w;
 710     for (bw++; (rx < rz) && (*bw != *rx); rx++) /* Seek the first           */
 711       if (--trash < 0)          /* waste some trash reserve */
 712         return 1;
 713     if (!(rx < rz))             /* head finished            */
 714       break;
 715     for (bw++, (br = ++rx);
 716         (br < rz) && (*bw) && ((*bw == *br) || (*bw == 'A')); br++, bw++);
 717     if (!(br < rz))             /* Note that we didn't use any 'eat' char yet, if  */
 718       while (*bw == 'A')        /* there were eat-en chars the head would be over  */
 719         bw++, eat++;            /* Happens only at end of head, and eat is still 0 */
 720     if (!*bw)
 721       return 0;
 722     if (*bw != 'Z')
 723     {
 724       eat = 0;
 725       if (!(br < rz))
 726       {                         /* If we failed because we got the end of head */
 727         trash -= (br - rx);     /* it makes no sense to rollback, just trash   */
 728         if (--trash < 0)        /* all the rest of the head wich isn't long    */
 729           return 1;             /* enough for this chunk and go out of this    */
 730         break;                  /* loop, then we try with the chunks of rm     */
 731       };
 732       if (--trash < 0)
 733         return 1;
 734     }
 735     else
 736     {
 737       w = bw;
 738       rx = br;
 739     }
 740   }
 741
 742   /* Match the unused chunks of wm against the chunks of rm */
 743   rx = r;
 744   for (; *r && (*r != 'Z'); r++);
 745   rz = r;
 746   if (*r++)
 747   {
 748     while (*r)
 749     {
 750       bw = w;
 751       while (eat && *r)         /* the '?' we had eated make us skip as many chars */
 752         if (*r++ != 'Z')        /* here, but can't skip stars or trailing zero     */
 753           eat--;
 754       for (bw++; (*r) && (*bw != *r); r++)
 755         if ((*r != 'Z') && (--trash < 0))
 756           return 1;
 757       if (!*r)
 758         break;
 759       for ((br = ++r), bw++;
 760           (*br) && (*br != 'Z') && ((*bw == *br) || (*bw == 'A')); br++, bw++);
 761       if (*br == 'Z')
 762         while (*bw == 'A')
 763           bw++, eat++;
 764       if (!*bw)
 765         return 0;
 766       if (*bw != 'Z')
 767       {
 768         eat = 0;
 769         if ((!*br) || (*r == 'Z'))
 770         {                       /* If we hit the end of rm or a star in it */
 771           trash -= (br - r);    /* makes no sense to rollback within this  */
 772           if (trash < 0)        /* same chunk of br, skip it all and then  */
 773             return 1;           /* either rollback or break this loop if   */
 774           if (!*br)             /* it was the end of rm                    */
 775             break;
 776           r = br;
 777         }
 778         if (--trash < 0)
 779           return 1;
 780       }
 781       else
 782       {
 783         r = br;
 784         w = bw;
 785       }
 786     }
 787   }
 788
 789   /* match the remaining chunks of wm against what remains of the tail of rm */
 790   r = rz - eat - 1;             /* can't have <nul> or 'Z'within the tail, so just move r */
 791   while (r >= rx)
 792   {
 793     bw = w;
 794     for (bw++; (*bw != *r); r--)
 795       if (--trash < 0)
 796         return 1;
 797     if (!(r >= rx))
 798       return 1;
 799     for ((br = --r), bw++;
 800         (*bw) && (br >= rx) && ((*bw == *br) || (*bw == 'A')); br--, bw++);
 801     if (!*bw)
 802       return 0;
 803     if (!(br >= rx))
 804       return 1;
 805     if (*bw != 'Z')
 806     {
 807       if (--trash < 0)
 808         return 1;
 809     }
 810     else
 811     {
 812       r = br;
 813       w = bw;
 814     }
 815   }
 816   return 1;                     /* Auch... something left out ? Fail */
 817 }
 818
 819 /*
 820  * matchcompIP()
 821  * Compiles an IP mask into an in_mask structure
 822  * The given <mask> can either be:
 823  * - An usual irc type mask, containing * and or ?
 824  * - An ip number plus a /bitnumber part, that will only consider
 825  *   the first "bitnumber" bits of the IP (bitnumber must be in 0-31 range)
 826  * - An ip numer plus a /ip.bit.mask.values that will consider
 827  *   only the bits marked as 1 in the ip.bit.mask.values
 828  * In the last two cases both the ip number and the bitmask can specify
 829  * less than 4 bytes, the missing bytes then default to zero, note that
 830  * this is *different* from the way inet_aton() does and that this does
 831  * NOT happen for normal IPmasks (not containing '/')
 832  * If the returned value is zero the produced in_mask might match some IP,
 833  * if it's nonzero it will never match anything (and the imask struct is
 834  * set so that always fails).
 835  *
 836  * The returned structure contains 3 fields whose meaning is the following:
 837  * im.mask = The bits considered significative in the IP
 838  * im.bits = What these bits should look like to have a match
 839  * im.fall = If zero means that the above information used as
 840  *           ((IP & im.mask) == im.bits) is enough to tell if the compiled
 841  *           mask matches the given IP, nonzero means that it is needed,
 842  *           in case they did match, to call also the usual text match
 843  *           functions, because the mask wasn't "completely compiled"
 844  *
 845  * They should be used like:
 846  * matchcompIP(&im, mask);
 847  * if ( ((IP & im.mask)!=im.bits)) || (im.fall&&match(mask,inet_ntoa(IP))) )
 848  *    { handle_non_match } else { handle_match };
 849  * instead of:
 850  * if ( match(mask, inet_ntoa(IP)) )
 851  *    { handle_non_match } else { handle_match };
 852  *
 853  * Note: This function could be smarter when dealing with complex masks,
 854  *       this implementation is quite lazy and understands only very simple
 855  *       cases, whatever contains a ? anywhere or contains a '*' that isn't
 856  *       part of a trailing '.*' will fallback to text-match, this could be
 857  *       avoided for masks like 12?3.5.6 12.*.3.4 1.*.*.2 72?72?72?72 and
 858  *       so on that "could" be completely compiled to IP masks.
 859  *       If you try to improve this be aware of the fact that ? and *
 860  *       could match both dots and digits and we _must_ always reject
 861  *       what doesn't match in textform (like leading zeros and so on),
 862  *       so it's a LOT more tricky than it might seem. By now most common
 863  *       cases are optimized.
 864  */
 865
 866 int matchcompIP(struct in_mask *imask, const char *mask)
 867 {
 868   const char *m = mask;
 869   unsigned int bits = 0;
 870   unsigned int filt = 0;
 871   int unco = 0;
 872   int digits = 0;
 873   int shift = 24;
 874   int tmp = 0;
 875
 876   do
 877   {
 878     switch (*m)
 879     {
 880       case '\\':
 881         if ((m[1] == '\\') || (m[1] == '*') || (m[1] == '?')
 882             || (m[1] == '\0'))
 883           break;
 884         continue;
 885       case '0':
 886       case '1':
 887       case '2':
 888       case '3':
 889       case '4':
 890       case '5':
 891       case '6':
 892       case '7':
 893       case '8':
 894       case '9':
 895         if (digits && !tmp)     /* Leading zeros */
 896           break;
 897         digits++;
 898         tmp *= 10;
 899         tmp += (*m - '0');      /* Can't overflow, INT_MAX > 2559 */
 900         if (tmp > 255)
 901           break;
 902         continue;
 903       case '\0':
 904         filt = 0xFFFFFFFF;
 905         /* Intentional fallthrough */
 906       case '.':
 907         if ((!shift) != (!*m))
 908           break;
 909         /* Intentional fallthrough */
 910       case '/':
 911         bits |= (tmp << shift);
 912         shift -= 8;
 913         digits = 0;
 914         tmp = 0;
 915         if (*m != '/')
 916           continue;
 917         shift = 24;
 918         do
 919         {
 920           m++;
 921           if (IsDigit(*m))
 922           {
 923             if (digits && !tmp) /* Leading zeros */
 924               break;
 925             digits++;
 926             tmp *= 10;
 927             tmp += (*m - '0');  /* Can't overflow, INT_MAX > 2559 */
 928             if (tmp > 255)
 929               break;
 930           }
 931           else
 932           {
 933             switch (*m)
 934             {
 935               case '.':
 936               case '\0':
 937                 if ((!shift) && (*m))
 938                   break;
 939                 filt |= (tmp << shift);
 940                 shift -= 8;
 941                 tmp = 0;
 942                 digits = 0;
 943                 continue;
 944               default:
 945                 break;
 946             }
 947             break;
 948           }
 949         }
 950         while (*m);
 951         if (*m)
 952           break;
 953         if (filt && (!(shift < 16)) && (!(filt & 0xE0FFFFFF)))
 954           filt = 0xFFFFFFFF << (32 - ((filt >> 24)));
 955         bits &= filt;
 956         continue;
 957       case '?':
 958         unco = 1;
 959         /* Intentional fallthrough */
 960       case '*':
 961         if (digits)
 962           unco = 1;
 963         filt = (0xFFFFFFFF << (shift)) << 8;
 964         while (*++m)
 965         {
 966           if (IsDigit(*m))
 967             unco = 1;
 968           else
 969           {
 970             switch (*m)
 971             {
 972               case '.':
 973                 if (m[1] != '*')
 974                   unco = 1;
 975                 if (!shift)
 976                   break;
 977                 shift -= 8;
 978                 continue;
 979               case '?':
 980                 unco = 1;
 981               case '*':
 982                 continue;
 983               default:
 984                 break;
 985             }
 986             break;
 987           }
 988         }
 989         if (*m)
 990           break;
 991         continue;
 992       default:
 993         break;
 994     }
 995
 996     /* If we get here there is some error and this can't ever match */
 997     filt = 0;
 998     bits = ~0;
 999     unco = 0;
1000     break;                      /* This time break the loop :) */
1001
1002   }
1003   while (*m++);
1004
1005   imask->bits.s_addr = htonl(bits);
1006   imask->mask.s_addr = htonl(filt);
1007   imask->fall = unco;
1008   return ((bits & ~filt) ? -1 : 0);
1009
1010 }