2 * IRC - Internet Relay Chat, common/match.c
3 * Copyright (C) 1990 Jarkko Oikarinen
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 1, or (at your option)
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * @brief Functions to match strings against IRC mask strings.
26 #include "ircd_chattr.h"
27 #include "ircd_string.h"
28 #include "ircd_snprintf.h"
33 * Written by Run (carlo@runaway.xs4all.nl), 25-10-96
36 * From: Carlo Wood <carlo@runaway.xs4all.nl>
37 * Message-Id: <199609021026.MAA02393@runaway.xs4all.nl>
38 * Subject: [C-Com] Analysis for `mmatch' (was: gline4 problem)
39 * To: coder-com@mail.undernet.org (coder committee)
40 * Date: Mon, 2 Sep 1996 12:26:01 +0200 (MET DST)
42 * We need a new function `mmatch(const char *old_mask, const char *new_mask)'
43 * which returns `true' likewise the current `match' (start with copying it),
44 * but which treats '*' and '?' in `new_mask' differently (not "\*" and "\?" !)
45 * as follows: a '*' in `new_mask' does not match a '?' in `old_mask' and
46 * a '?' in `new_mask' does not match a '\?' in `old_mask'.
47 * And ofcourse... a '*' in `new_mask' does not match a '\*' in `old_mask'...
48 * And last but not least, '\?' and '\*' in `new_mask' now become one character.
51 /** Compares one mask against another.
52 * One wildcard mask may be said to be a superset of another if the
53 * set of strings matched by the first is a proper superset of the set
54 * of strings matched by the second. In practical terms, this means
55 * that the second is made redundant by the first.
57 * The logic for this test is similar to that in match(), but a
58 * backslash in old_mask only matches a backslash in new_mask (and
59 * requires the next character to match exactly), and -- after
60 * contiguous runs of wildcards are logically collapsed -- a '?' in
61 * old_mask does not match a '*' in new_mask.
63 * @param[in] old_mask One wildcard mask.
64 * @param[in] new_mask Another wildcard mask.
65 * @return Zero if \a old_mask is a superset of \a new_mask, non-zero otherwise.
67 int mmatch(const char *old_mask, const char *new_mask)
69 const char *m = old_mask;
70 const char *n = new_mask;
91 for (m--; (m > old_mask) && (*m == '?'); m--)
93 if ((*m == '*') && (m > old_mask) && (m[-1] != '\\'))
99 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
100 if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
111 if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
119 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
120 if ((*n == '\\') && ((n[1] == '*') || (n[1] == '?')))
129 * This `if' has been changed compared to match() to do the following:
131 * old (m) new (n) boolean expression
132 * * any (*m == '*' && !mq) ||
133 * ? any except '*' (*m == '?' && !mq && (*n != '*' || nq)) ||
134 * any except * or ? same as m (!((*m == '*' || *m == '?') && !mq) &&
135 * ToLower(*m) == ToLower(*n) &&
136 * !((mq && !nq) || (!mq && nq)))
138 * Here `any' also includes \* and \? !
140 * After reworking the boolean expressions, we get:
141 * (Optimized to use boolean shortcircuits, with most frequently occuring
142 * cases upfront (which took 2 hours!)).
144 if ((*m == '*' && !mq) ||
145 ((!mq || nq) && ToLower(*m) == ToLower(*n)) ||
146 (*m == '?' && !mq && (*n != '*' || nq)))
159 /* Added to `mmatch' : Because '\?' and '\*' now is one character: */
160 if ((*na == '\\') && ((na[1] == '*') || (na[1] == '?')))
169 * Compare if a given string (name) matches the given
170 * mask (which can contain wild cards: '*' - match any
171 * number of chars, '?' - match any single character.
176 * Originally by Douglas A Lewis (dalewis@acsu.buffalo.edu)
177 * Rewritten by Timothy Vogelsang (netski), net@astrolink.org
180 /** Check a string against a mask.
181 * This test checks using traditional IRC wildcards only: '*' means
182 * match zero or more characters of any type; '?' means match exactly
183 * one character of any type. A backslash escapes the next character
184 * so that a wildcard may be matched exactly.
185 * @param[in] mask Wildcard-containing mask.
186 * @param[in] name String to check against \a mask.
187 * @return Zero if \a mask matches \a name, non-zero if no match.
189 int match(const char *mask, const char *name)
191 const char *m = mask, *n = name;
192 const char *m_tmp = mask, *n_tmp = name;
198 while (*m == '*') /* clean up any additional wildcards */
205 if (*m == '\\') /* next wildcard is disregarded */
210 return 0; /* match */
212 for (m--; (m > mask) && (*m == '?'); m--);
215 if (*m == '*' && (m > mask))
216 return 0; /* match */
225 while (*m == '*') /* clean up any additional wildcards */
230 if (ToLower(*m) != ToLower(*n) && *m != '?') {
232 return 1; /* failure! */
245 return 1; /* no match! */
250 * Collapse a pattern string into minimal components.
251 * This particular version is "in place", so that it changes the pattern
252 * which is to be reduced to a "minimal" size.
254 * (C) Carlo Wood - 6 Oct 1998
255 * Speedup rewrite by Andrea Cocito, December 1998.
256 * Note that this new optimized alghoritm can *only* work in place.
259 /** Collapse a mask string to remove redundancies.
260 * Specifically, it replaces a sequence of '*' followed by additional
261 * '*' or '?' with the same number of '?'s as the input, followed by
262 * one '*'. This minimizes useless backtracking when matching later.
263 * @param[in,out] mask Mask string to collapse.
264 * @return Pointer to the start of the string.
266 char *collapse(char *mask)
276 if ((*m == '*') && ((m[1] == '*') || (m[1] == '?')))
285 if (star && (*m != '?'))
291 if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
300 if ((*m == '\\') && ((m[1] == '*') || (m[1] == '?')))
310 ***************** Nemesi's matchcomp() / matchexec() **************
313 /** @page compiledmasks Compiled Masks
314 * These functions allow the use of "compiled" masks, you compile a mask
315 * by means of matchcomp() that gets the plain text mask as input and writes
316 * its result in the memory locations addressed by the 3 parameters:
317 * - *cmask will contain the text of the compiled mask
318 * - *minlen will contain the lenght of the shortest string that can match
320 * - *charset will contain the minimal set of chars needed to match the mask
321 * You can pass NULL as *charset and it will be simply not returned, but you
322 * MUST pass valid pointers for *minlen and *cmask (wich must be big enough
323 * to contain the compiled mask text that is in the worst case as long as the
324 * text of the mask itself in plaintext format) and the return value of
325 * matchcomp() will be the number of chars actually written there (excluded
326 * the trailing zero). cmask can be == mask, matchcomp() can work in place.
327 * The {cmask, minlen} couple of values make the real compiled mask and
328 * need to be passed to the functions that use the compiled mask, if you pass
329 * the wrong minlen or something wrong in cmask to one of these expect a
330 * coredump. This means that when you record a compiled mask you must store
331 * *both* these values.
332 * Once compiled the mask can be used to match a string by means of
333 * matchexec(), it can be printed back to human-readable format by means
334 * of sprintmatch() or it can be compared to another compiled mask by means
335 * of mmexec() that will tell if it completely overrides that mask (a lot like
336 * what mmatch() does for plain text masks).
337 * You can gain a lot of speed in many situations avoiding to matchexec() when:
338 * - The maximum lenght of the field you are about to match() the mask to is
339 * shorter than minlen, in example when matching abc*def*ghil with a nick:
340 * It just cannot match since a nick is at most 9 chars long and the mask
341 * needs at least 10 chars (10 will be the value returned in minlen).
342 * - The charset allowed for the field you are about to match to doesn't
343 * "contain" the charset returned by matchcomp(), in example when you
344 * have *.* as mask it makes no sense to try to match it against a nick
345 * because, again, a nick can't contain a '.', you can check this with
346 * a simple (charset & NTL_IRCNK) in this case.
347 * - As a special case, since compiled masks are forced to lowercase,
348 * it would make no sense to use the NTL_LOWER and NTL_UPPER on a compiled
349 * mask, thus they are reused as follows: if the NTL_LOWER bit of charset
350 * is set it means that the mask contains only non-wilds chars (i.e. you can
351 * use strCasecmp() to match it or a direct hash lookup), if the NTL_UPPER
352 * bit is set it means that it contains only wild chars (and you can
353 * match it with strlen(field)>=minlen).
354 * Do these optimizations ONLY when the data you are about to pass to
355 * matchexec() are *known* to be invalid in advance, using strChattr()
356 * or strlen() on the text would be slower than calling matchexec() directly
358 * Internally a compiled mask contain in the *cmask area the text of
359 * the plain text form of the mask itself with applied the following hacks:
360 * - All characters are forced to lowercase (so that uppercase letters and
361 * specifically the symbols 'A' and 'Z' are reserved for special use)
362 * - All non-escaped stars '*' are replaced by the letter 'Z'
363 * - All non-escaped question marks '?' are replaced by the letter 'A'
364 * - All escape characters are removed, the wilds escaped by them are
365 * then passed by without the escape since they don't collide anymore
366 * with the real wilds (encoded as A/Z)
367 * - Finally the part of the mask that follows the last asterisk is
368 * reversed (byte order mirroring) and moved right after the first
370 * After all this a mask like: Head*CHUNK1*chu\*nK2*ch??k3*TaIl
371 * .... becomes: headZliatZchunk1Zchu*nk2ZchAAk3
372 * This can still be printed on a console, more or less understood by an
373 * human and handled with the usual str*() library functions.
374 * When you store somewhere the compiled mask you can avoid storing the
375 * textform of it since it can be "decompiled" by means of sprintmatch(),
376 * but at that time the following things are changed in the mask:
377 * - All chars have been forced to lowercase.
378 * - The mask is collapsed.
379 * The balance point of using compiled masks in terms of CPU is when you expect
380 * to use matchexec() instead of match() at least 20 times on the same mask
381 * or when you expect to use mmexec() instead of mmatch() 3 times.
384 /** Compile a mask for faster matching.
385 * See also @ref compiledmasks.
386 * @param[out] cmask Output buffer for compiled mask.
387 * @param[out] minlen Minimum length of matching strings.
388 * @param[out] charset Character attributes used in compiled mask.
389 * @param[out] mask Input mask.
390 * @return Length of compiled mask, not including NUL terminator.
392 int matchcomp(char *cmask, int *minlen, int *charset, const char *mask)
394 const char *m = mask;
399 int l1, l2, lmin, loop, sign;
404 int chset2 = (NTL_LOWER | NTL_UPPER);
416 chset2 &= ~NTL_LOWER;
419 if ((*m == '?') || (*m == '*'))
427 chset2 &= ~NTL_LOWER;
432 chset &= IRCD_CharAttrTab[*b++ - CHAR_MIN];
433 chset2 &= ~NTL_UPPER;
437 *charset = (chset | chset2);
448 for (x1 = ls + 1, x2 = (b - 1); x1 < x2; x1++, x2--)
457 while ((lmin = (l1 < l2) ? l1 : l2))
460 for (loop = 0; loop < lmin; loop++)
468 l1 -= (sign < 0) ? 0 : lmin;
469 l2 -= (sign > 0) ? 0 : lmin;
479 /** Compare a string to a compiled mask.
480 * If \a cmask is not from matchcomp(), or if \a minlen is not the value
481 * passed out of matchcomp(), this may core.
482 * See also @ref compiledmasks.
483 * @param[in] string String to test.
484 * @param[in] cmask Compiled mask string.
485 * @param[in] minlen Minimum length of strings that match \a cmask.
486 * @return Zero if the string matches, non-zero otherwise.
488 int matchexec(const char *string, const char *cmask, int minlen)
490 const char *s = string - 1;
491 const char *b = cmask - 1;
497 while ((ToLower(*++s) == *++b) && *s);
499 return ((*b != '\0') && ((*b++ != 'Z') || (*b != '\0')));
510 if ((trash = (s - string - minlen)) < 0)
514 while ((ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b
515 && (ToLower(*--s) == *++b) && *b && (ToLower(*--s) == *++b) && *b);
528 while ((ToLower(*++s) != ch))
534 while ((ToLower(*++s) == *++b) && *b);
557 * Prints the human readable version of *cmask into *mask, (decompiles
559 * The area pointed by *mask MUST be big enough (the mask might be up to
560 * twice the size of its compiled form if it's made all of \? or \*, and
561 * this function can NOT work in place since it might enflate the mask)
562 * The printed mask is not identical to the one that was compiled to cmask,
563 * infact it is 1) forced to all lowercase, 2) collapsed, both things
564 * are supposed to NOT change it's meaning.
565 * It returns the number of chars actually written to *mask;
568 /** Decompile a compiled mask into printable form.
569 * See also @ref compiledmasks.
570 * @param[out] mask Output mask buffer.
571 * @param[in] cmask Compiled mask.
572 * @return Number of characters written to \a mask.
574 int matchdecomp(char *mask, const char *cmask)
577 const char *rcm = cmask;
578 const char *begtail, *endtail;
586 for (; (*rcm != 'Z'); rcm++, rtb++)
588 if ((*rcm == '?') || (*rcm == '*'))
590 if (!((*rtb = ((*rcm == 'A') ? '?' : *rcm))))
597 while (*rcm && (*rcm != 'Z'))
622 for (rcm = endtail; (--rcm) > begtail; *rtb++ = ((*rcm == 'A') ? '?' : *rcm))
623 if ((*rcm == '?') || (*rcm == '*'))
632 * Checks if a wider compiled mask (wcm/wminlen) completely overrides
633 * a more restrict one (rcm/rminlen), basically what mmatch() does for
634 * non-compiled masks, returns 0 if the override is true (like mmatch()).
635 * "the wider overrides the restrict" means that any string that matches
636 * the restrict one _will_ also match the wider one, always.
637 * In this we behave differently from mmatch() because in example we return
638 * true for " a?*cd overrides a*bcd " for wich the override happens for how
639 * we literally defined it, here mmatch() would have returned false.
640 * The original concepts and the base alghoritm are copied from mmatch()
641 * written by Run (Carlo Wood), this function is written by
642 * Nemesi (Andrea Cocito)
644 /** Tests for a superset relationship between compiled masks. This
645 * function does for compiled masks what mmatch() is does for normal
647 * See also @ref compiledmasks.
648 * @param[in] wcm Compiled mask believed to be wider.
649 * @param[in] wminlen Minimum match length for \a wcm.
650 * @param[in] rcm Compiled mask believed to be restricted.
651 * @param[in] rminlen Minimum match length for \a rcm.
652 * @return Zero if \a wcm is a superset of \a rcm, non-zero if not.
654 int mmexec(const char *wcm, int wminlen, const char *rcm, int rminlen)
656 const char *w, *r, *br, *bw, *rx, *rz;
659 /* First of all rm must have enough non-stars to 'contain' wm */
660 if ((trash = rminlen - wminlen) < 0)
666 /* Let's start the game, remember that '*' is mapped to 'Z', '?'
667 is mapped to 'A' and that head?*??*?chunk*???*tail becomes
668 headAAAAZliatAAAZchunk for compiled masks */
670 /* Match the head of wm with the head of rm */
671 for (; (*r) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); r++, w++);
673 while (*w == 'A') /* Eat extra '?' before '*' in wm if got '*' in rm */
675 if (*w != 'Z') /* head1<any>.. can't match head2<any>.. */
676 return ((*w) || (*r)) ? 1 : 0; /* and head<nul> matches only head<nul> */
678 return 0; /* headZ<nul> matches head<anything> */
680 /* Does rm have any stars in it ? let's check */
681 for (rx = r; *r && (*r != 'Z'); r++);
684 /* rm has no stars and thus isn't a mask but it's just a flat
685 string: special handling occurs here, note that eat must be 0 here */
690 for (; r--, (*w) && ((*w == *r) || (*w == 'A')); w++);
691 if (*w != 'Z') /* headZliat1<any> fails on head<any>2tail */
692 return (*w) ? 1 : 0; /* but headZliat<nul> matches head<any>tail */
695 /* match the chunks */
697 { /* This loop can't break but only return */
699 for (bw = w++; (*w != *rx); rx++) /* Seek the 1st char of the chunk */
700 if (--trash < 0) /* See if we can trash one more char of rm */
701 return 1; /* If not we can only fail of course */
702 for (r = ++rx, w++; (*w) && ((*w == *r) || (*w == 'A')); r++, w++);
703 if (!*w) /* Did last loop match the rest of chunk ? */
704 return 0; /* ... Yes, end of wm, matched ! */
706 { /* ... No, hitted non-star */
707 w = bw; /* Rollback at beginning of chunk */
708 if (--trash < 0) /* Trashed the char where this try started */
709 return 1; /* if we can't trash more chars fail */
713 rx = r; /* Successfully matched a chunk, move rx */
714 } /* and go on with the next one */
718 /* rm has at least one '*' and thus is a 'real' mask */
719 rz = r++; /* rx = unused of head, rz = beg-tail */
721 /* Match the tail of wm (if any) against the tail of rm */
724 for (; (*w) && (*r != 'Z') && ((*w == *r) || (*w == 'A')); w++, r++);
725 if (*r == 'Z') /* extra '?' before tail are fluff, just flush 'em */
728 if (*w != 'Z') /* We aren't matching a chunk, can't rollback */
732 /* Match the chunks of wm against what remains of the head of rm */
736 for (bw++; (rx < rz) && (*bw != *rx); rx++) /* Seek the first */
737 if (--trash < 0) /* waste some trash reserve */
739 if (!(rx < rz)) /* head finished */
741 for (bw++, (br = ++rx);
742 (br < rz) && (*bw) && ((*bw == *br) || (*bw == 'A')); br++, bw++);
743 if (!(br < rz)) /* Note that we didn't use any 'eat' char yet, if */
744 while (*bw == 'A') /* there were eat-en chars the head would be over */
745 bw++, eat++; /* Happens only at end of head, and eat is still 0 */
752 { /* If we failed because we got the end of head */
753 trash -= (br - rx); /* it makes no sense to rollback, just trash */
754 if (--trash < 0) /* all the rest of the head wich isn't long */
755 return 1; /* enough for this chunk and go out of this */
756 break; /* loop, then we try with the chunks of rm */
768 /* Match the unused chunks of wm against the chunks of rm */
770 for (; *r && (*r != 'Z'); r++);
777 while (eat && *r) /* the '?' we had eated make us skip as many chars */
778 if (*r++ != 'Z') /* here, but can't skip stars or trailing zero */
780 for (bw++; (*r) && (*bw != *r); r++)
781 if ((*r != 'Z') && (--trash < 0))
785 for ((br = ++r), bw++;
786 (*br) && (*br != 'Z') && ((*bw == *br) || (*bw == 'A')); br++, bw++);
795 if ((!*br) || (*r == 'Z'))
796 { /* If we hit the end of rm or a star in it */
797 trash -= (br - r); /* makes no sense to rollback within this */
798 if (trash < 0) /* same chunk of br, skip it all and then */
799 return 1; /* either rollback or break this loop if */
800 if (!*br) /* it was the end of rm */
815 /* match the remaining chunks of wm against what remains of the tail of rm */
816 r = rz - eat - 1; /* can't have <nul> or 'Z'within the tail, so just move r */
820 for (bw++; (*bw != *r); r--)
825 for ((br = --r), bw++;
826 (*bw) && (br >= rx) && ((*bw == *br) || (*bw == 'A')); br--, bw++);
842 return 1; /* Auch... something left out ? Fail */
848 #include <sys/socket.h>
849 #include <netinet/in.h>
850 #include <arpa/inet.h>
852 /** Parse an input string as an IPv4 address.
853 * @param[in] in Text form of address.
854 * @param[out] out IPv4 address in network representation.
855 * @return Number of address bits specified by \a in.
857 static int ipmask_parse_ipv4(const char *in, struct in_addr *out)
864 class = sscanf(in, "%d.%d.%d.%d/%d", &ad[0], &ad[1], &ad[2], &ad[3], &bits);
867 ircd_snprintf(0, ipname, sizeof(ipname), "%d.%d.%d.%d", ad[0], ad[1], ad[2], ad[3]);
868 out->s_addr = inet_addr(ipname);
872 /** Test whether a string looks like it matches only IPv4 addresses.
873 * @param[in] mask Hostname matching mask.
874 * @return Non-zero if \a mask can only match IPv4 addresses, zero otherwise.
876 int check_if_ipmask(const char *mask)
881 for (p = mask; *p; ++p)
882 if (*p != '*' && *p != '?' && *p != '.' && *p != '/')
892 /** Try to parse an IPv4 or IPv6 address mask.
893 * @param[in] in Address matching mask.
894 * @param[out] mask Fixed bits of address mask.
895 * @param[out] bits_ptr If non-NULL, receives number of bits specified in address mask.
896 * @return Non-zero on successful parse; zero on error.
898 int ipmask_parse(const char *in, struct irc_in_addr *mask, unsigned char *bits_ptr)
904 if (check_if_ipmask(in)) {
905 bits = ipmask_parse_ipv4(in, &ipv4);
906 mask->in6_16[0] = mask->in6_16[1] = mask->in6_16[2] = 0;
907 mask->in6_16[3] = mask->in6_16[4] = 0;
908 mask->in6_16[5] = 0xffff;
909 memcpy(&mask->in6_16[6], &ipv4.s_addr, sizeof(ipv4.s_addr));
912 if (!(p = strchr(in, '/')))
916 if (!ircd_aton(mask, in)) {
932 /** Test whether an address matches the most significant bits of a mask.
933 * @param[in] addr Address to test.
934 * @param[in] mask Address to test against.
935 * @param[in] bits Number of bits to test.
936 * @return 0 on mismatch, 1 if bits < 128 and all bits match; -1 if
937 * bits == 128 and all bits match.
939 int ipmask_check(const struct irc_in_addr *addr, const struct irc_in_addr *mask, unsigned char bits)
943 for (k = 0; k < 8; k++) {
945 return (addr->in6_16[k] & htons(0xffff << (16-bits))) == mask->in6_16[k];
946 if (addr->in6_16[k] != mask->in6_16[k])