1 /* Copyright (C) 1995, 1996 Tom Lord
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
23 #include "rxgnucomp.h"
27 /* regcomp takes a regular expression as a string and compiles it.
29 * PATTERN is the address of the pattern string.
31 * CFLAGS is a series of bits which affect compilation.
33 * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
34 * use POSIX basic syntax.
36 * If REG_NEWLINE is set, then . and [^...] don't match newline.
37 * Also, regexec will try a match beginning after every newline.
39 * If REG_ICASE is set, then we considers upper- and lowercase
40 * versions of letters to be equivalent when matching.
42 * If REG_NOSUB is set, then when PREG is passed to regexec, that
43 * routine will report only success or failure, and nothing about the
46 * It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
47 * the return codes and their meanings.)
53 regncomp (regex_t * preg, const char * pattern, int len, int cflags)
56 regncomp (preg, pattern, len, cflags)
66 rx_bzero ((char *)preg, sizeof (*preg));
67 syntax = ((cflags & REG_EXTENDED)
68 ? RE_SYNTAX_POSIX_EXTENDED
69 : RE_SYNTAX_POSIX_BASIC);
71 if (!(cflags & REG_ICASE))
77 preg->translate = (unsigned char *) malloc (256);
79 return (int) REG_ESPACE;
81 /* Map uppercase characters to corresponding lowercase ones. */
82 for (i = 0; i < CHAR_SET_SIZE; i++)
83 preg->translate[i] = isupper (i) ? tolower (i) : i;
87 /* If REG_NEWLINE is set, newlines are treated differently. */
88 if (!(cflags & REG_NEWLINE))
89 preg->newline_anchor = 0;
92 /* REG_NEWLINE implies neither . nor [^...] match newline. */
93 syntax &= ~RE_DOT_NEWLINE;
94 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
95 /* It also changes the matching behavior. */
96 preg->newline_anchor = 1;
99 preg->no_sub = !!(cflags & REG_NOSUB);
101 ret = rx_parse (&preg->pattern,
107 /* POSIX doesn't distinguish between an unmatched open-group and an
108 * unmatched close-group: both are REG_EPAREN.
110 if (ret == REG_ERPAREN)
117 rx_posix_analyze_rexp (&preg->subexps,
121 preg->is_nullable = rx_fill_in_fastmap (256,
125 preg->is_anchored = rx_is_anchored_p (preg->pattern);
134 regcomp (regex_t * preg, const char * pattern, int cflags)
137 regcomp (preg, pattern, cflags)
139 const char * pattern;
143 /* POSIX says a null character in the pattern terminates it, so we
144 * can use strlen here in compiling the pattern.
147 return regncomp (preg, pattern, strlen (pattern), cflags);
153 /* Returns a message corresponding to an error code, ERRCODE, returned
154 from either regcomp or regexec. */
158 regerror (int errcode, const regex_t *preg,
159 char *errbuf, size_t errbuf_size)
162 regerror (errcode, preg, errbuf, errbuf_size)
172 msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
173 msg_size = strlen (msg) + 1; /* Includes the 0. */
174 if (errbuf_size != 0)
176 if (msg_size > errbuf_size)
178 strncpy (errbuf, msg, errbuf_size - 1);
179 errbuf[errbuf_size - 1] = 0;
182 strcpy (errbuf, msg);
191 rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
194 rx_regmatch (pmatch, preg, rules, start, end, string)
197 struct rx_context_rules * rules;
203 struct rx_solutions * solutions;
204 enum rx_answers answer;
205 struct rx_context_rules local_rules;
210 local_rules = *rules;
215 end_lower_bound = start;
216 end_upper_bound = start;
218 else if (preg->pattern->len >= 0)
220 end_lower_bound = start + preg->pattern->len;
221 end_upper_bound = start + preg->pattern->len;
225 end_lower_bound = start;
226 end_upper_bound = end;
228 end = end_upper_bound;
229 while (end >= end_lower_bound)
231 local_rules.not_eol = (rules->not_eol
232 ? ( (end == orig_end)
233 || !local_rules.newline_anchor
234 || (string[end] != '\n'))
235 : ( (end != orig_end)
236 && (!local_rules.newline_anchor
237 || (string[end] != '\n'))));
238 solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
239 start, end, &local_rules, string);
243 answer = rx_next_solution (solutions);
245 if (answer == rx_yes)
249 pmatch[0].rm_so = start;
250 pmatch[0].rm_eo = end;
251 pmatch[0].final_tag = solutions->final_tag;
253 rx_basic_free_solutions (solutions);
257 rx_basic_free_solutions (solutions);
276 rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
279 rx_regexec (pmatch, preg, rules, start, end, string)
282 struct rx_context_rules * rules;
291 struct rexp_node * simplified;
292 struct rx_unfa * unfa;
293 struct rx_classical_system machine;
295 anchored = preg->is_anchored;
298 if ((end - start) > RX_MANY_CASES)
300 if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
302 unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
305 rx_free_rexp (simplified);
308 rx_init_system (&machine, unfa->nfa);
309 rx_free_rexp (simplified);
312 for (x = start; x <= end; ++x)
314 if (preg->is_nullable
316 && (preg->fastmap[((unsigned char *)string)[x]])))
318 if ((end - start) > RX_MANY_CASES)
321 if (rx_start_superstate (&machine) != rx_yes)
326 amt = rx_advance_to_final (&machine, string + x, end - start - x);
327 if (!machine.final_tag && (amt < (end - start - x)))
330 stat = rx_regmatch (pmatch, preg, rules, x, end, string);
331 if (!stat || (stat != REG_NOMATCH))
339 if (!preg->newline_anchor)
346 if (string[x] == '\n')
357 /* regexec searches for a given pattern, specified by PREG, in the
360 * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
361 * `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
362 * least NMATCH elements, and we set them to the offsets of the
363 * corresponding matched substrings.
365 * EFLAGS specifies `execution flags' which affect matching: if
366 * REG_NOTBOL is set, then ^ does not match at the beginning of the
367 * string; if REG_NOTEOL is set, then $ does not match at the end.
369 * We return 0 if we find a match and REG_NOMATCH if not.
374 regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
377 regnexec (preg, string, len, nmatch, pmatch, eflags)
387 struct rx_context_rules rules;
392 want_reg_info = (!preg->no_sub && (nmatch > 0));
394 rules.newline_anchor = preg->newline_anchor;
395 rules.not_bol = !!(eflags & REG_NOTBOL);
396 rules.not_eol = !!(eflags & REG_NOTEOL);
397 rules.case_indep = !!(eflags & REG_ICASE);
399 if (nmatch >= preg->re_nsub)
406 regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs));
409 nregs = preg->re_nsub;
414 for (x = 0; x < nregs; ++x)
415 regs[x].rm_so = regs[x].rm_eo = -1;
419 stat = rx_regexec (regs, preg, &rules, 0, len, string);
421 if (!stat && want_reg_info && pmatch && (regs != *pmatch))
424 for (x = 0; x < nmatch; ++x)
425 (*pmatch)[x] = regs[x];
428 if (!stat && (eflags & REG_ALLOC_REGS))
430 else if (regs && (!pmatch || (regs != *pmatch)))
438 regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
441 regexec (preg, string, nmatch, pmatch, eflags)
449 return regnexec (preg,
454 (eflags & ~REG_ALLOC_REGS));
458 /* Free dynamically allocated space used by PREG. */
462 regfree (regex_t *preg)
471 rx_free_rexp (preg->pattern);
476 free (preg->subexps);
479 if (preg->translate != 0)
481 free (preg->translate);