1 /* Copyright (C) 1995, 1996 Tom Lord
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU Library General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
13 * You should have received a copy of the GNU Library General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, 59 Temple Place - Suite 330,
16 * Boston, MA 02111-1307, USA.
23 #include "rxspencer.h"
28 static char * silly_hack_2 = 0;
30 struct rx_solutions rx_no_solutions;
34 rx_make_solutions (struct rx_registers * regs, struct rx_unfaniverse * verse, struct rexp_node * expression, struct rexp_node ** subexps, int cset_size, int start, int end, rx_vmfn vmfn, rx_contextfn contextfn, void * closure)
37 rx_make_solutions (regs, verse, expression, subexps, cset_size,
38 start, end, vmfn, contextfn, closure)
39 struct rx_registers * regs;
40 struct rx_unfaniverse * verse;
41 struct rexp_node * expression;
42 struct rexp_node ** subexps;
47 rx_contextfn contextfn;
51 struct rx_solutions * solns;
54 && (expression->len >= 0)
55 && (expression->len != (end - start)))
56 return &rx_no_solutions;
60 solns = (struct rx_solutions *)silly_hack_2;
64 solns = (struct rx_solutions *)malloc (sizeof (*solns));
67 rx_bzero ((char *)solns, sizeof (*solns));
70 solns->cset_size = cset_size;
71 solns->subexps = subexps;
72 solns->exp = expression;
73 rx_save_rexp (expression);
79 solns->contextfn = contextfn;
80 solns->closure = closure;
82 if (!solns->exp || !solns->exp->observed)
84 solns->dfa = rx_unfa (verse, expression, cset_size);
87 rx_init_system (&solns->match_engine, solns->dfa->nfa);
89 if (rx_yes != rx_start_superstate (&solns->match_engine))
94 struct rexp_node * simplified;
96 status = rx_simple_rexp (&simplified, cset_size, solns->exp, subexps);
99 solns->dfa = rx_unfa (verse, simplified, cset_size);
102 rx_free_rexp (simplified);
105 rx_init_system (&solns->match_engine, solns->dfa->nfa);
106 if (rx_yes != rx_start_superstate (&solns->match_engine))
108 rx_free_rexp (simplified);
111 if (expression && ( (expression->type == r_concat)
112 || (expression->type == r_plus)
113 || (expression->type == r_star)
114 || (expression->type == r_interval)))
116 struct rexp_node * subexp;
118 subexp = solns->exp->params.pair.left;
120 if (!subexp || !subexp->observed)
122 solns->left_dfa = rx_unfa (solns->verse, subexp, solns->cset_size);
126 struct rexp_node * simplified;
128 status = rx_simple_rexp (&simplified, solns->cset_size, subexp, solns->subexps);
131 solns->left_dfa = rx_unfa (solns->verse, simplified, solns->cset_size);
132 rx_free_rexp (simplified);
135 if (!solns->left_dfa)
137 rx_bzero ((char *)&solns->left_match_engine, sizeof (solns->left_match_engine));
138 rx_init_system (&solns->left_match_engine, solns->left_dfa->nfa);
144 rx_free_rexp (solns->exp);
153 rx_free_solutions (struct rx_solutions * solns)
156 rx_free_solutions (solns)
157 struct rx_solutions * solns;
163 if (solns == &rx_no_solutions)
168 rx_free_solutions (solns->left);
174 rx_free_solutions (solns->right);
180 rx_free_unfa (solns->dfa);
185 rx_terminate_system (&solns->left_match_engine);
186 rx_free_unfa (solns->left_dfa);
190 rx_terminate_system (&solns->match_engine);
194 rx_free_rexp (solns->exp);
199 silly_hack_2 = (char *)solns;
206 static enum rx_answers
207 rx_solution_fit_p (struct rx_solutions * solns)
209 static enum rx_answers
210 rx_solution_fit_p (solns)
211 struct rx_solutions * solns;
214 unsigned const char * burst;
218 int rel_pos_in_burst;
219 enum rx_answers vmstat;
222 current_pos = solns->start;
224 vmstat = solns->vmfn (solns->closure,
225 &burst, &burst_len, &burst_addr,
226 current_pos, solns->end,
229 if (vmstat != rx_yes)
232 rel_pos_in_burst = current_pos - burst_addr;
233 burst_end_addr = burst_addr + burst_len;
235 if (burst_end_addr >= solns->end)
237 enum rx_answers fit_status;
238 fit_status = rx_fit_p (&solns->match_engine,
239 burst + rel_pos_in_burst,
240 solns->end - current_pos);
245 enum rx_answers fit_status;
246 fit_status = rx_advance (&solns->match_engine,
247 burst + rel_pos_in_burst,
248 burst_len - rel_pos_in_burst);
249 if (fit_status != rx_yes)
255 current_pos += burst_len - rel_pos_in_burst;
263 static enum rx_answers
264 rx_solution_fit_str_p (struct rx_solutions * solns)
266 static enum rx_answers
267 rx_solution_fit_str_p (solns)
268 struct rx_solutions * solns;
272 unsigned const char * burst;
276 int rel_pos_in_burst;
277 enum rx_answers vmstat;
282 current_pos = solns->start;
283 count = solns->exp->params.cstr.len;
284 key = (unsigned char *)solns->exp->params.cstr.contents;
287 vmstat = solns->vmfn (solns->closure,
288 &burst, &burst_len, &burst_addr,
289 current_pos, solns->end,
292 if (vmstat != rx_yes)
295 rel_pos_in_burst = current_pos - burst_addr;
296 burst_end_addr = burst_addr + burst_len;
299 unsigned const char * pos;
301 pos = burst + rel_pos_in_burst;
303 if (burst_end_addr >= solns->end)
320 part_count_init = burst_len - rel_pos_in_burst;
321 part_count = part_count_init;
330 count -= part_count_init;
331 current_pos += burst_len - rel_pos_in_burst;
343 rx_best_end_guess (struct rx_solutions * solns, struct rexp_node * exp, int bound)
346 rx_best_end_guess (solns, exp, bound)
347 struct rx_solutions * solns;
348 struct rexp_node * exp;
353 unsigned const char * burst;
357 int rel_pos_in_burst;
359 enum rx_answers vmstat;
362 unparse_print_rexp (256, solns->exp);
364 unparse_print_rexp (256, exp);
365 printf ("\nbound %d \n", bound);
368 if (rx_yes != rx_start_superstate (&solns->left_match_engine))
372 best_guess = current_pos = solns->start;
376 printf (" best_guess %d\n", best_guess);
378 vmstat = solns->vmfn (solns->closure,
379 &burst, &burst_len, &burst_addr,
383 printf (" str>%s\n", burst);
385 if (vmstat != rx_yes)
390 rel_pos_in_burst = current_pos - burst_addr;
391 burst_end_addr = burst_addr + burst_len;
393 if (burst_end_addr > bound)
395 burst_end_addr = bound;
396 burst_len = bound - burst_addr;
403 printf (" rel_pos_in_burst %d burst_len %d\n", rel_pos_in_burst, burst_len);
405 while (rel_pos_in_burst < burst_len)
407 amt_advanced= rx_advance_to_final (&solns->left_match_engine,
408 burst + rel_pos_in_burst,
409 burst_len - rel_pos_in_burst);
411 printf (" amt_advanced %d", amt_advanced);
413 if (amt_advanced < 0)
417 current_pos += amt_advanced;
418 rel_pos_in_burst += amt_advanced;
419 if (solns->left_match_engine.final_tag)
420 best_guess = current_pos;
422 printf (" best_guess %d\n", best_guess);
423 printf (" current_pos %d\n", current_pos);
425 if (amt_advanced == 0)
430 if (current_pos == bound)
443 rx_next_solution (struct rx_solutions * solns)
446 rx_next_solution (solns)
447 struct rx_solutions * solns;
453 if (solns == &rx_no_solutions)
460 if (solns->step != 0)
467 solns->final_tag = 1;
468 return (solns->start == solns->end
473 else if ( (solns->exp->len >= 0)
474 && (solns->exp->len != (solns->end - solns->start)))
478 else if (!solns->exp->observed)
480 if (solns->step != 0)
484 else if (solns->exp->type == r_string)
487 ans = rx_solution_fit_str_p (solns);
488 solns->final_tag = 1;
495 ans = rx_solution_fit_p (solns);
496 solns->final_tag = solns->match_engine.final_tag;
501 else if (solns->exp->observed)
503 enum rx_answers fit_p;
507 if (solns->exp->params.intval)
509 solns->regs[solns->exp->params.intval].rm_so = solns->saved_rm_so;
510 solns->regs[solns->exp->params.intval].rm_eo = solns->saved_rm_eo;
518 fit_p = rx_solution_fit_p (solns);
519 /* Set final_tag here because this rough fit test
520 * may be all the matching that gets done.
521 * For example, consider a paren node containing
522 * a true regular expression ending with a cut
525 solns->final_tag = solns->match_engine.final_tag;
542 switch (solns->exp->type)
552 enum rx_answers paren_stat;
556 if (solns->exp->params.intval)
558 solns->saved_rm_so = solns->regs[solns->exp->params.intval].rm_so;
559 solns->saved_rm_eo = solns->regs[solns->exp->params.intval].rm_eo;
562 if ( !solns->exp->params.pair.left
563 || !solns->exp->params.pair.left->observed)
565 if (solns->exp->params.intval)
567 solns->regs[solns->exp->params.intval].rm_so = solns->start;
568 solns->regs[solns->exp->params.intval].rm_eo = solns->end;
571 /* Keep the final_tag from the fit_p test. */
576 solns->left = rx_make_solutions (solns->regs,
578 solns->exp->params.pair.left,
596 if (solns->exp->params.intval)
598 solns->regs[solns->exp->params.intval].rm_so = solns->saved_rm_so;
599 solns->regs[solns->exp->params.intval].rm_eo = solns->saved_rm_eo;
602 paren_stat = rx_next_solution (solns->left);
603 if (paren_stat == rx_yes)
605 if (solns->exp->params.intval)
607 solns->regs[solns->exp->params.intval].rm_so = solns->start;
608 solns->regs[solns->exp->params.intval].rm_eo = solns->end;
610 solns->final_tag = solns->left->final_tag;
616 rx_free_solutions (solns->left);
618 if (solns->exp->params.intval)
620 solns->regs[solns->exp->params.intval].rm_so = solns->saved_rm_so;
621 solns->regs[solns->exp->params.intval].rm_eo = solns->saved_rm_eo;
631 enum rx_answers opt_stat;
635 solns->left = rx_make_solutions (solns->regs,
637 solns->exp->params.pair.left,
654 opt_stat = rx_next_solution (solns->left);
655 if (opt_stat == rx_yes)
657 solns->final_tag = solns->left->final_tag;
663 rx_free_solutions (solns->left);
665 return ((solns->start == solns->end)
675 enum rx_answers alt_stat;
679 solns->left = rx_make_solutions (solns->regs,
681 solns->exp->params.pair.left,
698 alt_stat = rx_next_solution (solns->left);
700 if (alt_stat == rx_yes)
702 solns->final_tag = solns->left->final_tag;
708 rx_free_solutions (solns->left);
714 solns->right = rx_make_solutions (solns->regs,
716 solns->exp->params.pair.right,
733 alt_stat = rx_next_solution (solns->right);
735 if (alt_stat == rx_yes)
737 solns->final_tag = solns->right->final_tag;
743 rx_free_solutions (solns->right);
754 enum rx_answers concat_stat;
756 solns->split_guess = solns->end;
758 solns->split_guess = ((solns->end - solns->start) > RX_MANY_CASES
759 ? rx_best_end_guess (solns,
760 solns->exp->params.pair.left, solns->end)
764 concat_split_guess_loop:
765 solns->left = rx_make_solutions (solns->regs,
767 solns->exp->params.pair.left,
783 concat_try_next_left_match:
785 concat_stat = rx_next_solution (solns->left);
786 if (concat_stat != rx_yes)
788 rx_free_solutions (solns->left);
789 rx_free_solutions (solns->right);
790 solns->left = solns->right = 0;
791 solns->split_guess = solns->split_guess - 1;
793 solns->split_guess = ((solns->split_guess - solns->start) > RX_MANY_CASES
794 ? rx_best_end_guess (solns,
795 solns->exp->params.pair.left,
796 solns->split_guess - 1)
797 : solns->split_guess - 1);
799 if (solns->split_guess >= solns->start)
800 goto concat_split_guess_loop;
814 solns->right = rx_make_solutions (solns->regs,
816 solns->exp->params.pair.right,
826 rx_free_solutions (solns->left);
836 /* concat_try_next_right_match: */
838 concat_stat = rx_next_solution (solns->right);
839 if (concat_stat == rx_yes)
841 solns->final_tag = solns->right->final_tag;
844 else if (concat_stat == rx_no)
846 rx_free_solutions (solns->right);
849 goto concat_try_next_left_match;
851 else /* concat_stat == rx_bogus */
853 rx_free_solutions (solns->left);
855 rx_free_solutions (solns->right);
869 enum rx_answers star_stat;
871 solns->split_guess = solns->end;
873 solns->split_guess = ((solns->end - solns->start) > RX_MANY_CASES
874 ? rx_best_end_guess (solns,
875 solns->exp->params.pair.left, solns->end)
879 star_split_guess_loop:
880 solns->left = rx_make_solutions (solns->regs,
882 solns->exp->params.pair.left,
898 star_try_next_left_match:
900 star_stat = rx_next_solution (solns->left);
901 if (star_stat != rx_yes)
903 rx_free_solutions (solns->left);
904 rx_free_solutions (solns->right);
905 solns->left = solns->right = 0;
906 solns->split_guess = solns->split_guess - 1;
908 solns->split_guess = ((solns->split_guess - solns->start) > RX_MANY_CASES
909 ? rx_best_end_guess (solns,
910 solns->exp->params.pair.left,
911 solns->split_guess - 1)
912 : solns->split_guess - 1);
914 if (solns->split_guess >= solns->start)
915 goto star_split_guess_loop;
920 if ( (solns->exp->type == r_star)
921 && (solns->start == solns->end)
922 && (star_stat == rx_no))
924 solns->final_tag = 1;
938 if (solns->split_guess == solns->end)
940 solns->final_tag = solns->left->final_tag;
945 solns->right = rx_make_solutions (solns->regs,
957 rx_free_solutions (solns->left);
967 /* star_try_next_right_match: */
969 star_stat = rx_next_solution (solns->right);
970 if (star_stat == rx_yes)
972 solns->final_tag = solns->right->final_tag;
975 else if (star_stat == rx_no)
977 rx_free_solutions (solns->right);
980 goto star_try_next_left_match;
982 else /* star_stat == rx_bogus */
984 rx_free_solutions (solns->left);
986 rx_free_solutions (solns->right);
998 enum rx_answers interval_stat;
1001 /* If the interval permits nothing,
1002 * return immediately.
1004 if (solns->exp->params.intval2 < solns->interval_x)
1010 /* If the interval permits only 0 iterations,
1011 * return immediately. Success depends on the
1012 * emptiness of the match.
1014 if ( (solns->exp->params.intval2 == solns->interval_x)
1015 && (solns->exp->params.intval <= solns->interval_x))
1018 solns->final_tag = 1;
1019 return ((solns->start == solns->end)
1024 /* The interval permits at most 0 iterations,
1025 * but also requires more. A bug.
1027 if (solns->exp->params.intval2 == solns->interval_x)
1029 /* indicates a regexp compilation error, actually */
1034 solns->split_guess = solns->end;
1036 solns->split_guess = ((solns->end - solns->start) > RX_MANY_CASES
1037 ? rx_best_end_guess (solns,
1038 solns->exp->params.pair.left, solns->end)
1042 /* The interval permits more than 0 iterations.
1043 * If it permits 0 and the match is to be empty,
1044 * the trivial match is the most preferred answer.
1046 if (solns->exp->params.intval <= solns->interval_x)
1049 if (solns->start == solns->end)
1051 solns->final_tag = 1;
1054 /* If this isn't a trivial match, or if the trivial match
1055 * is rejected, look harder.
1060 interval_split_guess_loop:
1061 /* The match requires at least one iteration, either because
1062 * there are characters to match, or because the interval starts
1065 * Look for the first iteration:
1067 solns->left = rx_make_solutions (solns->regs,
1069 solns->exp->params.pair.left,
1085 interval_try_next_left_match:
1087 interval_stat = rx_next_solution (solns->left);
1088 if (interval_stat != rx_yes)
1090 rx_free_solutions (solns->left);
1091 rx_free_solutions (solns->right);
1092 solns->left = solns->right = 0;
1093 solns->split_guess = solns->split_guess - 1;
1095 solns->split_guess = ((solns->split_guess - solns->start) > RX_MANY_CASES
1096 ? rx_best_end_guess (solns,
1097 solns->exp->params.pair.left,
1098 solns->split_guess - 1)
1099 : solns->split_guess - 1);
1101 if (solns->split_guess >= solns->start)
1102 goto interval_split_guess_loop;
1106 return interval_stat;
1117 /* After matching one required iteration, construct a smaller
1118 * interval and try to match that against the rest.
1120 * To avoid thwarting unfa caching, instead of building a new
1121 * rexp node with different interval extents, we keep interval_x
1122 * in each solns structure to keep track of the number of
1123 * iterations matched so far.
1125 solns->right = rx_make_solutions (solns->regs,
1135 solns->right->interval_x = solns->interval_x + 1;
1139 rx_free_solutions (solns->left);
1149 /* interval_try_next_right_match: */
1151 interval_stat = rx_next_solution (solns->right);
1152 if (interval_stat == rx_yes)
1154 solns->final_tag = solns->right->final_tag;
1155 return interval_stat;
1157 else if (interval_stat == rx_no)
1159 rx_free_solutions (solns->right);
1162 goto interval_try_next_left_match;
1164 else /* interval_stat == rx_bogus */
1166 rx_free_solutions (solns->left);
1168 rx_free_solutions (solns->right);
1171 return interval_stat;
1179 solns->final_tag = 1;
1180 return solns->contextfn (solns->closure,
1182 solns->start, solns->end,