This is my entry in C.
It doesn't do much.
Inputs can only be taken from the terminal.
'.' '?' '+' and '^' '$' are implemented.
escaping isnt supported.
Only thing good I can tell about it is - its functional as a regex matching engine. Although not all asked are implemented, it does work, but has some bugs.
You can take a look at the code here for easier reading.
Code:
/*
fool_regx.c
challenge #27 entry :: debd
am literally laughing at me ...
hope actual regex engines are never implemented like this.
lots of ifs, 37 to be precise.
$, ^, ?, ., and + are implemented. There are some bugs.
escaping the metas aren't supported.
^ and $ if present anywhere except in the beggining and in the end respectively are treated as literals.
Input is taken from commandline.
Input terminates with a '\n'
Output is in the form of the entire string given where matches are highlighted in red.
*/
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
int str[500], i=0, k=0, w=0, wstart=0, q=0, p=0, chance=-1, wrd[200];
int c, dummy=0, literals=0, bseek=0, metafound=0, char_dol=0;
int match_from=0, match_start=0, match_done=0, s_before_plus=0;
int last_whitesp=0;
/* Variable descriptions::
str[] :: holds the string to search in
wrd[] :: stores the regex
i :: count of input characters for string + 2
k :: count of input characters for regex + 2
p :: loop variable for looping through the string
q :: loop variable for looping through the regex
wstart :: stores position of last space found.
chance :: if there is a match on progress
literals :: count of everything except ^ and $ in the regex
bseek :: last spcae position in case first char of the regex is a '.'
metafound :: if any of '.' '+' '?' is found in regex
char_dol :: if ^ or $ is present in regex in the first / last position
match_from :: where to start searching for a match when ^ or $ is present
match-start :: marks the start of a match for highlighting purpose
match_done :: if a match has been found
s_before_plus :: character prior to a '+'
last_whitesp :: last white space in the input string
dummy :: a dummy
wstart and bseek were used in the
word version of the program i.e. the words where the matches were found were printed.
Not necesssary now.
*/
printf("Search in: ");
do
{
c = getchar();
if(c == ' ') last_whitesp = i;
str[i++] = c;
} while(c != '\n' && i < 500);
printf("Regex: ");
do
{
w = getchar();
wrd[k++] = w;
if(w == '.' || w == '?' || w == '+') metafound = 1;
} while(w != '\n' && k < 200);
literals = k-2; /* exclude the last newline and -1 more because k is increased after the last k++*/
if(wrd[0] == '^')
{
q=1;
literals = k-3; /* apparantly */
char_dol = 1;
}
if(wrd[k-2] == '$')
{
literals = k-3;
if(metafound) match_from = last_whitesp+1; /* Entire word could match, not only the last chars */
else match_from = (i-2) - (literals); /* try for the last 'literal' number of chars of the string */
if(metafound && wrd[k-3] == '.') match_from = (i-2) - (literals); /* if last char of regex is '.' no need to try entire word */
char_dol = 1;
/*literals = k-3;*/
}
/*if(match_from > 0) this portion is no more needed
{
bseek = match_from;
while(str[bseek] != str[0])
{
if(str[bseek] == ' ')
{
wstart = bseek+1;
break;
}
bseek--;
}
}*/
for(p=match_from; p<i-1; p++) /* start loop to find match */
{
/*printf("p:q=%i:%i", p,q);*/
if(wrd[q] != str[p] && wrd[q] != '.' && wrd[q] != '?' && wrd[q] != '+') /* handles unmatch, index control for '?' */
{
if(wrd[q+1] == '?')
{
if(!q) match_start = p; /* mark start of match if next one is a '?' and regex is at begining */
if(literals > q)
q++, p--; /* increase q but dont let p increase so we can check next in string with next in regex */
continue;
}
q=0;
chance = -1;
if(literals > 0) p=match_start; /* after a match fails midway set p to previous start of match */
match_start++; /* so it could be tried from on the next char in string */
continue;
}
if(wrd[q] == str[p] || wrd[q] == '.' || wrd[q] == '?' || wrd[q] == '+') /* this block holds most of the match logic and index control */
{
if(!q) match_start = p;
if(wrd[q] == '?') p--; /* we are at a ?. We dont care about chars preceding ? and the ? itself */
/* to check next in regex with present char at p, dont let p increase */
/* p--, p++ keeps it at same position */
if(wrd[q] == '.' && wrd[q+1] == '+') /* piece of cake */
p=i-2, match_done=1; /* p is also used to mark the end of match */
if(wrd[q] == str[p] && wrd[q+1] == '+') /* if next is a '+' remember what preceeds it */
s_before_plus = wrd[q];
if(wrd[q] == '+')
{
if(p == i-2) match_done = 1; /* if p is already at the end, match is found */
if (str[p] == s_before_plus) q--; /* if there are same chars as s_before_plus in the string increase p as */
else p--; /* long as an unmatch is found. so keep q at same pos. otherwise decrease */
/* p so we can check next in string with next in regex */
}
if(q == literals) match_done = 1; /* if q is already == literals say match done. true only if regex contains no meta. */
if(wrd[q] != '+' && char_dol)
{
if(char_dol && q == literals+1) match_done = 1; /* we took literals one less when a ^ / $ is present. */
}
else
{
if(char_dol) { /* when + is present we stop when a unmatch is found after '+' */
if(s_before_plus != str[p]) match_done = 1; }
}
if(q <= literals)
{ q++;
chance = 1;
}
}
if(str[p] == ' ')
{
if(chance == -1)
wstart = p+1; /* was used previously */
}
if(wrd[0] == '^' && p == literals+1)
break;
if(match_done)
{
printf("Match found : ");
/*while(wstart <= p) this portion is the code to output ib word mode.
{
if(wstart >= match_start)
printf("\E[31m%c\E[0m\017", str[wstart++]);
else
printf("%c", str[wstart++]);
if(wstart > p)
{
dummy = wstart;
while(str[dummy] != ' ' && str[dummy] != '\n')
printf("%c", str[dummy++]);
}
}*/
wstart=0; /* we nomore use wstart as a holder for last space position */
while(wstart < i-1)
{
if(wstart >= match_start && wstart <= p) /* color from match start up to latest p */
printf("\E[31m%c\E[0m\017", str[wstart++]);
else
printf("%c", str[wstart++]); /* else vanilla */
}
puts("");
exit(0);
}
}
printf("Match not found.\n");
exit(1);
}
Here are some example outputs:
Bookmarks