mario::konrad
programming / C++ / sailing / nerd stuff
Simple Pattern Matching
© 2000 / Mario Konrad

A very simple recursive algorithm works for simple pattern matching:

1
2
3
4
5
6
7
8
9
int match_r(const char * pattern, const char * s)
{
   switch (*pattern) {
      case '\0': return !*s;
      case '*':  return match_r(pattern+1, s) || *s && match_r(pattern, s+1);
      case '?':  return *s && match_r(pattern+1, s+1);
      default :  return (*pattern == *s) && match_r(pattern+1, s+1);
   }
}

A little bit more complex algorithm which can handle set of characters:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/* Scans a set of characters and returns 0 if the set mismatches at this */
/* position in the teststring and 1 if it is matching                    */
/* pattern is set to the closing ] and s is unmodified if mismatched */
/* and otherwise the char pointer is pointing to the next character      */
int set(const char ** pattern, const char **s)
{
    int fit = 0;
    int neg = 0;
    int beg = 1;

    if ('!' == **pattern) {
        neg = 1;
        (*pattern)++;
    }
    while ((']' != **pattern) || (beg)) {
        if (!fit) {
            if (('-' == **pattern)
                    && ((*(*pattern - 1)) < (*(*pattern + 1)))
                    && (']' != *(*pattern + 1))
                    && (!beg)) {
                if (((**s) >= (*(*pattern - 1)))
                        && ((**s) <= (*(*pattern + 1)))) {
                    fit = 1;
                    (*pattern)++;
                }
            } else if ((**pattern) == (**s)) {
                fit = 1;
            }
        }
        (*pattern)++;
        beg = 0;
    }
    if (neg) fit = 1 - fit; /* change from zero to one and vice versa */
    if (fit) (*s)++;

  return (fit);
}

/* scans an asterisk */
int asterisk(const char ** pattern, const char ** s)
{
    int fit = 1;

    (*pattern)++; /* erase the leading asterisk */
    while (**s && (('?' == **pattern) || ('*' == **pattern))) {
        if ('?' == **pattern) (*s)++;
        (*pattern)++;
    }
    /* Now it could be that s is empty and pattern contains */
    /* aterisks. Then we delete them to get a proper state */
    while ('*' == (**pattern)) (*pattern)++;
    if (('\0' == (**s)) && ('\0' != (**pattern))) return (fit = 0);
    if (('\0' == (**s)) && ('\0' == (**pattern))) {
        return (fit = 1);
    } else {
        /* Neither s nor pattern are empty!          */
        /* the first character of pattern isn't in [*?] */
        if (0 == match(*pattern, (*s))) {
            do {
                (*s)++;
                /* skip as much characters as possible in the teststring */
                /* stop if a character match occurs */
                while (((**pattern) != (**s)) && ('['  != (**pattern)) && ('\0' != (**s))) (*s)++;
            }
            while ((('\0' != **s)) ? (0 == match(*pattern, (*s))) : (0 != (fit = 0)));
        }
        if (('\0' == **s) && ('\0' == **pattern)) fit = 1;
        return (fit);
    }
}

int match(const char * pattern, const char * s)
{
    int fit = 1;

    for (; *pattern && fit && *s; pattern++) {
        switch (*pattern) {
            case '[':
                pattern++; /* leave out the opening square bracket */
                fit = set(&pattern, &s);
                /* we don't need to decrement the pattern as in case */
                /* of asterisk because the closing ] is still there */
                break;
            case '?':
                s++;
                break;
            case '*':
                fit = asterisk(&pattern, &s);
                /* the asterisk was skipped by asterisk() but the loop will */
                /* increment by itself. So we have to decrement */
                pattern--;
                break;
            default:
                fit = (int) (*pattern == *s);
                s++;
                break;
        }
    }
    while ((*pattern == '*') && fit) pattern++; /* here s is empty otherwise you cannot leave the previous loop */
    return (int) (fit && ('\0' == *s) && ('\0' == *pattern));
}