Pages

Friday, May 14, 2010

WildCard 通配符的粗糙实现

话说今年学校又举行 ACM 程序设计竞赛了,这次我有机会出题目吓一吓那些小孩子们,心里很得瑟呢。
本意是想要出一道模拟题判断一个五子棋棋局的胜负状况;但是很神奇的是,一念之间,我就产生了一个新的想法,考 '*' 和 '?' 通配符吧。
WildCard 说起来是匹配,实际上更像是查找:比如说用 ab*??ab*ab 匹配 abababababab ,不是从头到尾比对一遍就能完成的事情,所以让他们做这道题肯定会很有乐趣。


于是… 俺设定题目的输入格式为:
测试数据组的数目 n
含通配符的表达式 regI
本组测试的字符串个数 mI
mI 行字符串
regII
mII…

输出每一个与本组测试前的通配符相匹配的字符串;组与组之间用一个空行隔开。

然后用 Python 写了个简单的程序用来自动生成测试数据:
import random

N=100

characters = r'''0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-= []{}^.,\|/!@#$%&();:<>~`'''
characters += '\t'

wildcardparts = [
    '*',
    '?',
    '*?'
];

def randChar():
    return characters[ random.randint(0, len(characters)-1) ]

def genRandomWildCard():
    nc = random.randint( 1, 12 )
    wc = []
    for c in range(nc):
        wc.append( randChar() )
    nc = random.randint( 0, 5 )
    for w in range(nc):
        p = random.randint(0, len(wc))
        wc.insert(p, wildcardparts[
            random.randrange(0, len(wildcardparts)-1)
        ])
    return wc

def genMatched(wildcard):
    matched = []
    for part in wildcard:
        if part=='*':
            s = ''
            l = random.randint(0, 50)
            for i in range(l):
                s += randChar()
            matched.append(s)
        elif part=='?':
            matched.append(randChar())
        else:
            matched.append(part)
    return ''.join(matched)


def genRandom():
    l = random.randint(3, 1024)
    s = ''
    for i in range(l):
        s+=randChar()
    return s

print N
for ith in range(N):
    n = random.randint( 3, 200 )
    wildcard = genRandomWildCard()
    print ''.join(wildcard)
    print n
    for i in range(n):
        if random.randint(1, 100)<33:
            print genMatched(wildcard)
        else:
            print genRandom()

测试数据的结果也是直接利用 Python 的 re 得到的,很方便;然而后来他们找我要例程,我顿时就觉得很悲哀……早知如此我就出一道 A+B 的题目多好……
只好自食苦果了,如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct WildCardNode
{
    char const * pattern; // pointer to the original pattern
    int  len;             // length for exact matching
    char curr;            // '?', '*', or other

    struct WildCardNode * pre;
    struct WildCardNode * next;
} WildCardNode;

typedef WildCardNode M;

M * newNode()
{
    M * node = (M*) malloc(sizeof(M));
    node->pattern = 0;
    node->pre = 0;
    node->next = 0;
    return node;
}

M * createWildCard(char * pattern)
{
    char ch;
    M * head = newNode();
    M * curr = head;
    head->pattern = "";
    head->len = 0;
    head->curr = 0;
    
    for(ch = *pattern; ch && ch!='\n'; ch = *++pattern) {
        curr->next = newNode();
        curr->next->pre = curr;
        curr = curr->next;

        if (ch=='*') {
            curr->curr = '*';
            curr->pattern = pattern;
            while( pattern[1] == '*' )
                ++pattern;
        } else if (ch=='?') {
            curr->curr = '?';
            curr->pattern = pattern;
            curr->len = 1;
            while( pattern[1] == '?' ) {
                ++pattern;
                ++(curr->len);
            }
        } else {
            curr->curr = ch;
            curr->pattern = pattern;
            curr->len = 1;
            while ( pattern[1] !=0 && pattern[1] != '\n' &&
                    pattern[1] != '*' && pattern[1] != '?' ) {
                ++pattern;
                ++(curr->len);
            }
        }
    }
    if (*pattern=='\n')
        *pattern=0;

    return head;
}

void destroyWildCard(M * wc)
{
    M * node = 0;
    if(wc)
        node = wc->next;
    while(wc) {
        free(wc);
        wc = node;
        if(node)
            node = node->next;
    }
}

void printWildCard(M * wc)
{
    while((wc=wc->next)!=0) {
        if(wc->curr == '*') {
            printf(" \'*\'\n");
        } else if (wc->curr == '?') {
            printf(" \'?\' of length %d\n", wc->len );
        } else {
            int i;
            printf(" string: \"");
            for( i=0; i<wc->len; ++i )
                putchar(wc->pattern[i]);
            printf("\" (of length %d) \n", wc->len );
        }
    }
}

int DFS( M * node, char const * str )
{
    if (node==0) { // reached the end
        return str[0]=='\n'; // here is '\n' because you read the str by fgets which keeps it
    }
    if ( node->curr == '*' ) {
        int pos=0;
        for (pos=strlen(str)-1; pos>=0; --pos) {
        //for(pos=0; str[pos]; ++pos) {
            if(DFS(node->next, str+pos))
                return 1;
        }
        return 0;
    } else if ( node->curr == '?' ) {
        int i;
        for(i=0; i<node->len; ++i)
            if(str[i]==0 || str[i]=='\n')
                return 0;
        return DFS(node->next, str+node->len);
    } else {
        if ( strncmp(str, node->pattern, node->len) )
            return 0;
        else
            return DFS(node->next, str+node->len);
    }
}

inline int wildCardMatch(M * pattern, char const * str)
{
    return DFS(pattern, str);
}

#if 0
int main()
{
    char p[1024], t[1024];
    while(fgets(p,1024,stdin)) {
        M* m = createWildCard(p);
        fgets(t,1024,stdin);
        printf( wildCardMatch(m, t)? "Yes!\n": "No~\n" );
        destroyWildCard(m);
    }
    return 0;
}
#else
char p[2048], t[4096];
int main()
{
    int N,n;
    fgets(p, 2048, stdin);
    for(sscanf(p, "%d", &N); N--; puts("")) {
        M* wc = 0;
        fgets(p, 2048, stdin);
        wc = createWildCard(p);
        fgets(t, 12, stdin);
        for(sscanf(t, "%d", &n); n--; ) {
            fgets(t, 4096, stdin);
            if(wildCardMatch( wc, t ))
                printf("%s", t);
        }
        destroyWildCard(wc);
    }
    return 0;
}
#endif

悲剧的是到最后,我这道题没人做出来——有个勇敢的孩子提交了12次,这让我很欣慰……
(END)

No comments:

Post a Comment