This page looks best with JavaScript enabled

递归下降语法分析器的构建

 ·  ☕ 3 min read · 👀... views

针对给定语法设计递归下降分析程序,以对任意输入的符号串进行语法分析。语法如下

1
2
3
4
5
exp -> exp addop term | term
addop -> + | -
term = term mulop factor | factor
mulop = *
factor = ( exp ) | number

因为存在重复和选择,所以使用EBNF。

1
2
exp -> term { addop term }
term -> factor { mulop factor }

写成伪代码如下

1
2
3
4
5
6
7
8
9
/* exp伪代码 */
procedure exp
begin
    term;
    while token = + do
        match(token);
        term;
    end while;
end exp;
1
2
3
4
5
6
7
8
9
/* term伪代码 */
procedure term
begin
    factor;
    while token = * do
        match(token);
        factor;
    end while;
end term;
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
/* factor伪代码 */
procedure factor
begin
    if token = '('
        match('(');
        exp;
        match(')');
    else
        getnumber;
end factor;
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
/* exp构造语法树 */
function exp:syntaxTree;
var temp,newtemp:syntaxTree;
begin
    temp := term;
    while token = + do
        newtemp := makeOpNode(token);
        match(token);
        leftChild(newtemp) := temp;
        rightChile(newtemp) := term;
        temp := newtemp;
    end while;
end exp;
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
/* term构造语法树 */
function term:syntaxTree;
var temp,newtemp:syntaxTree;
begin
    temp := factor;
    while token = * do
        newtemp := makeOpNode(token);
        match(token);
        leftChild(newtemp) := temp;
        rightChile(newtemp) := factor;
        temp := newtemp;
    end while;
    return temp;
end term;
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
/* factor构造语法树 */
function factor:syntaxTree;
var temp:syntaxTree;
begin
    if token = '('
        match('(')
        temp := makeOpNode(exp);
        leftChild(temp) := null;
        rightChile(temp) := null;
        match(')')
    else if isdigit(token)
        temp := makeOpNode(token);
        leftChild(temp) := null;
        rightChile(temp) := null;
    else
        error
    reutrn temp;
end factor;

完整代码如下,基本思路就是getchar预先获取一个char用以进行预测性解析,然后递归构造语法树并打印出来.
如果匹配到运算符就构造新的节点加到原来的树上;如果匹配到单个数字,那么就使用ungetc将数字返回输入流然后用scanf("%d")来匹配多位number.

刚开始程序以c语言实现,但是只能处理单位数字,考虑到所有节点数据一致,干脆改到c++使用string.这里遇到一点问题,刚开始c语言的时候data为char类型,树节点用malloc获取空间,改到c++后结构体内string类型不能用malloc类型,因为初始化并定长了,改用new和delete后解决问题.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include <iostream>
#include <string>
#include <cstdio>
#include <stdlib.h>
using namespace std;

/*树节点*/
typedef struct node {
    string data;
    struct node* left;
    struct node* right;
}Node;

void printTree(Node* n, int type, int level);
void destroy_node(Node*);
Node* makeNode(string);

char token;
Node* exp();
Node* term();
Node* factor();
void match(char expectedToken) {
    if (token == expectedToken)
        token = getchar();
    else
        printf("error in match\n");
}
int main()
{
    token = getchar();
    Node* root = exp();
    if (token == '\n' || token == '\r') {
        printf("success\n");
        printTree(root, 0, 0);
    }
    else {
        printf("error in main\n");
    }
    destroy_node(root);
    return 0;
}

Node* exp() {
    Node* temp = term();
    Node* newtemp = NULL;
    while (token == '+' || token == '-') {
        newtemp = makeNode(string(1,token));
        match(token);
        newtemp->left = temp;
        newtemp->right = term();
        temp = newtemp;
    }
    return temp;
}

Node* term() {
    Node* temp = factor();
    Node* newtemp = NULL;
    while (token == '*' || token == '/') {
        newtemp = makeNode(string(1,token));
        match(token);
        newtemp->left = temp;
        newtemp->right = factor();
        temp = newtemp;
    }
    return temp;
}

Node* factor() {
    Node* temp = NULL;
    if (token == '(') {
        match('(');
        temp = exp();
        match(')');
    }
    else if (token >= '1' && token <= '9') {
        ungetc(token, stdin);
        int number;
        scanf("%d", &number);
        temp = makeNode(to_string(number));
        token = getchar();
    }
    else {
        printf("error in factor\n");
    }
    return temp;
}

/*销毁树*/
void destroy_node(Node* node)
{
    if (node != NULL)
    {
        destroy_node(node->left);
        destroy_node(node->right);
        cout << node->data;
        delete node;
        node = NULL;
    }
}

Node* makeNode(string value) {
    Node* node = new Node;
    // Node* node = (Node*)malloc(sizeof(Node));
    node->data = value;
    node->left = NULL;
    node->right = NULL;
    return node;
}

void printTree(Node* n, int type, int level)
{
    int i;

    if (NULL == n)
        return;

    printTree(n->right, 2, level + 1);
    switch (type)
    {
    case 0:
        printf("%s\n", n->data.c_str());
        break;
    case 1:
        for (i = 0; i < level; i++)
            printf("\t");
        printf("\\ %s\n", n->data.c_str());
        break;
    case 2:
        for (i = 0; i < level; i++)
            printf("\t");
        printf("/ %s\n", n->data.c_str());
        break;
    }
    printTree(n->left, 1, level + 1);
}

运行结果如下图所示

image-20211123182443603

Share on

ruokeqx
WRITTEN BY
ruokeqx