TEST语言的词法规则

  1. 标识符:字母打头,后接任意字母或数字。
  2. 保留字:标识符的子集,包括if, else, for, while, int, write, read。
  3. 无符号整数:由数字组成,但最高位不能为0,允许一位的0。
  4. 分界符:(、)、;、{、}、,
  5. 运算符:+、-、*、/、=、<、>、>=、<=、!=、==
  6. 注释符:/*       */

TEST语言的语法规则

1)  <program> → {<declaration_list><statement_list>}

2)  <declaration_list> → <declaration_list><declaration_stat> | ε

3)  <declaration_stat> → int ID;

4)  <statement_list> → <statement_list><statement>| ε

5)  <statement> → <if_stat>|<while_stat>|<for_stat>|<read_stat>               |<write_stat>|<compound_stat> |<assignment_stat>|;

6)  <if_stat> → if (<bool_expression >) <statement >

| if (<bool_expression>) <statement >else < statement >

7)  <while_stat> → while (<bool_expression>) < statement >

8)  <for_stat> → for (<assignment_expression>; <bool_expression>;

<assignment_ expression >)<statement>

9)  <write_stat> → write < arithmetic_expression >;

10) <read_stat> → read ID;

11) <compound_stat> → {<statement_list>}

12) <assignment_expression> → ID=<arithmetic_expression>

13) <assignment_stat> →<assignment_expression>;

14) <bool_expression>→<arithmetic_expression> >  <arithmetic_expression>

|<arithmetic_expression> <  <arithmetic_expression>

|<arithmetic_expression> >= <arithmetic_expression>

|<arithmetic_expression> <= <arithmetic_expression>

|<arithmetic_expression> == <arithmetic_expression>

|<arithmetic_expression> != <arithmetic_expression>

15) <arithmetic_expression> → <arithmetic_expression>+<term>

|< arithmetic_expression>-<term>

|< term >

16) < term > → < term >*<factor>|< term >/<factor>|< factor >

17) < factor > → (<arithmetic_expression>)|ID|NUM

实现步骤

  • 根据词法规则写出相应的正则表达式,构造NFA,最后化简DFA

NFA

DFA

  • 根据词法分析的结果进行语法分析,消除左递归,找出first,follow集

关键代码

  • 文件读取

由于文件读取是一个异步操作,这里用一个promise将其包裹

<input type='file' accept='text/plain' onchange='openFile(event)'/>
let readPromise = new Promise(function (resolve, reject) {
    openFile = function (event) {
        let input = event.target;
        let reader = new FileReader();
        reader.onload = function () {
            if (reader.result) {
                resolve(reader.result);
            }
        };
        reader.readAsText(input.files[0]);
    };
});
readPromise.then(function (result) {
    readResultString = result;
    readResultArray = readResultString.split('');
    //先进行词法分析
    wordStatistic();
    // console.log(wordStatisticResult);
    // console.log("                  ");
    console.log(TEXTARRAY[grammarStatistic()]);
    console.log(statisticLog);
});

逐行读取词法分析结果

//功能性函数,用于读取或者输出
const readLine = () => {
    let array = resultLine.Lines[resultLine.flag].split(" ");
    resultLine.flag++;
    statisticLog += array[0] + " " + array[1] + "\r\n";
    return ({
        symbol: array[0],
        value: array[1]
    });
};
const outFunction = (Keyword, ch) => {
    wordStatisticResult += Keyword + " " + ch + '\r\n';
};
  • 词法分析
const keyword = ["if", "else", "for", "while", "do", "read", "write", "int"];//所有的保留字
const singleWord = ['+', '-', '*', '(', ')', '{', '}', ',', ';', ':', '.'];//除号单独处理
const doubleWord = ['>', '<', '=', '!'];//可能是双运算符的数组
let openFile;
let readResultString = "";//文件读取结果string
let readResultArray = [];//文件读取结果array
let wordStatisticResult = "";//词法分析结果
let statisticLog = "";//语法分析记录
let stack = [];//用于临时存放词法分析结果的栈
let resultLine = {};//当前的某一行词法分析结果 symbol类型,value值
let t = '';
let readPromise = new Promise(function (resolve, reject) {
    openFile = function (event) {
        let input = event.target;
        let reader = new FileReader();
        reader.onload = function () {
            if (reader.result) {
                resolve(reader.result);
            }
        };
        reader.readAsText(input.files[0]);
    };
});
//用于词法分析
const wordStatistic = () => {
    for (let i = 0; i < readResultArray.length;) {
        let ch = readResultArray[i];
        while (ch == " " || ch == "\r" || ch == "\n") {
            ch = readResultArray[++i];
        }
        //判断是标识符还是保留字
        if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
            stack = [ch];
            ch = readResultArray[++i];
            while (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9') {
                stack.push(ch);
                ch = readResultArray[++i];
            }
            let StrToken = stack.join("");
            outFunction(keyword.indexOf(StrToken) >= 0 ? StrToken : "ID", StrToken);
        }
        //判断数字
        else if (ch >= '0' && ch <= '9') {
            stack = [ch];
            ch = readResultArray[++i];
            while (ch >= '0' && ch <= '9') {
                stack.push(ch);
                ch = readResultArray[++i];
            }
            let StrToken = stack.join("");
            outFunction("NUM", StrToken);
        }
        //判断单分界符
        else if (singleWord.indexOf(ch) >= 0) {
            stack = [ch];
            ch = readResultArray[++i];
            let StrToken = stack.join("");
            outFunction(StrToken, StrToken);
        }
        //判断双分界符
        else if (doubleWord.indexOf(ch) >= 0) {
            stack = [ch];
            let ch1 = ch;
            ch = readResultArray[++i];
            if (ch == '=') {
                stack.push(ch);
                ch = readResultArray[++i];
            }
            let StrToken = stack.join("");
            outFunction(ch1 == "!" ? "ERROR" : StrToken, StrToken);
        }
        //除号单独处理
        else if (ch == '/') {
            ch = readResultArray[++i];
            if (ch == "*") {
                let ch2 = readResultArray[++i];
                do {
                    ch = ch2;
                    ch2 = readResultArray[++i];
                } while ((ch != "*" || ch2 != "/") && i < readResultArray.length);
                ch = readResultArray[++i];
            } else {
                outFunction(ch, ch);
            }
        }
        else {
            outFunction("ERROR", ch);
            ch = readResultArray[++i];
        }
    }
};
  • 语法分析
//用于语法分析
const grammarStatistic = () => {
    let code = CODE.NORMAL;
    resultLine = {
        Lines: wordStatisticResult.split('\r\n'),
        flag: 0,
    };//初始化,开始读第一行词法分析结果
    code = program();
    return code;
};
const program = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.symbol != "{") {
        return CODE["LOSS{"];
    }
    t = readLine();
    code = declaration_list();
    if (code > CODE.NORMAL) {
        return code;
    }
    code = statement_list();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != "}") {
        code = CODE["LOSS}"];//缺少右括号
        return code;
    }
    return code;
};
const declaration_list = () => {
    let code = CODE.NORMAL;
    while (t.symbol == "int") {
        code = declaration_stat();
        if (code > CODE.NORMAL) {
            return code;
        }
    }
    return code;
};
const declaration_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.symbol != "ID") {
        return CODE["LOSSID"];
    }
    t = readLine();
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    t = readLine();
    return code;
};
const statement_list = () => {
    let code = CODE.NORMAL;
    while (t.symbol != "}") {
        code = statement();
        if (code > CODE.NORMAL) {
            return code;
        }
        if (t.value == "undefined") {//如果读取到末尾
            return CODE["LOSS}"];
        }
    }
    return code;
};
const statement = () => {
    let code = CODE.NORMAL;
    if (t.symbol == "if") {
        code = if_state();
    }
    else if (t.symbol == "while") {
        code = while_stat();
    }
    else if (t.symbol == "for") {
        code = for_stat();
    }
    else if (t.symbol == "read") {
        code = read_stat();
    }
    else if (t.symbol == "write") {
        code = write_stat();
    }
    else if (t.symbol == "{") {
        code = compound_stat();
    }
    else if (t.symbol == "ID") {
        code = assignment_stat();
    }
    else if (t.symbol == ";") {
        t = readLine();
    }
    else {
        t = readLine();
    }
    return code;
};
const if_state = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.value != "(") {
        return CODE["LOSS("];//缺少左括号
    }
    t = readLine();
    code = bool_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ")") {
        return CODE["LOSS)"];//缺少右括号
    }
    t = readLine();
    code = statement();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol == "else") {
        t = readLine();
        code = statement();
        if (code > CODE.NORMAL) {
            return code;
        }
    }
    return code;
};
const while_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.value != "(") {
        return CODE["LOSS("];//缺少左括号
    }
    t = readLine();
    code = bool_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ")") {
        return CODE["LOSS)"];//缺少右括号
    }
    t = readLine();
    code = statement();
    return code;
};
const for_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.symbol != "(") {
        return CODE["LOSS("];
    }
    t = readLine();
    code = assignment_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    t = readLine();
    code = bool_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    t = readLine();
    code = assignment_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ")") {
        return CODE["LOSS)"];
    }
    t = readLine();
    code = statement();
    return code;
};
const write_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    code = arithmetic_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    t = readLine();
    return code;
};
const read_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    if (t.symbol != "ID") {
        return CODE["LOSSID"];
    }
    t = readLine();
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    t = readLine();
    return code;
};
const compound_stat = () => {
    let code = CODE.NORMAL;
    t = readLine();
    code = statement_list();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != "}") {
        return CODE["LOSS}"];
    }
    else {
        t = readLine();
    }
    return code;
};
const assignment_expression = () => {
    let code = CODE.NORMAL;
    if (t.symbol == "ID") {
        t = readLine();
        if (t.symbol == "=") {
            t = readLine();
            code = arithmetic_expression();
            if (code > CODE.NORMAL) {
                return code;
            }
        }
        else {
            return CODE["LOSS="];
        }
    }
    else {
        return CODE["LOSSID"];
    }
    return code;
};
const assignment_stat = () => {
    let code = CODE.NORMAL;
    code = assignment_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.symbol != ";") {
        return CODE["LOSS;"];
    }
    return code;
};
const bool_expression = () => {
    let code = CODE.NORMAL;
    code = arithmetic_expression();
    if (code > CODE.NORMAL) {
        return code;
    }
    if (t.value == ">" || t.value == "<" || t.value == '>=' || t.value == "<=" || t.value == "!=" || t.value == "==") {
        t = readLine();
        code = arithmetic_expression();
        if (code > CODE.NORMAL) {
            return code;
        }
    }
    return code;
};
const arithmetic_expression = () => {
    let code = CODE.NORMAL;
    code = term();
    if (code > CODE.NORMAL) {
        return code;
    }
    while (t.symbol == "+" || t.symbol == "-") {
        t = readLine();
        code = term();
        if (code > CODE.NORMAL) {
            return code;
        }
    }
    return code;
};
const term = () => {
    let code = CODE.NORMAL;
    code = factor();
    if (code > CODE.NORMAL) {
        return code;
    }
    while (t.symbol == "*" || t.symbol == "/") {
        t = readLine();
        code = factor();
        if (code > CODE.NORMAL) {
            return code;
        }
    }
    return code;
};
const factor = () => {
    let code = CODE.NORMAL;
    if (t.symbol == "(") {
        t = readLine();
        code = arithmetic_expression();
        if (code > CODE.NORMAL) {
            return code;
        }
        if (t.symbol != ')') {
            return CODE["LOSS)"];
        }
        t = readLine();
    }
    else if (t.symbol == "ID" || t.symbol == "NUM") {
        t = readLine();
    }
    else {
        return CODE["LOSSfactor"];
    }
    return code;
};

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注