AstroLang/src/parser.rs

319 lines
No EOL
11 KiB
Rust

use core::panic;
use crate::lexer::{Token, TokenType};
#[derive(Debug, Clone, PartialEq)]
pub enum ASTPart {
String(AstString),
Number(AstNumber),
Boolean(AstBool),
Assigment(AstAssigment),
Operation(AstOperation),
VarRead(AstVarRead),
Call(AstCall),
VarUpdate(AstVarUpdate),
NOOP
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstString {
pub value: String,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstNumber {
pub value: i64,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstBool {
pub value: bool,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstAssigment {
pub variable: String,
pub value: Box<ASTPart>,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstOperation {
pub operator: String,
pub left: Box<ASTPart>,
pub right: Box<ASTPart>,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstVarRead {
pub variable: String,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstCall {
pub function: Box<ASTPart>,
pub args: Vec<ASTPart>,
pub pos: usize
}
#[derive(Debug, Clone, PartialEq)]
pub struct AstVarUpdate {
pub variable: String,
pub value: Box<ASTPart>,
pub pos: usize
}
fn is_end(input: &Token, end: &Vec<Token>) -> bool {
for token in end {
if input.typ == token.typ && (token.value == "" || input.value == token.value) {
return true;
}
}
return false;
}
fn read_call(variable: ASTPart, pos: &mut usize, input: &Vec<Token>) -> ASTPart {
let mut args: Vec<ASTPart> = vec![];
*pos += 1;
let start_pos = input[*pos-1].pos;
while pos < &mut input.len() {
let token = &input[*pos];
if token.typ == TokenType::SEPARATOR && token.value == "," {
*pos += 1;
continue;
}
if token.typ == TokenType::SEPARATOR && token.value == String::from(")") {
*pos += 1;
break;
}
let ends: Vec<Token> = vec![
Token { typ: TokenType::SEPARATOR, value: String::from(","), pos: 0 },
Token { typ: TokenType::SEPARATOR, value: String::from(")"), pos: 0 }
];
let arg = read_exp(pos, input, &ends, &ends);
args.push(arg);
}
return ASTPart::Call(AstCall { function: Box::new(variable), args: args, pos: start_pos });
}
fn operator_precedence(op: &str) -> i64 {
match op {
"|" | "&" => 1,
"+" | "-" => 2,
"*" | "/" | "%" => 3,
"^" => 4,
_ => 0
}
}
fn shunt(input: Vec<ASTPart>) -> ASTPart {
let mut output: Vec<ASTPart> = vec![];
let mut stack: Vec<ASTPart> = vec![];
for part in input {
match &part {
ASTPart::String(_) => {
output.push(part);
},
ASTPart::Number(_) => {
output.push(part);
},
ASTPart::Call(_) => {
stack.push(part);
},
ASTPart::VarRead(_) => {
output.push(part);
},
ASTPart::Boolean(_) => {
output.push(part);
},
ASTPart::Operation(op) => {
if *op.left != ASTPart::NOOP && *op.right != ASTPart::NOOP {
output.push(part);
break;
}
while stack.len() > 0 {
let top = &stack[stack.len()-1];
match top {
ASTPart::Operation(top_op) => {
if operator_precedence(&top_op.operator) >= operator_precedence(&op.operator) {
output.push(stack.pop().unwrap());
} else {
break;
}
},
_ => break
}
}
stack.push(part);
},
_ => {}
}
}
while stack.len() > 0 {
output.push(stack.pop().unwrap());
}
let mut i = 0;
while i < output.len() {
match &output[i] {
ASTPart::Operation(op) => {
if *op.left != ASTPart::NOOP && *op.right != ASTPart::NOOP {
i += 1;
continue;
}
if i < 2 {
panic!("Unexpected operation at {}", op.pos);
}
let left = output[i-2].clone();
let right = output[i-1].clone();
output[i] = ASTPart::Operation(AstOperation {
operator: op.operator.clone(),
left: Box::new(left),
right: Box::new(right),
pos: op.pos,
});
output.remove(i-2);
output.remove(i-2);
i -= 1;
}
_ => {
i += 1;
}
}
}
return output[0].clone();
}
fn read_exp(pos: &mut usize, input: &Vec<Token>, ends: &Vec<Token>, parse_ends: &Vec<Token>) -> ASTPart {
let mut expressions: Vec<ASTPart> = vec![];
while pos < &mut input.len() {
let token = &input[*pos];
let mut next_token = &Token {
typ: TokenType::OPEND,
value: String::from("END"),
pos: 0
};
if *pos+1 < input.len() {
next_token = &input[*pos+1]
}
if is_end(token, &parse_ends) {
break;
}
*pos += 1;
if is_end(token, ends) {
break;
}
if token.typ == TokenType::STRING {
expressions.push(ASTPart::String(AstString { value: token.value.clone(), pos: token.pos }));
} else if token.typ == TokenType::NUMBER {
expressions.push(ASTPart::Number(AstNumber { value: token.value.parse().unwrap(), pos: token.pos }));
} else if token.typ == TokenType::KEYWORD && token.value == "piszv" {
expressions.push(ASTPart::Boolean(AstBool { value: true, pos: token.pos }));
} else if token.typ == TokenType::KEYWORD && token.value == "nem piszv" {
expressions.push(ASTPart::Boolean(AstBool { value: false, pos: token.pos }));
} else if token.typ == TokenType::IDENTIFIER {
if next_token.typ == TokenType::SEPARATOR && next_token.value == "(" {
let var = ASTPart::VarRead(AstVarRead { variable: token.value.clone(), pos: token.pos });
expressions.push(read_call(var, pos, input));
} else {
expressions.push(ASTPart::VarRead(AstVarRead { variable: token.value.clone(), pos: token.pos }));
}
} else if token.typ == TokenType::OPERATOR {
expressions.push(ASTPart::Operation(AstOperation { operator: token.value.clone(), left: Box::new(ASTPart::NOOP), right: Box::new(ASTPart::NOOP), pos: token.pos }));
} else if token.typ == TokenType::SEPARATOR {
//We check for () and then send the into read_exp again, so we recursively parse the expression
if token.value == "(" {
let ends: Vec<Token> = vec![
Token { typ: TokenType::SEPARATOR, value: String::from(")"), pos: 0 }
];
let exp = read_exp(pos, input, &ends, &ends);
if input[*pos].typ == TokenType::SEPARATOR && input[*pos].value == ")" {
*pos += 1;
} else {
panic!("Unclosed parenthesis at {}", token.pos);
}
expressions.push(exp);
} else {
panic!("Unexpected {:?}({}) at {}", token.typ, token.value, token.pos);
}
} else {
panic!("Unexpected {:?}({}) at {}", token.typ, token.value, token.pos);
}
}
let shunted = shunt(expressions);
return shunted;
}
fn next_operation(pos: &mut usize, input: &Vec<Token>, op_ends: &Vec<Token>, parse_ends: &Vec<Token>) -> ASTPart {
let token = &input[*pos];
let mut next_token = &Token {
typ: TokenType::OPEND,
value: String::from("END"),
pos: 0
};
if *pos+1 < input.len() {
next_token = &input[*pos+1]
}
if is_end(token, &parse_ends) {
return ASTPart::NOOP;
}
*pos += 1;
if is_end(token, &op_ends) {
return ASTPart::NOOP;
}
if token.typ == TokenType::KEYWORD {
if token.value == "gethelj" {
let variable = &input[*pos];
*pos += 1;
if variable.typ != TokenType::IDENTIFIER {
panic!("Unexpected {:?} at {}", variable.typ, variable.pos)
}
let eq = &input[*pos];
if eq.typ == TokenType::SEPARATOR && eq.value == "=" {
*pos += 1;
}
let value = read_exp(pos, input, op_ends, parse_ends);
return ASTPart::Assigment(AstAssigment { variable: variable.value.clone(), value: Box::new(value), pos: token.pos });
} else {
panic!("Unexpected {:?}({}) at {}", token.typ, token.value, token.pos);
}
} else if token.typ == TokenType::IDENTIFIER {
if next_token.typ == TokenType::SEPARATOR && next_token.value == "(" {
let var = ASTPart::VarRead(AstVarRead { variable: token.value.clone(), pos: token.pos });
return read_call(var, pos, input);
} else if next_token.typ == TokenType::SEPARATOR && next_token.value == "=" {
*pos += 1;
let value = read_exp(pos, input, op_ends, parse_ends);
return ASTPart::VarUpdate(AstVarUpdate { variable: token.value.clone(), value: Box::new(value), pos: token.pos });
} else {
panic!("Unexpected {:?}({}) at {}", token.typ, token.value, token.pos);
}
} else {
panic!("Unexpected {:?}({}) at {}", token.typ, token.value, token.pos);
}
}
fn parse_internal(input: Vec<Token>, op_ends: Vec<Token>, parse_ends: Vec<Token>) -> Vec<ASTPart> {
let mut out: Vec<ASTPart> = vec![];
let mut pos = 0;
while pos < input.len() {
let op = next_operation(&mut pos, &input, &op_ends, &parse_ends);
match op {
ASTPart::NOOP => {},
_ => {
out.push(op);
}
}
if is_end(&input[pos], &parse_ends) {
break;
}
}
return out;
}
pub fn parse(input: Vec<Token>) -> Vec<ASTPart> {
let op_ends: Vec<Token> = vec![
Token { typ: TokenType::OPEND, value: String::from("\n"), pos: 0 },
Token { typ: TokenType::OPEND, value: String::from(";"), pos: 0 },
Token { typ: TokenType::OPEND, value: String::from("EOF"), pos: 0 }
];
let parse_ends: Vec<Token> = vec![
Token { typ: TokenType::OPEND, value: String::from("EOF"), pos: 0 }
];
let out = parse_internal(input, op_ends, parse_ends);
return out;
}