made the lexer
This commit is contained in:
commit
d294a06820
6 changed files with 259 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
7
Cargo.lock
generated
Normal file
7
Cargo.lock
generated
Normal file
|
@ -0,0 +1,7 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "ASL"
|
||||
version = "0.1.0"
|
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "ASL"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
218
src/lexer.rs
Normal file
218
src/lexer.rs
Normal file
|
@ -0,0 +1,218 @@
|
|||
#[derive(Debug, PartialEq)]
|
||||
pub enum TokenType {
|
||||
STRING,
|
||||
NUMBER,
|
||||
IDENTIFIER,
|
||||
OPERATOR,
|
||||
KEYWORD,
|
||||
SEPARATOR
|
||||
}
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
pub typ: TokenType,
|
||||
pub value: String,
|
||||
pub pos: usize,
|
||||
}
|
||||
|
||||
fn is_ignored(char: &str) -> bool {
|
||||
let chars = vec![""," ","\n","\r","\t"];
|
||||
return chars.contains(&char);
|
||||
}
|
||||
fn is_number(char: &str) -> bool {
|
||||
let chars = vec!["0","1","2","3","4","5","6","7","8","9"];
|
||||
return chars.contains(&char);
|
||||
}
|
||||
fn is_operator(char: &str) -> bool {
|
||||
let chars = vec!["+","-","*","/","^","%","|","&","!"];
|
||||
return chars.contains(&char);
|
||||
}
|
||||
fn is_sep(char: &str) -> bool {
|
||||
let chars = vec!["(",")","[","]","{","}",",","."];
|
||||
return chars.contains(&char);
|
||||
}
|
||||
|
||||
fn read_string(splitted: &Vec<&str>, pos: &mut usize, out: &mut Vec<Token>) {
|
||||
let mut str = String::from("");
|
||||
let start_pos = *pos-1;
|
||||
*pos += 5;
|
||||
let mut success = false;
|
||||
while pos < &mut splitted.len() {
|
||||
let nchar = splitted[*pos];
|
||||
*pos += 1;
|
||||
if splitted.len() >= *pos+5 && splitted[(*pos-1)..(*pos+5)].join("") == "\"szaft" {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
str += nchar;
|
||||
}
|
||||
if !success {
|
||||
panic!("Unexpected end of string at {}", pos);
|
||||
}
|
||||
*pos += 5;
|
||||
out.push(Token { typ: TokenType::STRING, value: str, pos: start_pos });
|
||||
}
|
||||
fn read_comment(splitted: &Vec<&str>, pos: &mut usize, is_multiline: bool) {
|
||||
let mut str = String::from("");
|
||||
*pos += 1;
|
||||
let mut success = !is_multiline;
|
||||
while pos < &mut splitted.len() {
|
||||
let nchar = splitted[*pos];
|
||||
*pos += 1;
|
||||
if (nchar == "\n" || nchar == "\r") && !is_multiline {
|
||||
break;
|
||||
}
|
||||
if splitted.len() >= *pos+1 && splitted[(*pos-1)..(*pos+1)].join("") == "*/" {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
str += nchar;
|
||||
}
|
||||
if !success {
|
||||
panic!("Unexpected end of comment at {}", pos);
|
||||
}
|
||||
*pos += 1;
|
||||
}
|
||||
fn generate_combinations(words: Vec<&str>) -> Vec<String> {
|
||||
let mut result = vec![];
|
||||
let mut current = String::new();
|
||||
for word in words {
|
||||
if !current.is_empty() {
|
||||
current.push(' ');
|
||||
}
|
||||
current.push_str(&word);
|
||||
result.push(current.clone());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
fn read_identifier(splitted: &Vec<&str>, pos: &mut usize, out: &mut Vec<Token>) {
|
||||
let keywords = vec!["kraf","piszolj","ha nem geny akkor geny","ha nem geny","nem piszv","kopva","gethelj","ha geny","jukadban","lőcsve","nem reti","csecs","megint","reti","piszv","amíg geny"];
|
||||
let mut raw_keywords: Vec<String> = vec![];
|
||||
for keyword in &keywords {
|
||||
let spi: Vec<&str> = keyword.split(" ").collect();
|
||||
for word in &spi {
|
||||
if !raw_keywords.contains(&String::from(*word)) {
|
||||
raw_keywords.push(String::from(*word));
|
||||
}
|
||||
}
|
||||
let combos = generate_combinations(spi);
|
||||
for word in combos {
|
||||
if !raw_keywords.contains(&word) {
|
||||
raw_keywords.push(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
//Lets collect everything
|
||||
let mut identifier = String::from("");
|
||||
let start_pos = *pos-1;
|
||||
let mut word_pos: Vec<usize> = vec![start_pos];
|
||||
*pos -= 1;
|
||||
while pos < &mut splitted.len() {
|
||||
let prev_char = splitted[*pos-1];
|
||||
let char = splitted[*pos];
|
||||
*pos += 1;
|
||||
if is_operator(char) || is_sep(char) || char == "\n" || char == "\r" {
|
||||
break;
|
||||
}
|
||||
if prev_char == " " {
|
||||
word_pos.push(*pos-1);
|
||||
}
|
||||
identifier += char;
|
||||
}
|
||||
//Lets break it up to tokens
|
||||
let words: Vec<&str> = identifier.split(" ").collect();
|
||||
let mut state: Vec<&str> = vec![];
|
||||
let mut state_pos: usize = 0;
|
||||
let mut i = 0;
|
||||
let mut pre_out: Vec<Token> = vec![];
|
||||
for word in words {
|
||||
if raw_keywords.contains(&String::from(word)) {
|
||||
if state_pos > 0 {
|
||||
pre_out.push(Token { typ: TokenType::IDENTIFIER, value: state.join(" "), pos: state_pos });
|
||||
}
|
||||
if let Some(prev_token) = pre_out.last_mut() {
|
||||
if prev_token.typ == TokenType::KEYWORD {
|
||||
//Lets check for possibilites!
|
||||
let next_value = format!("{} {}", prev_token.value, word);
|
||||
if raw_keywords.contains(&next_value) {
|
||||
prev_token.value = next_value;
|
||||
} else {
|
||||
pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] });
|
||||
}
|
||||
} else {
|
||||
pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] });
|
||||
}
|
||||
} else {
|
||||
pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] });
|
||||
}
|
||||
state = vec![];
|
||||
state_pos = 0;
|
||||
} else {
|
||||
state.push(word);
|
||||
if state_pos == 0 {
|
||||
state_pos = word_pos[i];
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
pre_out.push(Token { typ: TokenType::IDENTIFIER, value: state.join(" "), pos: state_pos });
|
||||
//Check for invalid keywords!
|
||||
for token in &mut pre_out {
|
||||
if token.typ == TokenType::KEYWORD {
|
||||
if !keywords.contains(&token.value.as_str()) {
|
||||
token.typ = TokenType::IDENTIFIER;
|
||||
}
|
||||
}
|
||||
}
|
||||
//Check for identifiers next to each other
|
||||
let mut i = 1;
|
||||
while i < pre_out.len() {
|
||||
let (left, right) = pre_out.split_at_mut(i);
|
||||
let token = &right[0];
|
||||
let prev_token = &mut left[i-1];
|
||||
if token.typ == TokenType::IDENTIFIER && prev_token.typ == TokenType::IDENTIFIER {
|
||||
prev_token.value = format!("{} {}", prev_token.value, token.value);
|
||||
pre_out.remove(i);
|
||||
i = 1
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
out.extend(pre_out);
|
||||
}
|
||||
|
||||
pub fn lex(input: String) -> Vec<Token> {
|
||||
let mut out: Vec<Token> = vec![];
|
||||
let splitted: Vec<&str> = input.split("").collect();
|
||||
let mut pos = 1;
|
||||
while pos < splitted.len() {
|
||||
let char = splitted[pos];
|
||||
pos += 1;
|
||||
if is_ignored(char) {
|
||||
continue;
|
||||
}
|
||||
if is_number(char) {
|
||||
let mut num = String::from(char);
|
||||
let start_pos = pos-1;
|
||||
while pos < splitted.len() {
|
||||
let nchar = splitted[pos];
|
||||
pos += 1;
|
||||
if !is_number(nchar) {
|
||||
break;
|
||||
}
|
||||
num += nchar;
|
||||
}
|
||||
pos -= 1;
|
||||
out.push(Token { typ: TokenType::NUMBER, value: num, pos: start_pos });
|
||||
} else if splitted.len() >= pos+5 && splitted[(pos-1)..(pos+5)].join("") == "szaft\"" {
|
||||
read_string(&splitted, &mut pos, &mut out);
|
||||
} else if splitted.len() >= pos+1 && splitted[(pos-1)..(pos+1)].join("") == "//" {
|
||||
read_comment(&splitted, &mut pos, false);
|
||||
} else if splitted.len() >= pos+1 && splitted[(pos-1)..(pos+1)].join("") == "/*" {
|
||||
read_comment(&splitted, &mut pos, true);
|
||||
} else if is_operator(char) {
|
||||
out.push(Token { typ: TokenType::OPERATOR, value: String::from(char), pos: pos-1 });
|
||||
} else {
|
||||
read_identifier(&splitted, &mut pos, &mut out);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
19
src/main.rs
Normal file
19
src/main.rs
Normal file
|
@ -0,0 +1,19 @@
|
|||
use std::fs;
|
||||
|
||||
mod lexer;
|
||||
|
||||
fn main() {
|
||||
let inp = fs::read_to_string("./test.as");
|
||||
match inp {
|
||||
Result::Ok(data) => {
|
||||
let lexed = lexer::lex(data);
|
||||
println!("Lexer output: ");
|
||||
for token in lexed {
|
||||
println!(" {}: {:?}: {}", token.pos, token.typ, token.value);
|
||||
}
|
||||
},
|
||||
Result::Err(err) => {
|
||||
panic!("Error while reading file: {}", err)
|
||||
}
|
||||
}
|
||||
}
|
8
test.as
Normal file
8
test.as
Normal file
|
@ -0,0 +1,8 @@
|
|||
szaft"Hello, World!
|
||||
ugva"szaft
|
||||
64
|
||||
//ez egy komment
|
||||
/*ez
|
||||
is*/420
|
||||
69+69
|
||||
ez egy identifier ha nem geny piszolj lőcs
|
Loading…
Add table
Add a link
Reference in a new issue