#[derive(Debug, PartialEq)] pub enum TokenType { STRING, NUMBER, IDENTIFIER, OPERATOR, KEYWORD, SEPARATOR, OPEND } #[derive(Debug)] pub struct Token { pub typ: TokenType, pub value: String, pub pos: usize, } fn is_opend(char: &str) -> bool { let chars = vec![";","\n"]; return chars.contains(&char); } fn is_ignored(char: &str) -> bool { let chars = vec![""," ","\n","\r","\t"]; return chars.contains(&char); } fn is_number(char: &str) -> bool { let chars = vec!["0","1","2","3","4","5","6","7","8","9"]; return chars.contains(&char); } fn is_operator(char: &str) -> bool { let chars = vec!["+","-","*","/","^","%","|","&","!","<",">"]; return chars.contains(&char); } fn is_mul_operator(char: &str, next_char: &str) -> bool { let chars = vec!["==","!=","<=",">="]; let check = String::from(char) + next_char; return chars.contains(&check.as_str()); } fn is_sep(char: &str) -> bool { let chars = vec!["(",")","[","]","{","}",",",".","="]; return chars.contains(&char); } /*fn is_mul_sep(char: &str, next_char: &str) -> bool { let chars = vec!["=>"]; let check = String::from(char) + next_char; return chars.contains(&check.as_str()); }*/ fn read_string(splitted: &Vec<&str>, pos: &mut usize, out: &mut Vec) { let mut str = String::from(""); let start_pos = *pos-1; *pos += 5; let mut success = false; while pos < &mut splitted.len() { let nchar = splitted[*pos]; *pos += 1; if splitted.len() >= *pos+5 && splitted[(*pos-1)..(*pos+5)].join("") == "\"szaft" { success = true; break; } str += nchar; } if !success { panic!("Unexpected end of string at {}", pos); } *pos += 5; out.push(Token { typ: TokenType::STRING, value: str, pos: start_pos }); } fn read_comment(splitted: &Vec<&str>, pos: &mut usize, is_multiline: bool) { let mut str = String::from(""); *pos += 1; let mut success = !is_multiline; while pos < &mut splitted.len() { let nchar = splitted[*pos]; *pos += 1; if (nchar == "\n" || nchar == "\r") && !is_multiline { break; } if splitted.len() >= *pos+1 && splitted[(*pos-1)..(*pos+1)].join("") == "*/" { success = true; break; } str += nchar; } if !success { panic!("Unexpected end of comment at {}", pos); } *pos += 1; } fn generate_combinations(words: Vec<&str>) -> Vec { let mut result = vec![]; let mut current = String::new(); for word in words { if !current.is_empty() { current.push(' '); } current.push_str(&word); result.push(current.clone()); } return result; } fn read_identifier(splitted: &Vec<&str>, pos: &mut usize, out: &mut Vec) { let keywords = vec!["kraf","piszolj","ha nem geny akkor geny","ha nem geny","nem piszv","kopva","gethelj","ha geny","lőcsve","csecs","reti","piszv","amíg geny","nincs hám","szard le"]; let mut raw_keywords: Vec = vec![]; for keyword in &keywords { let spi: Vec<&str> = keyword.split(" ").collect(); for word in &spi { if !raw_keywords.contains(&String::from(*word)) { raw_keywords.push(String::from(*word)); } } let combos = generate_combinations(spi); for word in combos { if !raw_keywords.contains(&word) { raw_keywords.push(word); } } } //Lets collect everything let mut identifier = String::from(""); let start_pos = *pos-1; let mut word_pos: Vec = vec![start_pos]; *pos -= 1; while pos < &mut splitted.len() { let prev_char = splitted[*pos-1]; let char = splitted[*pos]; if is_operator(char) || is_sep(char) || char == "\n" || char == "\r" || (prev_char == " " && is_number(char)) { break; } *pos += 1; if prev_char == " " { word_pos.push(*pos-1); } identifier += char; } //Lets break it up to tokens let words: Vec<&str> = identifier.trim().split(" ").collect(); let mut state: Vec<&str> = vec![]; let mut state_pos: usize = 0; let mut i = 0; let mut pre_out: Vec = vec![]; for word in words { if raw_keywords.contains(&String::from(word)) { if state_pos > 0 { pre_out.push(Token { typ: TokenType::IDENTIFIER, value: state.join(" "), pos: state_pos }); } if let Some(prev_token) = pre_out.last_mut() { if prev_token.typ == TokenType::KEYWORD { //Lets check for possibilites! let next_value = format!("{} {}", prev_token.value, word); if raw_keywords.contains(&next_value) { prev_token.value = next_value; } else { pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] }); } } else { pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] }); } } else { pre_out.push(Token { typ: TokenType::KEYWORD, value: String::from(word), pos: word_pos[i] }); } state = vec![]; state_pos = 0; } else { state.push(word); if state_pos == 0 { state_pos = word_pos[i]; } } i += 1; } if state_pos > 0 { pre_out.push(Token { typ: TokenType::IDENTIFIER, value: state.join(" "), pos: state_pos }); } //Check for invalid keywords! for token in &mut pre_out { if token.typ == TokenType::KEYWORD { if !keywords.contains(&token.value.as_str()) { token.typ = TokenType::IDENTIFIER; } } if token.typ == TokenType::IDENTIFIER { if token.value.starts_with(" ") { token.value.remove(0); } if token.value.ends_with(" ") { token.value.remove(token.value.len()-1); } } } //Check for identifiers next to each other let mut i = 1; while i < pre_out.len() { let (left, right) = pre_out.split_at_mut(i); let token = &right[0]; let prev_token = &mut left[i-1]; if token.typ == TokenType::IDENTIFIER && prev_token.typ == TokenType::IDENTIFIER { prev_token.value = format!("{} {}", prev_token.value, token.value); pre_out.remove(i); i = 1 } i += 1; } out.extend(pre_out); } pub fn lex(input: String) -> Vec { let mut out: Vec = vec![]; let splitted: Vec<&str> = input.split("").collect(); let mut pos = 1; while pos < splitted.len() { let char = splitted[pos]; pos += 1; if is_opend(char) { out.push(Token { typ: TokenType::OPEND, value: String::from(char), pos: pos-1 }); continue; } if is_ignored(char) { continue; } if is_number(char) { let mut num = String::from(char); let start_pos = pos-1; while pos < splitted.len() { let nchar = splitted[pos]; pos += 1; if !is_number(nchar) { break; } num += nchar; } pos -= 1; out.push(Token { typ: TokenType::NUMBER, value: num, pos: start_pos }); } else if splitted.len() >= pos+5 && splitted[(pos-1)..(pos+5)].join("") == "szaft\"" { read_string(&splitted, &mut pos, &mut out); } else if splitted.len() >= pos+1 && splitted[(pos-1)..(pos+1)].join("") == "//" { read_comment(&splitted, &mut pos, false); } else if splitted.len() >= pos+1 && splitted[(pos-1)..(pos+1)].join("") == "/*" { read_comment(&splitted, &mut pos, true); } else if is_mul_operator(char, splitted[pos]) { out.push(Token { typ: TokenType::OPERATOR, value: String::from(char) + splitted[pos], pos: pos-1 }); pos += 1; } else if is_operator(char) { out.push(Token { typ: TokenType::OPERATOR, value: String::from(char), pos: pos-1 }); /*} else if is_mul_sep(char, splitted[pos]) { out.push(Token { typ: TokenType::SEPARATOR, value: String::from(char) + splitted[pos], pos: pos-1 }); pos += 1;*/ } else if is_sep(char) { out.push(Token { typ: TokenType::SEPARATOR, value: String::from(char), pos: pos-1 }); } else { read_identifier(&splitted, &mut pos, &mut out); } } out.push(Token { typ: TokenType::OPEND, value: String::from("EOF"), pos: pos-1 }); return out; }