wip
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
/target
|
16
Cargo.lock
generated
Normal file
16
Cargo.lock
generated
Normal file
@ -0,0 +1,16 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.98"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
|
||||
|
||||
[[package]]
|
||||
name = "ts-parser"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
]
|
7
Cargo.toml
Normal file
7
Cargo.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "ts-parser"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.98"
|
23
src/ast.rs
Normal file
23
src/ast.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use crate::token::Token;
|
||||
|
||||
pub struct Module {
|
||||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
pub enum Statement {
|
||||
FunctionDeclaration {
|
||||
name: Token,
|
||||
parameters: Vec<ParameterDeclaration>,
|
||||
},
|
||||
Expression,
|
||||
}
|
||||
|
||||
pub struct ParameterDeclaration {
|
||||
name: Token,
|
||||
typename: Token,
|
||||
}
|
||||
|
||||
pub enum Expression {
|
||||
Identifier(Token),
|
||||
FunctionCall {},
|
||||
}
|
51
src/format.rs
Normal file
51
src/format.rs
Normal file
@ -0,0 +1,51 @@
|
||||
use crate::token::{CommentKind, KeywordKind, LiteralKind, Number, Token, TokenKind};
|
||||
|
||||
pub trait Formatter {
|
||||
fn format(self, options: FormatterOptions) -> anyhow::Result<String>;
|
||||
}
|
||||
pub struct FormatterOptions {}
|
||||
|
||||
impl Formatter for &[Token] {
|
||||
fn format(self, options: FormatterOptions) -> anyhow::Result<String> {
|
||||
let mut result = String::new();
|
||||
for t in self {
|
||||
let kind = &t.kind;
|
||||
let s = match kind {
|
||||
TokenKind::Identifier(i) => i.clone(),
|
||||
TokenKind::Literal(kind) => match kind {
|
||||
LiteralKind::String(s) => {
|
||||
format!("\"{s}\"")
|
||||
}
|
||||
LiteralKind::Number(number) => match number {
|
||||
Number::Integer(i) => format!("{i}"),
|
||||
Number::Float(f) => format!("{f}"),
|
||||
},
|
||||
},
|
||||
TokenKind::Keyword(kind) => format!(
|
||||
"{}",
|
||||
match kind {
|
||||
KeywordKind::function => "function ",
|
||||
KeywordKind::string => "string",
|
||||
KeywordKind::number => "number",
|
||||
}
|
||||
),
|
||||
TokenKind::Comment(kind, s) => match kind {
|
||||
CommentKind::Line => format!("// {s}"),
|
||||
CommentKind::Block => format!("/* {s} */"),
|
||||
},
|
||||
TokenKind::LeftParen => "(".to_string(),
|
||||
TokenKind::RightParen => ")".to_string(),
|
||||
TokenKind::LeftCurly => " {".to_string(),
|
||||
TokenKind::RightCurly => "}".to_string(),
|
||||
TokenKind::Comma => ", ".to_string(),
|
||||
TokenKind::Colon => ": ".to_string(),
|
||||
TokenKind::Semicolon => ";".to_string(),
|
||||
TokenKind::Period => ".".to_string(),
|
||||
TokenKind::NewLine => "\n".to_string(),
|
||||
TokenKind::EndOfFile => "".to_string(),
|
||||
};
|
||||
result += &s;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
198
src/lexer.rs
Normal file
198
src/lexer.rs
Normal file
@ -0,0 +1,198 @@
|
||||
use anyhow::bail;
|
||||
|
||||
use crate::token::{CommentKind, KeywordKind, LiteralKind, Token, TokenKind, TokenLocation};
|
||||
|
||||
pub struct Lexer {
|
||||
file: Option<String>,
|
||||
source: Vec<char>,
|
||||
tokens: Vec<Token>,
|
||||
line: usize,
|
||||
current_line_offset: usize,
|
||||
start: usize,
|
||||
current: usize,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new(source: &str, file: Option<String>) -> Lexer {
|
||||
Lexer {
|
||||
source: source.chars().collect::<Vec<_>>(),
|
||||
tokens: Vec::new(),
|
||||
line: 1,
|
||||
start: 0,
|
||||
current: 0,
|
||||
file,
|
||||
current_line_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lex(mut self) -> Vec<Token> {
|
||||
while self.current < self.source.len() {
|
||||
self.start = self.current;
|
||||
self.next_token();
|
||||
}
|
||||
self.clean_newlines();
|
||||
self.tokens
|
||||
}
|
||||
|
||||
fn next_token(&mut self) {
|
||||
let c = self.consume();
|
||||
|
||||
let t = match c {
|
||||
'(' => Some(TokenKind::LeftParen),
|
||||
')' => Some(TokenKind::RightParen),
|
||||
'{' => Some(TokenKind::LeftCurly),
|
||||
'}' => Some(TokenKind::RightCurly),
|
||||
',' => Some(TokenKind::Comma),
|
||||
':' => Some(TokenKind::Colon),
|
||||
';' => Some(TokenKind::Semicolon),
|
||||
'.' => Some(TokenKind::Period),
|
||||
'\n' => {
|
||||
self.line += 1;
|
||||
self.current_line_offset = self.current;
|
||||
Some(TokenKind::NewLine)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(t) = t {
|
||||
self.push(t);
|
||||
return;
|
||||
}
|
||||
|
||||
if c == '/' {
|
||||
let p = self.peek();
|
||||
let t = match p {
|
||||
'/' => {
|
||||
while !self.is_eof() {
|
||||
let c = self.consume();
|
||||
if c == '\n' {
|
||||
self.line += 1;
|
||||
self.current_line_offset = self.current;
|
||||
break;
|
||||
}
|
||||
}
|
||||
let s = self.current_scan(2, 0);
|
||||
TokenKind::Comment(CommentKind::Line, s)
|
||||
}
|
||||
'*' => {
|
||||
while !self.is_eof() {
|
||||
if self.peek_match("*/") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if self.is_eof() {
|
||||
todo!("Expected */ before EOF");
|
||||
}
|
||||
self.current += 2;
|
||||
let s = self.current_scan(2, 2);
|
||||
TokenKind::Comment(CommentKind::Block, s)
|
||||
}
|
||||
_ => todo!("forward slash"),
|
||||
};
|
||||
self.push(t);
|
||||
}
|
||||
|
||||
if c == '"' {
|
||||
while !self.is_eof() {
|
||||
let c = self.consume();
|
||||
match c {
|
||||
'"' => break,
|
||||
'\n' => todo!("Expected closing string before new line"),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
if self.is_eof() {
|
||||
todo!("Expected closing string before EOL")
|
||||
}
|
||||
let s = self.current_scan(1, 1);
|
||||
self.push(TokenKind::Literal(LiteralKind::String(s)));
|
||||
return;
|
||||
}
|
||||
|
||||
if c.is_ascii_alphabetic() {
|
||||
while !self.is_eof() {
|
||||
let p = self.peek();
|
||||
if p.is_alphanumeric() || p == '_' {
|
||||
self.consume();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if self.is_eof() {
|
||||
todo!("Not sure if handling is necessary")
|
||||
}
|
||||
let s = self.current_scan(0, 0);
|
||||
if let Ok(k) = TryInto::<KeywordKind>::try_into(s.as_str()) {
|
||||
self.push(TokenKind::Keyword(k));
|
||||
return;
|
||||
}
|
||||
self.push(TokenKind::Identifier(s));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
fn clean_newlines(&mut self) {
|
||||
while let Some(TokenKind::NewLine) = self.tokens.first().map(|t| &t.kind) {
|
||||
self.tokens.remove(0);
|
||||
}
|
||||
|
||||
let mut i = 0;
|
||||
loop {
|
||||
let w = self
|
||||
.tokens
|
||||
.get(i..(i + 3))
|
||||
.map(|ts| ts.iter().map(|t| &t.kind).collect::<Vec<_>>());
|
||||
match w.as_deref() {
|
||||
Some([TokenKind::NewLine, TokenKind::NewLine, TokenKind::NewLine]) => {
|
||||
self.tokens.remove(i + 2);
|
||||
}
|
||||
Some(_) => {
|
||||
i += 1;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn current_scan(&self, start_offset: usize, end_offset: usize) -> String {
|
||||
self.source[(self.start + start_offset)..(self.current - end_offset)]
|
||||
.iter()
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
fn push(&mut self, kind: TokenKind) {
|
||||
self.tokens.push(Token {
|
||||
kind,
|
||||
location: TokenLocation {
|
||||
file: self.file.clone(),
|
||||
line: self.line,
|
||||
column: self.start.saturating_sub(self.current_line_offset),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
fn peek(&self) -> char {
|
||||
self.source[self.current]
|
||||
}
|
||||
fn peek_n(&self, n: usize) -> Option<char> {
|
||||
self.source.get(self.current + n).copied()
|
||||
}
|
||||
fn peek_match(&self, m: &str) -> bool {
|
||||
let c = self.current;
|
||||
let s = self
|
||||
.source
|
||||
.get(c..(c + m.len()))
|
||||
.map(|s| s.iter().collect::<String>());
|
||||
if let Some(s) = s { s == m } else { false }
|
||||
}
|
||||
|
||||
fn consume(&mut self) -> char {
|
||||
let c = self.source[self.current];
|
||||
self.current += 1;
|
||||
c
|
||||
}
|
||||
|
||||
fn is_eof(&self) -> bool {
|
||||
self.current == self.source.len()
|
||||
}
|
||||
}
|
40
src/main.rs
Normal file
40
src/main.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use token::Token;
|
||||
|
||||
mod ast;
|
||||
mod format;
|
||||
mod lexer;
|
||||
mod parse;
|
||||
mod token;
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{
|
||||
format::Formatter,
|
||||
lexer::{self, Lexer},
|
||||
};
|
||||
|
||||
const BASIC: &str = r#"
|
||||
function hello(name: string){
|
||||
console.log("Hey, ", name);
|
||||
}
|
||||
|
||||
|
||||
console.log("Starting!");
|
||||
|
||||
hello();
|
||||
"#;
|
||||
#[test]
|
||||
fn lex() {
|
||||
println!("Running lex");
|
||||
let lexer = Lexer::new(BASIC, Some("basic.file".to_string()));
|
||||
let tokens = lexer.lex();
|
||||
println!(
|
||||
"{}",
|
||||
tokens.format(crate::format::FormatterOptions {}).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
13
src/parse.rs
Normal file
13
src/parse.rs
Normal file
@ -0,0 +1,13 @@
|
||||
use crate::token::Token;
|
||||
|
||||
pub struct Parser {
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Parser {
|
||||
Self { tokens }
|
||||
}
|
||||
|
||||
fn parse(&mut self) {}
|
||||
}
|
71
src/token.rs
Normal file
71
src/token.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use anyhow::anyhow;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub location: TokenLocation,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TokenKind {
|
||||
Identifier(String),
|
||||
Literal(LiteralKind),
|
||||
Keyword(KeywordKind),
|
||||
Comment(CommentKind, String),
|
||||
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftCurly,
|
||||
RightCurly,
|
||||
Comma,
|
||||
Colon,
|
||||
Semicolon,
|
||||
Period,
|
||||
NewLine,
|
||||
EndOfFile,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CommentKind {
|
||||
Line,
|
||||
Block,
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Debug)]
|
||||
pub enum KeywordKind {
|
||||
function,
|
||||
string,
|
||||
number,
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for KeywordKind {
|
||||
type Error = anyhow::Error;
|
||||
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
||||
match value {
|
||||
"function" => Ok(Self::function),
|
||||
"string" => Ok(Self::string),
|
||||
"number" => Ok(Self::number),
|
||||
_ => Err(anyhow!("unknown keyword")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LiteralKind {
|
||||
String(String),
|
||||
Number(Number),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Number {
|
||||
Integer(usize),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TokenLocation {
|
||||
pub file: Option<String>,
|
||||
pub line: usize,
|
||||
pub column: usize,
|
||||
}
|
Reference in New Issue
Block a user