diff --git a/.gitignore b/.gitignore index 9e290bc..97e6d83 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ /target Cargo.lock -.vscode/launch.json +.vscode/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..f5f3c9a --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/source-code-parser.iml b/.idea/source-code-parser.iml new file mode 100644 index 0000000..495c100 --- /dev/null +++ b/.idea/source-code-parser.iml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/lang/go/class_def.rs b/src/lang/go/class_def.rs new file mode 100644 index 0000000..14b06d6 --- /dev/null +++ b/src/lang/go/class_def.rs @@ -0,0 +1,145 @@ +use crate::go::util::identifier::parse_identifier; +use crate::go::util::vartype::find_type; +use crate::parse::AST; +use crate::prophet::*; + +pub(crate) fn parse_type( + ast: &AST, + package: &str, + path: &str, +) -> Option { + let node = match ast.find_child_by_type(&["type_spec"]) { + Some(type_node) => type_node, + None => ast, + }; + + parse_type_internal(node, &package, path) +} + +pub(crate) fn parse_type_internal( + ast: &AST, + package: &str, + path: &str, +) -> Option { + + //determine the type of the instance + let instance_type = match ast.find_child_by_type(&["struct_type", "interface_type"]) { + Some(node) => match &*node.r#type { + "interface_type" => InstanceType::InterfaceComponent, + _ => InstanceType::ClassComponent, + }, + None => InstanceType::ClassComponent, + }; + + //find the name of the type + let instance_name = match ast.find_child_by_type(&["type_identifier"]) { + Some(identifier) => identifier.value.clone(), + None => "".into(), + }; + + //determine what type the instance is + let declaration_type = match instance_type { + InstanceType::InterfaceComponent => ContainerType::Interface, + _ => ContainerType::Class, + }; + + //get the component information using the path, package, instance name, and instance type + let component = ComponentInfo { + language: Language::Go, + path: path.into(), + package_name: package.into(), + instance_name: format!( + "{}::{}", + instance_name, + match instance_type { + InstanceType::InterfaceComponent => "InterfaceComponent", + _ => "ClassComponent", + } + ), + instance_type: instance_type, + }; + + // Find bounds + let (start, end) = match ast.span { + Some(span) => (span.0 as i32, span.2 as i32), + None => (0, 0), + }; + + // Define default values + let stereotype = ContainerStereotype::Entity; // TODO determine properly + let mut fields = vec![]; + let constructors = vec![]; + let mut methods = vec![]; + //let mut modifier = Modifier::new(); + + for child in ast.children.iter() { + match &*child.r#type { + "struct_type" => { + parse_struct_body(child, &component, &mut methods, &mut fields); + } + _ => {} + } + } + + Some(ClassOrInterfaceComponent { + component: ContainerComponent { + component: component, + accessor: AccessorType::Public, + stereotype: stereotype, + methods: methods, + container_name: instance_name, + line_count: (end - start) + 1, + }, + declaration_type: declaration_type, + annotations: vec![], + constructors: constructors, + field_components: fields, + }) +} + +fn parse_struct_body( + ast: &AST, + component: &ComponentInfo, + _methods: &mut Vec, + fields: &mut Vec, +) { + for node in ast.children.iter() { + match &*node.r#type { + "field_declaration_list" => fields.append(&mut parse_fields(node, component)), + _ => {} + } + } +} + +fn parse_fields(ast: &AST, component: &ComponentInfo) -> Vec { + let mut fields = vec![]; + for node in ast.children.iter() { + match &*node.r#type { + "field_declaration" => { + let field_identifier = parse_identifier(node); + let type_identifier = find_type(node); + + fields.push(FieldComponent { + component: ComponentInfo { + language: Language::Go, + path: component.path.clone(), + package_name: component.package_name.clone(), + instance_name: field_identifier.clone(), + instance_type: InstanceType::FieldComponent, + }, + annotations: vec![], + variables: vec![], + field_name: field_identifier, + accessor: AccessorType::Public, + is_static: false, + is_final: false, + default_value: String::new(), + r#type: type_identifier, + expression: None, + }) + } + _ => {} + } + } + fields +} diff --git a/src/lang/go/function_body/expr.rs b/src/lang/go/function_body/expr.rs new file mode 100644 index 0000000..8f06f0f --- /dev/null +++ b/src/lang/go/function_body/expr.rs @@ -0,0 +1,193 @@ +use crate::Language; +use crate::go::util::identifier::parse_identifier; + +use crate::ast::*; +use crate::ComponentInfo; +use crate::AST; + +use super::is_common_junk_tag; + +pub(crate) fn parse_expr(ast: &AST, component: &ComponentInfo) -> Option { + match &*ast.r#type { + // Variables and initialization + "identifier" | "field_identifier" => parse_ident(ast, component), + "int_literal" | + "interpreted_string_literal" | "nil" | "true" | "false" => Some(Expr::Literal(Literal::new(ast.value.clone(), Language::Go))), + "assignment_statement" => parse_assignment(ast, component), + + //language specific + "binary_expression" => parse_binary(ast, component), + "expression_list" => parse_expr_stmt(ast, component), + "inc_statement" | "dec_statement" => parse_inc_dec(ast, component), + + //function and method calls + "call_expression" => parse_function(ast, component), + "selector_expression" => Some(parse_dot_expr(ast, component)?.into()), + + _ => None, + } +} + +pub(crate) fn parse_expr_stmt(ast: &AST, component: &ComponentInfo) -> Option { + let mut expr = None; + for comp in ast.children.iter() { + expr = parse_expr(comp, component); + if expr.is_some() { + break; + } + } + expr +} + +fn parse_ident(ast: &AST, _component: &ComponentInfo) -> Option { + let ident: Expr = Ident::new(ast.value.clone(), Language::Go).into(); + Some(ident.into()) +} + +/// Parse an assignment expression. May contain a variable declaration +pub(crate) fn parse_assignment(ast: &AST, component: &ComponentInfo) -> Option { + // Define attributes + let mut lhs = None; + let mut rhs = None; + + // Find values + for node in ast.children.iter() { + + let unknown = &*node.r#type; + if unknown == "=" { + continue; + } + + let result = parse_expr(node, component); + + if result.is_some() { + if lhs.is_none() { + lhs = result; + } else if rhs.is_none() { + rhs = result; + } else { + eprintln!( + "Extra parsable tag {} encountered while parsing assignment", + unknown + ); + } + } else { + //log_unknown_tag(unknown, "parse_assignment"); + } + } + + // Assemble + if let Some(lhs) = lhs { + if let Some(rhs) = rhs { + let bin: Expr = BinaryExpr::new(Box::new(lhs.into()), "=".into(), Box::new(rhs), Language::Go).into(); + Some(bin.into()) + } else { + Some(lhs.into()) + } + } else { + eprintln!("Assignment with no lefthand side!"); + None + } +} + +//parse increment or decrement statments +fn parse_inc_dec(ast: &AST, component: &ComponentInfo) -> Option { + let name = if ast.children[0].r#type == "identifier" { + 0 + } else { + 1 + }; + let op = (name + 1) % 2; + + Some( + IncDecExpr::new( + op < name, + ast.children[op].r#type == "++", + Box::new(parse_expr(&ast.children[name], component)?), + Language::Go, + ) + .into(), + ) +} + +fn parse_binary(ast: &AST, component: &ComponentInfo) -> Option { + let mut lhs = None; + let mut op = None; + let mut rhs = None; + for child in ast.children.iter() { + if !is_common_junk_tag(&child.r#type) { + let res = Some(child); + if lhs.is_none() { + lhs = res; + } else if op.is_none() { + op = res; + } else if rhs.is_none() { + rhs = res; + break; + } + } + } + + if let Some(lhs) = lhs { + if let Some(op) = op { + if let Some(rhs) = rhs { + return Some( + BinaryExpr::new( + Box::new(parse_expr(lhs, component)?), + op.value.as_str().into(), + Box::new(parse_expr(rhs, component)?), + Language::Go + ) + .into(), + ); + } + } + } + eprintln!("Malformed binary expression detected!"); + None +} + +fn parse_function(ast: &AST, component: &ComponentInfo) -> Option { + let selector = match ast.find_child_by_type(&["selector_expression"]) { + Some(node) => node, + None => ast + }; + + let argument_list = match ast.find_child_by_type(&["argument_list"]) { + Some(node) => node, + None => ast + }; + + let args: Vec = argument_list + .children + .iter() + .map(|arg| parse_expr(arg, component)) + .flat_map(|arg| arg) + .collect(); + + + //determine the type of function call + if selector.find_child_by_type(&["."]).is_some() { + //member functions + let function_name = parse_dot_expr(selector, component)?; + + Some(CallExpr::new(Box::new(function_name.into()), args, Language::Go).into()) + } else { + //regular functions + let name = Ident::new(parse_identifier(&selector.children[0]), Language::Go); + Some(CallExpr::new(Box::new(name.into()), args, Language::Go).into()) + } +} + +fn parse_dot_expr(node: &AST, component: &ComponentInfo) -> Option{ + //get the name of what called the function + //let lhs = Ident::new(parse_identifier(&node.children[0]), Language::Go); + let lhs = parse_expr(&node.children[0], component)?; + //let rhs = Ident::new(parse_identifier(&node.children[2]), Language::Go); + let rhs = parse_expr(&node.children[2], component)?; + + Some(DotExpr::new(Box::new(lhs), Box::new(rhs), Language::Go)) +} + + + diff --git a/src/lang/go/function_body/mod.rs b/src/lang/go/function_body/mod.rs new file mode 100644 index 0000000..2cec44f --- /dev/null +++ b/src/lang/go/function_body/mod.rs @@ -0,0 +1,36 @@ +use crate::{AST, ComponentInfo, Language, ast::Block}; + +use self::node::parse_child_nodes; + +mod expr; +mod node; +mod stmt; + +/// Parse the body of a method, static block, constructor, etc. +pub(crate) fn parse_block(ast: &AST, component: &ComponentInfo) -> Block { + Block::new(parse_child_nodes(ast, component), Language::Go) + +} + +/* +/// Logs an unknown tag was encountered. You better not think too much about that. +/// It does not, however, log well-known "filler tags", to keep from cluttering output. +pub(crate) fn log_unknown_tag(tag: &str, parent: &str) { + if !is_common_junk_tag(tag) { + eprintln!("Unknown tag {} encountered while parsing {}!", tag, parent); + } +} +*/ + +/// Catch all for standard-issue junk tags from treesitter, to allow easy blanket-silencing of +/// false alarms, to focus on the tags that are actually important +pub(crate) fn is_common_junk_tag(tag: &str) -> bool { + // TECHNICALLY should just be 2 match arms. I split it up by the class of tag, so its easy to + // if a case is handled already. The compiler's gotta be smart enough to figure it out. + match tag { + "if" | "else" | "for" | "while" | "do" | "switch" | "try" | "catch" | "finally" => true, + "class" | "interface" | "enum" => true, + "(" | ")" | "{" | "}" | "->" | ";" | "," | "." | "..." => true, + _ => false, + } +} diff --git a/src/lang/go/function_body/node.rs b/src/lang/go/function_body/node.rs new file mode 100644 index 0000000..41cfea6 --- /dev/null +++ b/src/lang/go/function_body/node.rs @@ -0,0 +1,35 @@ +use crate::ast::*; +use crate::ComponentInfo; +use crate::AST; + +use crate::go::function_body::stmt::*; + +use super::parse_block; +use crate::go::function_body::expr::parse_expr; + +pub(crate) fn parse_child_nodes(ast: &AST, component: &ComponentInfo) -> Vec { + ast.children + .iter() + .map(|member| parse_node(member, component)) + .flat_map(|some| some) + .collect() +} + +pub(crate) fn parse_node(ast: &AST, component: &ComponentInfo) -> Option { + match &*ast.r#type { + "var_declaration" => Some(Node::Stmt(parse_decl(ast, component).into())), + "short_var_declaration" => Some(Node::Stmt(parse_short_decl(ast, component)?.into())), + "if_statement" => parse_if(ast, component), + "block" => Some(parse_block(ast, component).into()), + + "for_statement" => parse_for(ast, component), + + _ => { + let expr: Stmt = parse_expr(ast, component)?.into(); + Some(expr.into()) + }, + + } +} + + diff --git a/src/lang/go/function_body/stmt.rs b/src/lang/go/function_body/stmt.rs new file mode 100644 index 0000000..a151232 --- /dev/null +++ b/src/lang/go/function_body/stmt.rs @@ -0,0 +1,309 @@ +use crate::ast::{to_block, DeclStmt, Expr, ExprStmt, ForStmt, IfStmt, Node, Stmt, VarDecl, Ident}; +use crate::ComponentInfo; +use crate::AST; +use crate::Language; +use crate::go::util::identifier::parse_identifier; + +use super::{expr::parse_expr, is_common_junk_tag, node::parse_node, parse_block}; +use crate::go::function_body::node::parse_child_nodes; +use crate::go::util::vartype::find_type; + +/// Parse an AST section containing a variable declaration +pub(crate) fn parse_decl(ast: &AST, component: &ComponentInfo) -> DeclStmt { + // Extract informtion about the variable + let ast = match ast.find_child_by_type(&["var_spec"]) { + Some(var) => var, + None => ast, + }; + let r#type = find_type(ast); + + // Determine the value it was set to + let rhs = parse_child_nodes(ast, component); + + let mut decl = DeclStmt::new(vec![], vec![], Language::Go); + for var in rhs.iter() { + // Extract expression from the hierarchy + let base = match var { + Node::Stmt(Stmt::ExprStmt(ExprStmt { expr, .. })) | Node::Expr(expr) => expr, + _ => { + eprintln!("Unable to interpret as variable: {:#?}", var); + continue; + } + }; + + // Parse variable + match base { + Expr::BinaryExpr(expr) => match expr.lhs.as_ref() { + Expr::Ident(lhs) => { + decl.variables + .push(VarDecl::new(Some(r#type.clone()), lhs.clone(), Language::Go)); + decl.expressions.push(Some(expr.rhs.as_ref().clone())); + } + unknown => eprintln!("Expected Ident got {:#?}", unknown), + }, + Expr::Ident(id) => { + decl.variables + .push(VarDecl::new(Some(r#type.clone()), id.clone(), Language::Go)); + decl.expressions.push(None); + } + Expr::Literal(lit) => decl.expressions.push(Some(lit.clone().into())), + unknown => { + eprintln!("Expected BinaryExpr or Ident, got {:#?}", unknown); + } + } + } + + for var_decl in decl.variables.iter_mut() { + var_decl.is_final = None; //Go does not have final variables + var_decl.is_static = None; //Go does not have static variables + var_decl.var_type = Some(r#type.clone()); + } + decl.into() +} + +pub(crate) fn parse_short_decl(ast: &AST, component: &ComponentInfo) -> Option { + //let mut r#type = "N/A".to_string(); + //let mut i = 0; + + /* + for expr in ast.find_all_children_by_type(&["expression_list"]).get_or_insert(vec![]).iter() { + if i == 0 { + i+= 1; + } + else { + r#type = determine_var_type(expr); + } + } + */ + + let expr_lists = ast.find_all_children_by_type(&["expression_list"]).unwrap_or_default(); + let lhs = *expr_lists.first()?; + let rhs = *expr_lists.last()?; + + + //println!("{:#?}", ast); + + // Determine the value it was set to + let var_names = lhs.children.iter() + .filter(|child| child.r#type == "identifier") + .map(|node| parse_identifier(node)) + .map(|variable| VarDecl::new(None, Ident::new(variable, Language::Go), Language::Go)) + .collect::>(); + + + // let rhs_vals = parse_child_nodes(rhs, component).into_iter().map(|node| match node { + // Node::Expr(expr) => Some(expr), + // var_name => { + // println!("{:#?}", var_name); + // None + // } + // }).collect::>(); + + + let rhs_vals = rhs.children.iter() + .map(|child| parse_expr(child, component)) + .collect(); + + println!("{:#?}", rhs_vals); + + Some(DeclStmt::new(var_names, rhs_vals, Language::Go)) + + /* + ///FIX ME!!!! need base to hold a vector of Ident, right now it is only getting the first var! + for var in lhs.iter() { + // Extract expression from the hierarchy + /* + let base = match var { + Node::Stmt(Stmt::ExprStmt(ExprStmt { expr, .. })) | Node::Expr(expr) => expr, + _ => { + eprintln!("Unable to interpret as variable: {:#?}", var); + continue; + } + }; + */ + + //println!("{:#?}", base); + //println!("{:#?}", lhs); + + // Parse variable + /* + match base { + Expr::BinaryExpr(expr) => match expr.lhs.as_ref() { + Expr::Ident(lhs) => { + decl.variables + .push(VarDecl::new(Some(r#type.clone()), lhs.clone(), Language::Go)); + decl.expressions.push(Some(expr.rhs.as_ref().clone())); + } + unknown => eprintln!("Expected Ident got {:#?}", unknown), + }, + Expr::Ident(id) => { + decl.variables + .push(VarDecl::new(Some(r#type.clone()), id.clone(), Language::Go)); + decl.expressions.push(None); + } + Expr::Literal(lit) => decl.expressions.push(Some(lit.clone().into())), + + //Expr::DotExpr() + unknown => { + eprintln!("Expected BinaryExpr, Ident or Literal, got {:#?}", unknown); + } + } + */ + + + //decl.expressions.push(Some(base.clone())); + } + + for var_decl in decl.variables.iter_mut() { + var_decl.is_final = None; //Go does not have final variables + var_decl.is_static = None; //Go does not have static variables + var_decl.var_type = Some(r#type.clone()); + } + decl.into() + */ + + +} + +pub(crate) fn parse_if(ast: &AST, component: &ComponentInfo) -> Option { + let mut guard = None; + let mut if_stmt = None; + let mut else_stmt = None; + + for child in ast.children.iter().filter(|node| node.r#type != "else") { + match &*child.r#type { + + "binary_expression" => { + guard = parse_expr(child, component) + }, + + _ => { + if let Some(stmt) = parse_node(child, component) { + let stmt = to_block(stmt, Language::Go); + if if_stmt.is_none() { + if_stmt = Some(stmt); + } else { + else_stmt = Some(stmt); + break; + } + } + } + } + } + let ret_node = Some(Node::Stmt(IfStmt::new(guard?, if_stmt?, else_stmt, Language::Go).into())); + ret_node +} + + +pub(crate) fn parse_for(ast: &AST, component: &ComponentInfo) -> Option { + let mut for_clauses: Vec> = vec![vec![], vec![], vec![]]; + let mut i = 0; + + //find the node containing the for clauses of the statement + let clause_node = match ast.find_child_by_type(&["for_clause", "range_clause"]) { + Some(node) => node, + None => ast, + }; + + //Coerce an Option to an Expr, if it can be + let to_expr = |parts: &Vec| -> Vec { + parts + .into_iter() + .flat_map(|part| match part.clone() { + Node::Expr(node) => Some(node), + Node::Stmt(Stmt::ExprStmt(ExprStmt { expr, .. })) => Some(expr), + _ => None, + }) + .collect() + }; + + //get all the clauses in the for_clauses vector + for node in clause_node.children.iter() { + if !is_common_junk_tag(&*node.r#type) { + for_clauses[i].push(node); + } + else { + i = i + 1; + } + } + + //prepare clauses for parsing + let parts: Vec>> = for_clauses + .iter() + .map(|c| { + if c.len() > 0 { + Some( + c.iter() + .map(|c| parse_node(c, component)) + .flat_map(|c| c) + .collect(), + ) + } else { + None + } + }) + .collect(); + + + + //parse initialization + let init = parts[0].clone().map_or(vec![], |init_parts| { + init_parts + .into_iter() + .flat_map(|p| match p { + Node::Stmt(node) => Some(node), + Node::Expr(node) => Some(Stmt::ExprStmt(ExprStmt::new(node, Language::Go))), + _ => panic!("Not supported: block in for loop init"), + }) + .collect() + }); + + //parse guard condition + let guard = match parts.get(1) { + Some(Some(guard)) => to_expr(&guard).get(0).cloned(), + _ => None + }; + + //.clone() + //.map(|guard| to_expr(&guard).get(0).flatten()) + //.map(|guard| (*to_expr(&guard).get(0)).clone()); + + //parse postcondition + let post = parts[2].clone().map_or(vec![], |post| to_expr(&post)); + + + + //find the node containing the block of the for statement + let block_node = match ast.find_child_by_type(&["block"]) { + Some(node) => parse_block(node, component), + None => parse_block(ast, component), + }; + + //assemble into a for statement + let for_stmt = ForStmt::new( + init, + guard, + post, + block_node, + Language::Go, + ); + //return the node + Some(Stmt::ForStmt(for_stmt).into()) +} + +/* +fn determine_var_type(node: &AST) -> String { + let mut toRet = String::from("N/A"); + + match &*node.children[0].r#type { + "int_literal" => { + toRet = String::from("int"); + }, + _ => { + toRet = String::from("Unknown"); + } + } + + toRet +} +*/ diff --git a/src/lang/go/function_def.rs b/src/lang/go/function_def.rs new file mode 100644 index 0000000..81fa566 --- /dev/null +++ b/src/lang/go/function_def.rs @@ -0,0 +1,194 @@ +use crate::go::function_body::parse_block; +use crate::go::util::identifier::parse_identifier; +use crate::go::util::vartype::find_type; +use crate::go::util::vartype::find_return; +use crate::go::util::vartype::unwrap_pointer_type; +use crate::parse::AST; +use crate::prophet::*; + +pub(crate) fn parse_function(ast: &AST, module_name: &str, path: &str) -> Option { + //find the function name + let fn_identifier = parse_identifier(ast); + //get return type + //let return_type = find_type(ast); + let return_type = find_return(ast); + let component = ComponentInfo { + language: Language::Go, + path: path.to_string(), + package_name: module_name.to_string(), + instance_name: fn_identifier.clone(), + instance_type: InstanceType::MethodComponent, + }; + + let mut body = None; + let span = ast.span.expect("No span for a method! AST malformed!"); + let line_begin = span.0 as i32; + let line_end = span.2 as i32; + + //parse parameter list + let mut parameters = vec![]; + let _param_list = match ast.find_child_by_type(&["parameter_list"]) { + Some(list) => { + //iterate through the list, pushing each parameter to parameters + for node in list.children.iter() { + match &*node.r#type { + "parameter_declaration" => parameters.push(parse_parameter(node, &component)), + _ => {} + } + } + } + None => {} + }; + + for member in ast.children.iter() { + match &*member.r#type { + "block" => { + body = Some(parse_block(member, &component)); + } + _ => {} + } + } + + let func = MethodComponent { + component: ComponentInfo { + language: Language::Go, + path: path.into(), + package_name: module_name.to_string(), + instance_name: fn_identifier.clone(), + instance_type: InstanceType::MethodComponent, + }, + accessor: AccessorType::Public, + method_name: fn_identifier, + return_type: return_type, + parameters: parameters, + is_static: false, + is_abstract: false, + is_final: false, + sub_methods: vec![], + annotations: vec![], + line_count: line_end - line_begin + 1, + line_begin, + line_end, + body, + }; + Some(func) +} + +pub(crate) fn parse_method(ast: &AST, module_name: &str, path: &str) -> (String, MethodComponent) { + let method_identifier = parse_identifier(ast); + //let return_type = find_type(ast); + let return_type = find_return(ast); + let component = ComponentInfo { + language: Language::Go, + path: path.to_string(), + package_name: module_name.to_string(), + instance_name: method_identifier.clone(), + instance_type: InstanceType::MethodComponent, + }; + + //Define fields + let mut body = None; + let span = ast.span.expect("No span for a method! AST malformed!"); + let line_begin = span.0 as i32; + let line_end = span.2 as i32; + + //parse first parameter list, which ideally is the one containing the receiver + let mut parent_struct_type_name = String::new(); + match ast.find_child_by_type(&["parameter_list"]) { + Some(parameter_list) => { + match parameter_list.find_child_by_type(&["parameter_declaration"]) { + Some(parameter_node) => { + parent_struct_type_name = + parse_parameter(parameter_node, &component).r#type.clone() + } + None => {} + } + } + None => {} + }; + + let mut i = 0; + let mut parameters = vec![]; + for node in ast + .find_all_children_by_type(&["parameter_list"]) + .get_or_insert(vec![]) + .iter() + { + if i == 0 || i == 2 { + i = 1; + } else { + for param_node in node.children.iter() { + match &*param_node.r#type { + "parameter_declaration" => { + parameters.push(parse_parameter(param_node, &component)) + } + _ => {} + } + } + i = 2 + } + + } + + for member in ast.children.iter() { + match &*member.r#type { + "block" => { + body = Some(parse_block(member, &component)); + } + _ => {} + } + } + + let func = MethodComponent { + component: ComponentInfo { + language: Language::Go, + path: path.into(), + package_name: module_name.to_string(), + instance_name: method_identifier.clone(), + instance_type: InstanceType::MethodComponent, + }, + accessor: AccessorType::Public, + method_name: method_identifier, + return_type: return_type, + parameters: parameters, + is_static: false, + is_abstract: false, + is_final: false, + sub_methods: vec![], + annotations: vec![], + line_count: line_end - line_begin + 1, + line_begin, + line_end, + body, + }; + (parent_struct_type_name, func) +} + +fn parse_parameter(ast: &AST, component: &ComponentInfo) -> MethodParamComponent { + + + let my_node = unwrap_pointer_type(ast); + + let mut name = "".to_string(); + + + name += &parse_identifier(ast); + //let mut modifier = Modifier::new(); + let mut param_type = "".to_string(); + + param_type += &find_type(my_node); + //let param_type = find_type(myNode); + + MethodParamComponent { + component: ComponentInfo { + language: Language::Go, + path: component.path.clone(), + package_name: component.package_name.clone(), + instance_name: component.instance_name.clone(), + instance_type: InstanceType::MethodParamComponent, + }, + annotation: None, + r#type: param_type.into(), + parameter_name: name.into(), + } +} diff --git a/src/lang/go/mod.rs b/src/lang/go/mod.rs new file mode 100644 index 0000000..f24d186 --- /dev/null +++ b/src/lang/go/mod.rs @@ -0,0 +1,154 @@ +use std::collections::HashMap; + +use crate::parse::AST; +use crate::prophet::*; + +mod class_def; +mod function_body; +mod function_def; +mod util; + +use crate::go::util::identifier::parse_identifier; +use class_def::*; +use function_def::*; + +pub fn find_components(ast: AST, path: &str) -> Vec { + find_components_internal(ast, String::new(), path) +} + +fn find_components_internal(ast: AST, mut package: String, path: &str) -> Vec { + let mut components = vec![]; + let mut types = HashMap::new(); + + //first parse for all nodes EXCEPT for "method_declaration" + for node in ast + .find_all_children_by_type(&[ + "type_declaration", + "function_declaration", + "package_clause", + "import_declaration", + ]) + .get_or_insert(vec![]) + .iter() + { + match &*node.r#type { + "function_declaration" => match parse_function(node, &*package, path) { + Some(function) => components.push(ComponentType::MethodComponent(function.clone())), + None => {} + }, + "package_clause" => { + package = parse_package(node); + } + "type_declaration" => match parse_type(node, &*package, path) { + Some(type_decl) => { + let type_name = type_decl.component.container_name.clone(); + types.insert(type_name, type_decl); + } + None => {} + }, + "import_declaration" => println!("{}", parse_import(node)), + tag => todo!("Cannot identify provided tag {:#?}", tag), + }; + } + + //now parse "method_declaration" nodes + for node in ast + .find_all_children_by_type(&["method_declaration"]) + .get_or_insert(vec![]) + .iter() + { + let mut tuple = parse_method(node, &*package, path); + + if tuple.0.starts_with("*") { + tuple.0.remove(0); + } + match types.get(&tuple.0) { + Some(parent_struct) => { + //create a copy of the instance of the original struct and add the method to it + let mut new_methods = parent_struct.component.methods.clone(); + new_methods.push(tuple.1.clone()); + + let parent_component = parent_struct.component.clone(); + let new_parent_struct = ClassOrInterfaceComponent { + component: ContainerComponent { + component: parent_component.component.clone(), + accessor: parent_component.accessor.clone(), + stereotype: parent_component.stereotype.clone(), + methods: new_methods, + container_name: parent_component.container_name.clone(), + line_count: parent_component.line_count.clone(), + }, + declaration_type: parent_struct.declaration_type.clone(), + annotations: parent_struct.annotations.clone(), + constructors: parent_struct.constructors.clone(), + field_components: parent_struct.field_components.clone(), + }; + + types.insert(tuple.0, new_parent_struct); + } + None => {} + } + } + + //push the now updated types with their member methods onto the components vector + for (_k, v) in types { + components.push(ComponentType::ClassOrInterfaceComponent(v)); + } + + components +} + +fn parse_package(ast: &AST) -> String { + parse_identifier(ast) +} + +fn parse_import(ast: &AST) -> String { + let mut buffer = String::new(); + for node in ast.children.iter() { + match &*node.r#type { + "import_spec" => match node.find_child_by_type(&["interpreted_string_literal"]) { + Some(import) => buffer.push_str(&*trim_import((&*import.value).to_string())), + None => {} + }, + "import_spec_list" => { + for import_node in node.children.iter() { + match &*import_node.r#type { + "import_spec" => { + match import_node.find_child_by_type(&["interpreted_string_literal"]) { + Some(import) => { + buffer.push_str(&*trim_import((&*import.value).to_string())); + buffer.push_str("\n"); + } + None => {} + } + } + _ => {} + } + } + } + _ => buffer.push_str(&*parse_import(node)), + } + } + + //to remove the last newline for multiple imports + if buffer.ends_with('\n') { + buffer.pop(); + } + + buffer +} + +/// removes the quotations surrounding the values of the "interpreted_string_literal" nodes +fn trim_import(import_str: String) -> String { + let mut str = import_str.clone(); + + //additional checks to see if the string actually begins and ends with quotation marks + if str.starts_with('\"') { + str.remove(0); + } + if str.ends_with('\"') { + str.pop(); + } + + str +} diff --git a/src/lang/go/util/identifier.rs b/src/lang/go/util/identifier.rs new file mode 100644 index 0000000..b3189f2 --- /dev/null +++ b/src/lang/go/util/identifier.rs @@ -0,0 +1,25 @@ +use crate::AST; + +/// Indicates when no type is found +pub const NO_NAME: &str = "N/A"; + +pub(crate) fn parse_identifier(ast: &AST) -> String { + let name_node = match ast.find_child_by_type(&[ + "package_identifier", + "field_identifier", + "identifier"]) { + Some(node) => node, + None => ast, + }; + + let name_str = match &*name_node.r#type { + "package_identifier" + | "field_identifier" + | "identifier" => name_node.value.clone(), + + _ => NO_NAME.into(), + }; + + name_str +} + diff --git a/src/lang/go/util/mod.rs b/src/lang/go/util/mod.rs new file mode 100644 index 0000000..f63bf2b --- /dev/null +++ b/src/lang/go/util/mod.rs @@ -0,0 +1,26 @@ +//use crate::AST; + +pub(crate) mod identifier; +pub(crate) mod vartype; + +/* +/// Convert a vector into an Option. If the vector is empty, swaps it out for None; otherwise is Some(vector) +pub(crate) fn fold_vec(vector: Vec) -> Option> { + if !vector.is_empty() { + Some(vector) + } else { + None + } +} +*/ + +/* +/// Convert the children of a provided tree into a single, consecutive string +pub(crate) fn stringify_tree_children(ast: &AST) -> String { + let mut buffer = String::new(); + for member in ast.children.iter() { + buffer.push_str(&*member.value); + } + buffer +} +*/ \ No newline at end of file diff --git a/src/lang/go/util/vartype.rs b/src/lang/go/util/vartype.rs new file mode 100644 index 0000000..f74e2d1 --- /dev/null +++ b/src/lang/go/util/vartype.rs @@ -0,0 +1,110 @@ +use crate::AST; + +/// Indicates when no type is found +pub const NO_TYPE: &str = "N/A"; + + +pub(crate) fn find_type(ast: &AST) -> String { + let my_node = unwrap_pointer_type(ast); + let mut is_ptr = false; + + match my_node.find_child_by_type(&["*"]) { + Some(_node) => {is_ptr = true;}, + None => {} + } + + let mut r#type = match my_node.find_child_by_type(&["qualified_type"]) { + Some(node) => { + parse_qualified_type(node) + }, + None => { + let id_node = match my_node.find_child_by_type(&["type_identifier"]) { + Some(node) => node, + None => ast, + }; + let str = parse_type(id_node); + str + } + }; + + if is_ptr == true { + r#type = format!("*{}", r#type); + } + r#type +} + +pub(crate) fn find_return(ast: &AST) -> String { + let mut i = 0; + let mut ret = "".to_string(); + + for node in ast.children.iter() { + match &*node.r#type { + "parameter_list" => { + //if it has multiple return values + if i == 4 { + for sub_node in node.children.iter() { + match &*sub_node.r#type { + "parameter_declaration" => { + ret += &find_type(unwrap_pointer_type(sub_node)); + ret += ", " + }, + _ => {} + } + + } + + ret.pop(); + ret.pop(); + } + }, + "type_identifier" => { + //if it ends with a single return + if i == 4 { + ret = node.value.clone(); + } + }, + _ => {} + } + i += 1; + } + + + ret +} + +pub(crate) fn unwrap_pointer_type(ast: &AST) -> &AST { + let mut my_node = ast; + + for node in ast.children.iter() { + match &*node.r#type { + "pointer_type" => { + my_node = node; + }, + _ => {} + } + } + + my_node +} + +fn parse_type(ast: &AST) -> String { + match &*ast.r#type { + "type_identifier" => ast.value.clone(), + _ => NO_TYPE.into() + } +} + +fn parse_qualified_type(ast: &AST) -> String { + let mut type_str = "".to_string(); + + for node in ast.children.iter() { + match &*node.r#type { + "package_identifier" | "." | "type_identifier" => { + type_str += &node.value; + }, + _ => {} + } + } + + type_str +} \ No newline at end of file diff --git a/src/lang/mod.rs b/src/lang/mod.rs index e72ee8e..8c9064e 100644 --- a/src/lang/mod.rs +++ b/src/lang/mod.rs @@ -1,2 +1,3 @@ pub mod cpp; pub mod java; +pub mod go; diff --git a/src/parse.rs b/src/parse.rs index de40dce..7810904 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -89,7 +89,7 @@ impl AST { LANG::Cpp => (cpp::find_components(self, path, path), lang.into()), LANG::Java => (java::find_components(self, path), lang.into()), LANG::Python => (vec![], Language::Python), - LANG::Go => (vec![], Language::Go), + LANG::Go => (go::find_components(self, path), lang.into()), lang => { tracing::info!("unsupported lang: {:?}", lang); (vec![], Language::Unknown)