From c32d21f55a0d2bcb6bc7ccb873970bfc0f1cd0d6 Mon Sep 17 00:00:00 2001 From: Sushain Cherivirala Date: Mon, 15 Mar 2021 22:08:18 -0700 Subject: [PATCH] Replace cg-comp with tree-sitter (#83) * First attempt * Bump tree-sitter-apertium * Toss core tool installation * Single cc build * Tweaks * Bump tree-sitter-apertium --- .github/workflows/check.yml | 10 ------- Cargo.toml | 2 +- build.rs | 13 ++++----- src/stats/mod.rs | 48 ++++------------------------------ src/stats/rlx.rs | 32 +++++++++++++++++++++++ src/stats/tree-sitter-apertium | 2 +- src/tests/get.rs | 5 ---- 7 files changed, 46 insertions(+), 66 deletions(-) create mode 100644 src/stats/rlx.rs diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 37a8515..3ffe73f 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -47,11 +47,6 @@ jobs: - name: Restore Rust cache uses: Swatinem/rust-cache@v1 - - name: Install apertium-all-dev - run: wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash - - name: Install other language tools - run: sudo apt-get install cg3 - - name: Install diesel_cli uses: actions-rs/install@v0.1 with: @@ -77,11 +72,6 @@ jobs: - name: Restore Rust cache uses: Swatinem/rust-cache@v1 - - name: Install apertium-all-dev - run: wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash - - name: Install other language tools - run: sudo apt-get install cg3 - - name: Install diesel_cli uses: actions-rs/install@v0.1 with: diff --git a/Cargo.toml b/Cargo.toml index dad7196..cb77604 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,4 +38,4 @@ diesel_cli = { version = "1.4.1", default-features = false, features = ["sqlite" httpmock = "0.5.6" [build-dependencies] -cc="*" +cc = "1.0.67" diff --git a/build.rs b/build.rs index 63ae925..4d340ac 100644 --- a/build.rs +++ b/build.rs @@ -1,11 +1,12 @@ use std::path::PathBuf; fn main() { - let ts_dir: PathBuf = ["src", "stats", "tree-sitter-apertium", "tree-sitter-lexd", "src"] - .iter() - .collect(); + let includes = vec![ + PathBuf::from(r"src/stats/tree-sitter-apertium/tree-sitter-lexd/src"), + PathBuf::from(r"src/stats/tree-sitter-apertium/tree-sitter-cg/src"), + ]; cc::Build::new() - .include(&ts_dir) - .file(ts_dir.join("parser.c")) - .compile("tree-sitter-lexd"); + .includes(&includes) + .files(vec![includes[0].join("parser.c"), includes[1].join("parser.c")]) + .compile("tree-sitter"); } diff --git a/src/stats/mod.rs b/src/stats/mod.rs index 043115b..b4abea9 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -1,21 +1,18 @@ mod lexc; mod lexd; +mod rlx; mod xml; use std::{ - io::{self, Write}, - num::ParseIntError, - process::Output, + io::{self}, str::Utf8Error, }; use lazy_static::lazy_static; -use regex::{Regex, RegexSet, RegexSetBuilder}; +use regex::{RegexSet, RegexSetBuilder}; use reqwest::Error as ReqwestError; use rocket_contrib::{json, json::JsonValue}; use slog::Logger; -use tempfile::NamedTempFile; -use tokio::process::Command; use crate::{ models::{FileKind, StatKind}, @@ -29,7 +26,7 @@ pub enum StatsError { Utf8(Utf8Error), Io(io::Error), Xml(String), - CgComp(String), + Rlx(String), Lexd(String), Lexc(String), } @@ -60,42 +57,7 @@ pub async fn get_file_stats( FileKind::Monodix | FileKind::MetaMonodix => self::xml::get_monodix_stats(&body, &file_path), FileKind::Bidix | FileKind::MetaBidix | FileKind::Postdix => self::xml::get_bidix_stats(&body, &file_path), FileKind::Transfer => self::xml::get_transfer_stats(&body, &file_path), - FileKind::Rlx => { - let mut rlx_file = NamedTempFile::new().map_err(StatsError::Io)?; - rlx_file.write_all(body.as_bytes()).map_err(StatsError::Io)?; - let output = Command::new("cg-comp") - .arg( - rlx_file - .path() - .to_str() - .ok_or_else(|| StatsError::CgComp("Unable to create temporary file".to_string()))?, - ) - .arg("/dev/null") - .output() - .await; - - match output { - Ok(Output { status, ref stderr, .. }) if status.success() => { - let cg_conv_output = String::from_utf8_lossy(stderr); - lazy_static! { - static ref RE: Regex = Regex::new(r"(\w+): (\d+)").unwrap(); - } - for capture in RE.captures_iter(&cg_conv_output) { - if &capture[1] == "Rules" { - let rule_count_string = &capture[2]; - let rule_count: u32 = rule_count_string - .parse() - .map_err(|e: ParseIntError| StatsError::CgComp(e.to_string()))?; - return Ok(vec![(StatKind::Rules, json!(rule_count))]); - } - } - - Err(StatsError::CgComp(format!("No stats in output: {}", &cg_conv_output))) - }, - Ok(Output { ref stderr, .. }) => Err(StatsError::CgComp(String::from_utf8_lossy(stderr).to_string())), - Err(err) => Err(StatsError::Io(err)), - } - }, + FileKind::Rlx => self::rlx::get_stats(&logger, &body), FileKind::Twol => { let rule_count = body.lines().filter(|line| line.starts_with('"')).count(); Ok(vec![(StatKind::Rules, json!(rule_count))]) diff --git a/src/stats/rlx.rs b/src/stats/rlx.rs new file mode 100644 index 0000000..fe36b2d --- /dev/null +++ b/src/stats/rlx.rs @@ -0,0 +1,32 @@ +use rocket_contrib::{json, json::JsonValue}; +use slog::Logger; +use tree_sitter::{Language, Parser, TreeCursor}; + +use crate::{models::StatKind, stats::StatsError}; + +extern "C" { + fn tree_sitter_cg() -> Language; +} + +pub fn get_stats(_logger: &Logger, body: &str) -> Result, StatsError> { + let mut parser = Parser::new(); + let language = unsafe { tree_sitter_cg() }; + parser + .set_language(language) + .map_err(|e| StatsError::Rlx(format!("Unable to load tree-sitter parser: {}", e)))?; + let tree = parser + .parse(body, None) + .ok_or_else(|| StatsError::Rlx("Unable to parse rlx file".to_string()))?; + + let mut rules: usize = 0; + + let mut walker: TreeCursor = tree.root_node().walk(); + for child in tree.root_node().children(&mut walker) { + let kind = child.kind(); + if kind == "rule" || kind.starts_with("rule_") { + rules += 1; + } + } + + Ok(vec![(StatKind::Rules, json!(rules))]) +} diff --git a/src/stats/tree-sitter-apertium b/src/stats/tree-sitter-apertium index eff7226..39eff02 160000 --- a/src/stats/tree-sitter-apertium +++ b/src/stats/tree-sitter-apertium @@ -1 +1 @@ -Subproject commit eff722622f400beeb8943197da57e6a5af98ded9 +Subproject commit 39eff02abf646d1c16c4ffa499944413d0ccb1d6 diff --git a/src/tests/get.rs b/src/tests/get.rs index 14245df..f51b91c 100644 --- a/src/tests/get.rs +++ b/src/tests/get.rs @@ -254,13 +254,8 @@ fn lexd_module_stats() { #[test] fn module_specific_stats() { - #[cfg(not(tarpaulin))] let kinds = [("monodix", 2), ("rlx", 1), ("postdix", 1)]; - // TODO: Bring back `rlx` once tarpaulin doesn't sometimes hang on `cg-comp`. - #[cfg(tarpaulin)] - let kinds = [("monodix", 2), ("postdix", 1)]; - for (kind, stat_count) in &kinds { let module = format!("apertium-{}", TEST_LT_MODULE); let endpoint = format!("/{}/{}?async=false", module, kind);