Skip to content

Commit

Permalink
Replace cg-comp with tree-sitter (#83)
Browse files Browse the repository at this point in the history
* First attempt

* Bump tree-sitter-apertium

* Toss core tool installation

* Single cc build

* Tweaks

* Bump tree-sitter-apertium
  • Loading branch information
sushain97 authored Mar 16, 2021
1 parent 7cbfb9d commit c32d21f
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 66 deletions.
10 changes: 0 additions & 10 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ jobs:
- name: Restore Rust cache
uses: Swatinem/rust-cache@v1

- name: Install apertium-all-dev
run: wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash
- name: Install other language tools
run: sudo apt-get install cg3

- name: Install diesel_cli
uses: actions-rs/[email protected]
with:
Expand All @@ -77,11 +72,6 @@ jobs:
- name: Restore Rust cache
uses: Swatinem/rust-cache@v1

- name: Install apertium-all-dev
run: wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash
- name: Install other language tools
run: sudo apt-get install cg3

- name: Install diesel_cli
uses: actions-rs/[email protected]
with:
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ diesel_cli = { version = "1.4.1", default-features = false, features = ["sqlite"
httpmock = "0.5.6"

[build-dependencies]
cc="*"
cc = "1.0.67"
13 changes: 7 additions & 6 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use std::path::PathBuf;

fn main() {
let ts_dir: PathBuf = ["src", "stats", "tree-sitter-apertium", "tree-sitter-lexd", "src"]
.iter()
.collect();
let includes = vec![
PathBuf::from(r"src/stats/tree-sitter-apertium/tree-sitter-lexd/src"),
PathBuf::from(r"src/stats/tree-sitter-apertium/tree-sitter-cg/src"),
];
cc::Build::new()
.include(&ts_dir)
.file(ts_dir.join("parser.c"))
.compile("tree-sitter-lexd");
.includes(&includes)
.files(vec![includes[0].join("parser.c"), includes[1].join("parser.c")])
.compile("tree-sitter");
}
48 changes: 5 additions & 43 deletions src/stats/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
mod lexc;
mod lexd;
mod rlx;
mod xml;

use std::{
io::{self, Write},
num::ParseIntError,
process::Output,
io::{self},
str::Utf8Error,
};

use lazy_static::lazy_static;
use regex::{Regex, RegexSet, RegexSetBuilder};
use regex::{RegexSet, RegexSetBuilder};
use reqwest::Error as ReqwestError;
use rocket_contrib::{json, json::JsonValue};
use slog::Logger;
use tempfile::NamedTempFile;
use tokio::process::Command;

use crate::{
models::{FileKind, StatKind},
Expand All @@ -29,7 +26,7 @@ pub enum StatsError {
Utf8(Utf8Error),
Io(io::Error),
Xml(String),
CgComp(String),
Rlx(String),
Lexd(String),
Lexc(String),
}
Expand Down Expand Up @@ -60,42 +57,7 @@ pub async fn get_file_stats(
FileKind::Monodix | FileKind::MetaMonodix => self::xml::get_monodix_stats(&body, &file_path),
FileKind::Bidix | FileKind::MetaBidix | FileKind::Postdix => self::xml::get_bidix_stats(&body, &file_path),
FileKind::Transfer => self::xml::get_transfer_stats(&body, &file_path),
FileKind::Rlx => {
let mut rlx_file = NamedTempFile::new().map_err(StatsError::Io)?;
rlx_file.write_all(body.as_bytes()).map_err(StatsError::Io)?;
let output = Command::new("cg-comp")
.arg(
rlx_file
.path()
.to_str()
.ok_or_else(|| StatsError::CgComp("Unable to create temporary file".to_string()))?,
)
.arg("/dev/null")
.output()
.await;

match output {
Ok(Output { status, ref stderr, .. }) if status.success() => {
let cg_conv_output = String::from_utf8_lossy(stderr);
lazy_static! {
static ref RE: Regex = Regex::new(r"(\w+): (\d+)").unwrap();
}
for capture in RE.captures_iter(&cg_conv_output) {
if &capture[1] == "Rules" {
let rule_count_string = &capture[2];
let rule_count: u32 = rule_count_string
.parse()
.map_err(|e: ParseIntError| StatsError::CgComp(e.to_string()))?;
return Ok(vec![(StatKind::Rules, json!(rule_count))]);
}
}

Err(StatsError::CgComp(format!("No stats in output: {}", &cg_conv_output)))
},
Ok(Output { ref stderr, .. }) => Err(StatsError::CgComp(String::from_utf8_lossy(stderr).to_string())),
Err(err) => Err(StatsError::Io(err)),
}
},
FileKind::Rlx => self::rlx::get_stats(&logger, &body),
FileKind::Twol => {
let rule_count = body.lines().filter(|line| line.starts_with('"')).count();
Ok(vec![(StatKind::Rules, json!(rule_count))])
Expand Down
32 changes: 32 additions & 0 deletions src/stats/rlx.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use rocket_contrib::{json, json::JsonValue};
use slog::Logger;
use tree_sitter::{Language, Parser, TreeCursor};

use crate::{models::StatKind, stats::StatsError};

extern "C" {
fn tree_sitter_cg() -> Language;
}

pub fn get_stats(_logger: &Logger, body: &str) -> Result<Vec<(StatKind, JsonValue)>, StatsError> {
let mut parser = Parser::new();
let language = unsafe { tree_sitter_cg() };
parser
.set_language(language)
.map_err(|e| StatsError::Rlx(format!("Unable to load tree-sitter parser: {}", e)))?;
let tree = parser
.parse(body, None)
.ok_or_else(|| StatsError::Rlx("Unable to parse rlx file".to_string()))?;

let mut rules: usize = 0;

let mut walker: TreeCursor = tree.root_node().walk();
for child in tree.root_node().children(&mut walker) {
let kind = child.kind();
if kind == "rule" || kind.starts_with("rule_") {
rules += 1;
}
}

Ok(vec![(StatKind::Rules, json!(rules))])
}
2 changes: 1 addition & 1 deletion src/stats/tree-sitter-apertium
Submodule tree-sitter-apertium updated 38 files
+26 −0 tree-sitter-cg/Cargo.toml
+1 −1 tree-sitter-cg/binding.gyp
+0 −0 tree-sitter-cg/bindings/node/binding.cc
+19 −0 tree-sitter-cg/bindings/node/index.js
+40 −0 tree-sitter-cg/bindings/rust/build.rs
+52 −0 tree-sitter-cg/bindings/rust/lib.rs
+14 −6 tree-sitter-cg/grammar.js
+0 −13 tree-sitter-cg/index.js
+5 −5 tree-sitter-cg/package.json
+62 −15 tree-sitter-cg/src/grammar.json
+31 −8 tree-sitter-cg/src/node-types.json
+9,235 −6,119 tree-sitter-cg/src/parser.c
+72 −87 tree-sitter-cg/src/tree_sitter/parser.h
+26 −1 tree-sitter-cg/test/corpus/basic.txt
+21 −0 tree-sitter-cg/test/corpus/subreading.txt
+1 −1 tree-sitter-lexd/package.json
+26 −0 tree-sitter-rtx/Cargo.toml
+1 −1 tree-sitter-rtx/binding.gyp
+0 −0 tree-sitter-rtx/bindings/node/binding.cc
+19 −0 tree-sitter-rtx/bindings/node/index.js
+40 −0 tree-sitter-rtx/bindings/rust/build.rs
+52 −0 tree-sitter-rtx/bindings/rust/lib.rs
+0 −13 tree-sitter-rtx/index.js
+5 −5 tree-sitter-rtx/package.json
+1 −0 tree-sitter-rtx/src/grammar.json
+95 −99 tree-sitter-rtx/src/parser.c
+72 −87 tree-sitter-rtx/src/tree_sitter/parser.h
+26 −0 tree-sitter-twolc/Cargo.toml
+1 −1 tree-sitter-twolc/binding.gyp
+0 −0 tree-sitter-twolc/bindings/node/binding.cc
+19 −0 tree-sitter-twolc/bindings/node/index.js
+40 −0 tree-sitter-twolc/bindings/rust/build.rs
+52 −0 tree-sitter-twolc/bindings/rust/lib.rs
+0 −13 tree-sitter-twolc/index.js
+5 −5 tree-sitter-twolc/package.json
+3 −17 tree-sitter-twolc/src/grammar.json
+1,294 −1,297 tree-sitter-twolc/src/parser.c
+72 −87 tree-sitter-twolc/src/tree_sitter/parser.h
5 changes: 0 additions & 5 deletions src/tests/get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,8 @@ fn lexd_module_stats() {

#[test]
fn module_specific_stats() {
#[cfg(not(tarpaulin))]
let kinds = [("monodix", 2), ("rlx", 1), ("postdix", 1)];

// TODO: Bring back `rlx` once tarpaulin doesn't sometimes hang on `cg-comp`.
#[cfg(tarpaulin)]
let kinds = [("monodix", 2), ("postdix", 1)];

for (kind, stat_count) in &kinds {
let module = format!("apertium-{}", TEST_LT_MODULE);
let endpoint = format!("/{}/{}?async=false", module, kind);
Expand Down

0 comments on commit c32d21f

Please sign in to comment.