Skip to content

Commit

Permalink
add precision feature
Browse files Browse the repository at this point in the history
  • Loading branch information
solaoi committed Mar 30, 2022
1 parent 10577c1 commit d8dd691
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 13 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ you'll get the correlation coefficient of two columns.
# check the column is valid
-c,--check
# set precision, default is 6
-p,--precision <number>
# show frequency table and histogram
-b,--binsize <number>
Expand Down Expand Up @@ -53,7 +56,7 @@ you can download a binary release
```sh
# Install with wget or curl
## set the latest version on releases.
VERSION=v1.0.15
VERSION=v1.0.20
## case you use wget
wget https://github.com/solaoi/colc/releases/download/$VERSION/colc_linux_amd64.tar.gz
## case you use curl
Expand Down Expand Up @@ -87,7 +90,6 @@ colc 2 some.csv

<img width="379" alt="スクリーンショット 2022-03-30 14 50 39" src="https://user-images.githubusercontent.com/46414076/160760624-58a15682-0f64-45b0-8b99-2b3732952971.png">


Of course `-b,--binsize` works well:)

```
Expand Down
38 changes: 27 additions & 11 deletions colc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
import { runner } from "./lib/common.ts";
import { parse } from "https://deno.land/[email protected]/flags/mod.ts";

const { _, binsize, b, filter, f, check, c } = parse(Deno.args);
const { _, binsize, b, filter, f, check, c, precision, p } = parse(Deno.args);
const [column, filename] = _;
const hasTwoColumn = (() => {
if (typeof column !== "string") return false;
Expand All @@ -24,6 +24,16 @@ if (
console.log("Usage:\n colc [column] [file.csv|tsv|txt]");
Deno.exit(1);
}
const awkPrecision: number = (() => {
if (
typeof precision === "number" && precision > 0 &&
Number.isInteger(precision)
) {
return precision;
}
if (typeof p === "number" && p > 0 && Number.isInteger(p)) return p;
return 6;
})();
const binSize: number | null = (() => {
if (typeof binsize === "number" && binsize > 0) return binsize;
if (typeof b === "number" && b > 0) return b;
Expand Down Expand Up @@ -60,7 +70,7 @@ if (hasTwoColumn) {
}
bash.push("| awk");
bash.push(
`'BEGIN{OFMT="%.6f"}{split($1,col,",");asum+=col[1];a[NR]=col[1];bsum+=col[2];b[NR]=col[2]}END{amean=asum/NR;bmean=bsum/NR;for(i in a){as+=(a[i]-amean)^2;bs+=(b[i]-bmean)^2;sum+=(a[i]-amean)*(b[i]-bmean)};astddev=sqrt(as/NR);bstddev=sqrt(bs/NR);print sum/NR/astddev/bstddev}'`,
`'BEGIN{OFMT="%.${awkPrecision}f"}{split($1,col,",");asum+=col[1];a[NR]=col[1];bsum+=col[2];b[NR]=col[2]}END{amean=asum/NR;bmean=bsum/NR;for(i in a){as+=(a[i]-amean)^2;bs+=(b[i]-bmean)^2;sum+=(a[i]-amean)*(b[i]-bmean)};astddev=sqrt(as/NR);bstddev=sqrt(bs/NR);print sum/NR/astddev/bstddev}'`,
);
return bash.join(" ");
})();
Expand Down Expand Up @@ -118,7 +128,7 @@ if (binSize === null) {
}
bash.push("| sort -n | awk");
bash.push(
`'BEGIN{OFMT="%.6f"}NR==1{min=$1}{if(0==$1)zeros++;if($1<0)neg++;sum+=$1;d[NR]=$1}END{avg=sum/NR;for(i in d)s+=(d[i]-avg)^2;stddev=sqrt(s/(NR-1));q1=(3*d[int((NR-1)/4)+1]+d[int((NR-1)/4)+2])/4;q3=(d[int(3*(NR-1)/4)+1]+3*d[int(3*(NR-1)/4)+2])/4;iqr=q3-q1;stur=1+log(NR)/log(2);sturi=int(stur);sturges=stur>sturi?sturi+1:sturi;max=d[NR];range=max-min;sqrtnr=sqrt(NR);threerootnr=exp(log(NR)/3);print stddev,avg,sum,NR,max,min,sqrt(s/(NR-1))/sqrtnr,s/(NR-1),(NR%2)?d[(NR+1)/2]:(d[NR/2]+d[NR/2+1])/2,avg+stddev,avg-stddev,avg+2*stddev,avg-2*stddev,avg+3*stddev,avg-3*stddev,range/sturges,(3.5*stddev)/threerootnr,q1,q3,iqr,q1-1.5*iqr,q3+1.5*iqr,range/sqrtnr,2*iqr/threerootnr,range,zeros,zeros*100/NR,neg,neg*100/NR,stddev/avg}'`,
`'BEGIN{OFMT="%.${awkPrecision}f"}NR==1{min=$1}{if(0==$1)zeros++;if($1<0)neg++;sum+=$1;d[NR]=$1}END{avg=sum/NR;for(i in d)s+=(d[i]-avg)^2;stddev=sqrt(s/(NR-1));q1=(3*d[int((NR-1)/4)+1]+d[int((NR-1)/4)+2])/4;q3=(d[int(3*(NR-1)/4)+1]+3*d[int(3*(NR-1)/4)+2])/4;iqr=q3-q1;stur=1+log(NR)/log(2);sturi=int(stur);sturges=stur>sturi?sturi+1:sturi;max=d[NR];range=max-min;sqrtnr=sqrt(NR);threerootnr=exp(log(NR)/3);print stddev,avg,sum,NR,max,min,sqrt(s/(NR-1))/sqrtnr,s/(NR-1),(NR%2)?d[(NR+1)/2]:(d[NR/2]+d[NR/2+1])/2,avg+stddev,avg-stddev,avg+2*stddev,avg-2*stddev,avg+3*stddev,avg-3*stddev,range/sturges,(3.5*stddev)/threerootnr,q1,q3,iqr,q1-1.5*iqr,q3+1.5*iqr,range/sqrtnr,2*iqr/threerootnr,range,zeros,zeros*100/NR,neg,neg*100/NR,stddev/avg}'`,
);
return bash.join(" ");
})();
Expand Down Expand Up @@ -153,7 +163,7 @@ if (binSize === null) {
zeroRate,
negatives,
negativeRate,
cv
cv,
] = await runner
.run(statsCommand).then((s) => s.split(" "));
const sturgesFormulaIsInvalid = count.split(".")[0].length <= 2 &&
Expand All @@ -165,8 +175,8 @@ if (binSize === null) {
"zeros": comma(zeros || "0"),
"zeros(%)": comma(zeroRate),
"negatives": comma(negatives || "0"),
"negatives(%)": comma(negativeRate)
}
"negatives(%)": comma(negativeRate),
};
const stats = {
"min": comma(min),
"25%(Q1)": comma(q1),
Expand All @@ -179,7 +189,7 @@ if (binSize === null) {
"IQR(Q3-Q1)": comma(iqr),
"Q1–(1.5*IQR)": comma(lf),
"Q3+(1.5*IQR)": comma(uf),
}
};
const stds = {
"stddev(σ)": comma(stddev),
"stderr": comma(stderr),
Expand All @@ -188,16 +198,22 @@ if (binSize === null) {
"mean±σ(≒68%)": `${comma(sigmaMinus1)}, ${comma(sigmaPlus1)}`,
"mean±2σ(≒95%)": `${comma(sigmaMinus2)}, ${comma(sigmaPlus2)}`,
"mean±3σ(≒99%)": `${comma(sigmaMinus3)}, ${comma(sigmaPlus3)}`,
}
};
const recommendedBinsizes = {
"binsize(Square-root)": sqrtBinsize,
...(!sturgesFormulaIsInvalid && { "binsize(Sturges')": sturgesBinsize }),
"binsize(Scott's)": scottBinsize,
"binsize(FD)": fdBinsize,
}
};
const { println, showHeader, hr } = formatter(
sturgesFormulaIsInvalid ? 20 : 21,
getMaxLength({...total,...stats,...iqrs,...stds,...recommendedBinsizes}),
getMaxLength({
...total,
...stats,
...iqrs,
...stds,
...recommendedBinsizes,
}),
);
hasHeader ? showHeader(headerName) : hr();
Object.entries(total).forEach(([key, value]) => {
Expand Down Expand Up @@ -236,7 +252,7 @@ const freqCommand = (() => {
}
bash.push("| awk");
bash.push(
`'BEGIN{OFMT="%.6f"}{b=int($1/${binSize});a[b]++;bmax=b>bmax?b:bmax;bmin=b<bmin?b:bmin}END{freq="";for(i in a)freq=freq "|" i "_" a[i];print NR, freq, bmin, bmax}'`,
`'BEGIN{OFMT="%.${awkPrecision}f"}{b=int($1/${binSize});a[b]++;bmax=b>bmax?b:bmax;bmin=b<bmin?b:bmin}END{freq="";for(i in a)freq=freq "|" i "_" a[i];print NR, freq, bmin, bmax}'`,
);
return bash.join(" ");
})();
Expand Down

0 comments on commit d8dd691

Please sign in to comment.