Skip to content

Commit

Permalink
Error reporting (#28)
Browse files Browse the repository at this point in the history
* track errors per file

* Added explanatory text
also consistently put / infront of filenames
  • Loading branch information
nleanba authored Sep 4, 2024
1 parent 14e2758 commit 4248d9b
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 29 deletions.
3 changes: 2 additions & 1 deletion config/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export const config = {
title: "GG2RDF",
description: "The Plazi service to transform GoldenGate XML into Turtle RDF.",
description:
'The Plazi service to transform GoldenGate XML into Turtle RDF.<br>See <a href="/workdir/fileStatus.txt">/workdir/fileStatus.txt</a> for per-file transformation status.',
email: "[email protected]",
sourceBranch: "main",
sourceRepository: "plazi/treatments-xml",
Expand Down
74 changes: 69 additions & 5 deletions src/action_worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,56 @@
*/
import { existsSync, GHActWorker, GitRepository, type Job } from "./deps.ts";
import { config } from "../config/config.ts";
import { gg2rdf } from "./gg2rdf.ts";
import { gg2rdf, Status } from "./gg2rdf.ts";

const GHTOKEN = Deno.env.get("GHTOKEN");

const parseStatusFromDisk = (
path = `${config.workDir}/fileStatus.txt`,
): Map<string, Status> => {
if (!existsSync(path)) return new Map();
const result = new Map<string, Status>();
Deno.readTextFileSync(path).split("\n").forEach((line) => {
const [file, status] = line.split(": ");
if (file) result.set(file, parseInt(status, 10));
});
return result;
};

const saveStatusToDisk = (
statusMap: Map<string, Status>,
path = `${config.workDir}/fileStatus.txt`,
) => {
const encoder = new TextEncoder();
using statusFile = Deno.openSync(path, {
create: true,
write: true,
truncate: true,
});
statusFile.truncateSync();
statusFile.writeSync(
encoder.encode(
`: 0=successful, 1=has_warnings, 2=has_errors, 3=failed (stats at end)\n`,
),
);
const counts = [0, 0, 0, 0];
for (const [file, status] of statusMap) {
counts[status]++;
if (file.at(0) !== "/") {
statusFile.writeSync(encoder.encode(`/${file}: ${status}\n`));
} else {
statusFile.writeSync(encoder.encode(`${file}: ${status}\n`));
}
}
statusFile.writeSync(
encoder.encode(
`: (stats) 0=successful ${counts[0]}x, 1=has_warnings ${
counts[1]
}x, 2=has_errors ${counts[2]}x, 3=failed ${counts[3]}x\n`,
),
);
};

const worker = new GHActWorker(self, config, async (job: Job, log) => {
log("Starting transformation\n" + JSON.stringify(job, undefined, 2));

Expand Down Expand Up @@ -56,6 +102,8 @@ const worker = new GHActWorker(self, config, async (job: Job, log) => {

log(`\nTotal files: ${modified.length + removed.length}\n`);

const statusMap = parseStatusFromDisk();

// run saxon on modified files
for (const file of modified) {
if (
Expand All @@ -69,16 +117,31 @@ const worker = new GHActWorker(self, config, async (job: Job, log) => {
},
);
try {
gg2rdf(
const status = gg2rdf(
`${worker.gitRepository.directory}/${file}`,
`${config.workDir}/tmpttl/${file.slice(0, -4)}.ttl`,
log,
);
log("gg2rdf successful");
statusMap.set(file, status);
switch (status) {
case Status.successful:
log("gg2rdf successful");
break;
case Status.has_warnings:
log("gg2rdf successful with warnings");
break;
case Status.has_errors:
log("gg2rdf successful with errors");
break;
case Status.failed:
log("gg2rdf failed gracefully");
break;
}
} catch (error) {
log("gg2rdf failed:");
log("gg2rdf failed catastrophically:");
log(error);
throw new Error("gg2rdf failed");
saveStatusToDisk(statusMap);
throw new Error("gg2rdf failed catastrophically");
}
} else {
log(
Expand Down Expand Up @@ -126,6 +189,7 @@ const worker = new GHActWorker(self, config, async (job: Job, log) => {
}
}

saveStatusToDisk(statusMap);
await targetRepo.commit(job, message, log);
await targetRepo.push(log);
log("git push successful");
Expand Down
73 changes: 50 additions & 23 deletions src/gg2rdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,19 @@ if (import.meta.main) {
gg2rdf(flags.input, flags.output);
}

// Note that the order is important, as code will only ever update the status to a higher one.
export const enum Status {
successful,
has_warnings,
has_errors,
failed,
}

export function gg2rdf(
inputPath: string,
outputPath: string,
log: (msg: string) => void = console.log,
) {
): Status {
const document = new DOMParser().parseFromString(
Deno.readTextFileSync(inputPath).replaceAll(/(<\/?)mods:/g, "$1MODS"),
"text/xml",
Expand All @@ -67,8 +75,9 @@ export function gg2rdf(
// this is the <document> surrounding everything. doc != document
const doc = document.querySelector("document") as Element;
if (!doc) {
log(`Error: missing <document> in ${inputPath}.`);
log(`Error: missing <document> in ${inputPath}.\n Could not start gg2rdf.`);
output("# Error: Could not create RDF due to missing <document>");
return Status.failed;
}
const id = partialURI(doc.getAttribute("docId") || "") || "MISSING_ID";
log(`starting gg2rdf on document id: ${id}`);
Expand All @@ -80,6 +89,8 @@ export function gg2rdf(
const citedMaterials: Subject[] = [];
let treatmentTaxonUri = "";

let status: Status = Status.successful;

try {
checkForErrors();
makeTreatment();
Expand All @@ -96,8 +107,29 @@ export function gg2rdf(
error.stack ?? "[no stacktrace]"
}`.replaceAll(/\n/g, "\n# "),
);
return Status.failed;
}

const enum REL {
CITES,
SAME,
NONE,
DEPRECATES,
}
const enum RANKS {
INVALID,
kingdom,
phylum,
class,
order,
family,
tribe,
genus,
species,
}

return status;

// end of top-level code

/** replaces <xsl:template match="/"> (root template) */
Expand All @@ -115,6 +147,7 @@ export function gg2rdf(
output(
"# Warning: treatment taxon is missing ancestor kingdom, defaulting to 'Animalia'",
);
status = Math.max(status, Status.has_warnings);
}
if (errors.length) {
throw new Error(
Expand Down Expand Up @@ -214,6 +247,7 @@ export function gg2rdf(
epithetErrors.forEach((e) => {
t.addProperty("# Warning: Could not add treatment taxon because", e);
log(`Warning: Could not add treatment taxon because ${e}`);
status = Math.max(status, Status.has_warnings);
});
} else {
const rank: string = taxon.getAttribute("rank");
Expand Down Expand Up @@ -248,6 +282,7 @@ export function gg2rdf(
);
if (!treatmentTaxon) {
log("# Warning: Lost treatment-taxon, cannot add vernacular names");
status = Math.max(status, Status.has_warnings);
} else {
doc.querySelectorAll("vernacularName").forEach((v: Element) => {
const language = v.getAttribute("language") || undefined;
Expand Down Expand Up @@ -311,6 +346,7 @@ export function gg2rdf(
`Could not add TaxonConceptCitation\n${error}\n${error.stack ?? ""}`
.replaceAll(/\n/g, "\n# "),
);
status = Math.max(status, Status.has_errors);
}
} else {
log(`${e.tagName} found without taxonomicName`);
Expand Down Expand Up @@ -429,6 +465,7 @@ export function gg2rdf(

if (cTaxonAuthority === "INVALID") {
log(`Warning: Invalid Authority for ${tnuri}`);
status = Math.max(status, Status.has_warnings);
return { ok: false, tnuri };
}

Expand All @@ -450,9 +487,11 @@ export function gg2rdf(
) {
if (cTaxonRankGroup === RANKS.INVALID) {
s.addProperty("# Error:", "Invalid Rank");
status = Math.max(status, Status.has_errors);
}
if (taxonRelation === REL.NONE) {
s.addProperty("# Error:", "Invalid taxon relation");
status = Math.max(status, Status.has_errors);
}
s.addProperty("a", "dwcFP:TaxonConcept");
return { ok: true, uri, tnuri };
Expand Down Expand Up @@ -656,6 +695,7 @@ export function gg2rdf(
output(
"# Warning: Failed to output a material citation, could not create identifier",
);
status = Math.max(status, Status.has_warnings);
return "";
}

Expand Down Expand Up @@ -763,6 +803,7 @@ export function gg2rdf(
if ((attr + "").includes(".")) {
s.addProperty("# Warning:", `abbreviated ${n} ${STR(attr)}`);
if (!rankLimit) log(`Warning: abbreviated ${n} ${STR(attr)}`);
status = Math.max(status, Status.has_warnings);
}
nextRankLimit = n;
}
Expand All @@ -779,6 +820,7 @@ export function gg2rdf(
} else {
log(`Warning: Could not determine parent name of ${uri}`);
s.addProperty("# Warning:", "Could not determine parent name");
status = Math.max(status, Status.has_warnings);
}

s.addProperty("a", "dwcFP:TaxonName");
Expand Down Expand Up @@ -901,15 +943,18 @@ export function gg2rdf(
taxonNameURI(cTaxon)
}' due to issues with rank`,
);
} else {t.addProperty(
} else {
t.addProperty(
"# Warning:",
`Not adding 'trt:citesTaxonName ${
taxonConceptURI({
taxonName: cTaxon,
taxonAuthority: cTaxonAuthority,
})
}' due to issues with rank`,
);}
);
}
status = Math.max(status, Status.has_warnings);
return;
}
if (cTaxonAuthority === "INVALID") {
Expand Down Expand Up @@ -943,13 +988,6 @@ export function gg2rdf(
return;
}

const enum REL {
CITES,
SAME,
NONE,
DEPRECATES,
}

/** replaces <xsl:template name="taxonRelation"> */
function getTaxonRelation(
{ taxon, cTaxon }: { taxon: Element; cTaxon: Element },
Expand Down Expand Up @@ -1009,18 +1047,6 @@ export function gg2rdf(
return REL.DEPRECATES;
}

const enum RANKS {
INVALID,
kingdom,
phylum,
class,
order,
family,
tribe,
genus,
species,
}

function getTaxonRankGroup(t: Element): RANKS {
if (t.getAttribute("species")) return RANKS.species;
if (t.getAttribute("genus")) return RANKS.genus;
Expand Down Expand Up @@ -1296,6 +1322,7 @@ export function gg2rdf(
const result = s.replaceAll(/(?:\p{Z}|\p{S}|\p{P})(?<![-])/ug, "");
if (result !== s) {
log(`Warning: Normalizing "${s}" to "${result}".`);
status = Math.max(status, Status.has_warnings);
}
return result;
}
Expand Down

0 comments on commit 4248d9b

Please sign in to comment.