Skip to content

Commit

Permalink
feat(community): Add jira document loader filtering on creation date
Browse files Browse the repository at this point in the history
  • Loading branch information
mgiorgino-iobeya committed Dec 2, 2024
1 parent cc18630 commit 195a6d7
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 75 deletions.
6 changes: 3 additions & 3 deletions examples/src/document_loaders/jira.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira";

const host = process.env.JIRA_HOST;
const host = process.env.JIRA_HOST || 'https://jira.example.com';
const username = process.env.JIRA_USERNAME;
const accessToken = process.env.JIRA_ACCESS_TOKEN;
const projectKey = process.env.JIRA_PROJECT_KEY;
const projectKey = process.env.JIRA_PROJECT_KEY || 'PROJ';

if (username && accessToken) {
// Created within last 30 days
Expand All @@ -18,7 +18,7 @@ if (username && accessToken) {
});

const documents = await loader.load();
console.log(documents);
console.log(`Loaded ${documents.length} Jira document(s)`);
} else {
console.log(
"You must provide a username and access token to run this example."
Expand Down
230 changes: 169 additions & 61 deletions libs/langchain-community/src/document_loaders/tests/jira.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,82 +3,118 @@
*/
import { Document } from "@langchain/core/documents";
import { expect, test } from "@jest/globals";
import { JiraProjectLoader } from "../web/jira.js";
import {
JiraIssue,
JiraProjectLoader,
JiraProjectLoaderParams,
} from "../web/jira.js";

describe("JiraProjectLoader Integration Tests", () => {
const JIRA_HOST = requireEnvVar("JIRA_HOST");
const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME");
const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN");
const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY");
const jiraConf: JiraProjectLoaderParams = {
host: JIRA_HOST,
projectKey: JIRA_PROJECT_KEY,
username: JIRA_USERNAME,
accessToken: JIRA_ACCESS_TOKEN,
limitPerRequest: 20,
};

function requireEnvVar(name: string): string {
// eslint-disable-next-line no-process-env
const value = process.env[name];
if (!value) {
throw new Error(`environment variable "${name}" must be set`);
}
return value;
}
test("should load Jira project issues as documents successfully", async () => {
const docs = await loadJiraDocsUntil((docs) => docs.length > 0);

async function loadJiraDocs({createdAfter = undefined}: {createdAfter?: Date} = {}): Promise<Document[]> {
const loader = new JiraProjectLoader({
host: JIRA_HOST,
projectKey: JIRA_PROJECT_KEY,
username: JIRA_USERNAME,
accessToken: JIRA_ACCESS_TOKEN,
limitPerRequest: 20,
createdAfter
});

return loader.load();
}

test("should load Jira project issues successfully", async () => {
const now = new Date();
let months = 1;
expect(docs).toBeDefined();
expect(Array.isArray(docs)).toBe(true);

let docs: Document[] = [];
while (docs.length === 0 && months < 120) {
const createdAfter = new Date(now);
createdAfter.setDate(now.getDate() - months * 30);
docs = await loadJiraDocs({createdAfter});
months *= 1.2;
if (docs.length < 1) {
// Skip test if not enough issues available
return;
}
const firstDoc = docs[0];

// Check document structure
expect(firstDoc).toHaveProperty("pageContent");
expect(firstDoc).toHaveProperty("metadata");

// Check metadata
expect(firstDoc.metadata).toHaveProperty("id");
expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST);
expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY);

// Check pageContent contains essential Jira issue information
const content = firstDoc.pageContent;
expect(content).toContain("Issue:");
expect(content).toContain("Project:");
expect(content).toContain("Status:");
expect(content).toContain("Priority:");
expect(content).toContain("Type:");
expect(content).toContain("Creator:");
});

if (months >= 10) {
docs = await loadJiraDocs({});
test("should filter issues based on createdAfter date", async () => {
// First load at least 2 issues with different creation dates (ignoring time)
const baseIssues = await loadJiraIssuesUntil(haveTwoDifferentCreationDates);
if (baseIssues.length < 2) {
// Skip test if not enough issues available
return;
}

expect(docs).toBeDefined();
expect(Array.isArray(docs)).toBe(true);
// Create a map from date string without time to list of issues
const dateToIssueMap = new Map<string, JiraIssue[]>();
baseIssues.forEach((issue) => {
const date = asStringWithoutTime(new Date(issue.fields.created));
dateToIssueMap.set(date, (dateToIssueMap.get(date) ?? []).concat(issue));
});
// Convert map to list of {date, issues}
const issuesGroupedByDate = Array.from(
dateToIssueMap,
([date, issues]) => ({ date, issues })
);
issuesGroupedByDate.sort((a, b) => a.date.localeCompare(b.date));

// Pick middle date to split issues in two groups
const middleIndex = Math.floor(issuesGroupedByDate.length / 2);
const middleDate = new Date(issuesGroupedByDate[middleIndex].date);
const issuesAfterMiddle = issuesGroupedByDate
.slice(middleIndex)
.flatMap(({ issues }) => issues);

// Load issues created after middle date
const loader = new JiraProjectLoader({
...jiraConf,
createdAfter: middleDate,
});

if (docs.length > 0) {
const firstDoc = docs[0];

// Check document structure
expect(firstDoc).toHaveProperty("pageContent");
expect(firstDoc).toHaveProperty("metadata");

// Check metadata
expect(firstDoc.metadata).toHaveProperty("id");
expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST);
expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY);

// Check pageContent contains essential Jira issue information
const content = firstDoc.pageContent;
expect(content).toContain("Issue:");
expect(content).toContain("Project:");
expect(content).toContain("Status:");
expect(content).toContain("Priority:");
expect(content).toContain("Type:");
expect(content).toContain("Creator:");
}
const filteredDocs = await loader.load();

// Verify we got the expected issues
expect(filteredDocs.length).toBeGreaterThan(0);
expect(filteredDocs.length).toBeLessThan(baseIssues.length);

// Verify all returned issues are created after our cutoff date
const middleDateTimestamp = middleDate.getTime();
filteredDocs.forEach((doc) => {
const issueDateString = doc.pageContent
.split("\n")
.filter((line) => /^Created: /.test(line))[0]
.replace("Created: ", "");
const issueDateTimestamp = new Date(
asStringWithoutTime(new Date(issueDateString))
).getTime();
expect(issueDateTimestamp).toBeGreaterThanOrEqual(middleDateTimestamp);
});

// Verify we got the same issues as in our original set
const filteredIds = new Set(filteredDocs.map((d) => d.metadata.id));
const expectedIds = new Set(issuesAfterMiddle.map((issue) => issue.id));
expect(filteredIds).toEqual(expectedIds);
});

test("should handle invalid credentials", async () => {
const loader = new JiraProjectLoader({
host: JIRA_HOST,
projectKey: JIRA_PROJECT_KEY,
...jiraConf,
username: "invalid_username",
accessToken: "invalid_token",
});
Expand All @@ -89,13 +125,85 @@ describe("JiraProjectLoader Integration Tests", () => {

test("should handle invalid project key", async () => {
const loader = new JiraProjectLoader({
host: JIRA_HOST,
...jiraConf,
projectKey: "INVALID_PROJECT_KEY",
username: JIRA_USERNAME,
accessToken: JIRA_ACCESS_TOKEN,
});

const docs = await loader.load();
expect(docs).toEqual([]);
});

function requireEnvVar(name: string): string {
// eslint-disable-next-line no-process-env
const value = process.env[name];
if (!value) {
throw new Error(`environment variable "${name}" must be set`);
}
return value;
}

function asStringWithoutTime(date: Date): string {
return date.toISOString().split("T")[0];
}

function sameDate(a: Date, b: Date) {
return asStringWithoutTime(a) === asStringWithoutTime(b);
}

function haveTwoDifferentCreationDates(issues: JiraIssue[]): boolean {
return (
issues.length >= 2 &&
issues
.slice(1)
.some(
(issue) =>
!sameDate(
new Date(issue.fields.created),
new Date(issues[0].fields.created)
)
)
);
}

async function loadJiraDocsUntil(predicate: (docs: Document[]) => boolean) {
const load = (createdAfter: Date) =>
new JiraProjectLoader({
...jiraConf,
createdAfter,
}).load();
return loadUntil(load, predicate);
}

async function loadJiraIssuesUntil(
predicate: (docs: JiraIssue[]) => boolean
) {
const load = (createdAfter: Date) =>
new JiraProjectLoader({
...jiraConf,
createdAfter,
}).loadAsIssues();
return loadUntil(load, predicate);
}

async function loadUntil<T>(
loadCreatedAfter: (date: Date) => Promise<T[]>,
predicate: (loaded: T[]) => boolean
): Promise<T[]> {
const now = new Date();
let months = 1;
const maxMonths = 120;

let loaded: T[] = [];
while (!predicate(loaded) && months < maxMonths) {
const createdAfter = new Date(now);
createdAfter.setDate(now.getDate() - months * 30);
loaded = await loadCreatedAfter(createdAfter);
months *= 1.2;
}

if (months >= maxMonths) {
return [];
}
return loaded;
}
});
41 changes: 30 additions & 11 deletions libs/langchain-community/src/document_loaders/web/jira.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,34 +369,53 @@ export class JiraProjectLoader extends BaseDocumentLoader {
}

public async load(): Promise<Document[]> {
const allIssues: JiraIssue[] = [];

try {
for await (const issues of this.fetchIssues()) {
allIssues.push(...issues);
}

return this.documentConverter.convertToDocuments(allIssues);
const allJiraIssues = await this.loadAsIssues();
return this.documentConverter.convertToDocuments(allJiraIssues);
} catch (error) {
console.error("Error:", error);
return [];
}
}

public async loadAsIssues(): Promise<JiraIssue[]> {
const allIssues: JiraIssue[] = [];

for await (const issues of this.fetchIssues()) {
allIssues.push(...issues);
}

return allIssues;
}

protected toJiraDateString(date: Date | undefined): string | undefined {
if (!date) {
return undefined;
}
const year = date.getFullYear();
const month = String(date.getMonth() + 1).padStart(2, "0");
const dayOfMonth = String(date.getDate()).padStart(2, "0");
return `${year}-${month}-${dayOfMonth}`;
}

protected async *fetchIssues(): AsyncIterable<JiraIssue[]> {
const authorizationHeader = this.buildAuthorizationHeader();
const url = `${this.host}${API_ENDPOINTS.SEARCH}`;
const createdAfterAsString = this.toJiraDateString(this.createdAfter);
let startAt = 0;

while (true) {
try {
const jqlProps = [
`project=${this.projectKey}`,
`startAt=${startAt}`,
`maxResults=${this.limitPerRequest}`,
...(this.createdAfter ? [`created>${this.createdAfter.toISOString()}`] : [])
...(createdAfterAsString ? [`created>=${createdAfterAsString}`] : []),
];
const pageUrl = `${url}?jql=${jqlProps.join('&')}`;
const params = new URLSearchParams({
jql: jqlProps.join(" AND "),
startAt: `${startAt}`,
maxResults: `${this.limitPerRequest}`,
});
const pageUrl = `${url}?${params}`;

const options = {
method: "GET",
Expand Down

0 comments on commit 195a6d7

Please sign in to comment.