Skip to content

Commit

Permalink
When setting up a FeedConnection, check if the feed loads and parses …
Browse files Browse the repository at this point in the history
…rather than just checking the Content-Type (#684)

* When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type

* Don't create a new Parser each time we fetch a feed

* Create 684.bugfix

---------

Co-authored-by: Tadeusz Sośnierz <[email protected]>
Co-authored-by: Will Hunt <[email protected]>
  • Loading branch information
3 people authored Mar 24, 2023
1 parent 5ccd64a commit 6c4bbc7
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 30 deletions.
1 change: 1 addition & 0 deletions changelog.d/684.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Don't check Content-Type of RSS feeds when adding a new connection, instead just check if the feed is valid.
18 changes: 5 additions & 13 deletions src/Connections/FeedConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {Appservice, Intent, StateEvent} from "matrix-bot-sdk";
import { IConnection, IConnectionState, InstantiateConnectionOpts } from ".";
import { ApiError, ErrCode } from "../api";
import { BridgeConfigFeeds } from "../Config/Config";
import { FeedEntry, FeedError} from "../feeds/FeedReader";
import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader";
import { Logger } from "matrix-appservice-bridge";
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
import { BaseConnection } from "./BaseConnection";
Expand Down Expand Up @@ -37,6 +37,7 @@ export interface FeedConnectionSecrets {
export type FeedResponseItem = GetConnectionsResponseItem<FeedConnectionState, FeedConnectionSecrets>;

const MAX_LAST_RESULT_ITEMS = 5;
const VALIDATION_FETCH_TIMEOUT_MS = 5000;

@Connection
export class FeedConnection extends BaseConnection implements IConnection {
Expand All @@ -57,20 +58,11 @@ export class FeedConnection extends BaseConnection implements IConnection {
} catch (ex) {
throw new ApiError("Feed URL doesn't appear valid", ErrCode.BadValue);
}
let res;

try {
res = await axios.head(url).catch(() => axios.get(url));
await FeedReader.fetchFeed(url, {}, VALIDATION_FETCH_TIMEOUT_MS);
} catch (ex) {
throw new ApiError(`Could not read from URL: ${ex.message}`, ErrCode.BadValue);
}
const contentType = res.headers['content-type'];
// we're deliberately liberal here, since different things pop up in the wild
if (!contentType.match(/xml/)) {
throw new ApiError(
`Feed responded with a content type of "${contentType}", which doesn't look like an RSS/Atom feed`,
ErrCode.BadValue,
StatusCodes.UNSUPPORTED_MEDIA_TYPE
);
throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue);
}
}

Expand Down
54 changes: 37 additions & 17 deletions src/feeds/FeedReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { Logger } from "matrix-appservice-bridge";
import { MessageQueue } from "../MessageQueue";

import Ajv from "ajv";
import axios from "axios";
import axios, { AxiosResponse } from "axios";
import Parser from "rss-parser";
import Metrics from "../Metrics";
import UserAgent from "../UserAgent";
Expand Down Expand Up @@ -88,8 +88,7 @@ function normalizeUrl(input: string): string {
}

export class FeedReader {

private readonly parser = new Parser();
private readonly parser = FeedReader.buildParser();

private connections: FeedConnection[];
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
Expand Down Expand Up @@ -185,6 +184,28 @@ export class FeedReader {
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
}

private static buildParser(): Parser {
return new Parser();
}

public static async fetchFeed(
url: string,
headers: any,
timeoutMs: number,
parser: Parser = FeedReader.buildParser(),
): Promise<{ response: AxiosResponse<any, any>, feed: Parser.Output<any> }> {
const response = await axios.get(url, {
headers: {
'User-Agent': UserAgent,
...headers,
},
// We don't want to wait forever for the feed.
timeout: timeoutMs,
});
const feed = await parser.parseString(response.data);
return { response, feed };
}

private async pollFeeds(): Promise<void> {
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);

Expand All @@ -196,28 +217,24 @@ export class FeedReader {
const fetchKey = randomUUID();
const { etag, lastModified } = this.cacheTimes.get(url) || {};
try {
const res = await axios.get(url, {
headers: {
'User-Agent': UserAgent,
const { response, feed } = await FeedReader.fetchFeed(
url,
{
...(lastModified && { 'If-Modified-Since': lastModified}),
...(etag && { 'If-None-Match': etag}),
},
// We don't want to wait forever for the feed.
timeout: this.config.pollTimeoutSeconds * 1000,
});
// Clear any HTTP failures
this.feedsFailingHttp.delete(url);
this.config.pollTimeoutSeconds * 1000,
this.parser,
);

// Store any entity tags/cache times.
if (res.headers.ETag) {
this.cacheTimes.set(url, { etag: res.headers.ETag});
} else if (res.headers['Last-Modified']) {
this.cacheTimes.set(url, { lastModified: res.headers['Last-Modified'] });
if (response.headers.ETag) {
this.cacheTimes.set(url, { etag: response.headers.ETag});
} else if (response.headers['Last-Modified']) {
this.cacheTimes.set(url, { lastModified: response.headers['Last-Modified'] });
}

const feed = await this.parser.parseString(res.data);
this.feedsFailingParsing.delete(url);

let initialSync = false;
let seenGuids = this.seenEntries.get(url);
if (!seenGuids) {
Expand Down Expand Up @@ -272,6 +289,9 @@ export class FeedReader {
this.seenEntries.set(url, newSeenItems);
}
this.queue.push<FeedSuccess>({ eventName: 'feed.success', sender: 'FeedReader', data: { url: url } });
// Clear any feed failures
this.feedsFailingHttp.delete(url);
this.feedsFailingParsing.delete(url);
} catch (err: unknown) {
if (axios.isAxiosError(err)) {
// No new feed items, skip.
Expand Down

0 comments on commit 6c4bbc7

Please sign in to comment.