Skip to content

Commit

Permalink
rss: ingest atom/rss feed contents already decoding HTML special chars (
Browse files Browse the repository at this point in the history
#37)

Without this move, the main UI could show info and feed entries with
crude HTML marks, as in "“". Translating that at the source is
the best move, since they get stored right in the DB.

Signed-off-by: Gustavo Lima Chaves <[email protected]>
  • Loading branch information
glima authored Jun 1, 2024
1 parent bd8ed51 commit 3c70379
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 9 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ crossterm = "0.27"
diligent-date-parser = "0.1"
directories = "5"
html2text = "0.12"
html-escape = "0.2.13"
num_cpus = "1.16"
opml = "1.1"
r2d2 = "0.8"
Expand Down
47 changes: 38 additions & 9 deletions src/rss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::modes::ReadMode;
use anyhow::{bail, Context, Result};
use atom_syndication as atom;
use chrono::prelude::{DateTime, Utc};
use html_escape::decode_html_entities_to_string;
use rss::Channel;
use rusqlite::params;
use rusqlite::types::{FromSql, ToSqlOutput};
Expand Down Expand Up @@ -155,11 +156,25 @@ struct IncomingEntry {
impl From<&atom::Entry> for IncomingEntry {
fn from(entry: &atom::Entry) -> Self {
Self {
title: Some(entry.title().to_string()),
author: entry.authors().first().map(|author| author.name.to_owned()),
title: {
let mut title = String::new();
decode_html_entities_to_string(entry.title(), &mut title);
Some(title)
},
author: entry.authors().first().map(|entry_author| {
let mut author = String::new();
decode_html_entities_to_string(&entry_author.name, &mut author);
author
}),
pub_date: entry.published().map(|date| date.with_timezone(&Utc)),
description: None,
content: entry.content().and_then(|content| content.value.to_owned()),
content: entry.content().and_then(|entry_content| {
entry_content.value().map(|entry_content| {
let mut content = String::new();
decode_html_entities_to_string(entry_content, &mut content);
content
})
}),
link: entry.links().first().map(|link| link.href().to_string()),
}
}
Expand All @@ -168,13 +183,27 @@ impl From<&atom::Entry> for IncomingEntry {
impl From<&rss::Item> for IncomingEntry {
fn from(entry: &rss::Item) -> Self {
Self {
title: entry.title().map(|title| title.to_owned()),
author: entry.author().map(|author| author.to_owned()),
title: entry.title().map(|entry_title| {
let mut title = String::new();
decode_html_entities_to_string(entry_title, &mut title);
title
}),
author: entry.author().map(|entry_author| {
let mut author = String::new();
decode_html_entities_to_string(entry_author, &mut author);
author
}),
pub_date: entry.pub_date().and_then(parse_datetime),
description: entry
.description()
.map(|description| description.to_owned()),
content: entry.content().map(|content| content.to_owned()),
description: entry.description().map(|entry_description| {
let mut description = String::new();
decode_html_entities_to_string(entry_description, &mut description);
description
}),
content: entry.content().map(|entry_content| {
let mut content = String::new();
decode_html_entities_to_string(entry_content, &mut content);
content
}),
link: entry.link().map(|link| link.to_owned()),
}
}
Expand Down

0 comments on commit 3c70379

Please sign in to comment.