diff options
author | Jakob Stendahl <jakob.stendahl@outlook.com> | 2022-02-14 21:19:14 +0100 |
---|---|---|
committer | Jakob Stendahl <jakob.stendahl@outlook.com> | 2022-02-14 21:19:14 +0100 |
commit | 78950762a70bffb91255f1c1ce2594a071f0fa84 (patch) | |
tree | 3fc173f383a69f5dc544caab091869d0984625d6 | |
parent | fa01748d66089b8b2c33db3973f14b678361f739 (diff) | |
download | RSS-watcher-78950762a70bffb91255f1c1ce2594a071f0fa84.tar.gz RSS-watcher-78950762a70bffb91255f1c1ce2594a071f0fa84.zip |
:sparkles: Add many more fields that can be used, improve documentation and improve file structurev0.3.0
-rw-r--r-- | Readme.md | 91 | ||||
-rw-r--r-- | src/main.rs | 154 | ||||
-rw-r--r-- | src/notify.rs | 78 | ||||
-rw-r--r-- | src/rss_utils.rs | 219 |
4 files changed, 389 insertions, 153 deletions
@@ -2,34 +2,99 @@ Simple rust app that periodically checks RSS feeds for new entries, and pushes those to Gotify. +## Requirements +- MySQL database, with a database and authentication prepared that the app can + use. +- Either you need to have rust and cargo installed, or you need docker. + ## Usage -This can be run using docker or locally, to run with docker you can +### Docker +The simplest way to run this is using the docker image on +[docker hub](https://hub.docker.com/r/jakobst1n/rss-watcher) (personally I am +running it on a kubernetes cluster). It can be run with the command below, +make sure to set the database credentials so they fit your database. ``` $ run -it --rm -e DB_HOST=<database host> -e DB_USER=<database user> \ -e DB_PASS=<database password> -e DB_BASE=<database name> \ --restart=unless-stopped jakobst1n/rss-watcher ``` -To run locally you need to set all those environment variables, and then -you can run it with + +### Locally +If you want to run it without docker: +- Make sure Rust and Cargo is installed. +- Set the environment variables (there are a lot of ways to do this, + `export VAR_NAME=VAR_VALUE`, set them before the command, make a small shell + script to start it, etc...) +- Compile and run the app: ``` $ RUST_LOG=info cargo run ``` -All feed have to be defined in the database, you should start the app and let -it create the table(s) itself. Then you can add feeds like this +### First start +When you start the app the first time, it will create a table in the database, +later it will run migrations between versions automatically. +If that ever happens. + +When the table is created, you can start to add the +feeds you want notifications for. The app starts each iteration by checking +the database. So you can insert new feeds like this in the simplest form: ```sql INSERT INTO `rss-watcher-feeds` (url, push_url, push_token) VALUES (<the url of the RSS/Atom feed>, <root url of gotify server e.g. https://push.example.com>, <token for gotify app>); ``` -You can also specify what fields should be used in the title and message fields -of the gotify notification by changing the `title` and `message` columns. -By default they are set to `{{title}}` and `{{summary}}` respectively. -Also, if you set the env var `FETCH_INTERVAL`, it will change how often it -will poll for new changes (in ms). +## Configuration +### Feeds +The feed config in the database is quite simple, you can however overwrite +how the feed will be sent to gotify by adjusting the `title` and `message` +fields in the database. By default `title` is set to +`{{title}}: {{entry.title}}` and `message` is set to `{{entry.summary}}`. + +The possible template fields are: +| Field | +|------------------------| +| {{id}} | +| {{title}} | +| {{updated}} | +| {{authors}} | +| {{description}} | +| {{links}} | +| {{categories}} | +| {{contributors}} | +| {{language}} | +| {{published}} | +| {{rights}} | +| {{entry.id}} | +| {{entry.title}} | +| {{entry.updated}} | +| {{entry.authors}} | +| {{entry.links}} | +| {{entry.summary}} | +| {{entry.categories}} | +| {{entry.contributors}} | +| {{entry.published}} | +| {{entry.source}} | +| {{entry.rights}} | + +The best way to find the ones you want is to test a bit, here are some resources +to see what they are: +- [https://validator.w3.org/feed/docs/rss2.html](https://validator.w3.org/feed/docs/rss2.html) +- [https://validator.w3.org/feed/docs/atom.html](https://validator.w3.org/feed/docs/atom.html) +- [https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Feed.html](https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Feed.html) +- [https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Entry.html](https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Entry.html) + +### Environment variables +| Variable | Description | +|----------------|-----------------------------------------------------------------------| +| FETCH_INTERVAL | How often the app should poll for new changes in ms (defaults to 2 m) | +| DB_HOST | Hostname/FQDN/IP address of the database | +| DB_BASE | The database we should use | +| DB_USER | The user that will be used to access the database | +| DB_PASS | The password that will be used to access the database | +| RUST_LOG | Log level, for docker this defaults to `info` | + -## Todo -- Extract more RSS fields. -- Deal with multiple links. +## Issues +Please make an issue if you find a bug, or if something is weird :) diff --git a/src/main.rs b/src/main.rs index 02b386f..d4f7f1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,126 +1,16 @@ +mod rss_utils; +mod notify; mod database; use database::FeedConf; use std::env; use std::process; -use std::error::Error; -use feed_rs::parser; -use feed_rs::model::Feed; -use feed_rs::model::Entry; -use feed_rs::model::Text; -use chrono::prelude::{Utc,DateTime,NaiveDateTime}; -use std::time::Duration; -use tokio::{time}; use log::{debug, info, warn, error}; -use html2md; -extern crate mime; - -/** - * Extract text field from Option - */ -fn extract_text(text: &Option<Text>) -> String { - if text.is_none() { return String::from("Text field not found"); } - let field = text.as_ref().unwrap(); - match (field.content_type.type_(), field.content_type.subtype()) { - (mime::TEXT, mime::HTML) => return html2md::parse_html(field.content.as_ref()), - (mime::TEXT, mime::PLAIN) => return field.content.to_owned(), - _ => return String::from(format!("Unknown field content type {:#?}", field.content_type)), - } -} - -/** - * This will extract fields from RSS entry, and replace special tags - * from the input string with those entries. - */ -fn replace_tags(input: String, entry: &Entry) -> String { - let mut out = input; - out = out.replace("{{id}}", entry.id.as_ref()); - out = out.replace("{{title}}", extract_text(&entry.title).as_ref()); - out = out.replace("{{summary}}", extract_text(&entry.summary).as_ref()); - return out; -} - -/** - * Method that escapes some characters that would break json spec, and also escape - * special HTML characters. - */ -fn escape(input: String) -> String { - return input.replace("\\","\\\\") - .replace("\"", "\\\"") - .replace("\n", "\\n") - .replace("<", "<") - .replace(">", ">") - .replace("&", "$amp;"); -} - -/** - * Push feed entry to gotify - */ -async fn gotify_push(entry: &Entry, feed_conf: &FeedConf) -> Result<(), reqwest::Error> { - let uri = format!("{}/message", &feed_conf.push_url); - - // Extract content and create title and message strings - let mut title_content = feed_conf.title.to_owned(); - let mut message_content = feed_conf.message.to_owned(); - title_content = replace_tags(title_content, entry); - message_content = replace_tags(message_content, entry); - // Build json string that will be sent as payload to gotify - let mut req = "{".to_owned(); - - req.push_str(format!("\"title\":\"{}\"", escape(title_content.to_owned())).as_str()); - req.push_str(format!(",\"message\":\"{}\"", escape(message_content.to_owned())).as_str()); - req.push_str(",\"priority\":1"); - - req.push_str(",\"extras\": {"); - req.push_str("\"client::display\": { \"contentType\": \"text/markdown\" }"); - if entry.links.len() > 0 { - req.push_str(",\"client::notification\": { \"click\": { \"url\": \""); - req.push_str(escape(entry.links[0].href.to_owned()).as_str()); - req.push_str("\"}}") - } - req.push_str("}}"); - - // Send request to gotify - let client = reqwest::Client::new(); - let res = client.post(uri) - .query(&[("token",&feed_conf.push_token)]) - .body(req.to_owned()) - .header("Content-Type", "application/json") - .send() - .await?; - if res.status().is_success() { - info!("Sent notification with title \"{}\"", title_content); - } else { - error!("payload: {}", req); - error!("Could not send notification... {:#?}", res); - } - Ok(()) -} - -/** - * Function takes a FeedConf struct, and makes a get request to fetch - * the feed. It then uses feed_rs to parse that feed and returns that - * parsed feed. - */ -async fn fetch_feed(feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> Result<Option<Feed>, Box<dyn Error>> { - info!("Fetching feed \"{}\"", &feed_conf.url); - let client = reqwest::Client::new(); - let last_fetch_rfc2822 = last_fetch_time.to_rfc2822().replace("+0000", "GMT"); - debug!("Using header \"If-Modified-Since {:?}\"", &last_fetch_rfc2822); - let resp = client.get(&feed_conf.url) - .header("If-Modified-Since", &last_fetch_rfc2822) - .send() - .await?; - if resp.status() == 304 { - info!("No changes since last fetch at {}", &last_fetch_rfc2822); - Ok(None) - } else { - let feed = parser::parse(&resp.bytes().await?[..])?; - debug!("{:#?}", feed); - Ok(Some(feed)) - } -} +use chrono::prelude::{Utc,DateTime,NaiveDateTime}; +use tokio::{time}; +use std::time::Duration; +use feed_rs::model::Feed; /** * This calls fetch_feed, and figures out wether it succeeded or not. @@ -139,39 +29,24 @@ async fn get_feed(feed_conf: &FeedConf) -> bool { debug!("Using last_fetch_time {:?}", last_fetch_time.to_owned()); // Fetch the feed and parse it - let res = fetch_feed(&feed_conf, last_fetch_time).await; - let feed: Option<Feed>; + let res = rss_utils::fetch_feed(&feed_conf, last_fetch_time).await; + let feed_res: Option<Feed>; match res { Err(e) => { error!("Could not fetch feed ({:?})", e); return false; }, - Ok(x) => feed = x + Ok(x) => feed_res = x } // If feed is empty (we got status code 304), we should skip any further // processing - if let None = feed { return false; } + if let None = feed_res { return false; } + let feed = feed_res.unwrap(); // Process all entries in the feed - for entry in feed.unwrap().entries { - // Skip sending notification if the publish time is before the - // last_fetch_time - if let Some(x) = entry.published { - if last_fetch_time > x { - info!("Skipping entry that was published at {}", x); - continue; - } - } - // Attempt to send notification, give up feed for this main loop - // iteration without saving last_fetch_time - if let Err(e) = gotify_push(&entry, &feed_conf).await { - error!("Could not send push notification ({:#?})", e); - return false; - } - } - - return true; + let res_notif = notify::all(&feed, &feed_conf, last_fetch_time).await; + return res_notif; } /** @@ -199,8 +74,7 @@ async fn main_loop() { for feed in feeds { let time_now = Utc::now(); - let res = get_feed(&feed).await; - if res { + if get_feed(&feed).await { database::update_last_fetch(feed.id, time_now.timestamp(), &mut conn); } } diff --git a/src/notify.rs b/src/notify.rs new file mode 100644 index 0000000..19915db --- /dev/null +++ b/src/notify.rs @@ -0,0 +1,78 @@ +use crate::database::FeedConf; +use crate::rss_utils; + +use log::{info, error}; +use feed_rs::model::Feed; +use chrono::prelude::{Utc,DateTime}; + +/** + * Push feed entry to gotify + */ +async fn gotify(title: String, message: String, link: Option<String>, feed_conf: &FeedConf) -> Result<(), reqwest::Error> { + let uri = format!("{}/message", &feed_conf.push_url); + + // Build json string that will be sent as payload to gotify + let mut req = "{".to_owned(); + + req.push_str(format!("\"title\":\"{}\"", title).as_str()); + req.push_str(format!(",\"message\":\"{}\"", message).as_str()); + req.push_str(",\"priority\":1"); + + req.push_str(",\"extras\": {"); + req.push_str("\"client::display\": { \"contentType\": \"text/markdown\" }"); + if link.is_some() { + req.push_str(",\"client::notification\": { \"click\": { \"url\": \""); + req.push_str(link.unwrap().as_str()); + req.push_str("\"}}") + } + req.push_str("}}"); + + // Send request to gotify + let client = reqwest::Client::new(); + let res = client.post(uri) + .query(&[("token",&feed_conf.push_token)]) + .body(req.to_owned()) + .header("Content-Type", "application/json") + .send() + .await?; + if res.status().is_success() { + info!("Sent notification with title \"{}\"", title); + } else { + error!("payload: {}", req); + error!("Could not send notification... {:#?}", res); + } + Ok(()) +} + +/** + * Push all new entries in the feed as per the configuration + */ +pub async fn all(feed: &Feed, feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> bool { + let mut all_notifs_successfull = true; + + // Skip sending notification if the publish time is before the + // last_fetch_time + for entry in &feed.entries { + if let Some(x) = entry.published { + if last_fetch_time > x { + info!("Skipping entry that was published at {}", x); + continue; + } + } + + // Get the fields we want to send to gotify + let title = rss_utils::fill_template(&feed_conf.title, &entry, &feed); + let message = rss_utils::fill_template(&feed_conf.message, &entry, &feed); + let mut link: Option<String> = None; + if entry.links.len() > 0 { + link = Some(rss_utils::escape(entry.links[0].href.to_owned())); + } + + if let Err(e) = gotify(title, message, link, &feed_conf).await { + error!("Could not send push notification ({:#?})", e); + all_notifs_successfull = false; + } + } + + return all_notifs_successfull; +} diff --git a/src/rss_utils.rs b/src/rss_utils.rs new file mode 100644 index 0000000..bce412d --- /dev/null +++ b/src/rss_utils.rs @@ -0,0 +1,219 @@ +use crate::database::FeedConf; + +use log::{debug, info}; +use std::error::Error; +use feed_rs::parser; +use feed_rs::model; +use chrono::prelude::{Utc,DateTime}; +use html2md; +extern crate mime; + +/** + * Extract text field from Option + */ +fn extract_text(text: &Option<model::Text>, field: &str) -> String { + if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + let field = text.as_ref().unwrap(); + match (field.content_type.type_(), field.content_type.subtype()) { + (mime::TEXT, mime::HTML) => return html2md::parse_html(field.content.as_ref()), + (mime::TEXT, mime::PLAIN) => return field.content.to_owned(), + _ => return String::from(format!("Unknown field content type {:#?}", field.content_type)), + } +} + +/** + * Extract string field from Option + */ +fn extract_string(text: &Option<String>, field: &str) -> String { + if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + return text.as_ref().unwrap().to_owned(); +} + +/** + * Extract string field from Option + */ +fn extract_datetime(date: &Option<DateTime<Utc>>, field: &str) -> String { + if date.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + return date.unwrap().to_rfc2822().replace("+0000", "UTC"); +} + +/** + * Turn a vector of feed_rs::model::Person into markdown. + */ +fn person_vec_to_md(person_vec: &Vec<model::Person>) -> String { + let mut md_str = "".to_owned(); + + for (i, person) in person_vec.iter().enumerate() { + if person.uri.is_some() && person.email.is_some() { + md_str.push_str(format!("[{}]({}) - [homepage]({})", + person.name, + person.email.as_ref().unwrap(), + person.uri.as_ref().unwrap() + ).as_str()); + } else if person.uri.is_some() { + md_str.push_str(format!("[{}]({})", + person.name, + person.uri.as_ref().unwrap(), + ).as_str()); + } else if person.email.is_some() { + md_str.push_str(format!("[{}]({})", + person.name, + person.email.as_ref().unwrap(), + ).as_str()); + } else { + md_str.push_str(&person.name); + } + if i < (person_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * Turn a vector of feed_rs::model::Link into markdown. + */ +fn link_vec_to_md(link_vec: &Vec<model::Link>) -> String { + let mut md_str = "".to_owned(); + + for (i, link) in link_vec.iter().enumerate() { + if link.title.is_some() { + md_str.push_str(format!("[{}]({})", + &link.title.as_ref().unwrap(), + &link.href).as_str()); + } else if link.rel.is_some() { + md_str.push_str(format!("[{}]({})", + &link.rel.as_ref().unwrap(), + &link.href).as_str()); + } else { + md_str.push_str(format!("[{}]({})", + &link.href, + &link.href).as_str()); + } + if i < (link_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * Turn a vector of feed_rs::model::Category into markdown. + */ +fn category_vec_to_md(category_vec: &Vec<model::Category>) -> String { + let mut md_str = "".to_owned(); + + for (i, category) in category_vec.iter().enumerate() { + if category.label.is_some() { + md_str.push_str(category.label.as_ref().unwrap()); + } else { + md_str.push_str(&category.term); + } + if i < (category_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * This will replace a given field with the appropriate formatted string from + * the rss feed/entry/item. + */ +fn fill_template_field(field: &str, entry: &model::Entry, feed: &model::Feed) -> String { + match field { + "id" => return feed.id.to_owned(), + "title" => return extract_text(&feed.title, field).to_owned(), + "updated" => return extract_datetime(&feed.updated, field).to_owned(), + "authors" => return person_vec_to_md(&feed.authors).to_owned(), + "description" => return extract_text(&feed.description, field).to_owned(), + "links" => return link_vec_to_md(&feed.links).to_owned(), + "categories" => return category_vec_to_md(&feed.categories).to_owned(), + "contributors" => return person_vec_to_md(&feed.contributors).to_owned(), + "language" => return extract_string(&feed.language, field).to_owned(), + "published" => return extract_datetime(&feed.published, field).to_owned(), + "rights" => return extract_text(&feed.rights, field).to_owned(), + + "entry.id" => return entry.id.to_owned(), + "entry.title" => return extract_text(&entry.title, field).to_owned(), + "entry.updated" => return extract_datetime(&entry.updated, field).to_owned(), + "entry.authors" => return person_vec_to_md(&entry.authors).to_owned(), + "entry.links" => return link_vec_to_md(&entry.links).to_owned(), + "entry.summary" => return extract_text(&entry.summary, field).to_owned(), + "entry.categories" => return category_vec_to_md(&entry.categories).to_owned(), + "entry.contributors" => return person_vec_to_md(&entry.contributors).to_owned(), + "entry.published" => return extract_datetime(&entry.published, field).to_owned(), + "entry.source" => return extract_string(&entry.source, field).to_owned(), + "entry.rights" => return extract_text(&entry.rights, field).to_owned(), + _ => return String::from(format!("Unknown field {:#?}", field)) + } +} + +/** + * Method that escapes some characters that would break json spec, and also escape + * special HTML characters. + */ +pub fn escape(input: String) -> String { + return input.replace("\\","\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("<", "<") + .replace(">", ">") + .replace("&", "$amp;"); +} + +/** + * This will find fields in the template string and use fill_template_field + * to replace the tags with formatted text from the rss feed/entry/item. + * It does use the escape function on the string it returns. + */ +pub fn fill_template(template_str: &str, entry: &model::Entry, feed: &model::Feed) -> String { + let mut filled_str = "".to_owned(); + + let mut l_bracket_n = 0; + let mut r_bracket_n = 0; + let mut field = "".to_owned(); + + for c in template_str.chars() { + if l_bracket_n > 1 { + if c == '}' { + r_bracket_n += 1; + if r_bracket_n > 1 { + filled_str.push_str(fill_template_field(&field, + &entry, + &feed).as_str()); + field = "".to_owned(); + r_bracket_n = 0; + l_bracket_n = 0; + } + } else { + field.push(c); + } + } else if c == '{' { + l_bracket_n += 1; + if l_bracket_n > 1 { field = "".to_owned(); } + } else { + l_bracket_n = 0; + filled_str.push(c); + } + } + return escape(filled_str).to_owned(); +} + +/** + * Function takes a FeedConf struct, and makes a get request to fetch + * the feed. It then uses feed_rs to parse that feed and returns that + * parsed feed. + */ +pub async fn fetch_feed(feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> Result<Option<model::Feed>, Box<dyn Error>> { + info!("Fetching feed \"{}\"", &feed_conf.url); + let client = reqwest::Client::new(); + let last_fetch_rfc2822 = last_fetch_time.to_rfc2822().replace("+0000", "GMT"); + debug!("Using header \"If-Modified-Since {:?}\"", &last_fetch_rfc2822); + let resp = client.get(&feed_conf.url) + .header("If-Modified-Since", &last_fetch_rfc2822) + .send() + .await?; + if resp.status() == 304 { + info!("No changes since last fetch at {}", &last_fetch_rfc2822); + Ok(None) + } else { + let feed = parser::parse(&resp.bytes().await?[..])?; + debug!("{:#?}", feed); + Ok(Some(feed)) + } +} |