aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Stendahl <jakob.stendahl@outlook.com>2022-02-14 21:19:14 +0100
committerJakob Stendahl <jakob.stendahl@outlook.com>2022-02-14 21:19:14 +0100
commit78950762a70bffb91255f1c1ce2594a071f0fa84 (patch)
tree3fc173f383a69f5dc544caab091869d0984625d6
parentfa01748d66089b8b2c33db3973f14b678361f739 (diff)
downloadRSS-watcher-78950762a70bffb91255f1c1ce2594a071f0fa84.tar.gz
RSS-watcher-78950762a70bffb91255f1c1ce2594a071f0fa84.zip
:sparkles: Add many more fields that can be used, improve documentation and improve file structurev0.3.0
-rw-r--r--Readme.md91
-rw-r--r--src/main.rs154
-rw-r--r--src/notify.rs78
-rw-r--r--src/rss_utils.rs219
4 files changed, 389 insertions, 153 deletions
diff --git a/Readme.md b/Readme.md
index 4740f5f..92d6984 100644
--- a/Readme.md
+++ b/Readme.md
@@ -2,34 +2,99 @@
Simple rust app that periodically checks RSS feeds for new entries,
and pushes those to Gotify.
+## Requirements
+- MySQL database, with a database and authentication prepared that the app can
+ use.
+- Either you need to have rust and cargo installed, or you need docker.
+
## Usage
-This can be run using docker or locally, to run with docker you can
+### Docker
+The simplest way to run this is using the docker image on
+[docker hub](https://hub.docker.com/r/jakobst1n/rss-watcher) (personally I am
+running it on a kubernetes cluster). It can be run with the command below,
+make sure to set the database credentials so they fit your database.
```
$ run -it --rm -e DB_HOST=<database host> -e DB_USER=<database user> \
-e DB_PASS=<database password> -e DB_BASE=<database name> \
--restart=unless-stopped jakobst1n/rss-watcher
```
-To run locally you need to set all those environment variables, and then
-you can run it with
+
+### Locally
+If you want to run it without docker:
+- Make sure Rust and Cargo is installed.
+- Set the environment variables (there are a lot of ways to do this,
+ `export VAR_NAME=VAR_VALUE`, set them before the command, make a small shell
+ script to start it, etc...)
+- Compile and run the app:
```
$ RUST_LOG=info cargo run
```
-All feed have to be defined in the database, you should start the app and let
-it create the table(s) itself. Then you can add feeds like this
+### First start
+When you start the app the first time, it will create a table in the database,
+later it will run migrations between versions automatically.
+If that ever happens.
+
+When the table is created, you can start to add the
+feeds you want notifications for. The app starts each iteration by checking
+the database. So you can insert new feeds like this in the simplest form:
```sql
INSERT INTO `rss-watcher-feeds` (url, push_url, push_token)
VALUES (<the url of the RSS/Atom feed>,
<root url of gotify server e.g. https://push.example.com>,
<token for gotify app>);
```
-You can also specify what fields should be used in the title and message fields
-of the gotify notification by changing the `title` and `message` columns.
-By default they are set to `{{title}}` and `{{summary}}` respectively.
-Also, if you set the env var `FETCH_INTERVAL`, it will change how often it
-will poll for new changes (in ms).
+## Configuration
+### Feeds
+The feed config in the database is quite simple, you can however overwrite
+how the feed will be sent to gotify by adjusting the `title` and `message`
+fields in the database. By default `title` is set to
+`{{title}}: {{entry.title}}` and `message` is set to `{{entry.summary}}`.
+
+The possible template fields are:
+| Field |
+|------------------------|
+| {{id}} |
+| {{title}} |
+| {{updated}} |
+| {{authors}} |
+| {{description}} |
+| {{links}} |
+| {{categories}} |
+| {{contributors}} |
+| {{language}} |
+| {{published}} |
+| {{rights}} |
+| {{entry.id}} |
+| {{entry.title}} |
+| {{entry.updated}} |
+| {{entry.authors}} |
+| {{entry.links}} |
+| {{entry.summary}} |
+| {{entry.categories}} |
+| {{entry.contributors}} |
+| {{entry.published}} |
+| {{entry.source}} |
+| {{entry.rights}} |
+
+The best way to find the ones you want is to test a bit, here are some resources
+to see what they are:
+- [https://validator.w3.org/feed/docs/rss2.html](https://validator.w3.org/feed/docs/rss2.html)
+- [https://validator.w3.org/feed/docs/atom.html](https://validator.w3.org/feed/docs/atom.html)
+- [https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Feed.html](https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Feed.html)
+- [https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Entry.html](https://docs.rs/feed-rs/1.0.0/feed_rs/model/struct.Entry.html)
+
+### Environment variables
+| Variable | Description |
+|----------------|-----------------------------------------------------------------------|
+| FETCH_INTERVAL | How often the app should poll for new changes in ms (defaults to 2 m) |
+| DB_HOST | Hostname/FQDN/IP address of the database |
+| DB_BASE | The database we should use |
+| DB_USER | The user that will be used to access the database |
+| DB_PASS | The password that will be used to access the database |
+| RUST_LOG | Log level, for docker this defaults to `info` |
+
-## Todo
-- Extract more RSS fields.
-- Deal with multiple links.
+## Issues
+Please make an issue if you find a bug, or if something is weird :)
diff --git a/src/main.rs b/src/main.rs
index 02b386f..d4f7f1e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,126 +1,16 @@
+mod rss_utils;
+mod notify;
mod database;
use database::FeedConf;
use std::env;
use std::process;
-use std::error::Error;
-use feed_rs::parser;
-use feed_rs::model::Feed;
-use feed_rs::model::Entry;
-use feed_rs::model::Text;
-use chrono::prelude::{Utc,DateTime,NaiveDateTime};
-use std::time::Duration;
-use tokio::{time};
use log::{debug, info, warn, error};
-use html2md;
-extern crate mime;
-
-/**
- * Extract text field from Option
- */
-fn extract_text(text: &Option<Text>) -> String {
- if text.is_none() { return String::from("Text field not found"); }
- let field = text.as_ref().unwrap();
- match (field.content_type.type_(), field.content_type.subtype()) {
- (mime::TEXT, mime::HTML) => return html2md::parse_html(field.content.as_ref()),
- (mime::TEXT, mime::PLAIN) => return field.content.to_owned(),
- _ => return String::from(format!("Unknown field content type {:#?}", field.content_type)),
- }
-}
-
-/**
- * This will extract fields from RSS entry, and replace special tags
- * from the input string with those entries.
- */
-fn replace_tags(input: String, entry: &Entry) -> String {
- let mut out = input;
- out = out.replace("{{id}}", entry.id.as_ref());
- out = out.replace("{{title}}", extract_text(&entry.title).as_ref());
- out = out.replace("{{summary}}", extract_text(&entry.summary).as_ref());
- return out;
-}
-
-/**
- * Method that escapes some characters that would break json spec, and also escape
- * special HTML characters.
- */
-fn escape(input: String) -> String {
- return input.replace("\\","\\\\")
- .replace("\"", "\\\"")
- .replace("\n", "\\n")
- .replace("<", "&lt;")
- .replace(">", "&gt;")
- .replace("&", "$amp;");
-}
-
-/**
- * Push feed entry to gotify
- */
-async fn gotify_push(entry: &Entry, feed_conf: &FeedConf) -> Result<(), reqwest::Error> {
- let uri = format!("{}/message", &feed_conf.push_url);
-
- // Extract content and create title and message strings
- let mut title_content = feed_conf.title.to_owned();
- let mut message_content = feed_conf.message.to_owned();
- title_content = replace_tags(title_content, entry);
- message_content = replace_tags(message_content, entry);
- // Build json string that will be sent as payload to gotify
- let mut req = "{".to_owned();
-
- req.push_str(format!("\"title\":\"{}\"", escape(title_content.to_owned())).as_str());
- req.push_str(format!(",\"message\":\"{}\"", escape(message_content.to_owned())).as_str());
- req.push_str(",\"priority\":1");
-
- req.push_str(",\"extras\": {");
- req.push_str("\"client::display\": { \"contentType\": \"text/markdown\" }");
- if entry.links.len() > 0 {
- req.push_str(",\"client::notification\": { \"click\": { \"url\": \"");
- req.push_str(escape(entry.links[0].href.to_owned()).as_str());
- req.push_str("\"}}")
- }
- req.push_str("}}");
-
- // Send request to gotify
- let client = reqwest::Client::new();
- let res = client.post(uri)
- .query(&[("token",&feed_conf.push_token)])
- .body(req.to_owned())
- .header("Content-Type", "application/json")
- .send()
- .await?;
- if res.status().is_success() {
- info!("Sent notification with title \"{}\"", title_content);
- } else {
- error!("payload: {}", req);
- error!("Could not send notification... {:#?}", res);
- }
- Ok(())
-}
-
-/**
- * Function takes a FeedConf struct, and makes a get request to fetch
- * the feed. It then uses feed_rs to parse that feed and returns that
- * parsed feed.
- */
-async fn fetch_feed(feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> Result<Option<Feed>, Box<dyn Error>> {
- info!("Fetching feed \"{}\"", &feed_conf.url);
- let client = reqwest::Client::new();
- let last_fetch_rfc2822 = last_fetch_time.to_rfc2822().replace("+0000", "GMT");
- debug!("Using header \"If-Modified-Since {:?}\"", &last_fetch_rfc2822);
- let resp = client.get(&feed_conf.url)
- .header("If-Modified-Since", &last_fetch_rfc2822)
- .send()
- .await?;
- if resp.status() == 304 {
- info!("No changes since last fetch at {}", &last_fetch_rfc2822);
- Ok(None)
- } else {
- let feed = parser::parse(&resp.bytes().await?[..])?;
- debug!("{:#?}", feed);
- Ok(Some(feed))
- }
-}
+use chrono::prelude::{Utc,DateTime,NaiveDateTime};
+use tokio::{time};
+use std::time::Duration;
+use feed_rs::model::Feed;
/**
* This calls fetch_feed, and figures out wether it succeeded or not.
@@ -139,39 +29,24 @@ async fn get_feed(feed_conf: &FeedConf) -> bool {
debug!("Using last_fetch_time {:?}", last_fetch_time.to_owned());
// Fetch the feed and parse it
- let res = fetch_feed(&feed_conf, last_fetch_time).await;
- let feed: Option<Feed>;
+ let res = rss_utils::fetch_feed(&feed_conf, last_fetch_time).await;
+ let feed_res: Option<Feed>;
match res {
Err(e) => {
error!("Could not fetch feed ({:?})", e);
return false;
},
- Ok(x) => feed = x
+ Ok(x) => feed_res = x
}
// If feed is empty (we got status code 304), we should skip any further
// processing
- if let None = feed { return false; }
+ if let None = feed_res { return false; }
+ let feed = feed_res.unwrap();
// Process all entries in the feed
- for entry in feed.unwrap().entries {
- // Skip sending notification if the publish time is before the
- // last_fetch_time
- if let Some(x) = entry.published {
- if last_fetch_time > x {
- info!("Skipping entry that was published at {}", x);
- continue;
- }
- }
- // Attempt to send notification, give up feed for this main loop
- // iteration without saving last_fetch_time
- if let Err(e) = gotify_push(&entry, &feed_conf).await {
- error!("Could not send push notification ({:#?})", e);
- return false;
- }
- }
-
- return true;
+ let res_notif = notify::all(&feed, &feed_conf, last_fetch_time).await;
+ return res_notif;
}
/**
@@ -199,8 +74,7 @@ async fn main_loop() {
for feed in feeds {
let time_now = Utc::now();
- let res = get_feed(&feed).await;
- if res {
+ if get_feed(&feed).await {
database::update_last_fetch(feed.id, time_now.timestamp(), &mut conn);
}
}
diff --git a/src/notify.rs b/src/notify.rs
new file mode 100644
index 0000000..19915db
--- /dev/null
+++ b/src/notify.rs
@@ -0,0 +1,78 @@
+use crate::database::FeedConf;
+use crate::rss_utils;
+
+use log::{info, error};
+use feed_rs::model::Feed;
+use chrono::prelude::{Utc,DateTime};
+
+/**
+ * Push feed entry to gotify
+ */
+async fn gotify(title: String, message: String, link: Option<String>, feed_conf: &FeedConf) -> Result<(), reqwest::Error> {
+ let uri = format!("{}/message", &feed_conf.push_url);
+
+ // Build json string that will be sent as payload to gotify
+ let mut req = "{".to_owned();
+
+ req.push_str(format!("\"title\":\"{}\"", title).as_str());
+ req.push_str(format!(",\"message\":\"{}\"", message).as_str());
+ req.push_str(",\"priority\":1");
+
+ req.push_str(",\"extras\": {");
+ req.push_str("\"client::display\": { \"contentType\": \"text/markdown\" }");
+ if link.is_some() {
+ req.push_str(",\"client::notification\": { \"click\": { \"url\": \"");
+ req.push_str(link.unwrap().as_str());
+ req.push_str("\"}}")
+ }
+ req.push_str("}}");
+
+ // Send request to gotify
+ let client = reqwest::Client::new();
+ let res = client.post(uri)
+ .query(&[("token",&feed_conf.push_token)])
+ .body(req.to_owned())
+ .header("Content-Type", "application/json")
+ .send()
+ .await?;
+ if res.status().is_success() {
+ info!("Sent notification with title \"{}\"", title);
+ } else {
+ error!("payload: {}", req);
+ error!("Could not send notification... {:#?}", res);
+ }
+ Ok(())
+}
+
+/**
+ * Push all new entries in the feed as per the configuration
+ */
+pub async fn all(feed: &Feed, feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> bool {
+ let mut all_notifs_successfull = true;
+
+ // Skip sending notification if the publish time is before the
+ // last_fetch_time
+ for entry in &feed.entries {
+ if let Some(x) = entry.published {
+ if last_fetch_time > x {
+ info!("Skipping entry that was published at {}", x);
+ continue;
+ }
+ }
+
+ // Get the fields we want to send to gotify
+ let title = rss_utils::fill_template(&feed_conf.title, &entry, &feed);
+ let message = rss_utils::fill_template(&feed_conf.message, &entry, &feed);
+ let mut link: Option<String> = None;
+ if entry.links.len() > 0 {
+ link = Some(rss_utils::escape(entry.links[0].href.to_owned()));
+ }
+
+ if let Err(e) = gotify(title, message, link, &feed_conf).await {
+ error!("Could not send push notification ({:#?})", e);
+ all_notifs_successfull = false;
+ }
+ }
+
+ return all_notifs_successfull;
+}
diff --git a/src/rss_utils.rs b/src/rss_utils.rs
new file mode 100644
index 0000000..bce412d
--- /dev/null
+++ b/src/rss_utils.rs
@@ -0,0 +1,219 @@
+use crate::database::FeedConf;
+
+use log::{debug, info};
+use std::error::Error;
+use feed_rs::parser;
+use feed_rs::model;
+use chrono::prelude::{Utc,DateTime};
+use html2md;
+extern crate mime;
+
+/**
+ * Extract text field from Option
+ */
+fn extract_text(text: &Option<model::Text>, field: &str) -> String {
+ if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); }
+ let field = text.as_ref().unwrap();
+ match (field.content_type.type_(), field.content_type.subtype()) {
+ (mime::TEXT, mime::HTML) => return html2md::parse_html(field.content.as_ref()),
+ (mime::TEXT, mime::PLAIN) => return field.content.to_owned(),
+ _ => return String::from(format!("Unknown field content type {:#?}", field.content_type)),
+ }
+}
+
+/**
+ * Extract string field from Option
+ */
+fn extract_string(text: &Option<String>, field: &str) -> String {
+ if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); }
+ return text.as_ref().unwrap().to_owned();
+}
+
+/**
+ * Extract string field from Option
+ */
+fn extract_datetime(date: &Option<DateTime<Utc>>, field: &str) -> String {
+ if date.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); }
+ return date.unwrap().to_rfc2822().replace("+0000", "UTC");
+}
+
+/**
+ * Turn a vector of feed_rs::model::Person into markdown.
+ */
+fn person_vec_to_md(person_vec: &Vec<model::Person>) -> String {
+ let mut md_str = "".to_owned();
+
+ for (i, person) in person_vec.iter().enumerate() {
+ if person.uri.is_some() && person.email.is_some() {
+ md_str.push_str(format!("[{}]({}) - [homepage]({})",
+ person.name,
+ person.email.as_ref().unwrap(),
+ person.uri.as_ref().unwrap()
+ ).as_str());
+ } else if person.uri.is_some() {
+ md_str.push_str(format!("[{}]({})",
+ person.name,
+ person.uri.as_ref().unwrap(),
+ ).as_str());
+ } else if person.email.is_some() {
+ md_str.push_str(format!("[{}]({})",
+ person.name,
+ person.email.as_ref().unwrap(),
+ ).as_str());
+ } else {
+ md_str.push_str(&person.name);
+ }
+ if i < (person_vec.len() - 1) { md_str.push_str(", "); }
+ }
+ return md_str;
+}
+
+/**
+ * Turn a vector of feed_rs::model::Link into markdown.
+ */
+fn link_vec_to_md(link_vec: &Vec<model::Link>) -> String {
+ let mut md_str = "".to_owned();
+
+ for (i, link) in link_vec.iter().enumerate() {
+ if link.title.is_some() {
+ md_str.push_str(format!("[{}]({})",
+ &link.title.as_ref().unwrap(),
+ &link.href).as_str());
+ } else if link.rel.is_some() {
+ md_str.push_str(format!("[{}]({})",
+ &link.rel.as_ref().unwrap(),
+ &link.href).as_str());
+ } else {
+ md_str.push_str(format!("[{}]({})",
+ &link.href,
+ &link.href).as_str());
+ }
+ if i < (link_vec.len() - 1) { md_str.push_str(", "); }
+ }
+ return md_str;
+}
+
+/**
+ * Turn a vector of feed_rs::model::Category into markdown.
+ */
+fn category_vec_to_md(category_vec: &Vec<model::Category>) -> String {
+ let mut md_str = "".to_owned();
+
+ for (i, category) in category_vec.iter().enumerate() {
+ if category.label.is_some() {
+ md_str.push_str(category.label.as_ref().unwrap());
+ } else {
+ md_str.push_str(&category.term);
+ }
+ if i < (category_vec.len() - 1) { md_str.push_str(", "); }
+ }
+ return md_str;
+}
+
+/**
+ * This will replace a given field with the appropriate formatted string from
+ * the rss feed/entry/item.
+ */
+fn fill_template_field(field: &str, entry: &model::Entry, feed: &model::Feed) -> String {
+ match field {
+ "id" => return feed.id.to_owned(),
+ "title" => return extract_text(&feed.title, field).to_owned(),
+ "updated" => return extract_datetime(&feed.updated, field).to_owned(),
+ "authors" => return person_vec_to_md(&feed.authors).to_owned(),
+ "description" => return extract_text(&feed.description, field).to_owned(),
+ "links" => return link_vec_to_md(&feed.links).to_owned(),
+ "categories" => return category_vec_to_md(&feed.categories).to_owned(),
+ "contributors" => return person_vec_to_md(&feed.contributors).to_owned(),
+ "language" => return extract_string(&feed.language, field).to_owned(),
+ "published" => return extract_datetime(&feed.published, field).to_owned(),
+ "rights" => return extract_text(&feed.rights, field).to_owned(),
+
+ "entry.id" => return entry.id.to_owned(),
+ "entry.title" => return extract_text(&entry.title, field).to_owned(),
+ "entry.updated" => return extract_datetime(&entry.updated, field).to_owned(),
+ "entry.authors" => return person_vec_to_md(&entry.authors).to_owned(),
+ "entry.links" => return link_vec_to_md(&entry.links).to_owned(),
+ "entry.summary" => return extract_text(&entry.summary, field).to_owned(),
+ "entry.categories" => return category_vec_to_md(&entry.categories).to_owned(),
+ "entry.contributors" => return person_vec_to_md(&entry.contributors).to_owned(),
+ "entry.published" => return extract_datetime(&entry.published, field).to_owned(),
+ "entry.source" => return extract_string(&entry.source, field).to_owned(),
+ "entry.rights" => return extract_text(&entry.rights, field).to_owned(),
+ _ => return String::from(format!("Unknown field {:#?}", field))
+ }
+}
+
+/**
+ * Method that escapes some characters that would break json spec, and also escape
+ * special HTML characters.
+ */
+pub fn escape(input: String) -> String {
+ return input.replace("\\","\\\\")
+ .replace("\"", "\\\"")
+ .replace("\n", "\\n")
+ .replace("<", "&lt;")
+ .replace(">", "&gt;")
+ .replace("&", "$amp;");
+}
+
+/**
+ * This will find fields in the template string and use fill_template_field
+ * to replace the tags with formatted text from the rss feed/entry/item.
+ * It does use the escape function on the string it returns.
+ */
+pub fn fill_template(template_str: &str, entry: &model::Entry, feed: &model::Feed) -> String {
+ let mut filled_str = "".to_owned();
+
+ let mut l_bracket_n = 0;
+ let mut r_bracket_n = 0;
+ let mut field = "".to_owned();
+
+ for c in template_str.chars() {
+ if l_bracket_n > 1 {
+ if c == '}' {
+ r_bracket_n += 1;
+ if r_bracket_n > 1 {
+ filled_str.push_str(fill_template_field(&field,
+ &entry,
+ &feed).as_str());
+ field = "".to_owned();
+ r_bracket_n = 0;
+ l_bracket_n = 0;
+ }
+ } else {
+ field.push(c);
+ }
+ } else if c == '{' {
+ l_bracket_n += 1;
+ if l_bracket_n > 1 { field = "".to_owned(); }
+ } else {
+ l_bracket_n = 0;
+ filled_str.push(c);
+ }
+ }
+ return escape(filled_str).to_owned();
+}
+
+/**
+ * Function takes a FeedConf struct, and makes a get request to fetch
+ * the feed. It then uses feed_rs to parse that feed and returns that
+ * parsed feed.
+ */
+pub async fn fetch_feed(feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> Result<Option<model::Feed>, Box<dyn Error>> {
+ info!("Fetching feed \"{}\"", &feed_conf.url);
+ let client = reqwest::Client::new();
+ let last_fetch_rfc2822 = last_fetch_time.to_rfc2822().replace("+0000", "GMT");
+ debug!("Using header \"If-Modified-Since {:?}\"", &last_fetch_rfc2822);
+ let resp = client.get(&feed_conf.url)
+ .header("If-Modified-Since", &last_fetch_rfc2822)
+ .send()
+ .await?;
+ if resp.status() == 304 {
+ info!("No changes since last fetch at {}", &last_fetch_rfc2822);
+ Ok(None)
+ } else {
+ let feed = parser::parse(&resp.bytes().await?[..])?;
+ debug!("{:#?}", feed);
+ Ok(Some(feed))
+ }
+}