diff options
author | Jakob Stendahl <jakob.stendahl@outlook.com> | 2022-02-14 21:19:14 +0100 |
---|---|---|
committer | Jakob Stendahl <jakob.stendahl@outlook.com> | 2022-02-14 21:19:14 +0100 |
commit | 78950762a70bffb91255f1c1ce2594a071f0fa84 (patch) | |
tree | 3fc173f383a69f5dc544caab091869d0984625d6 /src/rss_utils.rs | |
parent | fa01748d66089b8b2c33db3973f14b678361f739 (diff) | |
download | RSS-watcher-0.3.0.tar.gz RSS-watcher-0.3.0.zip |
:sparkles: Add many more fields that can be used, improve documentation and improve file structurev0.3.0
Diffstat (limited to 'src/rss_utils.rs')
-rw-r--r-- | src/rss_utils.rs | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/src/rss_utils.rs b/src/rss_utils.rs new file mode 100644 index 0000000..bce412d --- /dev/null +++ b/src/rss_utils.rs @@ -0,0 +1,219 @@ +use crate::database::FeedConf; + +use log::{debug, info}; +use std::error::Error; +use feed_rs::parser; +use feed_rs::model; +use chrono::prelude::{Utc,DateTime}; +use html2md; +extern crate mime; + +/** + * Extract text field from Option + */ +fn extract_text(text: &Option<model::Text>, field: &str) -> String { + if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + let field = text.as_ref().unwrap(); + match (field.content_type.type_(), field.content_type.subtype()) { + (mime::TEXT, mime::HTML) => return html2md::parse_html(field.content.as_ref()), + (mime::TEXT, mime::PLAIN) => return field.content.to_owned(), + _ => return String::from(format!("Unknown field content type {:#?}", field.content_type)), + } +} + +/** + * Extract string field from Option + */ +fn extract_string(text: &Option<String>, field: &str) -> String { + if text.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + return text.as_ref().unwrap().to_owned(); +} + +/** + * Extract string field from Option + */ +fn extract_datetime(date: &Option<DateTime<Utc>>, field: &str) -> String { + if date.is_none() { return String::from(format!("Field {:#?} was not in feed", field)); } + return date.unwrap().to_rfc2822().replace("+0000", "UTC"); +} + +/** + * Turn a vector of feed_rs::model::Person into markdown. + */ +fn person_vec_to_md(person_vec: &Vec<model::Person>) -> String { + let mut md_str = "".to_owned(); + + for (i, person) in person_vec.iter().enumerate() { + if person.uri.is_some() && person.email.is_some() { + md_str.push_str(format!("[{}]({}) - [homepage]({})", + person.name, + person.email.as_ref().unwrap(), + person.uri.as_ref().unwrap() + ).as_str()); + } else if person.uri.is_some() { + md_str.push_str(format!("[{}]({})", + person.name, + person.uri.as_ref().unwrap(), + ).as_str()); + } else if person.email.is_some() { + md_str.push_str(format!("[{}]({})", + person.name, + person.email.as_ref().unwrap(), + ).as_str()); + } else { + md_str.push_str(&person.name); + } + if i < (person_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * Turn a vector of feed_rs::model::Link into markdown. + */ +fn link_vec_to_md(link_vec: &Vec<model::Link>) -> String { + let mut md_str = "".to_owned(); + + for (i, link) in link_vec.iter().enumerate() { + if link.title.is_some() { + md_str.push_str(format!("[{}]({})", + &link.title.as_ref().unwrap(), + &link.href).as_str()); + } else if link.rel.is_some() { + md_str.push_str(format!("[{}]({})", + &link.rel.as_ref().unwrap(), + &link.href).as_str()); + } else { + md_str.push_str(format!("[{}]({})", + &link.href, + &link.href).as_str()); + } + if i < (link_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * Turn a vector of feed_rs::model::Category into markdown. + */ +fn category_vec_to_md(category_vec: &Vec<model::Category>) -> String { + let mut md_str = "".to_owned(); + + for (i, category) in category_vec.iter().enumerate() { + if category.label.is_some() { + md_str.push_str(category.label.as_ref().unwrap()); + } else { + md_str.push_str(&category.term); + } + if i < (category_vec.len() - 1) { md_str.push_str(", "); } + } + return md_str; +} + +/** + * This will replace a given field with the appropriate formatted string from + * the rss feed/entry/item. + */ +fn fill_template_field(field: &str, entry: &model::Entry, feed: &model::Feed) -> String { + match field { + "id" => return feed.id.to_owned(), + "title" => return extract_text(&feed.title, field).to_owned(), + "updated" => return extract_datetime(&feed.updated, field).to_owned(), + "authors" => return person_vec_to_md(&feed.authors).to_owned(), + "description" => return extract_text(&feed.description, field).to_owned(), + "links" => return link_vec_to_md(&feed.links).to_owned(), + "categories" => return category_vec_to_md(&feed.categories).to_owned(), + "contributors" => return person_vec_to_md(&feed.contributors).to_owned(), + "language" => return extract_string(&feed.language, field).to_owned(), + "published" => return extract_datetime(&feed.published, field).to_owned(), + "rights" => return extract_text(&feed.rights, field).to_owned(), + + "entry.id" => return entry.id.to_owned(), + "entry.title" => return extract_text(&entry.title, field).to_owned(), + "entry.updated" => return extract_datetime(&entry.updated, field).to_owned(), + "entry.authors" => return person_vec_to_md(&entry.authors).to_owned(), + "entry.links" => return link_vec_to_md(&entry.links).to_owned(), + "entry.summary" => return extract_text(&entry.summary, field).to_owned(), + "entry.categories" => return category_vec_to_md(&entry.categories).to_owned(), + "entry.contributors" => return person_vec_to_md(&entry.contributors).to_owned(), + "entry.published" => return extract_datetime(&entry.published, field).to_owned(), + "entry.source" => return extract_string(&entry.source, field).to_owned(), + "entry.rights" => return extract_text(&entry.rights, field).to_owned(), + _ => return String::from(format!("Unknown field {:#?}", field)) + } +} + +/** + * Method that escapes some characters that would break json spec, and also escape + * special HTML characters. + */ +pub fn escape(input: String) -> String { + return input.replace("\\","\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("<", "<") + .replace(">", ">") + .replace("&", "$amp;"); +} + +/** + * This will find fields in the template string and use fill_template_field + * to replace the tags with formatted text from the rss feed/entry/item. + * It does use the escape function on the string it returns. + */ +pub fn fill_template(template_str: &str, entry: &model::Entry, feed: &model::Feed) -> String { + let mut filled_str = "".to_owned(); + + let mut l_bracket_n = 0; + let mut r_bracket_n = 0; + let mut field = "".to_owned(); + + for c in template_str.chars() { + if l_bracket_n > 1 { + if c == '}' { + r_bracket_n += 1; + if r_bracket_n > 1 { + filled_str.push_str(fill_template_field(&field, + &entry, + &feed).as_str()); + field = "".to_owned(); + r_bracket_n = 0; + l_bracket_n = 0; + } + } else { + field.push(c); + } + } else if c == '{' { + l_bracket_n += 1; + if l_bracket_n > 1 { field = "".to_owned(); } + } else { + l_bracket_n = 0; + filled_str.push(c); + } + } + return escape(filled_str).to_owned(); +} + +/** + * Function takes a FeedConf struct, and makes a get request to fetch + * the feed. It then uses feed_rs to parse that feed and returns that + * parsed feed. + */ +pub async fn fetch_feed(feed_conf: &FeedConf, last_fetch_time: DateTime<Utc>) -> Result<Option<model::Feed>, Box<dyn Error>> { + info!("Fetching feed \"{}\"", &feed_conf.url); + let client = reqwest::Client::new(); + let last_fetch_rfc2822 = last_fetch_time.to_rfc2822().replace("+0000", "GMT"); + debug!("Using header \"If-Modified-Since {:?}\"", &last_fetch_rfc2822); + let resp = client.get(&feed_conf.url) + .header("If-Modified-Since", &last_fetch_rfc2822) + .send() + .await?; + if resp.status() == 304 { + info!("No changes since last fetch at {}", &last_fetch_rfc2822); + Ok(None) + } else { + let feed = parser::parse(&resp.bytes().await?[..])?; + debug!("{:#?}", feed); + Ok(Some(feed)) + } +} |