Skip to content

Commit

Permalink
Take namespaces declared locally into account
Browse files Browse the repository at this point in the history
  • Loading branch information
andy128k committed Aug 12, 2023
1 parent 69336ea commit df1118a
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 83 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,8 @@

## 2.x.x - Unreleased

- Take into account namespaces declared locally [`#155`](https://github.com/rust-syndication/rss/pull/155)

## 2.0.5 - 2023-07-26

- Upgrade `quick_xml` to `0.30` [`#153`](https://github.com/rust-syndication/rss/pull/153)
Expand Down
90 changes: 49 additions & 41 deletions src/channel.rs
Expand Up @@ -23,13 +23,15 @@ use crate::extension::atom;
use crate::extension::dublincore;
use crate::extension::itunes;
use crate::extension::syndication;
use crate::extension::util::{extension_name, parse_extension};
use crate::extension::util::{
extension_entry, extension_name, parse_extension_element, read_namespace_declarations,
};
use crate::extension::ExtensionMap;
use crate::image::Image;
use crate::item::Item;
use crate::textinput::TextInput;
use crate::toxml::{ToXml, WriterExt};
use crate::util::{attr_value, decode, element_text, skip};
use crate::util::{decode, element_text, skip};

/// Represents the channel of an RSS feed.
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -1032,7 +1034,7 @@ impl Channel {
pub fn read_from<R: BufRead>(reader: R) -> Result<Channel, Error> {
let mut reader = Reader::from_reader(reader);
reader.trim_text(true).expand_empty_elements(true);
let mut namespaces = BTreeMap::new();
let namespaces;
let mut buf = Vec::new();

let mut channel: Option<Channel> = None;
Expand All @@ -1047,15 +1049,12 @@ impl Channel {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => match decode(element.name().as_ref(), &reader)?.as_ref() {
"rss" | "rdf:RDF" => {
for attr in element.attributes().with_checks(false).flatten() {
let key = decode(attr.key.as_ref(), &reader)?;
if let Some(ns) = key.strip_prefix("xmlns:") {
namespaces.insert(
ns.to_string(),
attr_value(&attr, &reader)?.to_string(),
);
}
}
namespaces = read_namespace_declarations(
&mut reader,
element.attributes(),
&BTreeMap::new(),
)?
.into_owned();
break;
}
_ => {
Expand Down Expand Up @@ -1192,12 +1191,15 @@ impl Channel {
pub fn from_xml<R: BufRead>(
namespaces: &BTreeMap<String, String>,
reader: &mut Reader<R>,
_: Attributes,
atts: Attributes,
) -> Result<Self, Error> {
let mut channel = Channel::default();
let mut extensions = ExtensionMap::new();
let mut buf = Vec::new();
let mut skip_buf = Vec::new();

let namespaces = read_namespace_declarations(reader, atts, namespaces)?;

loop {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => match decode(element.name().as_ref(), reader)?.as_ref() {
Expand All @@ -1218,7 +1220,8 @@ impl Channel {
channel.text_input = Some(text_input);
}
"item" => {
let item = Item::from_xml(namespaces, reader, element.attributes())?;
let item =
Item::from_xml(namespaces.as_ref(), reader, element.attributes())?;
channel.items.push(item);
}
"title" => {
Expand Down Expand Up @@ -1283,14 +1286,28 @@ impl Channel {
}
},
n => {
if let Some((ns, name)) = extension_name(n) {
parse_extension(
if let Some((prefix, name)) = extension_name(n) {
let scope_namespases = read_namespace_declarations(
reader,
element.attributes(),
ns,
name,
&mut channel.extensions,
namespaces.as_ref(),
)?;
let ext_ns = scope_namespases.get(prefix).map(|s| s.as_str());
let ext = parse_extension_element(reader, element.attributes())?;
match ext_ns {
#[cfg(feature = "atom")]
Some(ns @ atom::NAMESPACE) => {
extension_entry(&mut extensions, ns, name).push(ext);
}
Some(ns @ itunes::NAMESPACE)
| Some(ns @ dublincore::NAMESPACE)
| Some(ns @ syndication::NAMESPACE) => {
extension_entry(&mut extensions, ns, name).push(ext);
}
_ => {
extension_entry(&mut channel.extensions, prefix, name).push(ext)
}
}
} else {
skip(element.name(), reader)?;
}
Expand All @@ -1304,28 +1321,19 @@ impl Channel {
buf.clear();
}

if !channel.extensions.is_empty() {
// Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions)
for (prefix, namespace) in namespaces {
match namespace.as_ref() {
#[cfg(feature = "atom")]
atom::NAMESPACE => channel
.extensions
.remove(prefix)
.map(|v| channel.atom_ext = Some(atom::AtomExtension::from_map(v))),
itunes::NAMESPACE => channel.extensions.remove(prefix).map(|v| {
channel.itunes_ext = Some(itunes::ITunesChannelExtension::from_map(v))
}),
dublincore::NAMESPACE => channel.extensions.remove(prefix).map(|v| {
channel.dublin_core_ext = Some(dublincore::DublinCoreExtension::from_map(v))
}),
syndication::NAMESPACE => channel.extensions.remove(prefix).map(|v| {
channel.syndication_ext =
Some(syndication::SyndicationExtension::from_map(v))
}),
_ => None,
};
}
// Process each of the namespaces we know
#[cfg(feature = "atom")]
if let Some(v) = extensions.remove(atom::NAMESPACE) {
channel.atom_ext = Some(atom::AtomExtension::from_map(v));
}
if let Some(v) = extensions.remove(itunes::NAMESPACE) {
channel.itunes_ext = Some(itunes::ITunesChannelExtension::from_map(v));
}
if let Some(v) = extensions.remove(dublincore::NAMESPACE) {
channel.dublin_core_ext = Some(dublincore::DublinCoreExtension::from_map(v));
}
if let Some(v) = extensions.remove(syndication::NAMESPACE) {
channel.syndication_ext = Some(syndication::SyndicationExtension::from_map(v));
}

Ok(channel)
Expand Down
43 changes: 27 additions & 16 deletions src/extension/util.rs
Expand Up @@ -5,6 +5,7 @@
// This program is free software; you can redistribute it and/or modify
// it under the terms of the MIT License and/or Apache 2.0 License.

use std::borrow::Cow;
use std::collections::BTreeMap;
use std::io::BufRead;
use std::str;
Expand All @@ -17,36 +18,46 @@ use crate::error::Error;
use crate::extension::{Extension, ExtensionMap};
use crate::util::{attr_value, decode};

pub fn extension_name(element_name: &str) -> Option<(&str, &str)> {
pub(crate) fn read_namespace_declarations<'m, R>(
reader: &mut Reader<R>,
mut atts: Attributes,
base: &'m BTreeMap<String, String>,
) -> Result<Cow<'m, BTreeMap<String, String>>, Error>
where
R: BufRead,
{
let mut namespaces = Cow::Borrowed(base);
for attr in atts.with_checks(false).flatten() {
let key = decode(attr.key.as_ref(), &reader)?;
if let Some(ns) = key.strip_prefix("xmlns:") {
namespaces
.to_mut()
.insert(ns.to_string(), attr_value(&attr, &reader)?.to_string());
}
}
Ok(namespaces)
}

pub(crate) fn extension_name(element_name: &str) -> Option<(&str, &str)> {
let mut split = element_name.splitn(2, ':');
let ns = split.next().filter(|ns| !ns.is_empty())?;
let name = split.next()?;
Some((ns, name))
}

pub fn parse_extension<R>(
reader: &mut Reader<R>,
atts: Attributes,
pub(crate) fn extension_entry<'e>(
extensions: &'e mut ExtensionMap,
ns: &str,
name: &str,
extensions: &mut ExtensionMap,
) -> Result<(), Error>
where
R: BufRead,
{
let ext = parse_extension_element(reader, atts)?;

) -> &'e mut Vec<Extension> {
let map = extensions
.entry(ns.to_string())
.or_insert_with(BTreeMap::new);

let items = map.entry(name.to_string()).or_insert_with(Vec::new);
items.push(ext);

Ok(())
map.entry(name.to_string()).or_insert_with(Vec::new)
}

fn parse_extension_element<R: BufRead>(
pub(crate) fn parse_extension_element<R: BufRead>(
reader: &mut Reader<R>,
mut atts: Attributes,
) -> Result<Extension, Error> {
Expand Down
58 changes: 32 additions & 26 deletions src/item.rs
Expand Up @@ -21,7 +21,9 @@ use crate::error::Error;
use crate::extension::atom;
use crate::extension::dublincore;
use crate::extension::itunes;
use crate::extension::util::{extension_name, parse_extension};
use crate::extension::util::{
extension_entry, extension_name, parse_extension_element, read_namespace_declarations,
};
use crate::extension::ExtensionMap;
use crate::guid::Guid;
use crate::source::Source;
Expand Down Expand Up @@ -609,11 +611,14 @@ impl Item {
pub fn from_xml<R: BufRead>(
namespaces: &BTreeMap<String, String>,
reader: &mut Reader<R>,
_: Attributes,
atts: Attributes,
) -> Result<Self, Error> {
let mut item = Item::default();
let mut extensions = ExtensionMap::new();
let mut buf = Vec::new();

let namespaces = read_namespace_declarations(reader, atts, namespaces)?;

loop {
match reader.read_event_into(&mut buf)? {
Event::Start(element) => match decode(element.name().as_ref(), reader)?.as_ref() {
Expand Down Expand Up @@ -642,14 +647,24 @@ impl Item {
"pubDate" => item.pub_date = element_text(reader)?,
"content:encoded" => item.content = element_text(reader)?,
n => {
if let Some((ns, name)) = extension_name(n) {
parse_extension(
if let Some((prefix, name)) = extension_name(n) {
let scope_namespases = read_namespace_declarations(
reader,
element.attributes(),
ns,
name,
&mut item.extensions,
namespaces.as_ref(),
)?;
let ext_ns = scope_namespases.get(prefix).map(|s| s.as_str());
let ext = parse_extension_element(reader, element.attributes())?;
match ext_ns {
#[cfg(feature = "atom")]
Some(ns @ atom::NAMESPACE) => {
extension_entry(&mut extensions, ns, name).push(ext);
}
Some(ns @ itunes::NAMESPACE) | Some(ns @ dublincore::NAMESPACE) => {
extension_entry(&mut extensions, ns, name).push(ext);
}
_ => extension_entry(&mut item.extensions, prefix, name).push(ext),
}
} else {
skip(element.name(), reader)?;
}
Expand All @@ -662,25 +677,16 @@ impl Item {
buf.clear();
}

if !item.extensions.is_empty() {
// Process each of the namespaces we know (note that the values are not removed prior and reused to support pass-through of unknown extensions)
for (prefix, namespace) in namespaces {
match namespace.as_ref() {
#[cfg(feature = "atom")]
atom::NAMESPACE => item
.extensions
.remove(prefix)
.map(|v| item.atom_ext = Some(atom::AtomExtension::from_map(v))),
itunes::NAMESPACE => item
.extensions
.remove(prefix)
.map(|v| item.itunes_ext = Some(itunes::ITunesItemExtension::from_map(v))),
dublincore::NAMESPACE => item.extensions.remove(prefix).map(|v| {
item.dublin_core_ext = Some(dublincore::DublinCoreExtension::from_map(v))
}),
_ => None,
};
}
// Process each of the namespaces we know
#[cfg(feature = "atom")]
if let Some(v) = extensions.remove(atom::NAMESPACE) {
item.atom_ext = Some(atom::AtomExtension::from_map(v));
}
if let Some(v) = extensions.remove(itunes::NAMESPACE) {
item.itunes_ext = Some(itunes::ITunesItemExtension::from_map(v))
}
if let Some(v) = extensions.remove(dublincore::NAMESPACE) {
item.dublin_core_ext = Some(dublincore::DublinCoreExtension::from_map(v))
}

Ok(item)
Expand Down
32 changes: 32 additions & 0 deletions tests/read.rs
Expand Up @@ -967,3 +967,35 @@ fn read_multiple_links() {
assert_eq!(channel.link(), "https://www.coindesk.com");
assert_eq!(channel.items[0].link.as_ref().unwrap(), "https://www.coindesk.com/policy/2023/01/14/doj-objects-to-ftxs-choice-of-lawyers-citing-conflict-of-interest/?utm_medium=referral&utm_source=rss&utm_campaign=headlines");
}

#[test]
fn read_local_namespace() {
let input = r#"
<?xml version="1.0" encoding="UTF-8"?>
<rss>
<channel>
<dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Creator</dc:creator>
</channel>
</rss>
"#;
let channel = input.parse::<Channel>().unwrap();

assert!(channel.dublin_core_ext().is_some());
assert_eq!(channel.dublin_core_ext().unwrap().creators, vec!["Creator"]);
}

#[test]
fn read_local_namespace_hijack() {
let input = r#"
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
<channel>
<itunes:creator xmlns:itunes="http://purl.org/dc/elements/1.1/">Creator</itunes:creator>
</channel>
</rss>
"#;
let channel = input.parse::<Channel>().unwrap();

assert!(channel.dublin_core_ext().is_some());
assert_eq!(channel.dublin_core_ext().unwrap().creators, vec!["Creator"]);
}

0 comments on commit df1118a

Please sign in to comment.