Skip to content

Commit

Permalink
Combine fields when importing RSS
Browse files Browse the repository at this point in the history
  • Loading branch information
cavis committed May 9, 2024
1 parent 40e7980 commit 982760d
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 22 deletions.
50 changes: 50 additions & 0 deletions app/models/concerns/episode_ready.rb.bk
@@ -0,0 +1,50 @@
require "active_support/concern"

module EpisodeReady
extend ActiveSupport::Concern

included do
scope :with_category, ->(cats) do


end

scope :for_feed, ->(feed) do
return all unless feed.present?

# TODO: episode has_and_belongs_to_many feeds

where(podcast_id: feed.podcast_id)

# published date
published_by(feed.episode_offset_seconds.to_i)

# include/exclude tags

tags = match_tags.map { |cat| normalize_category(cat) }
cats = (ep || []).categories.map { |cat| normalize_category(cat) }
(tags & cats).length > 0

# normalize
cat.to_s.downcase.gsub(/[^ a-z0-9_-]/, "").gsub(/\s+/, " ").strip

# limit
order(published_at: :desc).limit(feed.display_episodes_count.to_i)

end

# episodes (in a feed) with their media versions already cut
scope :feed_ready, ->(feed = nil) do
scope = feed ?

end

scope :feed_unready, ->(feed = nil) do

end
end

def is_feed?(feed)
end

end
12 changes: 12 additions & 0 deletions app/models/concerns/text_sanitizer.rb
Expand Up @@ -22,4 +22,16 @@ def sanitize_text_only(text)
return nil if text.blank?
Loofah.fragment(text).scrub!(:prune).text(encode_special_chars: false)
end

def sanitize_keywords(kws, strict)
Array(kws).map { |kw| sanitize_keyword(kw, kw.length, strict) }.uniq.reject(&:blank?)
end

def sanitize_keyword(kw, max_length, strict)
if strict
kw.to_s.downcase.gsub(/[^ a-z0-9_-]/, "").gsub(/\s+/, " ").strip.slice(0, max_length)
else
kw.strip.slice(0, max_length)
end
end
end
7 changes: 5 additions & 2 deletions app/models/imports/episode_rss_import.rb
Expand Up @@ -81,7 +81,6 @@ def update_episode_with_entry!
episode.published_at = entry[:published]
episode.season_number = entry[:itunes_season]
episode.subtitle = clean_string(episode_short_desc(entry))
episode.categories = Array(entry[:categories]).map(&:strip).reject(&:blank?)
episode.title = clean_title(entry[:title])

if entry[:itunes_summary] && entry_description_attribute(entry) != :itunes_summary
Expand All @@ -93,11 +92,15 @@ def update_episode_with_entry!
episode.original_guid = clean_string(entry[:entry_id])
episode.is_closed_captioned = closed_captioned?(entry)
episode.is_perma_link = entry[:is_perma_link]
episode.keywords = (entry[:itunes_keywords] || "").split(",").map(&:strip)
episode.position = entry[:itunes_order]
episode.url = episode_url(entry)
episode.itunes_type = entry[:itunes_episode_type] unless entry[:itunes_episode_type].blank?

# categories setter does the work of sanitizing these
cats = Array(entry[:categories])
keys = (entry[:itunes_keywords] || "").split(",")
episode.categories = cats + keys

episode
end

Expand Down
20 changes: 6 additions & 14 deletions app/models/imports/podcast_rss_import.rb
Expand Up @@ -177,17 +177,21 @@ def build_podcast_attributes
podcast_attributes[:owner_name] = owner[:name]
podcast_attributes[:owner_email] = owner[:email]

podcast_attributes[:categories] = parse_categories(feed)
podcast_attributes[:complete] = (clean_string(feed.itunes_complete) == "yes")
podcast_attributes[:copyright] ||= clean_string(feed.media_copyright)
podcast_attributes[:keywords] = parse_keywords(feed)
podcast_attributes[:serial_order] = feed.itunes_type && !!feed.itunes_type.match(/serial/i)
podcast_attributes[:locked] = true # won't publish feed until this is set to false

podcast_attributes[:title] = clean_string(feed.title)
podcast_attributes[:subtitle] = clean_string(podcast_short_desc(feed))
podcast_attributes[:description] = feed_description(feed)

# categories setter does the work of sanitizing these
cats = Array(feed.categories)
ikeys = (feed.itunes_keywords || "").split(",")
mkeys = (feed.media_keywords || "").split(",")
podcast_attributes[:categories] = cats + ikeys + mkeys

podcast_attributes
end

Expand Down Expand Up @@ -248,18 +252,6 @@ def parse_itunes_categories(feed)
[itunes_cats.keys.map { |n| ITunesCategory.new(name: n, subcategories: itunes_cats[n]) }.first].compact
end

def parse_categories(feed)
mcat = Array(feed.media_categories).map(&:strip)
rcat = Array(feed.categories).map(&:strip)
(mcat + rcat).compact.uniq
end

def parse_keywords(feed)
ikey = Array(feed.itunes_keywords).map(&:strip)
mkey = Array(feed.media_keywords).map(&:strip)
(ikey + mkey).compact.uniq
end

def podcast_short_desc(item)
[item.itunes_subtitle, item.description, item.title].find do |field|
!field.blank? && field.split.length < 50
Expand Down
6 changes: 6 additions & 0 deletions test/fixtures/transistor_two.xml
Expand Up @@ -45,6 +45,11 @@
<itunes:category text="Science &amp; Medicine">
<itunes:category text="Natural Sciences"/>
</itunes:category>
<category>
<![CDATA[Some Category]]>
</category>
<itunes:keywords>keyword1, keyword two</itunes:keywords>
<media:keywords>media one, keyword two</media:keywords>
<googleplay:category text="Science &amp; Medicine"/>
<googleplay:image href="http://cdn-transistor.prx.org/transistor1400.jpg"/>
<rawvoice:subscribe feed="http://transistor.prx.org/feed/podcast/" itunes="https://itunes.apple.com/us/podcast/transistor/id964305817?at=10l9zE"/>
Expand All @@ -69,6 +74,7 @@
</category>
<category>
</category>
<itunes:keywords>keyword1, architecture </itunes:keywords>
<description>For the next few episodes, we’re featuring the Smithsonian’s new series, Sidedoor, about where science, art, history, and humanity unexpectedly overlap — just like in their museums. In this episode: an astronomer has turned the night sky into a symphony; an architecture firm has radically re-thought police stations; and an audiophile builds a successful record … &lt;a href="https://transistor.prx.org/2017/01/sidedoor-from-the-smithsonian-shake-it-up/" class="more-link"&gt;Continue reading &lt;span class="screen-reader-text"&gt;Sidedoor from the Smithsonian: Shake it Up&lt;/span&gt;&lt;/a&gt;</description>
<content:encoded>
<![CDATA[<p>For the next few episodes, we’re featuring the Smithsonian’s new series, <em>Sidedoor</em>, about where science, art, history, and humanity unexpectedly overlap — just like in their museums.</p>
Expand Down
9 changes: 3 additions & 6 deletions test/models/imports/episode_rss_import_test.rb
Expand Up @@ -43,16 +43,13 @@
f = episode_import.episode
_(f.description).must_match(/For the next few episodes/)
_(f.description).wont_match(/feedburner/)
_(f.categories).must_include "Indie Features"
f.categories.each do |tag|
_(tag).wont_match(/\n/)
_(tag).wont_be :blank?
end
_(f.categories).wont_include '\t'
_(f.clean_title).must_equal "Sidedoor iTunes title"
_(f.season_number).must_equal 2
_(f.episode_number).must_equal 4

# categories and itunes:keywords are combined
_(f.categories).must_equal ["Indie Features", "science", "architecture", "keyword1"]

# It has the podcast set and the published_at date
_(f.podcast_id).must_equal podcast.id
_(f.published_at).must_equal Time.zone.parse("2017-01-20 03:04:12")
Expand Down
3 changes: 3 additions & 0 deletions test/models/imports/podcast_rss_import_test.rb
Expand Up @@ -66,6 +66,9 @@
_(importer.podcast.managing_editor_name).must_equal "PRX"
_(importer.podcast.managing_editor_email).must_equal "prxwpadmin@prx.org"

# categories, itunes:keywords and media:keywords are combined
_(importer.podcast.categories).must_equal ["Some Category", "keyword1", "keyword two", "media one"]

_(sns.messages.count).must_equal 2
_(sns.messages.map { |m| m["Job"]["Tasks"].length }).must_equal [2, 2]
_(sns.messages.map { |m| m["Job"]["Tasks"].map { |t| t["Type"] } }).must_equal [["Inspect", "Copy"], ["Inspect", "Copy"]]
Expand Down

0 comments on commit 982760d

Please sign in to comment.