From f34cd2e213f9be93ae3db4ad32e7e927fd15c618 Mon Sep 17 00:00:00 2001 From: Senan Kelly Date: Thu, 21 Dec 2023 03:20:22 +0000 Subject: [PATCH] fix(podcast): slightly more robust downloading and concurrency (#433) --- cmd/gonic/gonic.go | 11 + db/db.go | 2 +- podcast/podcast.go | 405 ++++++++++------------ server/ctrlsubsonic/handlers_by_folder.go | 3 +- 4 files changed, 201 insertions(+), 220 deletions(-) diff --git a/cmd/gonic/gonic.go b/cmd/gonic/gonic.go index ee601b2f..98b046ca 100644 --- a/cmd/gonic/gonic.go +++ b/cmd/gonic/gonic.go @@ -388,6 +388,17 @@ func main() { return nil }) + errgrp.Go(func() error { + defer logJob("podcast download")() + + ctxTick(ctx, 5*time.Second, func() { + if err := podcast.DownloadTick(); err != nil { + log.Printf("failed to download podcast: %s", err) + } + }) + return nil + }) + errgrp.Go(func() error { if *confPodcastPurgeAgeDays == 0 { return nil diff --git a/db/db.go b/db/db.go index a0d57aca..9f0ff60b 100644 --- a/db/db.go +++ b/db/db.go @@ -527,7 +527,7 @@ func (pe *PodcastEpisode) MIME() string { } func (pe *PodcastEpisode) AbsPath() string { - if pe.Podcast == nil { + if pe.Podcast == nil || pe.Podcast.RootDir == "" { return "" } return filepath.Join(pe.Podcast.RootDir, pe.Filename) diff --git a/podcast/podcast.go b/podcast/podcast.go index fa404d72..17fc73bb 100644 --- a/podcast/podcast.go +++ b/podcast/podcast.go @@ -26,8 +26,7 @@ import ( var ErrNoAudioInFeedItem = errors.New("no audio in feed item") const ( - downloadAllWaitInterval = 3 * time.Second - fetchUserAgent = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11` + fetchUserAgent = `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11` ) type Podcasts struct { @@ -45,44 +44,24 @@ func New(db *db.DB, base string, tagReader tagcommon.Reader) *Podcasts { } func (p *Podcasts) GetPodcastOrAll(id int, includeEpisodes bool) ([]*db.Podcast, error) { - var err error - podcasts := []*db.Podcast{} + var podcasts []*db.Podcast + q := p.db.DB if id != 0 { - err = p.db.Where("id=?", id).Find(&podcasts).Error - } else { - err = p.db.Find(&podcasts).Error + q = q.Where("id=?", id) } - if err != nil { - return nil, fmt.Errorf("finding podcasts: %w", err) - } - if !includeEpisodes { - return podcasts, nil + if includeEpisodes { + q = q.Preload("Episodes", func(db *gorm.DB) *gorm.DB { + return db.Order("podcast_episodes.publish_date DESC") + }) } - for _, c := range podcasts { - episodes, err := p.GetPodcastEpisodes(c.ID) - if err != nil { - return nil, fmt.Errorf("finding podcast episodes: %w", err) - } - c.Episodes = episodes + if err := q.Find(&podcasts).Error; err != nil { + return nil, fmt.Errorf("find podcasts: %w", err) } return podcasts, nil } -func (p *Podcasts) GetPodcastEpisodes(podcastID int) ([]*db.PodcastEpisode, error) { - episodes := []*db.PodcastEpisode{} - err := p.db. - Where("podcast_id=?", podcastID). - Order("publish_date DESC"). - Find(&episodes). - Error - if err != nil { - return nil, fmt.Errorf("find episodes by podcast id: %w", err) - } - return episodes, nil -} - func (p *Podcasts) GetNewestPodcastEpisodes(count int) ([]*db.PodcastEpisode, error) { - episodes := []*db.PodcastEpisode{} + var episodes []*db.PodcastEpisode err := p.db. Order("publish_date DESC"). Limit(count). @@ -112,14 +91,12 @@ func (p *Podcasts) AddNewPodcast(rssURL string, feed *gofeed.Feed) (*db.Podcast, if err := p.db.Save(&podcast).Error; err != nil { return &podcast, err } - if err := p.AddNewEpisodes(&podcast, feed.Items); err != nil { - return nil, err + if err := p.RefreshPodcast(&podcast, feed.Items); err != nil { + log.Printf("error addign new episodes : %v", err) + } + if err := p.downloadPodcastCover(&podcast); err != nil { + log.Printf("error downloading podcast cover: %v", err) } - go func() { - if err := p.downloadPodcastCover(&podcast); err != nil { - log.Printf("error downloading podcast cover: %v", err) - } - }() return &podcast, nil } @@ -150,69 +127,42 @@ func getEntriesAfterDate(feed []*gofeed.Item, after time.Time) []*gofeed.Item { return items } -func (p *Podcasts) AddNewEpisodes(podcast *db.Podcast, items []*gofeed.Item) error { - podcastEpisode := db.PodcastEpisode{} +func (p *Podcasts) RefreshPodcast(podcast *db.Podcast, items []*gofeed.Item) error { + var lastPodcastEpisode db.PodcastEpisode err := p.db. Where("podcast_id=?", podcast.ID). Order("publish_date DESC"). - First(&podcastEpisode).Error - var itemFound = true - if errors.Is(err, gorm.ErrRecordNotFound) { - itemFound = false - } else if err != nil { + First(&lastPodcastEpisode). + Error + if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) { return err } - if !itemFound { - var episodeErrs []error - for _, item := range items { - if _, err := p.AddEpisode(podcast.ID, item); err != nil { - episodeErrs = append(episodeErrs, err) - continue - } - } - return errors.Join(episodeErrs...) + + if lastPodcastEpisode.ID != 0 { + items = getEntriesAfterDate(items, *lastPodcastEpisode.PublishDate) } - for _, item := range getEntriesAfterDate(items, *podcastEpisode.PublishDate) { - episode, err := p.AddEpisode(podcast.ID, item) - if errors.Is(err, ErrNoAudioInFeedItem) { - log.Printf("failed to find audio in feed (%s, %s) item, skipping", podcast.Title, item.Title) - continue - } + + var episodeErrs []error + for _, item := range items { + podcastEpisode, err := p.addEpisode(podcast.ID, item) if err != nil { - return err + episodeErrs = append(episodeErrs, err) + continue } - if podcast.AutoDownload == db.PodcastAutoDownloadLatest && - (episode.Status != db.PodcastEpisodeStatusCompleted && episode.Status != db.PodcastEpisodeStatusDownloading) { - if err := p.DownloadEpisode(episode.ID); err != nil { - return err + + if lastPodcastEpisode.ID != 0 && podcast.AutoDownload == db.PodcastAutoDownloadLatest { + podcastEpisode.Status = db.PodcastEpisodeStatusDownloading + if err := p.db.Save(&podcastEpisode).Error; err != nil { + return fmt.Errorf("save podcast episode: %w", err) } } } - return nil -} -func getSecondsFromString(time string) int { - duration, err := strconv.Atoi(time) - if err == nil { - return duration - } - splitTime := strings.Split(time, ":") - if len(splitTime) == 3 { - hours, _ := strconv.Atoi(splitTime[0]) - minutes, _ := strconv.Atoi(splitTime[1]) - seconds, _ := strconv.Atoi(splitTime[2]) - return (3600 * hours) + (60 * minutes) + seconds - } - if len(splitTime) == 2 { - minutes, _ := strconv.Atoi(splitTime[0]) - seconds, _ := strconv.Atoi(splitTime[1]) - return (60 * minutes) + seconds - } - return 0 + return errors.Join(episodeErrs...) } -func (p *Podcasts) AddEpisode(podcastID int, item *gofeed.Item) (*db.PodcastEpisode, error) { - duration := 0 +func (p *Podcasts) addEpisode(podcastID int, item *gofeed.Item) (*db.PodcastEpisode, error) { + var duration int // if it has the media extension use it for _, content := range item.Extensions["media"]["content"] { durationExt := content.Attrs["duration"] @@ -249,9 +199,7 @@ func (p *Podcasts) isAudio(rawItemURL string) (bool, error) { return p.tagReader.CanRead(itemURL.Path), nil } -func itemToEpisode(podcastID, size, duration int, audio string, - item *gofeed.Item, -) *db.PodcastEpisode { +func itemToEpisode(podcastID, size, duration int, audio string, item *gofeed.Item) *db.PodcastEpisode { return &db.PodcastEpisode{ PodcastID: podcastID, Description: item.Description, @@ -290,17 +238,11 @@ func (p *Podcasts) findMediaAudio(podcastID, duration int, item *gofeed.Item) (* } func (p *Podcasts) RefreshPodcasts() error { - podcasts := []*db.Podcast{} + var podcasts []*db.Podcast if err := p.db.Find(&podcasts).Error; err != nil { return fmt.Errorf("find podcasts: %w", err) } - if err := p.refreshPodcasts(podcasts); err != nil { - return fmt.Errorf("refresh podcasts: %w", err) - } - return nil -} -func (p *Podcasts) refreshPodcasts(podcasts []*db.Podcast) error { var errs []error for _, podcast := range podcasts { fp := gofeed.NewParser() @@ -309,7 +251,7 @@ func (p *Podcasts) refreshPodcasts(podcasts []*db.Podcast) error { errs = append(errs, fmt.Errorf("refreshing podcast with url %q: %w", podcast.URL, err)) continue } - if err = p.AddNewEpisodes(podcast, feed.Items); err != nil { + if err = p.RefreshPodcast(podcast, feed.Items); err != nil { errs = append(errs, fmt.Errorf("adding episodes: %w", err)) continue } @@ -318,97 +260,57 @@ func (p *Podcasts) refreshPodcasts(podcasts []*db.Podcast) error { } func (p *Podcasts) DownloadPodcastAll(podcastID int) error { - podcastEpisodes := []db.PodcastEpisode{} err := p.db. + Model(db.PodcastEpisode{}). + Where("status=?", db.PodcastEpisodeStatusSkipped). Where("podcast_id=?", podcastID). - Find(&podcastEpisodes). + Update("status", db.PodcastEpisodeStatusDownloading). Error if err != nil { - return fmt.Errorf("get episodes by podcast id: %w", err) + return fmt.Errorf("update podcast episodes: %w", err) } - go func() { - for _, episode := range podcastEpisodes { - if episode.Status == db.PodcastEpisodeStatusDownloading || episode.Status == db.PodcastEpisodeStatusCompleted { - log.Println("skipping episode is in progress or already downloaded") - continue - } - if err := p.DownloadEpisode(episode.ID); err != nil { - log.Printf("error downloading episode: %v", err) - continue - } - log.Printf("finished downloading episode: %q", episode.Title) - time.Sleep(downloadAllWaitInterval) - } - }() return nil } func (p *Podcasts) DownloadEpisode(episodeID int) error { - podcastEpisode := db.PodcastEpisode{} - podcast := db.Podcast{} err := p.db. - Preload("Podcast"). + Model(db.PodcastEpisode{}). Where("id=?", episodeID). - First(&podcastEpisode). + Update("status", db.PodcastEpisodeStatusDownloading). Error if err != nil { - return fmt.Errorf("get podcast episode by id: %w", err) - } - err = p.db. - Where("id=?", podcastEpisode.PodcastID). - First(&podcast). - Error - if err != nil { - return fmt.Errorf("get podcast by id: %w", err) - } - if podcastEpisode.Status == db.PodcastEpisodeStatusDownloading || podcastEpisode.Status == db.PodcastEpisodeStatusCompleted { - log.Printf("already downloading podcast episode with id %d", episodeID) - return nil - } - podcastEpisode.Status = db.PodcastEpisodeStatusDownloading - p.db.Save(&podcastEpisode) - client := &http.Client{} - req, err := http.NewRequest("GET", podcastEpisode.AudioURL, nil) - if err != nil { - return fmt.Errorf("create http request: %w", err) + return fmt.Errorf("update podcast episodes: %w", err) } - req.Header.Add("User-Agent", fetchUserAgent) - // nolint: bodyclose - resp, err := client.Do(req) - if err != nil { - return fmt.Errorf("fetch podcast audio: %w", err) + return nil +} + +func getContentDispositionFilename(header http.Header) (string, bool) { + contentHeader := header.Get("content-disposition") + _, params, _ := mime.ParseMediaType(contentHeader) + filename, ok := params["filename"] + return filename, ok +} + +func getPodcastEpisodeFilename(podcast *db.Podcast, podcastEpisode *db.PodcastEpisode, header http.Header) (string, error) { + if podcastEpisode.Filename != "" { + return podcastEpisode.Filename, nil } - filename, ok := getContentDispositionFilename(resp.Header.Get("content-disposition")) + + filename, ok := getContentDispositionFilename(header) if !ok { audioURL, err := url.Parse(podcastEpisode.AudioURL) if err != nil { - return fmt.Errorf("parse podcast audio url: %w", err) + return "", fmt.Errorf("parse podcast audio url: %w", err) } filename = path.Base(audioURL.Path) } path, err := fileutil.Unique(podcast.RootDir, fileutil.Safe(filename)) if err != nil { - return fmt.Errorf("find unique path: %w", err) + return "", fmt.Errorf("find unique path: %w", err) } _, filename = filepath.Split(path) - audioFile, err := os.Create(filepath.Join(podcast.RootDir, filename)) - if err != nil { - return fmt.Errorf("create audio file: %w", err) - } - podcastEpisode.Filename = filename - p.db.Save(&podcastEpisode) - go func() { - if err := p.doPodcastDownload(&podcastEpisode, audioFile, resp.Body); err != nil { - log.Printf("error downloading podcast: %v", err) - } - }() - return nil -} -func getContentDispositionFilename(header string) (string, bool) { - _, params, _ := mime.ParseMediaType(header) - filename, ok := params["filename"] - return filename, ok + return filename, nil } func (p *Podcasts) downloadPodcastCover(podcast *db.Podcast) error { @@ -429,15 +331,16 @@ func (p *Podcasts) downloadPodcastCover(podcast *db.Podcast) error { } defer resp.Body.Close() - ext := path.Ext(imageURL.Path) + var ext = path.Ext(imageURL.Path) if ext == "" { - contentHeader := resp.Header.Get("content-disposition") - filename, _ := getContentDispositionFilename(contentHeader) + filename, _ := getContentDispositionFilename(resp.Header) ext = filepath.Ext(filename) } - cover := "cover" + ext - coverFile, err := os.Create(filepath.Join(podcast.RootDir, cover)) + if err := os.MkdirAll(podcast.RootDir, os.ModePerm); err != nil { + return fmt.Errorf("make podcast root dir: %w", err) + } + coverFile, err := os.Create(filepath.Join(podcast.RootDir, "cover"+ext)) if err != nil { return fmt.Errorf("creating podcast cover: %w", err) } @@ -453,39 +356,9 @@ func (p *Podcasts) downloadPodcastCover(podcast *db.Podcast) error { return nil } -func (p *Podcasts) doPodcastDownload(podcastEpisode *db.PodcastEpisode, file *os.File, src io.Reader) error { - if _, err := io.Copy(file, src); err != nil { - return fmt.Errorf("writing podcast episode: %w", err) - } - defer file.Close() - - podcastTags, err := p.tagReader.Read(podcastEpisode.AbsPath()) - if err != nil { - log.Printf("error parsing podcast audio: %e", err) - podcastEpisode.Status = db.PodcastEpisodeStatusError - p.db.Save(podcastEpisode) - return nil - } - - stat, _ := file.Stat() - podcastEpisode.Bitrate = podcastTags.Bitrate() - podcastEpisode.Status = db.PodcastEpisodeStatusCompleted - podcastEpisode.Length = podcastTags.Length() - podcastEpisode.Size = int(stat.Size()) - - if err := p.db.Save(podcastEpisode).Error; err != nil { - return fmt.Errorf("save podcast episode: %w", err) - } - return nil -} - func (p *Podcasts) DeletePodcast(podcastID int) error { - podcast := db.Podcast{} - err := p.db. - Where("id=?", podcastID). - First(&podcast). - Error - if err != nil { + var podcast db.Podcast + if err := p.db.Where("id=?", podcastID).First(&podcast).Error; err != nil { return err } if podcast.RootDir == "" { @@ -495,38 +368,36 @@ func (p *Podcasts) DeletePodcast(podcastID int) error { if err := os.RemoveAll(podcast.RootDir); err != nil { return fmt.Errorf("delete podcast directory: %w", err) } - err = p.db. - Where("id=?", podcastID). - Delete(db.Podcast{}). - Error - if err != nil { + + if err := p.db.Where("id=?", podcastID).Delete(db.Podcast{}).Error; err != nil { return fmt.Errorf("delete podcast row: %w", err) } return nil } func (p *Podcasts) DeletePodcastEpisode(podcastEpisodeID int) error { - episode := db.PodcastEpisode{} - err := p.db.Preload("Podcast").First(&episode, podcastEpisodeID).Error - if err != nil { + var podcastEpisode db.PodcastEpisode + if err := p.db.Preload("Podcast").First(&podcastEpisode, podcastEpisodeID).Error; err != nil { return err } - episode.Status = db.PodcastEpisodeStatusDeleted - p.db.Save(&episode) - if err := os.Remove(episode.AbsPath()); err != nil { - return err + podcastEpisode.Status = db.PodcastEpisodeStatusDeleted + if err := p.db.Save(&podcastEpisode).Error; err != nil { + return fmt.Errorf("save podcast episode: %w", err) + } + if err := os.Remove(podcastEpisode.AbsPath()); err != nil { + return fmt.Errorf("remove episode: %w", err) } - return err + return nil } func (p *Podcasts) PurgeOldPodcasts(maxAge time.Duration) error { expDate := time.Now().Add(-maxAge) var episodes []*db.PodcastEpisode err := p.db. - Where("status = ?", db.PodcastEpisodeStatusCompleted). - Where("created_at < ?", expDate). - Where("updated_at < ?", expDate). - Where("modified_at < ?", expDate). + Where("status=?", db.PodcastEpisodeStatusCompleted). + Where("created_at