Skip to content

Commit

Permalink
Remove by position
Browse files Browse the repository at this point in the history
  • Loading branch information
JMPerez committed May 8, 2024
1 parent 4910f14 commit d0493aa
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 1,747 deletions.
150 changes: 0 additions & 150 deletions __tests__/dedup/deduplicator.test.ts

This file was deleted.

144 changes: 27 additions & 117 deletions dedup/deduplicator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,18 @@ class BaseDeduplicator {
}

static findDuplicatedTracks(tracks: Array<SpotifyTrackType>) {

// a map of the ids that have been seen and their canonical position
const seenIds: { [key: string]: number } = {};

const seenIds: { [key: string]: boolean } = {};
const seenNameAndArtist: { [key: string]: Array<number> } = {};

let duplicates: Array<Duplicate> = [];
const result = tracks.reduce((duplicates, track, index) => {
if (track === null) return duplicates;
if (track.id === null) return duplicates;

let isDuplicate = false,
canonicalPosition: number | undefined = undefined;

let isDuplicate = false;
const seenNameAndArtistKey =
`${track.name}:${track.artists[0].name}`.toLowerCase();

if (track.id in seenIds) {
// if the two tracks have the same Spotify ID, they are duplicates
// we store the position of the item that will be considered canonical
// as we will need it to reinsert the track when all the tracks wiht
// the same id are removed
isDuplicate = true;
canonicalPosition = seenIds[track.id];
} else {
// if they have the same name, main artist, and roughly same duration
// we consider tem duplicates too
Expand All @@ -61,10 +49,9 @@ class BaseDeduplicator {
index: index,
track: track,
reason: track.id in seenIds ? 'same-id' : 'same-name-artist',
canonicalPosition
});
} else {
seenIds[track.id] = index;
seenIds[track.id] = true;
seenNameAndArtist[seenNameAndArtistKey] =
seenNameAndArtist[seenNameAndArtistKey] || [];
seenNameAndArtist[seenNameAndArtistKey].push(track.duration_ms);
Expand All @@ -75,38 +62,6 @@ class BaseDeduplicator {
}
}

function removeDuplicatesInArray(data: string[]): string[] {
return data.filter((value, index, self) => self.indexOf(value) === index);
}

export const calculateTracksToAddBack = function (duplicates: Duplicate[]): { track: SpotifyTrackType; position: number }[] {
// Step 1: create an array with as many items as the highest duplicate index
const positions: (SpotifyTrackType | null)[] = Array(Math.max(...duplicates.map(d => d.index)) + 1).fill(null);

// Step 2: mark the canonical positions
duplicates.forEach(duplicate => {
if (duplicate.reason === 'same-id' && duplicate.canonicalPosition !== undefined && !positions[duplicate.canonicalPosition]) {
positions[duplicate.canonicalPosition] = duplicate.track;
}
});

// Step 3: simulate the deletion of each duplicate
duplicates.sort((a, b) => b.index - a.index);
duplicates.forEach(duplicate => {
positions.splice(duplicate.index, 1);
});

// Step 4: identify the tracks to add back
const tracksToAddBack: { track: SpotifyTrackType; position: number }[] = [];
positions.forEach((track, index) => {
if (track) {
tracksToAddBack.push({ track, position: index });
}
});

return tracksToAddBack;
}

export class PlaylistDeduplicator extends BaseDeduplicator {
static async getTracks(
api: SpotifyWebApi,
Expand Down Expand Up @@ -157,74 +112,29 @@ export class PlaylistDeduplicator extends BaseDeduplicator {
'It is not possible to delete duplicates from a collaborative playlist using this tool since this is not supported in the Spotify Web API. You will need to remove these manually.'
);
} else {




let promises: Array<() => {}> = [];

// due to the change in Spotify's API, we can no longer specify a position to be deleted.
// thus, we need to delete all the tracks with a certain id, and then add a track in the right position.

// first, let's remove the duplicates that are based on artist name alone, as we are fine deleting
// all instances of these

// todo: we should probably use the id from "linked_from" for deduplication
const tracksToRemoveSameNameArtist = removeDuplicatesInArray(playlistModel.duplicates.filter(duplicate => duplicate.reason === 'same-name-artist')
.map((d) => d.track.linked_from ? d.track.linked_from.uri : d.track.uri));


if (tracksToRemoveSameNameArtist.length) {
do {
const chunk = tracksToRemoveSameNameArtist.splice(0, 100);
(function (playlistModel, chunk, api) {
promises.push(() =>
api.removeTracksFromPlaylist(
playlistModel.playlist.id,
chunk
)
);
})(playlistModel, chunk, api);
} while (tracksToRemoveSameNameArtist.length > 0);
}

// second, let's remove the duplicates that are based on ids
const tracksToRemoveSameId = removeDuplicatesInArray(
playlistModel.duplicates.filter(duplicate => duplicate.reason === 'same-id')
.map((d) => d.track.linked_from ? d.track.linked_from.uri : d.track.uri)
);

if (tracksToRemoveSameId.length) {
do {
const chunk = tracksToRemoveSameId.splice(0, 100);
(function (playlistModel, chunk, api) {
promises.push(() => {
const result = api.removeTracksFromPlaylist(
playlistModel.playlist.id,
chunk
);
return result;
}
);
})(playlistModel, chunk, api);
} while (tracksToRemoveSameId.length > 0);
}

const tracksToAddBack = calculateTracksToAddBack(playlistModel.duplicates);
if (tracksToAddBack.length) {
do {
const chunk = tracksToAddBack.splice(0, 1);
(function (playlistModel, chunk, api) {
promises.push(() =>
api.addTracksToPlaylist(
playlistModel.playlist.id,
[chunk[0].track.uri],
chunk[0].position
)
);
})(playlistModel, chunk, api);
} while (tracksToAddBack.length > 0);
}
const tracksToRemove = playlistModel.duplicates
.map((d) => ({
uri: d.track.linked_from ? d.track.linked_from.uri : d.track.uri,
positions: [d.index],
}))
const promises: Array<() => {}> = [];

// generate the list of all the positions to be removed
const positions: Array<number> =
tracksToRemove.reduce((prev, current) => prev.concat(current.positions),
[] as number[])
.sort((a, b) => b - a); // reverse so we delete the last ones first
do {
const chunk = positions.splice(0, 100);
(function (playlistModel, chunk, api) {
promises.push(() =>
api.removeTracksFromPlaylist(
playlistModel.playlist.id,
chunk
)
);
})(playlistModel, chunk, api);
} while (positions.length > 0);

promises
.reduce(
Expand Down Expand Up @@ -300,4 +210,4 @@ export class SavedTracksDeduplicator extends BaseDeduplicator {
resolve();
});
}
}
}
1 change: 0 additions & 1 deletion dedup/process.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ export default class {
}

dispatch(event: string, params) {
console.log({ params })
const callbacks = this.listeners[event];
callbacks.forEach((callback) => callback(params));
}
Expand Down
9 changes: 5 additions & 4 deletions dedup/spotifyApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ export default class SpotifyWebApi {

async removeTracksFromPlaylist(
playlistId: string,
uris: Array<string>
positions: Array<number>
) {
const res = await fetch(
`${apiPrefix}/playlists/${playlistId}/tracks`,
Expand All @@ -163,9 +163,10 @@ export default class SpotifyWebApi {
headers: {
Authorization: `Bearer ${this.token}`,
},
body: JSON.stringify({ tracks: uris.map(uri => ({ uri })) }),
}
);
body: JSON.stringify({
positions
})
});
return parseAPIResponse(res as Response);
}

Expand Down
1 change: 0 additions & 1 deletion dedup/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@ export type Duplicate = {
index: number;
reason: DuplicateReason;
track: SpotifyTrackType;
canonicalPosition: number | undefined;
}
2 changes: 1 addition & 1 deletion i18n.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ const resources = {
'faq.question-3': 'How is Dedup better than Spotify\'s built-in duplicate detection?',
'faq.answer-3': 'Spotify\'s applications only warn about duplicates when adding a song to a playlit or liked songs with the exact same song identifier. However, the same song can have multiple identifiers on Spotify that both in the same release or in several ones. Dedup detects duplicates based on title, artist, and duration similarity.',
'faq.question-4': 'When duplicates are found, which songs are removed?',
'faq.answer-4': 'Dedup will keep the first song within a group of duplicate songs, and will remove the rest. That first song will get its "added time" updated due to Spotify\'s API design.',
'faq.answer-4': 'Dedup will keep the first song within a group of duplicate songs, and will remove the rest.',
'faq.question-5': 'Is my data safe with this web application?',
'faq.answer-5': 'Yes, this web application does not store any user data on its servers. It only requests the minimum set of permissions necessary to process your library.',
'faq.question-6': 'What permissions does this web application require?',
Expand Down

0 comments on commit d0493aa

Please sign in to comment.