Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

csv import and export fixes #3135

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions bookwyrm/importers/__init__.py
@@ -1,6 +1,7 @@
""" import classes """

from .importer import Importer
from .bookwyrm_import import BookwyrmBooksImporter
from .calibre_import import CalibreImporter
from .goodreads_import import GoodreadsImporter
from .librarything_import import LibrarythingImporter
Expand Down
14 changes: 14 additions & 0 deletions bookwyrm/importers/bookwyrm_import.py
@@ -0,0 +1,14 @@
""" handle reading a csv from BookWyrm """

from typing import Any
from . import Importer


class BookwyrmBooksImporter(Importer):
"""Goodreads is the default importer, we basically just use the same structure"""

service = "BookWyrm"

def __init__(self, *args: Any, **kwargs: Any):
self.row_mappings_guesses.append(("shelf_name", ["shelf_name"]))
super().__init__(*args, **kwargs)
19 changes: 13 additions & 6 deletions bookwyrm/importers/importer.py
Expand Up @@ -18,14 +18,14 @@ class Importer:
row_mappings_guesses = [
("id", ["id", "book id"]),
("title", ["title"]),
("authors", ["author", "authors", "primary author"]),
("authors", ["author_text", "author", "authors", "primary author"]),
("isbn_10", ["isbn10", "isbn", "isbn/uid"]),
("isbn_13", ["isbn13", "isbn", "isbns", "isbn/uid"]),
("shelf", ["shelf", "exclusive shelf", "read status", "bookshelf"]),
("review_name", ["review name"]),
("review_body", ["my review", "review"]),
("review_name", ["review_name", "review name"]),
("review_body", ["review_content", "my review", "review"]),
("rating", ["my rating", "rating", "star rating"]),
("date_added", ["date added", "entry date", "added"]),
("date_added", ["date_added", "date added", "entry date", "added"]),
("date_started", ["date started", "started"]),
("date_finished", ["date finished", "last date read", "date read", "finished"]),
]
Expand All @@ -38,7 +38,12 @@ class Importer:

# pylint: disable=too-many-locals
def create_job(
self, user: User, csv_file: Iterable[str], include_reviews: bool, privacy: str
self,
user: User,
csv_file: Iterable[str],
include_reviews: bool,
create_shelves: bool,
privacy: str,
) -> ImportJob:
"""check over a csv and creates a database entry for the job"""
csv_reader = csv.DictReader(csv_file, delimiter=self.delimiter)
Expand All @@ -55,6 +60,7 @@ def create_job(
job = ImportJob.objects.create(
user=user,
include_reviews=include_reviews,
create_shelves=create_shelves,
privacy=privacy,
mappings=mappings,
source=self.service,
Expand Down Expand Up @@ -114,7 +120,7 @@ def get_shelf(self, normalized_row: dict[str, Optional[str]]) -> Optional[str]:
shelf = [
s for (s, gs) in self.shelf_mapping_guesses.items() if shelf_name in gs
]
return shelf[0] if shelf else None
return shelf[0] if shelf else normalized_row.get("shelf") or None

# pylint: disable=no-self-use
def normalize_row(
Expand Down Expand Up @@ -149,6 +155,7 @@ def create_retry_job(
job = ImportJob.objects.create(
user=user,
include_reviews=original_job.include_reviews,
create_shelves=original_job.create_shelves,
privacy=original_job.privacy,
source=original_job.source,
# TODO: allow users to adjust mappings
Expand Down
18 changes: 18 additions & 0 deletions bookwyrm/migrations/0189_importjob_create_shelves.py
@@ -0,0 +1,18 @@
# Generated by Django 3.2.23 on 2023-11-25 05:49

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("bookwyrm", "0188_theme_loads"),
]

operations = [
migrations.AddField(
model_name="importjob",
name="create_shelves",
field=models.BooleanField(default=True),
),
]
46 changes: 39 additions & 7 deletions bookwyrm/models/import_job.py
Expand Up @@ -4,6 +4,7 @@
import re
import dateutil.parser

from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
Expand Down Expand Up @@ -59,6 +60,7 @@ class ImportJob(models.Model):
created_date = models.DateTimeField(default=timezone.now)
updated_date = models.DateTimeField(default=timezone.now)
include_reviews: bool = models.BooleanField(default=True)
create_shelves: bool = models.BooleanField(default=True)
mappings = models.JSONField()
source = models.CharField(max_length=100)
privacy = models.CharField(max_length=255, default="public", choices=PrivacyLevels)
Expand Down Expand Up @@ -245,6 +247,11 @@ def shelf(self):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf")

@property
def shelf_name(self):
"""the goodreads shelf field"""
return self.normalized_data.get("shelf_name")

@property
def review(self):
"""a user-written review, to be imported with the book data"""
Expand Down Expand Up @@ -388,11 +395,36 @@ def handle_imported_book(item):

# shelve the book if it hasn't been shelved already
if item.shelf and not existing_shelf:
desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)

shelved_date = item.date_added or timezone.now()
ShelfBook(
book=item.book, shelf=desired_shelf, user=user, shelved_date=shelved_date
).save(priority=IMPORT_TRIGGERED)

try:

desired_shelf = Shelf.objects.get(identifier=item.shelf, user=user)
shelved_date = item.date_added or timezone.now()
ShelfBook(
book=item.book,
shelf=desired_shelf,
user=user,
shelved_date=shelved_date,
).save(priority=IMPORT_TRIGGERED)

except ObjectDoesNotExist:
if job.create_shelves:
shelfname = getattr(item, "shelf_name", item.shelf)
new_shelf = Shelf.objects.create(
user=user,
identifier=item.shelf,
name=shelfname,
privacy=job.privacy,
)

ShelfBook(
book=item.book,
shelf=new_shelf,
user=user,
shelved_date=shelved_date,
).save(priority=IMPORT_TRIGGERED)

for read in item.reads:
# check for an existing readthrough with the same dates
Expand All @@ -408,9 +440,9 @@ def handle_imported_book(item):
read.save()

if job.include_reviews and (item.rating or item.review) and not item.linked_review:
# we don't know the publication date of the review,
# but "now" is a bad guess
published_date_guess = item.date_read or item.date_added
# we don't necessarily know the publication date of the review,
# but "now" is a bad guess unless we have no choice
published_date_guess = item.date_read or item.date_added or timezone.now()
if item.review:
# pylint: disable=consider-using-f-string
review_title = "Review of {!r} on {!r}".format(
Expand Down
10 changes: 9 additions & 1 deletion bookwyrm/templates/import/import.html
Expand Up @@ -70,6 +70,9 @@ <h1 class="title">{% trans "Import Books" %}</h1>
<option value="Calibre" {% if current == 'Calibre' %}selected{% endif %}>
{% trans "Calibre (CSV)" %}
</option>
<option value="BookWyrm" {% if current == 'BookWyrm' %}selected{% endif %}>
{% trans "BookWyrm (CSV)" %}
</option>
</select>
</div>

Expand All @@ -94,9 +97,14 @@ <h1 class="title">{% trans "Import Books" %}</h1>
<input type="checkbox" name="include_reviews" checked> {% trans "Include reviews" %}
</label>
</div>
<div class="field">
<label class="label">
<input type="checkbox" name="create_shelves" checked> {% trans "Create new shelves if they do not exist" %}
</label>
</div>
<div class="field">
<label class="label" for="privacy_import">
{% trans "Privacy setting for imported reviews:" %}
{% trans "Privacy setting for imported reviews and shelves:" %}
</label>
{% include 'snippets/privacy_select.html' with no_label=True privacy_uuid="import" %}
</div>
Expand Down
11 changes: 9 additions & 2 deletions bookwyrm/views/imports/import_data.py
Expand Up @@ -15,6 +15,7 @@

from bookwyrm import forms, models
from bookwyrm.importers import (
BookwyrmBooksImporter,
CalibreImporter,
LibrarythingImporter,
GoodreadsImporter,
Expand Down Expand Up @@ -67,7 +68,7 @@ def get(self, request, invalid=False):
return TemplateResponse(request, "import/import.html", data)

def post(self, request):
"""ingest a goodreads csv"""
"""ingest a book data csv"""
site = models.SiteSettings.objects.get()
if not site.imports_enabled:
raise PermissionDenied()
Expand All @@ -77,11 +78,16 @@ def post(self, request):
return HttpResponseBadRequest()

include_reviews = request.POST.get("include_reviews") == "on"
create_shelves = request.POST.get("create_shelves") == "on"
privacy = request.POST.get("privacy")
source = request.POST.get("source")

importer = None
if source == "LibraryThing":

if source == "BookWyrm":
importer = BookwyrmBooksImporter()
print("BookwyrmBooksImporter")
elif source == "LibraryThing":
importer = LibrarythingImporter()
elif source == "Storygraph":
importer = StorygraphImporter()
Expand All @@ -98,6 +104,7 @@ def post(self, request):
request.user,
TextIOWrapper(request.FILES["csv_file"], encoding=importer.encoding),
include_reviews,
create_shelves,
privacy,
)
except (UnicodeDecodeError, ValueError, KeyError):
Expand Down
27 changes: 25 additions & 2 deletions bookwyrm/views/preferences/export.py
Expand Up @@ -48,7 +48,16 @@ def post(self, request):
fields = (
["title", "author_text"]
+ deduplication_fields
+ ["rating", "review_name", "review_cw", "review_content"]
+ [
"rating",
"review_published",
"review_name",
"review_cw",
"review_content",
"shelf",
"shelf_name",
"date_added",
]
)
writer.writerow(fields)

Expand All @@ -72,9 +81,23 @@ def post(self, request):
.first()
)
if review:
book.review_published = review.published_date
book.review_name = review.name
book.review_cw = review.content_warning
book.review_content = review.raw_content
book.review_content = (
review.raw_content
) # do imported reviews not have raw content?

shelfbook = (
models.ShelfBook.objects.filter(book=book, user=request.user)
.order_by("shelved_date")
.last()
)
if shelfbook:
book.shelf = shelfbook.shelf.identifier
book.shelf_name = shelfbook.shelf.name
book.date_added = shelfbook.shelved_date

writer.writerow([getattr(book, field, "") or "" for field in fields])

return HttpResponse(
Expand Down