Skip to content

Commit

Permalink
Add OSHB import format
Browse files Browse the repository at this point in the history
  • Loading branch information
schierlm committed Mar 12, 2024
1 parent 7622542 commit 634e4f2
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ In addition, the following other formats are supported, with varying accuracy:
- **[BrowserBible](https://github.com/digitalbiblesociety/browserbible-3/)**: export only
- **[Quick Bible](http://www.bibleforandroid.com/)**: export only
- **[SWORD](https://www.crosswire.org/sword) modules**: import only (see below for details)
- **[MorphGNT](https://github.com/morphgnt/sblgnt)**: import only
- **Original Languages with tagging**: import only
- [MorphGNT](https://github.com/morphgnt/sblgnt)
- [OpenScriptures Hebrew Bible (OSHB) MorphBB](https://github.com/openscriptures/morphhb)
- **[MyBible.Zone](https://mybible.zone/index-eng.php)** ([more bibles](http://www.ph4.org/b4_index.php)): import and export (in a special SQLite edition)
- **[Bible Analyzer](http://www.bibleanalyzer.com/)**: export only (text export for
bibles and dictionaries, SQLite export for bibles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public Collection<Module<ImportFormat>> getImportFormats() {
List<Module<ImportFormat>> result = new ArrayList<ModuleRegistry.Module<ImportFormat>>();
result.add(new Module<ImportFormat>("StrongDictionary", "Importer for creating a Strong's dictionary from public domain resources.", StrongDictionary.HELP_TEXT, StrongDictionary.class));
result.add(new Module<ImportFormat>("MorphGNT", "Importer for MorphGNT", MorphGNT.HELP_TEXT, MorphGNT.class));
result.add(new Module<ImportFormat>("OSHB", "Importer for OpenScriptures Hebrew Bible MorphBB", OSHB.HELP_TEXT, OSHB.class));
return result;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package biblemulticonverter.format;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;

import org.w3c.dom.*;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import biblemulticonverter.data.Bible;
import biblemulticonverter.data.Book;
import biblemulticonverter.data.BookID;
import biblemulticonverter.data.Chapter;
import biblemulticonverter.data.FormattedText.ExtraAttributePriority;
import biblemulticonverter.data.FormattedText.Visitor;
import biblemulticonverter.data.Verse;

public class OSHB implements ImportFormat {

public static final String[] HELP_TEXT = {
"Importer for OpenScriptures Hebrew Bible MorphBB",
"",
"Usage: OSHB <directory>",
"",
"Download OSHB from <https://github.com/openscriptures/morphhb>."
};

@Override
public Bible doImport(File directory) throws Exception {
Bible bible = new Bible("OSHB");
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
XPath xpath = javax.xml.xpath.XPathFactory.newInstance().newXPath();
for (int zefID = 1; zefID < 40; zefID++) {
BookID bid = BookID.fromZefId(zefID);
Book book = new Book(bid.getOsisID(), bid, bid.getEnglishName(), bid.getEnglishName());
bible.getBooks().add(book);
Document doc = docBuilder.parse(new File(directory, bid.getOsisID() + ".xml"));
NodeList verses = (NodeList) xpath.evaluate("//verse", doc, XPathConstants.NODESET);
for (int i = 0; i < verses.getLength(); i++) {
Element verse = (Element) verses.item(i);
String[] parts = verse.getAttribute("osisID").split("\\.");
if (parts.length != 3 || !parts[0].equals(book.getAbbr()))
throw new RuntimeException();
int cnum = Integer.parseInt(parts[1]);
int vnum = Integer.parseInt(parts[2]);
while (book.getChapters().size() < cnum)
book.getChapters().add(new Chapter());
Verse v = new Verse("" + vnum);
book.getChapters().get(cnum - 1).getVerses().add(v);
Visitor<RuntimeException> vv = v.getAppendVisitor();
boolean spaceAllowed = false;
for (Node ww = verse.getFirstChild(); ww != null; ww = ww.getNextSibling()) {
if (ww instanceof Text && ww.getTextContent().trim().isEmpty()) {
continue;
}
Element w = (Element) ww;
if (spaceAllowed)
vv.visitText(" ");
spaceAllowed = true;
if (w.getNodeName().equals("seg")) {
vv.visitText(w.getTextContent().trim());
continue;
} else if (w.getNodeName().equals("note")) {
vv.visitFootnote().visitText(w.getTextContent().replaceAll("[\r\n\t ]+", " ").trim());
continue;
}
if (!w.getNodeName().equals("w"))
throw new RuntimeException(w.getNodeName());
List<String> snums = new ArrayList<>(Arrays.asList(w.getAttribute("lemma").split("[^0-9]+")));
snums.removeIf(s -> s.isEmpty());
int[] strong = new int[snums.size()];
for (int j = 0; j < strong.length; j++) {
strong[j] = Integer.parseInt(snums.get(j));
}
vv.visitGrammarInformation(null, strong.length == 0 ? null : strong, new String[] { w.getAttribute("morph") }, null).visitText(w.getTextContent());
}
v.finished();
}
}
return bible;
}
}

0 comments on commit 634e4f2

Please sign in to comment.