diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..ffb29689 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,4 @@ +# These are supported funding model platforms + +open_collective: tabulapdf + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..9d3ecd47 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,21 @@ +on: + release: + types: [published] +permissions: + contents: read + packages: write +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ github.token }} + - uses: docker/build-push-action@v4 + with: + push: true + tags: | + ghcr.io/${{ github.repository }}:${{ github.ref_name }} + ghcr.io/${{ github.repository }}:latest diff --git a/.ruby-version b/.ruby-version index 3361fb04..80daa827 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -jruby-1.7.24 +jruby-9.2.13.0 diff --git a/.travis.yml b/.travis.yml index 7d25a5b6..53cb27b8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,15 @@ sudo: false -cache: bundler language: ruby +dist: trusty before_install: - gem update --system - gem install bundler + - gem install jbundler rvm: - - jruby-1.7.22 + - jruby-9.1.9.0 jdk: - oraclejdk8 - openjdk7 -script: bundle exec rake war && bundle exec rake jardist +script: bundle exec jbundle install && bundle exec rake war && bundle exec rake jardist notifications: email: false diff --git a/AUTHORS.md b/AUTHORS.md index da7e37b6..213b1a7c 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,18 +1,21 @@ -Tabula was originally started by Manuel Aristarán in late 2012 +# Authors & Acknowledgments + +Tabula was originally started by Manuel Aristarán in late 2012. The PRIMARY AUTHORS are (and/or have been): * Manuel Aristarán - MIT Media Lab (formerly La Nación, Knight-Mozilla OpenNews) * Mike Tigas - ProPublica, Knight-Mozilla OpenNews * Jeremy B. Merrill - The New York Times (formerly ProPublica) -* Jason Das, designer +* Jason Das, designer * David Frackman * Travis Swicegood - Texas Tribune Special thanks to these organizations: -* Knight-Mozilla OpenNews +* Knight-Mozilla OpenNews * ProPublica * La Nación * The New York Times * Knight Lab at Northwestern University +* The John S. and James L. Knight Foundation diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4cee4e6..fb389230 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,7 +7,7 @@ Interested in helping out? We'd love to have your help! You can help by: -- [Reporting a bug](https://github.com/jazzido/tabula). +- [Reporting a bug](https://github.com/tabulapdf/tabula/issues/new). - Adding or editing documentation. - Contributing code via a Pull Request from ideas listed in the [Enhancements](https://github.com/tabulapdf/tabula/labels/enhancement) section of the issues. - Spreading the word about Tabula to people who might be able to benefit from using it. @@ -28,10 +28,8 @@ Guidelines for contributing code If you'd like to contribute code, here's some stuff you should know: You're also welcome to send us a note, if you'd like. All of our email addresses are listed on our Github pages. -Tabula comes in a bunch of parts, all located in the [TabulaPDF Github organization](github.com/tabulapdf). +Tabula comes in a bunch of parts, all located in the [TabulaPDF Github organization](https://github.com/tabulapdf). -The [tabula](https://github.com/tabulapdf/tabula) repo is the UI. We aim for it to soon be all front-end, but right now has a small web server, written in Ruby, to interface between the front-end and extractor library, called "tabula-extractor" - - the [tabula-extractor](https://github.com/tabulapdf/tabula-extractor/) Ruby gem actually extracts info from PDFs, using table locations provided by the UI (or on the command line). tabula-extractor will be deprecated soon too -- it'll be replaced by 'tabula-java' - [tabula-java](https://github.com/tabulapdf/tabula-java/) is a pure Java port, for speed/wider usability. - - [tabula-api](https://github.com/tabulapdf/tabula-api/) will eventually serve as the glue layer between tabula-java and the tabula UI (replacing that small web server mentioned above). -The [Enhancements](https://github.com/tabulapdf/tabula/labels/enhancement) section of the issues lists some important improvements to Tabula that you could try out. They're well-suited to contributors, since they don't depend on a deep knowledge of all of Tabula's parts and they don't depend on close coordination. \ No newline at end of file +The [Enhancements](https://github.com/tabulapdf/tabula/labels/enhancement) section of the issues lists some important improvements to Tabula that you could try out. They're well-suited to contributors, since they don't depend on a deep knowledge of all of Tabula's parts and they don't depend on close coordination. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..6d7b2845 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM jruby:9.2-jdk + +RUN apt-get update -qq && apt-get install -y build-essential git \ + && apt-get clean && rm -rf /var/lib/apt/lists/* +RUN echo 'gem: --no-rdoc --no-ri' >> /.gemrc + +ENV GEM_HOME /usr/local/bundle +ENV PATH $GEM_HOME/bin:$PATH +RUN gem install bundler -v '< 2' \ + && bundle config --global path "$GEM_HOME" \ + && bundle config --global bin "$GEM_HOME/bin" + +# don't create ".bundle" in all our apps +ENV BUNDLE_APP_CONFIG $GEM_HOME + +WORKDIR /app +EXPOSE 9292 +CMD ["jruby", "-G", "-r", "jbundler", "-S", "rackup", "-o", "0.0.0.0", "config.ru"] + +# these didn't work as ONBUILD, strangely. Idk why. -JBM +COPY Gemfile Gemfile.lock Jarfile Jarfile.lock ./ +RUN bundle install && jruby -S jbundle install +COPY . . diff --git a/Gemfile b/Gemfile index 2a921d3b..22935cb3 100644 --- a/Gemfile +++ b/Gemfile @@ -4,15 +4,17 @@ source "https://rubygems.org" platform :jruby do - gem "cuba" - gem "rack", "~> 1.6.0" - gem "tilt", "~> 1.4.0" + gem "cuba", "~> 3.9.2" + gem "rack", ">= 2.0.6" + gem "tilt", "~> 2.0.8" group :development do + gem 'jar-dependencies', '0.3.12' + gem 'jbundler', '~> 0.9.3' gem "rake" - gem "warbler", "~> 1.4.9" - gem "jruby-jars", "1.7.24" - gem "bootstrap-sass", "~> 3.2.0" + gem "warbler", "~> 2.0.5" + gem "jruby-jars", "9.2.0.0" + gem "bootstrap-sass", ">= 3.4.1" gem "compass" end end diff --git a/Gemfile.lock b/Gemfile.lock index 9dd74b83..d402c57c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,9 +1,18 @@ GEM remote: https://rubygems.org/ specs: - bootstrap-sass (3.2.0.2) - sass (~> 3.2) - chunky_png (1.3.5) + autoprefixer-rails (9.8.6.3) + execjs + axiom-types (0.1.1) + descendants_tracker (~> 0.0.4) + ice_nine (~> 0.11.0) + thread_safe (~> 0.3, >= 0.3.1) + bootstrap-sass (3.4.1) + autoprefixer-rails (>= 5.2.1) + sassc (>= 2.0.0) + chunky_png (1.3.12) + coercible (1.0.0) + descendants_tracker (~> 0.0.1) compass (1.0.3) chunky_png (~> 1.2) compass-core (~> 1.0.2) @@ -16,38 +25,65 @@ GEM sass (>= 3.3.0, < 3.5) compass-import-once (1.0.5) sass (>= 3.2, < 3.5) - cuba (3.5.0) - rack (~> 1.6.0) - ffi (1.9.10-java) - jruby-jars (1.7.24) - jruby-rack (1.1.20) - multi_json (1.11.2) - rack (1.6.4) - rake (10.5.0) - rb-fsevent (0.9.7) - rb-inotify (0.9.7) - ffi (>= 0.5.0) - rubyzip (1.1.7) - sass (3.4.21) - tilt (1.4.1) - warbler (1.4.9) - jruby-jars (>= 1.5.6, < 2.0) + cuba (3.9.3) + rack (>= 1.6.0) + descendants_tracker (0.0.4) + thread_safe (~> 0.3, >= 0.3.1) + equalizer (0.0.11) + execjs (2.7.0) + ffi (1.13.1-java) + ice_nine (0.11.2) + jar-dependencies (0.3.12) + jbundler (0.9.3) + bundler (~> 1.5) + jar-dependencies (~> 0.3) + maven-tools (~> 1.1) + ruby-maven (~> 3.3, >= 3.3.8) + jruby-jars (9.2.0.0) + jruby-rack (1.1.21) + maven-tools (1.1.6) + virtus (~> 1.0) + multi_json (1.15.0) + rack (2.2.3) + rake (13.0.1) + rb-fsevent (0.10.4) + rb-inotify (0.10.1) + ffi (~> 1.0) + ruby-maven (3.3.12) + ruby-maven-libs (~> 3.3.9) + ruby-maven-libs (3.3.9) + rubyzip (1.3.0) + sass (3.4.25) + sassc (2.4.0) + ffi (~> 1.9) + thread_safe (0.3.6-java) + tilt (2.0.10) + virtus (1.0.5) + axiom-types (~> 0.1) + coercible (~> 1.0) + descendants_tracker (~> 0.0, >= 0.0.3) + equalizer (~> 0.0, >= 0.0.9) + warbler (2.0.5) + jruby-jars (>= 9.0.0.0) jruby-rack (>= 1.1.1, < 1.3) - rake (>= 0.9.6) - rubyzip (>= 0.9, < 1.2) + rake (>= 10.1.0) + rubyzip (~> 1.0, < 1.4) PLATFORMS java + ruby DEPENDENCIES - bootstrap-sass (~> 3.2.0) + bootstrap-sass (>= 3.4.1) compass - cuba - jruby-jars (= 1.7.24) - rack (~> 1.6.0) + cuba (~> 3.9.2) + jar-dependencies (= 0.3.12) + jbundler (~> 0.9.3) + jruby-jars (= 9.2.0.0) + rack (>= 2.0.6) rake - tilt (~> 1.4.0) - warbler (~> 1.4.9) + tilt (~> 2.0.8) + warbler (~> 2.0.5) BUNDLED WITH - 1.11.2 + 1.17.3 diff --git a/Jarfile b/Jarfile new file mode 100644 index 00000000..69a9196a --- /dev/null +++ b/Jarfile @@ -0,0 +1 @@ +jar 'technology.tabula:tabula', '1.0.4' \ No newline at end of file diff --git a/Jarfile.lock b/Jarfile.lock new file mode 100644 index 00000000..1e5e4543 --- /dev/null +++ b/Jarfile.lock @@ -0,0 +1,18 @@ +--- +:runtime: +- org.apache.pdfbox:fontbox:jar:2.0.21 +- org.apache.pdfbox:pdfbox:jar:2.0.21 +- commons-cli:commons-cli:jar:1.4 +- org.apache.pdfbox:jbig2-imageio:jar:3.0.3 +- org.slf4j:slf4j-simple:jar:1.7.30 +- org.bouncycastle:bcpkix-jdk15on:jar:1.66 +- org.bouncycastle:bcprov-jdk15on:jar:1.66 +- org.bouncycastle:bcmail-jdk15on:jar:1.66 +- com.google.code.gson:gson:jar:2.8.6 +- commons-logging:commons-logging:jar:1.2 +- com.github.jai-imageio:jai-imageio-core:jar:1.4.0 +- org.slf4j:slf4j-api:jar:1.7.30 +- technology.tabula:tabula:jar:1.0.4 +- org.apache.commons:commons-csv:jar:1.8 +- com.github.jai-imageio:jai-imageio-jpeg2000:jar:1.3.0 +- org.locationtech.jts:jts-core:jar:1.17.0 diff --git a/LICENSE.md b/LICENSE.md index 25e0070a..67ebf5ce 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -Copyright (C) 2012-2015 Manuel Aristarán +Copyright (C) 2012-2020 Manuel Aristarán Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/NOTICE.txt b/NOTICE.txt index 2eafee38..0298d823 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Tabula -© 2012-2013 Manuel Aristarán. Available under MIT License. See `AUTHORS.md` +© 2012-2020 Manuel Aristarán. Available under MIT License. See `AUTHORS.md` and `LICENSE.md`. This product includes software (lib/pdfbox-app-1.8.0.jar) developed at diff --git a/README.md b/README.md index cef007a6..d19cd060 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,58 @@ -**Repo Note**: The `master` branch is an *in development* version of Tabula that may or may not work. For stable, working code, please see [releases](https://github.com/tabulapdf/tabula/releases) or [switch to a `stable/` branch](https://github.com/tabulapdf/tabula/branches). +**Is `tabula` an active project?** -*As of August 2015, the master branch uses [tabula-java](https://github.com/tabulapdf/tabula-java) instead of [tabula-extractor](https://github.com/tabulapdf/tabula-extractor) under the hood.* +Tabula is, and always has been, a volunteer-run project. We've ocassionally had funding for specific features, but it's never been a commercial undertaking. At the moment, none of the original authors have the time to actively work on the project. The end-user application, hosted on this repo, is unlikely to see updates from us in the near future. [`tabula-java`](https://github.com/tabulapdf/tabula-java) sees updates and occasional bug-fix releases from time to time. + +-- + +**Repo Note**: The `master` branch is an *in development* version of Tabula. This may be substantially different from the latest [releases of Tabula](https://github.com/tabulapdf/tabula/releases). --- -# Tabula -[tabula `master`](https://github.com/tabulapdf/tabula/tree/master) [![Build Status](https://travis-ci.org/tabulapdf/tabula.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula) +# Tabula + +[tabula `master`](https://github.com/tabulapdf/tabula/tree/master) +[![Build Status](https://travis-ci.org/tabulapdf/tabula.svg?branch=master)](https://travis-ci.org/tabulapdf/tabula) Tabula helps you liberate data tables trapped inside PDF files. * [Download from the official site](http://tabula.technology/) -* [Read more about Tabula on OpenNews Source](http://source.mozillaopennews.org/en-US/articles/introducing-tabula/) -* See also: [tabula-extractor](https://github.com/jazzido/tabula-extractor), a command-line interface for Tabula. (Also, this is the extraction library that powers Tabula.) +* [Read more about Tabula on OpenNews Source](https://source.opennews.org/en-US/articles/introducing-tabula/) +* Interested in using Tabula on the command-line? Check out [tabula-java](https://github.com/tabulapdf/tabula-java), a Java library and command-line interface for Tabula. (This is the extraction library that powers Tabula.) -© 2012-2015 Manuel Aristarán. Available under MIT License. See +© 2012-2020 Manuel Aristarán. Available under MIT License. See [`AUTHORS.md`](AUTHORS.md) and [`LICENSE.md`](LICENSE.md). +- [Why Tabula?](#why-tabula) +- [Using Tabula](#using-tabula) +- [Known issues](#known-issues) +- [Incorporating Tabula into your own + project](#incorporating-tabula-into-your-own-project) +- [Running Tabula from source + (for developers)](#running-tabula-from-source-for-developers) + - [Building a packaged application + version](#building-a-packaged-application-version) +- [Contributing](#contributing) + - [Backers](#backers) + ## Why Tabula? If you’ve ever tried to do anything with data provided to you in PDFs, you know how painful this is — you can’t easily copy-and-paste rows of data out of PDF files. Tabula allows you to extract that data in CSV format, through -a simple web interface ([Check out this short screencast](https://erika.makes.org/popcorn/16ll)) +a simple web interface. **Caveat**: Tabula only works on text-based PDFs, not scanned documents. If you can click-and-drag to select text in your table in a PDF viewer (even if the output is disorganized trash), then your PDF is text-based and Tabula should work. -**Security Concerns?**: Tabula is designed with security in mind. Your PDF and the extracted data *never* touch the net -- when you use Tabula, as long as your browser's URL bar says "localhost" or "127.0.0.1", all processing takes place on your local machine. Tabula does download a list of Tabula versions from our server to alert you if Tabula has been updated (and we use hits to that list to count how often Tabula is being used); it also downloads a few badges and assets from the web. +**Security Concerns?**: Tabula is designed with security in mind. Your PDF and the extracted data *never* touch the net -- when you use Tabula on your local machine, as long as your browser's URL bar says "localhost" or "127.0.0.1", all processing takes place on your local machine. Other than to retrieve a few badges and other static assets, there are two calls that are made from your browser to external machines; one fetches the list of latest Tabula versions from GitHub to alert you if Tabula has been updated, the other makes a call to a stats counter that helps us determine how often various versions of Tabula are being used. If this is a problem, the version check can be disabled by adding `-Dtabula.disable_version_check=1` to the command line at startup, and the stats counter call can be disabled by adding `-Dtabula.disable_notifications=1`. Please note: If you are providing Tabula as a service using a reverse SSL proxy, users [may notice a security warning](https://github.com/tabulapdf/tabula/issues/924) due to our stats counter endpoint being hosted at a non-secure URL, so you may wish to disable the notifications in this scenario. ## Using Tabula First, make sure you have a recent copy of Java installed. You can [download Java here][jre_download]. Tabula requires -a Java Runtime Environment compatible with Java 6 or Java 7. -If you have a problem, check [Known Issues](#knownissues) first, then [report an issue](http://www.github.com/jazzido/tabula/issues). +a Java Runtime Environment compatible with Java 7 (i.e. Java 7, 8 or higher). +If you have a problem, check [Known Issues](#knownissues) first, then [report an issue](http://www.github.com/tabulapdf/tabula/issues). * ### Windows Download `tabula-win.zip` from [the download site][tabula_dl]. Unzip the whole thing @@ -45,7 +63,7 @@ If you have a problem, check [Known Issues](#knownissues) first, then [report an To close Tabula, just go back to the console window and press "Control-C" (as if to copy). -* ###Mac OS X +* ### Mac OS X Download `tabula-mac.zip` from [the download site][tabula_dl]. Unzip and open the Tabula app inside. A browser should automatically open to http://127.0.0.1:8080/ . If not, open your web browser of choice and @@ -56,27 +74,33 @@ If you have a problem, check [Known Issues](#knownissues) first, then [report an Note: If you’re running Mac OS X 10.8 or later, you might get an error like "Tabula is damaged and can't be opened." We're working on fixing this, but click [here](#gatekeeper) for a workaround. -* ###Other platforms (e.g. Linux) +* ### Other platforms (e.g. Linux) Download `tabula-jar.zip` from [the download site][tabula_dl] and unzip it to the directory of your choice. Open a terminal window, and `cd` to inside the `tabula` directory you just unzipped. Then run: `java -Dfile.encoding=utf-8 -Xms256M -Xmx1024M -jar tabula.jar` + Then manually navigate your browser to http://127.0.0.1:8080/ (New in + Tabula 1.1. To go back to the old behavior that automatically launches + your web browser, use the `-Dtabula.openBrowser=true` option. + + Tabula binds to port 8080 by default. You can change it with the `warbler.port` option; for example, to use port 9999: + + `java -Dfile.encoding=utf-8 -Xms256M -Xmx1024M -Dwarbler.port=9999 -jar tabula.jar` + + If the program fails to run, double-check that you have [Java installed][jre_download] and then try again. [jre_download]: https://www.java.com/download/ [tabula_dl]: http://tabula.technology -Tabula binds to port 8080 by default. You can change it with the `warbler.port` property: - -`java -Dfile.encoding=utf-8 -Xms256M -Xmx1024M -Dwarbler.port=9999 -jar tabula.jar` ## Known issues -There are some bugs that we're aware of that we haven't managed to fix yet. If there's not a solution here or you need more help, please go ahead and [report an issue](http://www.github.com/jazzido/tabula/issues). +There are some bugs that we're aware of that we haven't managed to fix yet. If there's not a solution here or you need more help, please go ahead and [report an issue](http://www.github.com/tabulapdf/tabula/issues). * **Legacy Java Environment (SE 6) Is Required:** (Mac): @@ -102,31 +126,46 @@ There are some bugs that we're aware of that we haven't managed to fix yet. If t 4. Run Tabula by typing `tabula.exe` * **A browser tab opens, but something other than Tabula loads there. Or Tabula doesn't start.** - It's possible another program is using port 8080, whichh Tabula binds to by default. You can try closing the other program, or change the port Tabula uses by running Tabula from the terminal with the `warbler.port` property: + It's possible another program is using port 8080, which Tabula binds to by default. You can try closing the other program, or change the port Tabula uses by running Tabula from the terminal with the `warbler.port` property: `java -Dfile.encoding=utf-8 -Xms256M -Xmx1024M -Dwarbler.port=9999 -jar tabula.jar` +## Incorporating Tabula into your own project + +Tabula is open-source, so we'd love for you to incorporate pieces of Tabula into your own projects. The "guts" of Tabula -- that is, the logic and heuristics that reconstruct tables from PDFs -- is contained in the [tabula-java](https://github.com/tabulapdf/tabula-java/) repo. There's a JAR file that you can easily incorporate into JVM languages like Java, Scala or Clojure and it includes a command-line tool for you to automate your extraction tasks. Visit that repo for more information on how to use `tabula-java` on the CLI and on how Tabula exports `tabula-java` scripts. + +### Bindings: + +Tabula has bindings for JRuby and R. If you end up writing bindings for another language, let us know and we'll add a link here. + + - [tabulizer](https://github.com/leeper/tabulizer) provides [R](https://www.r-project.org/) bindings for tabula-java and is community-supported by [@leeper](https://github.com/leeper). + - [tabula-js](https://github.com/ezodude/tabula-js) provides [Node.js](https://nodejs.org/en/) bindings for tabula-java; it is community-supported by [@ezodude](https://github.com/ezodude). + - [tabula-py](https://github.com/chezou/tabula-py) provides [Python](https://python.org) bindings for tabula-java; it is community-supported by [@chezou](https://github.com/chezou). + - [tabula-extractor](https://github.com/tabulapdf/tabula-extractor/) *DEPRECATED* - Provides JRuby bindings for tabula-java + + + ## Running Tabula from source (for developers) 1. Download JRuby. You can install it from its website, or using tools like - `rvm` or `rbenv` + `rvm` or `rbenv`. Note that as of Tabula 1.1.0 (7875582becb2799b65586d5680782cafd399bb33), Tabula uses the JRuby 9000 series (i.e. JRuby 9.1.5.0). 2. Download Tabula and install the Ruby dependencies. (Note: if using `rvm` or `rbenv`, ensure that JRuby is being used. ~~~ - git clone git://github.com/jazzido/tabula.git + git clone git://github.com/tabulapdf/tabula.git cd tabula - gem install bundler - gem install tabula-extractor + gem install bundler -v 1.17.3 bundle install + jruby -S jbundle install ~~~ **Then, start the development server:** - bundle exec rackup + jruby -G -r jbundler -S rackup (If you get encoding errors, set the `JAVA_OPTS` environment variable to `-Dfile.encoding=utf-8`) @@ -136,7 +175,7 @@ You can a couple some options when executing the server in this manner: TABULA_DATA_DIR="/tmp/tabula" \ TABULA_DEBUG=1 \ - bundle exec rackup + jruby -G -r jbundler -S rackup * `TABULA_DATA_DIR` controls where uploaded data for Tabula is stored. By default, data is stored in the OS-dependent application data directory for the current @@ -150,9 +189,16 @@ You can a couple some options when executing the server in this manner: Testing in this manner will be closer to testing the "packaged application" version of the app. - bundle exec rake war + jruby -G -S rake war java -Dfile.encoding=utf-8 -Xms256M -Xmx1024M -jar build/tabula.jar + +If you intend to develop against an unreleased version of [`tabula-java`](https://github.com/tabulapdf/tabula-java), you need to install its JAR to your local Maven repository. From the directory that contains `tabula-java` source: + + mvn install:install-file -Dfile=target/tabula--SNAPSHOT.jar -DgroupId=technology.tabula -DartifactId=tabula -Dversion=-SNAPSHOT -Dpackaging=jar -DpomFile=pom.xml + +Then, adjust the `Jarfile` accordingly. + ### Building a packaged application version After performing the above steps ("Running Tabula from source"), you can compile @@ -167,7 +213,7 @@ on the machine you are building it on, you may remove this entire block ( To compile the app: - rake macosx + WEBSERVER_VERSION=9.4.31.v20200723 MAVEN_REPO=https://repo1.maven.org/maven2 rake macosx This will result in a portable "tabula_mac.zip" archive (inside the `build` directory) for Mac OS X users. @@ -177,7 +223,7 @@ This results in a 98MB zip file, versus the 30MB zip file for other platforms, but allows users to run Tabula without having to worry about [Java version incompatibilities](https://github.com/tabulapdf/tabula/issues/237). -[buildxml_cert]: https://github.com/jazzido/tabula/blob/master/build.xml#L44-53 +[buildxml_cert]: https://github.com/tabulapdf/tabula/blob/master/build.xml#L44-53 **Windows** @@ -193,7 +239,7 @@ is in the repository root. Then: - rake windows + WEBSERVER_VERSION=9.4.31.v20200723 MAVEN_REPO=https://repo1.maven.org/maven2 rake windows This will result in a portable "tabula_win.zip" archive (inside the `build` directory) for Mac OS X users. @@ -204,7 +250,7 @@ If you have issues, you can try building manually. (These commands are for OS X/Linux and may need to be adjusted for Windows users.) # (from the root directory of the repo) - rake war + WEBSERVER_VERSION=9.4.31.v20200723 MAVEN_REPO=https://repo1.maven.org/maven2 rake war cd launch4j ant -f ../build.xml windows @@ -228,7 +274,25 @@ Interested in helping out? We'd love to have your help! You can help by: -- [Reporting a bug](https://github.com/jazzido/tabula). +- [Reporting a bug](https://github.com/tabulapdf/tabula/issues). - Adding or editing documentation. - Contributing code via a Pull Request from ideas or bugs listed in the [Enhancements](https://github.com/tabulapdf/tabula/labels/enhancement) section of the issues. [see `CONTRIBUTING.md`](CONTRIBUTING.md) - Spreading the word about Tabula to people who might be able to benefit from using it. + +### Backers + +You can also support our continued work on Tabula with a one-time or monthly donation [on OpenCollective](https://opencollective.com/tabulapdf#support). Organizations who use Tabula can also [sponsor the project](https://opencollective.com/tabulapdf#support) for acknolwedgement on [our official site](http://tabula.technology/) and this README. + +Tabula is made possible in part through the generosity of our users and through grants from the Knight Foundation and the Shuttleworth Foundation. Special thanks to all the users and organizations that support Tabula! + + + + + + + + +The John S. and James L. Knight Foundation +The Shuttleworth Foundation + +More acknowledgments can be found in [`AUTHORS.md`](AUTHORS.md). diff --git a/Rakefile b/Rakefile index bb55d847..5aa64c72 100644 --- a/Rakefile +++ b/Rakefile @@ -13,6 +13,7 @@ Warbler::Task.new("war", config.webxml.jruby.rack.logging = "stderr" config.dirs = ['lib', 'webapp'] config.override_gem_home = false + config.init_contents << StringIO.new("\nGem.clear_paths\nGem.path\n\n") } ) diff --git a/build.xml b/build.xml index ccfd553b..ab197767 100644 --- a/build.xml +++ b/build.xml @@ -5,7 +5,7 @@ - + @@ -17,7 +17,7 @@ - +