Skip to content
This repository has been archived by the owner on Jul 31, 2023. It is now read-only.

Commit

Permalink
Rename TFRUtil to TFRecorder.
Browse files Browse the repository at this point in the history
Change-Id: I1c3f66fc7564428790aa7db1532e7816d442a2f8
  • Loading branch information
cfezequiel committed Jul 21, 2020
1 parent f8fc727 commit f8c1a40
Show file tree
Hide file tree
Showing 37 changed files with 86 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Expand Up @@ -16,7 +16,7 @@ persistent=no
# usually to register additional checkers.
load-plugins=
# Use multiple processes to speed up Pylint.
jobs=4
jobs=1
# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Expand Up @@ -2,9 +2,9 @@ init:
pip install -r requirements.txt

test:
nosetests --with-coverage --nocapture -v --cover-package=tfrutil
nosetests --with-coverage --nocapture -v --cover-package=tfrecorder

pylint:
pylint tfrutil
pylint tfrecorder

.PHONY: init glint coverage test
.PHONY: init glint coverage test
16 changes: 9 additions & 7 deletions README.md
@@ -1,10 +1,10 @@
# TFRecord Utilities (TFRUtil)
# TFRecorder

TFRUtil makes it easy to create TFRecords from images and labels in
TFRecorder makes it easy to create TFRecords from images and labels in
Pandas DataFrames or CSV files.
Today, TFRUtil supports data stored in 'image csv format' similar to
Today, TFRecorder supports data stored in 'image csv format' similar to
GCP AutoML Vision.
In the future TFRUtil will support converting any Pandas DataFrame or CSV
In the future TFRecorder will support converting any Pandas DataFrame or CSV
file into TFRecords.

## Installation
Expand All @@ -23,7 +23,7 @@ pip install .

```bash
import pandas as pd
import tfrutil
import tfrecorder
df = pd.read_csv(...)
df.tensorflow.to_tfrecord(output_dir="gs://my/bucket")
```
Expand All @@ -41,12 +41,14 @@ df.tensorflow.to_tfrecord(
### Command-line interface
```bash
tfrutil create-tfrecords --output_dir="gs://my/bucket" data.csv
tfrecorder create-tfrecords --output_dir="gs://my/bucket" data.csv
```
## Input format
TFRUtil currently expects data to be in the same format as [AutoML Vision](https://cloud.google.com/vision/automl/docs/prepare). This format looks like a pandas dataframe or CSV formatted as:
TFRecorder currently expects data to be in the same format as
[AutoML Vision](https://cloud.google.com/vision/automl/docs/prepare).
This format looks like a pandas dataframe or CSV formatted as:
| split | image_uri | label |
|-------|---------------------------|-------|
Expand Down
2 changes: 1 addition & 1 deletion kokoro/gcp_ubuntu/kokoro_build.sh
Expand Up @@ -65,6 +65,6 @@ echo $(python -V) # should be Python 3
# Code under repo is checked out to ${KOKORO_ARTIFACTS_DIR}/git.
# The final directory name in this path is determined by the scm name specified
# in the job configuration.
cd ${KOKORO_ARTIFACTS_DIR}/git/tfrutil
cd ${KOKORO_ARTIFACTS_DIR}/git/tfrecorder

./build.sh
6 changes: 3 additions & 3 deletions setup.py
Expand Up @@ -38,13 +38,13 @@


setup(
name='tfrutil',
name='tfrecorder',
version='0.1',
install_requires=REQUIRED_PACKAGES,
packages=find_packages(),
include_package_data=True,
description='TFRUtil creates TensorFlow Records easily.',
description='TFRecorder creates TensorFlow Records easily.',
entry_points = {
'console_scripts': ['tfrutil=tfrutil.cli:main'],
'console_scripts': ['tfrecorder=tfrecorder.cli:main'],
},
)
4 changes: 2 additions & 2 deletions tfrutil/__init__.py → tfrecorder/__init__.py
Expand Up @@ -15,5 +15,5 @@
# limitations under the License.

"""Imports."""
from tfrutil import accessor
from tfrutil import client
from tfrecorder import accessor
from tfrecorder import client
23 changes: 12 additions & 11 deletions tfrutil/accessor.py → tfrecorder/accessor.py
Expand Up @@ -14,23 +14,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Creates a pandas DataFrame accessor for TFRUtil.
"""Creates a pandas DataFrame accessor for TFRecorder.
accessor.py contains TFRUtilAccessor which provides a pandas DataFrame
accessor.py contains TFRecorderAccessor which provides a pandas DataFrame
accessor. This accessor allows us to inject the to_tfr() function into
pandas DataFrames.
"""

from typing import Any, Dict, Optional, Union
import pandas as pd
from IPython.core import display

from tfrutil import client
from tfrutil import constants
from tfrecorder import client
from tfrecorder import constants


@pd.api.extensions.register_dataframe_accessor('tensorflow')
class TFRUtilAccessor:
"""DataFrame Accessor class for TFRUtil."""
class TFRecorderAccessor:
"""DataFrame Accessor class for TFRecorder."""

def __init__(self, pandas_obj):
self._df = pandas_obj
Expand All @@ -46,15 +47,15 @@ def to_tfr(
job_label: str = 'to-tfr',
compression: Optional[str] = 'gzip',
num_shards: int = 0) -> Dict[str, Any]:
"""TFRUtil Pandas Accessor.
"""TFRecorder Pandas Accessor.
TFRUtil provides an easy interface to create image-based tensorflow records
from a dataframe containing GCS locations of the images and labels.
TFRecorder provides an easy interface to create image-based tensorflow
records from a dataframe containing GCS locations of the images and labels.
Usage:
import tfrutil
import tfrecorder
df.tfrutil.to_tfr(
df.tfrecorder.to_tfr(
output_dir='gcs://foo/bar/train',
runner='DirectRunner',
compression='gzip',
Expand Down
7 changes: 4 additions & 3 deletions tfrutil/accessor_test.py → tfrecorder/accessor_test.py
Expand Up @@ -14,15 +14,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for pandas accessor."""
"""Tests for Pandas accessor."""

import os
import unittest

# pylint: disable=unused-import
import tfrecorder

import tfrutil
from tfrutil import test_utils
from tfrecorder import test_utils


class DataFrameAccessor(unittest.TestCase):
Expand All @@ -40,5 +40,6 @@ def test_accessor(self):
runner='DirectRunner', output_dir=self.output_dir)
self.assertTrue('metrics' in r)


if __name__ == '__main__':
unittest.main()
File renamed without changes.
10 changes: 5 additions & 5 deletions tfrutil/beam_image_test.py → tfrecorder/beam_image_test.py
Expand Up @@ -26,9 +26,9 @@
from PIL import Image
import tensorflow_transform as tft

from tfrutil import beam_image
from tfrutil import constants
from tfrutil import test_utils
from tfrecorder import beam_image
from tfrecorder import constants
from tfrecorder import test_utils


class BeamImageTests(unittest.TestCase):
Expand All @@ -37,7 +37,7 @@ class BeamImageTests(unittest.TestCase):
def setUp(self):
self.pipeline = test_utils.get_test_pipeline()
self.df = test_utils.get_test_df()
self.image_file = 'tfrutil/test_data/images/cat/cat-640x853-1.jpg'
self.image_file = 'tfrecorder/test_data/images/cat/cat-640x853-1.jpg'

def test_load(self):
"""Tests the image loading function."""
Expand All @@ -47,7 +47,7 @@ def test_load(self):
def test_file_not_found_load(self):
"""Test loading an image that doesn't exist."""
with self.assertRaises(OSError):
_ = beam_image.load('tfrutil/test_data/images/cat/food.jpg')
_ = beam_image.load('tfrecorder/test_data/images/cat/food.jpg')

def test_mode_to_channel(self):
"""Tests `mode_to_channel`."""
Expand Down
14 changes: 7 additions & 7 deletions tfrutil/beam_pipeline.py → tfrecorder/beam_pipeline.py
Expand Up @@ -14,9 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""TFRUtil Beam Pipeline.
"""TFRecorder Beam Pipeline.
This file implements the full Beam pipeline for TFRUtil.
This file implements the full Beam pipeline for TFRecorder.
"""

from typing import Any, Dict, Generator, Union
Expand All @@ -30,9 +30,9 @@
import tensorflow_transform as tft
from tensorflow_transform import beam as tft_beam

from tfrutil import beam_image
from tfrutil import common
from tfrutil import constants
from tfrecorder import beam_image
from tfrecorder import common
from tfrecorder import constants


def _get_setup_py_filepath() -> str:
Expand All @@ -56,7 +56,7 @@ def _get_job_name(job_label: str = None) -> str:
insure uniqueness.
"""

job_name = 'tfrutil-' + common.get_timestamp()
job_name = 'tfrecorder-' + common.get_timestamp()
if job_label:
job_label = job_label.replace('_', '-')
job_name += '-' + job_label
Expand Down Expand Up @@ -201,7 +201,7 @@ def build_pipeline(
num_shards: int,
dataflow_options: dict,
integer_label: bool) -> beam.Pipeline:
"""Runs TFRUtil Beam Pipeline.
"""Runs TFRecorder Beam Pipeline.
Args:
df: Pandas DataFrame
Expand Down
Expand Up @@ -23,7 +23,7 @@
import apache_beam as beam
import tensorflow as tf

from tfrutil import beam_pipeline
from tfrecorder import beam_pipeline


# pylint: disable=protected-access
Expand All @@ -40,7 +40,7 @@ def test_processing_fn_with_int_label(self):
result = beam_pipeline._preprocessing_fn(element, integer_label=True)
self.assertEqual(element, result)

@mock.patch('tfrutil.beam_pipeline.tft')
@mock.patch('tfrecorder.beam_pipeline.tft')
def test_processing_fn_with_string_label(self, mock_transform):
'Test preprocessing fn with string label.'
mock_transform.compute_and_apply_vocabulary.return_value = tf.constant(
Expand Down
6 changes: 3 additions & 3 deletions tfrutil/check.py → tfrecorder/check.py
Expand Up @@ -24,9 +24,9 @@
import tensorflow as tf
import tensorflow_transform as tft

from tfrutil import beam_image
from tfrutil import constants
from tfrutil import common
from tfrecorder import beam_image
from tfrecorder import constants
from tfrecorder import common

_OUT_IMAGE_TEMPLATE = 'image_{:0>3d}.png'

Expand Down
8 changes: 4 additions & 4 deletions tfrutil/check_test.py → tfrecorder/check_test.py
Expand Up @@ -26,10 +26,10 @@
from pandas import testing as pdt
import tensorflow as tf

from tfrutil import beam_image
from tfrutil import check
from tfrutil import constants
from tfrutil import test_utils
from tfrecorder import beam_image
from tfrecorder import check
from tfrecorder import constants
from tfrecorder import test_utils


# pylint: disable=protected-access
Expand Down
4 changes: 2 additions & 2 deletions tfrutil/cli.py → tfrecorder/cli.py
Expand Up @@ -18,8 +18,8 @@

import fire

from tfrutil import client
from tfrutil import check
from tfrecorder import client
from tfrecorder import check


def main():
Expand Down
16 changes: 8 additions & 8 deletions tfrutil/client.py → tfrecorder/client.py
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Provides a common interface for TFRUtil to DF Accessor and CLI.
"""Provides a common interface for TFRecorder to DF Accessor and CLI.
client.py provides create_tfrecords() to upstream clients including
the Pandas DataFrame Accessor (accessor.py) and the CLI (cli.py).
Expand All @@ -27,9 +27,9 @@
import pandas as pd
import tensorflow as tf

from tfrutil import common
from tfrutil import constants
from tfrutil import beam_pipeline
from tfrecorder import common
from tfrecorder import constants
from tfrecorder import beam_pipeline


def _validate_data(df):
Expand Down Expand Up @@ -170,13 +170,13 @@ def create_tfrecords(
num_shards: int = 0) -> Dict[str, Any]:
"""Generates TFRecord files from given input data.
TFRUtil provides an easy interface to create image-based tensorflow records
TFRecorder provides an easy interface to create image-based tensorflow records
from a dataframe containing GCS locations of the images and labels.
Usage:
import tfrutil
import tfrecorder
job_id = tfrutil.client.create_tfrecords(
job_id = tfrecorder.client.create_tfrecords(
train_df,
output_dir='gcs://foo/bar/train',
runner='DirectFlowRunner)
Expand Down Expand Up @@ -228,7 +228,7 @@ def create_tfrecords(
result = p.run()

if runner == 'DirectRunner':
logging.info("Using DirectRunner. TFRUtil will block until job completes.")
logging.info('Using DirectRunner - blocking until job completes.')
result.wait_until_finish()

row_count_filter = beam.metrics.MetricsFilter().with_name('row_count')
Expand Down
11 changes: 6 additions & 5 deletions tfrutil/client_test.py → tfrecorder/client_test.py
Expand Up @@ -15,6 +15,7 @@
# limitations under the License.

"""Tests for client."""

import os
from typing import List

Expand All @@ -25,9 +26,9 @@
import mock
import pandas as pd

from tfrutil import client
from tfrutil import constants
from tfrutil import test_utils
from tfrecorder import client
from tfrecorder import constants
from tfrecorder import test_utils


class ClientTest(unittest.TestCase):
Expand All @@ -38,7 +39,7 @@ def setUp(self):
self.test_region = 'us-central1'
self.test_project = 'foo'

@mock.patch('tfrutil.client.beam_pipeline')
@mock.patch('tfrecorder.client.beam_pipeline')
def test_create_tfrecords_direct_runner(self, mock_beam):
"""Tests `create_tfrecords` Direct case."""
mock_beam.build_pipeline().run().wait_until_finished.return_value = {
Expand All @@ -49,7 +50,7 @@ def test_create_tfrecords_direct_runner(self, mock_beam):
output_dir='/tmp/direct_runner')
self.assertTrue('metrics' in r)

@mock.patch('tfrutil.client.beam_pipeline')
@mock.patch('tfrecorder.client.beam_pipeline')
def test_create_tfrecords_dataflow_runner(self, mock_beam):
"""Tests `create_tfrecords` Dataflow case."""
mock_beam.build_pipeline().run().job_id.return_value = 'foo_id'
Expand Down

0 comments on commit f8c1a40

Please sign in to comment.