Skip to content

Commit

Permalink
Merge pull request #184 from PAIR-code/images
Browse files Browse the repository at this point in the history
Fix feature stats generation when examples contain encoded images
  • Loading branch information
jimbojw committed May 14, 2019
2 parents 0293abc + e40ec4c commit 5a42fe3
Showing 1 changed file with 8 additions and 6 deletions.
Expand Up @@ -19,6 +19,7 @@

import numpy as np
import pandas as pd
import sys


class BaseGenericFeatureStatisticsGenerator(object):
Expand Down Expand Up @@ -269,13 +270,14 @@ def GetDatasetsProto(self, datasets, features=None,
sorted_vals = sorted(zip(counts, vals), reverse=True)
sorted_vals = sorted_vals[:histogram_categorical_levels_count]
for val_index, val in enumerate(sorted_vals):
if val[1].dtype.type is np.str_:
printable_val = val[1]
else:
try:
try:
if (sys.version_info.major < 3 or
isinstance(val[1], (bytes, bytearray))):
printable_val = val[1].decode('UTF-8', 'strict')
except (UnicodeDecodeError, UnicodeEncodeError):
printable_val = '__BYTES_VALUE__'
else:
printable_val = val[1]
except (UnicodeDecodeError, UnicodeEncodeError):
printable_val = '__BYTES_VALUE__'
bucket = featstats.rank_histogram.buckets.add(
low_rank=val_index,
high_rank=val_index,
Expand Down

0 comments on commit 5a42fe3

Please sign in to comment.