Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert to Python3 #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions cudatree/__init__.py
@@ -1,6 +1,6 @@
from random_forest import RandomForestClassifier, convert_result
from datasource import load_data
from util import timer
from random_tree import RandomClassifierTree
from .random_forest import RandomForestClassifier, convert_result
from .datasource import load_data
from .util import timer
from .random_tree import RandomClassifierTree
__version__ = "0.6"

12 changes: 6 additions & 6 deletions cudatree/base_tree.py
@@ -1,8 +1,8 @@
import numpy as np
from util import get_best_dtype
from .util import get_best_dtype
from pycuda import gpuarray
import math
from util import start_timer, end_timer
from .util import start_timer, end_timer
from pycuda import driver

class BaseTree(object):
Expand All @@ -14,12 +14,12 @@ def print_tree(self):
def recursive_print(idx, depth):
if self.left_children[idx] == 0 and \
self.right_children[idx] == 0:
print "[LEAF] Depth: %s, Value: %s" % \
(depth, self.values_array[idx])
print("[LEAF] Depth: %s, Value: %s" % \
(depth, self.values_array[idx]))
else:
print "[NODE] Depth: %s, Feature: %s, Threshold: %f" %\
print("[NODE] Depth: %s, Feature: %s, Threshold: %f" %\
(depth, self.feature_idx_array[idx],
self.feature_threshold_array[idx])
self.feature_threshold_array[idx]))
recursive_print(self.left_children[idx], depth + 1)
recursive_print(self.right_children[idx], depth + 1)
recursive_print(0, 0)
Expand Down
14 changes: 7 additions & 7 deletions cudatree/datasource.py
@@ -1,4 +1,4 @@
import cPickle
import pickle
import numpy as np
from os import path
from sklearn.datasets import load_digits,load_iris,load_diabetes,fetch_covtype
Expand Down Expand Up @@ -27,33 +27,33 @@ def load_data(ds_name):
y_train = ds.target
elif ds_name == "cf10":
with open(data_dir + "data_batch_1", "r") as f:
ds = cPickle.load(f)
ds = pickle.load(f)
x_train = ds['data']
y_train = np.array(ds['labels'])
elif ds_name == "cf100":
with open(data_dir + "train", "r") as f:
ds = cPickle.load(f)
ds = pickle.load(f)
x_train = ds['data']
y_train = np.array(ds['fine_labels'])
elif ds_name == "cd10_test":
with open(data_dir + "test_batch", "r") as f:
ds = cPickle.load(f)
ds = pickle.load(f)
x_train = ds['data']
y_train = np.array(ds['labels'])
elif ds_name == "cf100_test":
with open(data_dir + "test", "r") as f:
ds = cPickle.load(f)
ds = pickle.load(f)
x_train = ds['data']
y_train = np.array(ds['fine_labels'])
elif ds_name == "inet":
if _img_data is None:
with open("/ssd/imagenet-subset.pickle", "r") as f:
_img_data = cPickle.load(f)
_img_data = pickle.load(f)
return _img_data['x'][0:10000], _img_data['Y'][0:10000]
elif ds_name == "inet_test":
if _img_data is None:
with open("/ssd/imagenet-subset.pickle", "r") as f:
_img_data = cPickle.load(f)
_img_data = pickle.load(f)
return _img_data['x'][10000:], _img_data['Y'][10000:]
elif ds_name == "kdd":
data = np.load(data_dir + "data.npy")
Expand Down
22 changes: 11 additions & 11 deletions cudatree/random_forest.py
@@ -1,9 +1,9 @@
import numpy as np
from random_tree import RandomClassifierTree
from util import timer, get_best_dtype, dtype_to_ctype, mk_kernel, mk_tex_kernel, compile_module
from .random_tree import RandomClassifierTree
from .util import timer, get_best_dtype, dtype_to_ctype, mk_kernel, mk_tex_kernel, compile_module
from pycuda import gpuarray
from pycuda import driver
from util import start_timer, end_timer, show_timings
from .util import start_timer, end_timer, show_timings
from parakeet import jit
import math

Expand Down Expand Up @@ -314,21 +314,21 @@ def fit(self, samples, target, bfs_threshold = None):
self.bfs_threshold = bfs_threshold

if self.verbose:
print "bsf_threadshold : %d; bootstrap : %r; min_samples_split : %d" % (self.bfs_threshold,
self.bootstrap, self.min_samples_split)
print "n_samples : %d; n_features : %d; n_labels : %d; max_features : %d" % (self.stride,
self.n_features, self.n_labels, self.max_features)
print("bsf_threadshold : %d; bootstrap : %r; min_samples_split : %d" % (self.bfs_threshold,
self.bootstrap, self.min_samples_split))
print("n_samples : %d; n_features : %d; n_labels : %d; max_features : %d" % (self.stride,
self.n_features, self.n_labels, self.max_features))


self._trees = [RandomClassifierTree(self) for i in xrange(self.n_estimators)]
self._trees = [RandomClassifierTree(self) for i in range(self.n_estimators)]

for i, tree in enumerate(self._trees):
si, n_samples = self._get_sorted_indices(self.sorted_indices)

if self.verbose:
with timer("Tree %s" % (i,)):
tree.fit(self.samples, self.target, si, n_samples)
print ""
print("")
else:
tree.fit(self.samples, self.target, si, n_samples)

Expand All @@ -355,7 +355,7 @@ def predict(self, x):
for i, tree in enumerate(self._trees):
res[i] = tree.gpu_predict(x, self.predict_kernel)

res = np.array([np.argmax(np.bincount(res[:,i])) for i in xrange(res.shape[1])])
res = np.array([np.argmax(np.bincount(res[:,i])) for i in range(res.shape[1])])
if hasattr(self, "compt_table"):
res = convert_result(self.compt_table, res)

Expand All @@ -369,7 +369,7 @@ def predict_proba(self, x):
for i, tree in enumerate(self._trees):
res[i] = tree.gpu_predict(x, self.predict_kernel)

for i in xrange(x.shape[0]):
for i in range(x.shape[0]):
tmp_res = np.bincount(res[:, i])
tmp_res.resize(self.n_labels)
res_proba[i] = tmp_res.astype(np.float64) / len(self._trees)
Expand Down
12 changes: 6 additions & 6 deletions cudatree/random_tree.py
Expand Up @@ -3,15 +3,15 @@
from pycuda import gpuarray
import numpy as np
import math
from util import total_times, compile_module, mk_kernel, mk_tex_kernel, timer
from util import dtype_to_ctype, get_best_dtype, start_timer, end_timer
from base_tree import BaseTree
from .util import total_times, compile_module, mk_kernel, mk_tex_kernel, timer
from .util import dtype_to_ctype, get_best_dtype, start_timer, end_timer
from .base_tree import BaseTree
from pycuda import driver
import random
from parakeet import jit
from util import start_timer, end_timer, show_timings
from .util import start_timer, end_timer, show_timings
import sys
import util
from . import util

def sync():
if False:
Expand All @@ -37,7 +37,7 @@ def restore_tree(left_children,

@jit
def _shuffle(x, r):
for i in xrange(1, len(x)):
for i in range(1, len(x)):
j = np.fmod(r[i], i)
old_xj = x[j]
x[j] = x[i]
Expand Down
12 changes: 6 additions & 6 deletions cudatree/util.py
Expand Up @@ -53,11 +53,11 @@ def __init__(self, name):
self.name = name

def __enter__(self, *args):
print "Running %s" % self.name
print("Running %s" % self.name)
self.start_t = time.time()

def __exit__(self, *args):
print "Time for %s: %s" % (self.name, time.time() - self.start_t)
print("Time for %s: %s" % (self.name, time.time() - self.start_t))

def dtype_to_ctype(dtype):
if dtype.kind == 'f':
Expand Down Expand Up @@ -126,15 +126,15 @@ def end_timer(name):
total_times[name] = total

def show_timings(limit = 100):
tables = sorted(total_times.iteritems(),
tables = sorted(iter(total_times.items()),
key = operator.itemgetter(1),
reverse = True)
idx = 0
print "---------Timings---------"
print("---------Timings---------")
for key, value in tables:
print key.ljust(15), ":", value
print(key.ljust(15), ":", value)
idx += 1
if idx == limit:
break

print "-------------------------"
print("-------------------------")
40 changes: 20 additions & 20 deletions estimate_threshold.py
Expand Up @@ -27,20 +27,20 @@
rfs[f] = cudatree.RandomForestClassifier(n_estimators = 3, bootstrap = False, max_features = max_features)

for n_classes in reversed(all_classes):
print "n_classes", n_classes
print("n_classes", n_classes)
for n_examples in reversed(all_examples):
print "n_examples", n_examples
print("n_examples", n_examples)
y = np.random.randint(low = 0, high = n_classes, size = n_examples)
for n_features in reversed(all_features):
print "n_features", n_features
print("n_features", n_features)
max_features = int(np.sqrt(n_features))
print "sqrt(n_features) =", max_features
print("sqrt(n_features) =", max_features)
if n_features * n_examples > 10**7:
print "Skipping due excessive n_features * n_examples..."
print("Skipping due excessive n_features * n_examples...")
i += len(thresholds)
continue
if n_examples * n_classes > 10 ** 7:
print "Skipping due to excessive n_examples * n_classes"
print("Skipping due to excessive n_examples * n_classes")
i += len(thresholds)
continue

Expand All @@ -51,38 +51,38 @@
best_time = np.inf
best_threshold = None
best_threshold_prct = None
print "(n_classes = %d, n_examples = %d, max_features = %d)" % (n_classes, n_examples, max_features)
print("(n_classes = %d, n_examples = %d, max_features = %d)" % (n_classes, n_examples, max_features))
tested_thresholds = []
times = []
for bfs_threshold in thresholds:
bfs_threshold_prct = float(bfs_threshold) / n_examples
print " -- (%d / %d) threshold %d (%0.2f%%)" % (i, total_iters, bfs_threshold, bfs_threshold_prct * 100)
print(" -- (%d / %d) threshold %d (%0.2f%%)" % (i, total_iters, bfs_threshold, bfs_threshold_prct * 100))
i += 1
if bfs_threshold > n_examples:
print "Skipping threshold > n_examples"
print("Skipping threshold > n_examples")
continue
if bfs_threshold / float(n_examples) < 0.001:
print "SKipping, BFS threshold too small relative to n_examples"
print("SKipping, BFS threshold too small relative to n_examples")


start_t = time.time()
rf.fit(x, y, bfs_threshold)
t = time.time() - start_t
tested_thresholds.append(bfs_threshold)
times.append(t)
print " ---> total time", t
print(" ---> total time", t)
if t < best_time:
best_time = t
best_threshold = bfs_threshold
best_theshold_prct = bfs_threshold_prct
print "thresholds", tested_thresholds
print "times", times
print("thresholds", tested_thresholds)
print("times", times)
inputs.append([1.0, n_classes, n_examples, max_features])
best_threshold_values.append(best_threshold)
best_threshold_prcts.append(best_threshold_prct)

X = np.array(inputs)
print "input shape", X.shape
print("input shape", X.shape)



Expand All @@ -91,9 +91,9 @@
Y = best_threshold_values

lstsq_result = np.linalg.lstsq(X, Y)
print "Regression coefficients:", lstsq_result[0]
print("Regression coefficients:", lstsq_result[0])
n = len(best_threshold_values)
print "Regression residual:", lstsq_result[1], "RMSE:", np.sqrt(lstsq_result[1] / n)
print("Regression residual:", lstsq_result[1], "RMSE:", np.sqrt(lstsq_result[1] / n))

import socket
csv_filename = "threshold_results_" + socket.gethostname()
Expand All @@ -110,14 +110,14 @@

log_lstsq_result = np.linalg.lstsq(LogX, LogY)

print "Log regression coefficients:", log_lstsq_result[0]
print("Log regression coefficients:", log_lstsq_result[0])
n = len(best_threshold_values)
print "Log regression residual:", log_lstsq_result[1], "RMSE:", np.sqrt(log_lstsq_result[1] / n)
print("Log regression residual:", log_lstsq_result[1], "RMSE:", np.sqrt(log_lstsq_result[1] / n))
log_pred = np.dot(LogX, log_lstsq_result[0])
pred = np.exp(log_pred)
residual = np.sum((Y - pred)**2)
print "Actual residual", residual
print "Actual RMSE:", np.sqrt(residual / n)
print("Actual residual", residual)
print("Actual RMSE:", np.sqrt(residual / n))


"""
Expand Down
2 changes: 1 addition & 1 deletion hybridforest/__init__.py
@@ -1 +1 @@
from hybridforest import RandomForestClassifier
from .hybridforest import RandomForestClassifier
6 changes: 3 additions & 3 deletions hybridforest/hybridforest.py
Expand Up @@ -7,7 +7,7 @@
from multiprocessing import Value, Lock, cpu_count
import atexit
import pycuda
from builder import CPUBuilder, GPUBuilder
from .builder import CPUBuilder, GPUBuilder

#kill the child process if any
def cleanup(proc):
Expand Down Expand Up @@ -149,7 +149,7 @@ def fit(self, X, Y, bfs_threshold = None):
self.max_features,
bfs_threshold,
remain_trees,
lock) for i in xrange(self.n_gpus - 1)]
lock) for i in range(self.n_gpus - 1)]

pycuda.autoinit.context.pop()
for b in gpu_builders:
Expand Down Expand Up @@ -181,7 +181,7 @@ def predict(self, X):
n_cd_trees = self.n_estimators - n_sk_trees
cuda_proba = self._cuda_forest.predict_proba(X) * n_cd_trees
final_proba = (sk_proba + cuda_proba ) / self.n_estimators
res = np.array([np.argmax(final_proba[i]) for i in xrange(final_proba.shape[0])])
res = np.array([np.argmax(final_proba[i]) for i in range(final_proba.shape[0])])

if hasattr(self._cuda_forest, "compt_table"):
res = convert_result(self._cuda_forest.compt_table, res)
Expand Down
16 changes: 8 additions & 8 deletions test/helpers.py
Expand Up @@ -12,14 +12,14 @@ def compare_accuracy(x,y, n_estimators = 11, bootstrap = True, slop = 0.98, n_re
skrf = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, bootstrap = bootstrap)
cuda_score_total = 0
sk_score_total = 0
for i in xrange(n_repeat):
for i in range(n_repeat):
cudarf.fit(xtrain, ytrain)
skrf.fit(xtrain, ytrain)
sk_score = skrf.score(xtest, ytest)
cuda_score = cudarf.score(xtest, ytest)
print "Iteration", i
print "Sklearn score", sk_score
print "CudaTree score", cuda_score
print("Iteration", i)
print("Sklearn score", sk_score)
print("CudaTree score", cuda_score)
sk_score_total += sk_score
cuda_score_total += cuda_score

Expand All @@ -39,14 +39,14 @@ def compare_hybrid_accuracy(x,y, n_estimators = 20, bootstrap = True, slop = 0.9
skrf = sklearn.ensemble.RandomForestClassifier(n_estimators = n_estimators, bootstrap = bootstrap)
cuda_score_total = 0
sk_score_total = 0
for i in xrange(n_repeat):
for i in range(n_repeat):
hybridrf.fit(xtrain, ytrain)
skrf.fit(xtrain, ytrain)
sk_score = skrf.score(xtest, ytest)
cuda_score = hybridrf.score(xtest, ytest)
print "Iteration", i
print "Sklearn score", sk_score
print "Hybrid score", cuda_score
print("Iteration", i)
print("Sklearn score", sk_score)
print("Hybrid score", cuda_score)
sk_score_total += sk_score
cuda_score_total += cuda_score

Expand Down
4 changes: 2 additions & 2 deletions test/test_covtype.py
Expand Up @@ -12,10 +12,10 @@ def test_covtype_memorize():
forest.fit(x, y, bfs_threshold = 500000)
with timer("Predict"):
diff, total = util.test_diff(forest.predict(x), y)
print "%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total))
print("%s(Wrong)/%s(Total). The error rate is %f." % (diff, total, diff/float(total)))
assert diff == 0, "Didn't perfectly memorize, got %d wrong" % diff

from helpers import compare_accuracy, compare_hybrid_accuracy
from .helpers import compare_accuracy, compare_hybrid_accuracy
def test_covtype_accuracy():
compare_accuracy(x,y)
compare_hybrid_accuracy(x, y)
Expand Down