Training using a custom dataset #396

domalbert · 2022-10-12T07:45:25Z

Hello together,

I'm trying to use spektral for custom image dataset. I'm aware of your tutorial on here and here. However when I do so I get the following traceback:

TypeError                                 Traceback (most recent call last)
Input In [35], in <cell line: 132>()
    132 for batch in loader_tr:
    133     step += 1
--> 134     loss, acc = train_step(*batch)
    135     results.append((loss, acc))
    136     if step == loader_tr.steps_per_epoch:

File ~\Anaconda3\lib\site-packages\tensorflow\python\util\traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~\AppData\Local\Temp\__autograph_generated_fileabo5js6c.py:11, in outer_factory.<locals>.inner_factory.<locals>.tf__train_step(inputs, target)
      9 retval_ = ag__.UndefinedReturnValue()
     10 with ag__.ld(tf).GradientTape() as tape:
---> 11     predictions = ag__.converted_call(ag__.ld(model), (ag__.ld(inputs),), dict(training=True), fscope)
     12     loss = ag__.converted_call(ag__.ld(loss_fn), (ag__.ld(target), ag__.ld(predictions)), None, fscope) + ag__.converted_call(ag__.ld(sum), (ag__.ld(model).losses,), None, fscope)
     13 gradients = ag__.converted_call(ag__.ld(tape).gradient, (ag__.ld(loss), ag__.ld(model).trainable_variables), None, fscope)

File ~\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File ~\AppData\Local\Temp\__autograph_generated_fileps0p3atv.py:11, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, inputs)
      9 retval_ = ag__.UndefinedReturnValue()
     10 (x, a, i) = ag__.ld(inputs)
---> 11 x = ag__.converted_call(ag__.ld(self).conv1, ([ag__.ld(x), ag__.ld(a)],), None, fscope)
     12 x = ag__.converted_call(ag__.ld(self).conv2, ([ag__.ld(x), ag__.ld(a)],), None, fscope)
     13 x = ag__.converted_call(ag__.ld(self).conv3, ([ag__.ld(x), ag__.ld(a)],), None, fscope)

File ~\AppData\Local\Temp\__autograph_generated_filehshivo11.py:14, in outer_factory.<locals>.inner_factory.<locals>.tf___inner_check_dtypes(inputs, **kwargs)
     12 try:
     13     do_return = True
---> 14     retval_ = ag__.converted_call(ag__.ld(call), (ag__.ld(inputs),), dict(**ag__.ld(kwargs)), fscope)
     15 except:
     16     do_return = False

File ~\AppData\Local\Temp\__autograph_generated_filed6l1u3d2.py:11, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, inputs, mask)
      9 retval_ = ag__.UndefinedReturnValue()
     10 (x, a) = ag__.ld(inputs)
---> 11 output = ag__.converted_call(ag__.ld(K).dot, (ag__.ld(x), ag__.ld(self).kernel_1), None, fscope)
     12 output = ag__.converted_call(ag__.ld(ops).modal_dot, (ag__.ld(a), ag__.ld(output)), None, fscope)
     13 skip = ag__.converted_call(ag__.ld(K).dot, (ag__.ld(x), ag__.ld(self).kernel_2), None, fscope)

TypeError: in user code:

    File "C:\Users\dalbertw\AppData\Local\Temp\ipykernel_16028\747893876.py", line 101, in train_step  *
        predictions = model(inputs, training=True)
    File "C:\Users\dalbertw\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\dalbertw\AppData\Local\Temp\__autograph_generated_fileps0p3atv.py", line 11, in tf__call
        x = ag__.converted_call(ag__.ld(self).conv1, ([ag__.ld(x), ag__.ld(a)],), None, fscope)
    File "C:\Users\dalbertw\AppData\Local\Temp\__autograph_generated_filehshivo11.py", line 14, in tf___inner_check_dtypes
        retval_ = ag__.converted_call(ag__.ld(call), (ag__.ld(inputs),), dict(**ag__.ld(kwargs)), fscope)
    File "C:\Users\dalbertw\AppData\Local\Temp\__autograph_generated_filed6l1u3d2.py", line 11, in tf__call
        output = ag__.converted_call(ag__.ld(K).dot, (ag__.ld(x), ag__.ld(self).kernel_1), None, fscope)

    TypeError: Exception encountered when calling layer "net_23" "                 f"(type Net).
    
    in user code:
    
        File "C:\Users\dalbertw\AppData\Local\Temp\ipykernel_16028\747893876.py", line 81, in call  *
            x = self.conv1([x, a])
        File "C:\Users\dalbertw\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\dalbertw\AppData\Local\Temp\__autograph_generated_filehshivo11.py", line 14, in tf___inner_check_dtypes
            retval_ = ag__.converted_call(ag__.ld(call), (ag__.ld(inputs),), dict(**ag__.ld(kwargs)), fscope)
        File "C:\Users\dalbertw\AppData\Local\Temp\__autograph_generated_filed6l1u3d2.py", line 11, in tf__call
            output = ag__.converted_call(ag__.ld(K).dot, (ag__.ld(x), ag__.ld(self).kernel_1), None, fscope)
    
        TypeError: Exception encountered when calling layer "gcs_conv_18" "                 f"(type GCSConv).
        
        in user code:
        
            File "C:\Users\dalbertw\Anaconda3\lib\site-packages\spektral\layers\convolutional\conv.py", line 105, in _inner_check_dtypes  *
                return call(inputs, **kwargs)
            File "C:\Users\dalbertw\Anaconda3\lib\site-packages\spektral\layers\convolutional\gcs_conv.py", line 105, in call  *
                output = K.dot(x, self.kernel_1)
            File "C:\Users\dalbertw\Anaconda3\lib\site-packages\keras\backend.py", line 2455, in dot
                out = tf.matmul(x, y)
        
            TypeError: Input 'b' of 'MatMul' Op has type float32 that does not match type int32 of argument 'a'.
        
        
        Call arguments received by layer "gcs_conv_18" "                 f"(type GCSConv):
          • inputs=['tf.Tensor(shape=(None, 6), dtype=int32)', '<tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x0000021F1F8D2D00>']
          • mask=None
    
    
    Call arguments received by layer "net_23" "                 f"(type Net):
      • inputs=('tf.Tensor(shape=(None, 6), dtype=int32)', '<tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x0000021F1F8D2D00>', 'tf.Tensor(shape=(None,), dtype=int64)')

I assume that my problem lies within the way I define my custom dataset and how spektral is wrapping it. Here is my full code:


from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from spektral.data import Dataset, DisjointLoader, Graph
from spektral.layers import GCSConv, GlobalAvgPool
from spektral.transforms.normalize_adj import NormalizeAdj


class MyDataset(Dataset):
    """
    A dataset of random colored graphs.
    The task is to classify each graph with the color which occurs the most in
    its nodes.
    The graphs have `n_colors` colors, of at least `n_min` and at most `n_max`
    nodes connected with probability `p`.
    """

    def __init__(self, n_samples, n_colors=3, n_min=10, n_max=100, p=0.1, **kwargs):
        self.n_samples = n_samples
        self.n_colors = n_colors
        self.n_min = n_min
        self.n_max = n_max
        self.p = p
        super().__init__(**kwargs)

    def read(self):
        def make_graph():
            #n = np.random.randint(self.n_min, self.n_max)
            #colors = np.random.randint(0, self.n_colors, size=n)

            # Node features
            x = np.random.randint(0,255, (6, 6))

            # Edges
            a = np.random.randint(0,2,(36,36))

            # Labels
            y = np.random.randint(0,10)

            return Graph(x=x, a=a, y=y)

        # We must return a list of Graph objects
        return [make_graph() for _ in range(self.n_samples)]


data = MyDataset(1000, transforms=NormalizeAdj())

# Train/valid/test split
idxs = np.random.permutation(len(data))
split_va, split_te = int(0.8 * len(data)), int(0.9 * len(data))
idx_tr, idx_va, idx_te = np.split(idxs, [split_va, split_te])
data_tr = data[idx_tr]
data_va = data[idx_va]
data_te = data[idx_te]

# Data loaders
loader_tr = DisjointLoader(data_tr, batch_size=batch_size, epochs=epochs)
loader_va = DisjointLoader(data_va, batch_size=batch_size)
loader_te = DisjointLoader(data_te, batch_size=batch_size)


################################################################################
# Build model
################################################################################
class Net(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCSConv(32, activation="relu")
        self.conv2 = GCSConv(32, activation="relu")
        self.conv3 = GCSConv(32, activation="relu")
        self.global_pool = GlobalAvgPool()
        self.dense = Dense(data.n_labels, activation="softmax")

    def call(self, inputs):
        x, a, i = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        x = self.conv3([x, a])
        output = self.global_pool([x, i])
        output = self.dense(output)

        return output

learning_rate = 1e-3
model = Net()
optimizer = Adam(lr=learning_rate)
loss_fn = CategoricalCrossentropy()


################################################################################
# Fit model
################################################################################
@tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    acc = tf.reduce_mean(categorical_accuracy(target, predictions))
    return loss, acc


def evaluate(loader):
    output = []
    step = 0
    while step < loader.steps_per_epoch:
        step += 1
        inputs, target = loader.__next__()
        pred = model(inputs, training=False)
        outs = (
            loss_fn(target, pred),
            tf.reduce_mean(categorical_accuracy(target, pred)),
            len(target),  # Keep track of batch size
        )
        output.append(outs)
        if step == loader.steps_per_epoch:
            output = np.array(output)
            return np.average(output[:, :-1], 0, weights=output[:, -1])


epoch = step = 0
best_val_loss = np.inf
best_weights = None
patience = 10
results = []
for batch in loader_tr:
    step += 1
    loss, acc = train_step(*batch)
    results.append((loss, acc))
    if step == loader_tr.steps_per_epoch:
        step = 0
        epoch += 1

        # Compute validation loss and accuracy
        val_loss, val_acc = evaluate(loader_va)
        print(
            "Ep. {} - Loss: {:.3f} - Acc: {:.3f} - Val loss: {:.3f} - Val acc: {:.3f}".format(
                epoch, *np.mean(results, 0), val_loss, val_acc
            )
        )

        # Check if loss improved for early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = es_patience
            print("New best val_loss {:.3f}".format(val_loss))
            best_weights = model.get_weights()
        else:
            patience -= 1
            if patience == 0:
                print("Early stopping (best val_loss: {})".format(best_val_loss))
                break
        results = []

################################################################################
# Evaluate model
################################################################################
model.set_weights(best_weights)  # Load best model
test_loss, test_acc = evaluate(loader_te)
print("Done. Test loss: {:.4f}. Test acc: {:.2f}".format(test_loss, test_acc))

Can you give me guidance on how to proceed?

The text was updated successfully, but these errors were encountered:

danielegrattarola · 2023-01-22T13:53:18Z

Sorry for the late reply.
It looks like the node features are of the wrong type, could you try casting them to Float?

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Training using a custom dataset #396

Training using a custom dataset #396

domalbert commented Oct 12, 2022

danielegrattarola commented Jan 22, 2023

Training using a custom dataset #396

Training using a custom dataset #396

Comments

domalbert commented Oct 12, 2022

danielegrattarola commented Jan 22, 2023