From b73888b0a93aca077384464f40312feab5774e93 Mon Sep 17 00:00:00 2001 From: Sonal Goyal Date: Wed, 27 Mar 2024 13:54:26 +0530 Subject: [PATCH 1/2] pluggable canopy --- .../java/zingg/common/core/block/Block.java | 20 +++++++++++-------- .../java/zingg/common/core/block/Canopy.java | 14 ++++++------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/common/core/src/main/java/zingg/common/core/block/Block.java b/common/core/src/main/java/zingg/common/core/block/Block.java index 35bde6b54..b304c1b48 100644 --- a/common/core/src/main/java/zingg/common/core/block/Block.java +++ b/common/core/src/main/java/zingg/common/core/block/Block.java @@ -66,16 +66,13 @@ public void setDupes(ZFrame dupes) { /** * @return the types * - * public Class[] getTypes() { return types; } */ /** * @param types - * the types to set + * the types to set * - * public void setTypes(Class[] types) { this.types = types; } - * - * /** + * * @return the maxSize */ public long getMaxSize() { @@ -84,7 +81,7 @@ public long getMaxSize() { /** * @param maxSize - * the maxSize to set + * the maxSize to set */ public void setMaxSize(long maxSize) { this.maxSize = maxSize; @@ -102,10 +99,13 @@ protected void setFunctionsMap(ListMap> m) { this.functionsMap = m; } + protected Canopy getCanopy(){ + return new Canopy(); + } public CanopygetNodeFromCurrent(Canopynode, HashFunction function, FieldDefinition context) { - Canopytrial = new Canopy(); + Canopytrial = getCanopy(); trial = node.copyTo(trial); // node.training, node.dupeN, function, context); trial.function = function; @@ -113,6 +113,10 @@ protected void setFunctionsMap(ListMap> m) { return trial; } + public void estimateElimCount(Canopy c, long elimCount) { + c.estimateElimCount(); + } + public abstract T getDataTypeFromString(String t); public CanopygetBestNode(Tree> tree, Canopyparent, Canopynode, @@ -144,7 +148,7 @@ protected void setFunctionsMap(ListMap> m) { + " and function " + function + " for " + field.dataType); Canopytrial = getNodeFromCurrent(node, function, context); - trial.estimateElimCount(); + estimateElimCount(trial, least); long elimCount = trial.getElimCount(); diff --git a/common/core/src/main/java/zingg/common/core/block/Canopy.java b/common/core/src/main/java/zingg/common/core/block/Canopy.java index 25f0d4124..09451c56d 100644 --- a/common/core/src/main/java/zingg/common/core/block/Canopy.java +++ b/common/core/src/main/java/zingg/common/core/block/Canopy.java @@ -20,19 +20,19 @@ public class Canopy implements Serializable { public static final Log LOG = LogFactory.getLog(Canopy.class); // created by function edge leading from parent to this node - HashFunction function; + protected HashFunction function; // aplied on field - FieldDefinition context; + protected FieldDefinition context; // list of duplicates passed from parent - List dupeN; + protected List dupeN; // number of duplicates eliminated after function applied on fn context - long elimCount; + protected long elimCount; // hash of canopy - Object hash; + protected Object hash; // training set - List training; + protected List training; // duplicates remaining after function is applied - List dupeRemaining; + protected List dupeRemaining; public Canopy() { } From bd48ac9dabed30ff3f578829a14986c82c8cb177 Mon Sep 17 00:00:00 2001 From: Sonal Goyal Date: Wed, 27 Mar 2024 22:46:37 +0530 Subject: [PATCH 2/2] debug logs in if then else --- .../java/zingg/common/core/block/Block.java | 18 +++++++++++++----- .../zingg/common/core/hash/FirstChars.java | 2 +- .../src/test/java/zingg/block/TestBlock.java | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/common/core/src/main/java/zingg/common/core/block/Block.java b/common/core/src/main/java/zingg/common/core/block/Block.java index b304c1b48..06c0e8c13 100644 --- a/common/core/src/main/java/zingg/common/core/block/Block.java +++ b/common/core/src/main/java/zingg/common/core/block/Block.java @@ -126,14 +126,18 @@ public void estimateElimCount(Canopy c, long elimCount) { Canopybest = null; for (FieldDefinition field : fieldsOfInterest) { - LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt " - + getDataTypeFromString(field.getDataType())); + if (LOG.isDebugEnabled()){ + LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt " + + getDataTypeFromString(field.getDataType())); + } //Class type = FieldClass.getFieldClassClass(field.getFieldClass()); FieldDefinition context = field; if (least ==0) break;//how much better can it get? // applicable functions List> functions = functionsMap.get(getDataTypeFromString(field.getDataType())); - LOG.debug("functions are " + functions); + if (LOG.isDebugEnabled()){ + LOG.debug("functions are " + functions); + } if (functions != null) { @@ -144,8 +148,10 @@ public void estimateElimCount(Canopy c, long elimCount) { //!childless.contains(function, field.fieldName) ) { - LOG.debug("Evaluating field " + field.fieldName + if (LOG.isDebugEnabled()){ + LOG.debug("Evaluating field " + field.fieldName + " and function " + function + " for " + field.dataType); + } Canopytrial = getNodeFromCurrent(node, function, context); estimateElimCount(trial, least); @@ -182,7 +188,9 @@ public void estimateElimCount(Canopy c, long elimCount) { }*/ } else { - LOG.debug("No child " + function); + if (LOG.isDebugEnabled()){ + LOG.debug("No child " + function); + } //childless.add(function, field.fieldName); } diff --git a/common/core/src/main/java/zingg/common/core/hash/FirstChars.java b/common/core/src/main/java/zingg/common/core/hash/FirstChars.java index 116b67cc9..78ad3042d 100644 --- a/common/core/src/main/java/zingg/common/core/hash/FirstChars.java +++ b/common/core/src/main/java/zingg/common/core/hash/FirstChars.java @@ -32,7 +32,7 @@ public String call(String field) { r = field.trim().substring(0, endIndex); } } - LOG.debug("Applying " + this.getName() + " on " + field + " and returning " + r); + //LOG.debug("Applying " + this.getName() + " on " + field + " and returning " + r); return r; } diff --git a/spark/core/src/test/java/zingg/block/TestBlock.java b/spark/core/src/test/java/zingg/block/TestBlock.java index 5d80ca66a..17cbdb93a 100644 --- a/spark/core/src/test/java/zingg/block/TestBlock.java +++ b/spark/core/src/test/java/zingg/block/TestBlock.java @@ -50,6 +50,7 @@ public void testTree() throws Throwable { // primary deciding is unique year so identityInteger should have been picked Canopy head = blockingTree.getHead(); assertEquals("identityInteger", head.getFunction().getName()); + blockingTree.toString(); }