Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
single responsibility principle for processing data
- Loading branch information
1 parent
c14132c
commit 6b53d07
Showing
8 changed files
with
238 additions
and
60 deletions.
There are no files selected for viewing
52 changes: 52 additions & 0 deletions
52
common/core/src/main/java/zingg/common/core/data/df/BlockedFrame.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package zingg.common.core.data.df; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.ZinggClientException; | ||
import zingg.common.client.util.ColName; | ||
import zingg.common.core.block.Canopy; | ||
import zingg.common.core.block.Tree; | ||
import zingg.common.core.context.Context; | ||
|
||
public class BlockedFrame<S, D, R, C, T> implements IZFrameProcessor<S, D, R, C, T> { | ||
|
||
protected ZFrame<D,R,C> originalDF; | ||
|
||
protected ZFrame<D,R,C> processedDF; | ||
|
||
protected IArguments args; | ||
|
||
protected Context<S,D,R,C,T> context; | ||
|
||
public static final Log LOG = LogFactory.getLog(BlockedFrame.class); | ||
|
||
public BlockedFrame(ZFrame<D, R, C> originalDF, IArguments args, Context<S,D,R,C,T> context) throws Exception, ZinggClientException { | ||
super(); | ||
this.originalDF = originalDF; | ||
this.args = args; | ||
this.context = context; | ||
this.processedDF = getBlocked(); | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getOriginalDF() { | ||
return originalDF; | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getProcessedDF() { | ||
return processedDF; | ||
} | ||
|
||
protected ZFrame<D, R, C> getBlocked() throws Exception, ZinggClientException { | ||
//testData = dropDuplicates(testData); | ||
Tree<Canopy<R>> tree = context.getBlockingTreeUtil().readBlockingTree(args); | ||
ZFrame<D, R, C> blocked = context.getBlockingTreeUtil().getBlockHashes(getOriginalDF(), tree); | ||
ZFrame<D, R, C> blocked1 = blocked.repartition(args.getNumPartitions(), blocked.col(ColName.HASH_COL));//.cache(); | ||
return blocked1; | ||
} | ||
|
||
} |
38 changes: 38 additions & 0 deletions
38
common/core/src/main/java/zingg/common/core/data/df/FieldDefFrame.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package zingg.common.core.data.df; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.cols.ZidAndFieldDefSelector; | ||
|
||
public class FieldDefFrame<S, D, R, C, T> implements IZFrameProcessor<S, D, R, C, T> { | ||
|
||
protected ZFrame<D,R,C> originalDF; | ||
|
||
protected ZFrame<D,R,C> processedDF; | ||
|
||
protected IArguments args; | ||
|
||
public static final Log LOG = LogFactory.getLog(FieldDefFrame.class); | ||
|
||
public FieldDefFrame(ZFrame<D, R, C> originalDF, IArguments args) { | ||
super(); | ||
this.originalDF = originalDF; | ||
this.args = args; | ||
this.processedDF = getOriginalDF().select(new ZidAndFieldDefSelector(args.getFieldDefinition()).getCols()); | ||
// return getDSUtil().getFieldDefColumnsDS(testDataOriginal, args, true); | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getOriginalDF() { | ||
return originalDF; | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getProcessedDF() { | ||
return processedDF; | ||
} | ||
|
||
} |
11 changes: 11 additions & 0 deletions
11
common/core/src/main/java/zingg/common/core/data/df/IZFrameProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
package zingg.common.core.data.df; | ||
|
||
import zingg.common.client.ZFrame; | ||
|
||
public interface IZFrameProcessor<S, D, R, C, T> { | ||
|
||
public ZFrame<D,R,C> getOriginalDF(); | ||
|
||
public ZFrame<D,R,C> getProcessedDF(); | ||
|
||
} |
45 changes: 45 additions & 0 deletions
45
common/core/src/main/java/zingg/common/core/data/df/PreprocessedFrame.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package zingg.common.core.data.df; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.ZinggClientException; | ||
import zingg.common.core.preprocess.StopWordsRemover; | ||
|
||
public class PreprocessedFrame<S, D, R, C, T> implements IZFrameProcessor<S, D, R, C, T> { | ||
|
||
protected ZFrame<D,R,C> originalDF; | ||
|
||
protected ZFrame<D,R,C> processedDF; | ||
|
||
protected IArguments args; | ||
|
||
protected StopWordsRemover<S, D, R, C, T> stopWordsRemover; | ||
|
||
public static final Log LOG = LogFactory.getLog(PreprocessedFrame.class); | ||
|
||
public PreprocessedFrame(ZFrame<D, R, C> originalDF, IArguments args, StopWordsRemover<S, D, R, C, T> stopWordsRemover) throws ZinggClientException { | ||
super(); | ||
this.originalDF = originalDF; | ||
this.args = args; | ||
this.stopWordsRemover = stopWordsRemover; | ||
this.processedDF = preprocess(); | ||
} | ||
|
||
protected ZFrame<D, R, C> preprocess() throws ZinggClientException { | ||
return this.stopWordsRemover.preprocessForStopWords(getOriginalDF()); | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getOriginalDF() { | ||
return originalDF; | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getProcessedDF() { | ||
return processedDF; | ||
} | ||
|
||
} |
37 changes: 37 additions & 0 deletions
37
common/core/src/main/java/zingg/common/core/data/df/RepartitionFrame.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package zingg.common.core.data.df; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
import zingg.common.client.IArguments; | ||
import zingg.common.client.ZFrame; | ||
import zingg.common.client.util.ColName; | ||
|
||
public class RepartitionFrame<S, D, R, C, T> implements IZFrameProcessor<S, D, R, C, T> { | ||
|
||
protected ZFrame<D,R,C> originalDF; | ||
|
||
protected ZFrame<D,R,C> processedDF; | ||
|
||
protected IArguments args; | ||
|
||
public static final Log LOG = LogFactory.getLog(RepartitionFrame.class); | ||
|
||
public RepartitionFrame(ZFrame<D, R, C> originalDF, IArguments args) { | ||
super(); | ||
this.originalDF = originalDF; | ||
this.args = args; | ||
this.processedDF = getOriginalDF().repartition(args.getNumPartitions(),getOriginalDF().col(ColName.ID_COL)); | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getOriginalDF() { | ||
return originalDF; | ||
} | ||
|
||
@Override | ||
public ZFrame<D, R, C> getProcessedDF() { | ||
return processedDF; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.