Skip to content

Commit

Permalink
Merge pull request #827 from zinggAI/main
Browse files Browse the repository at this point in the history
merging enterprise branch into main
  • Loading branch information
vikasgupta78 committed May 1, 2024
2 parents e02e25c + 0b65ddf commit c76ef26
Show file tree
Hide file tree
Showing 211 changed files with 3,316 additions and 971 deletions.
Binary file modified .DS_Store
Binary file not shown.
35 changes: 35 additions & 0 deletions .readthedocs.yaml
@@ -0,0 +1,35 @@
# Read the Docs configuration file for Sphinx projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
# You can also specify other tool versions:
# nodejs: "20"
# rust: "1.70"
# golang: "1.20"

# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: python/docs/conf.py
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
# builder: "dirhtml"
# Fail on all warnings to avoid broken references
# fail_on_warning: true

# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub

# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
# python:
# install:
# - requirements: docs/requirements.txt
2 changes: 1 addition & 1 deletion assembly/dependency-reduced-pom.xml
Expand Up @@ -45,7 +45,7 @@
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<version>3.6.0</version>
<executions>
<execution>
<id>make-assembly</id>
Expand Down
2 changes: 1 addition & 1 deletion assembly/pom.xml
Expand Up @@ -112,7 +112,7 @@
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<version>3.6.0</version>
<configuration>
<descriptors>
<descriptor>${project.basedir}/src/assembly/dist.xml</descriptor>
Expand Down
Expand Up @@ -4,6 +4,7 @@
import java.io.Serializable;
import java.io.StringWriter;
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
Expand Down Expand Up @@ -169,7 +170,7 @@ public void setLabelDataSampleSize(float labelDataSampleSize) throws ZinggClient
public List<? extends FieldDefinition> getFieldDefinition() {
return fieldDefinition;
}

/**
* Set the field definitions consisting of match field indices, types and
* classes
Expand Down
Expand Up @@ -19,7 +19,7 @@

public class ArgumentsUtil {

protected Class<? extends Arguments> argsClass;
protected Class<? extends IArguments> argsClass;
private static final String ENV_VAR_MARKER_START = "$";
private static final String ENV_VAR_MARKER_END = "$";
private static final String ESC = "\\";
Expand All @@ -31,7 +31,7 @@ public ArgumentsUtil() {
this(Arguments.class);
}

public ArgumentsUtil( Class<? extends Arguments> argsClass) {
public ArgumentsUtil( Class<? extends IArguments> argsClass) {
this.argsClass = argsClass;
}

Expand Down
84 changes: 67 additions & 17 deletions common/client/src/main/java/zingg/common/client/Client.java
Expand Up @@ -5,9 +5,17 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import zingg.common.client.license.IZinggLicense;
import zingg.common.client.event.events.IEvent;
import zingg.common.client.event.events.ZinggStartEvent;
import zingg.common.client.event.events.ZinggStopEvent;
import zingg.common.client.event.listeners.EventsListener;
import zingg.common.client.event.listeners.IEventListener;
import zingg.common.client.event.listeners.ZinggStartListener;
import zingg.common.client.event.listeners.ZinggStopListener;
import zingg.common.client.options.ZinggOptions;
import zingg.common.client.util.Email;
import zingg.common.client.util.EmailBody;
import zingg.common.client.util.PipeUtilBase;

/**
* This is the main point of interface with the Zingg matching product.
Expand All @@ -22,9 +30,11 @@ public abstract class Client<S,D,R,C,T> implements Serializable {
protected IZingg<S,D,R,C> zingg;
protected ClientOptions options;
protected S session;

protected PipeUtilBase<S,D,R,C> pipeUtil;
public static final Log LOG = LogFactory.getLog(Client.class);

protected String zFactoryClassName;


/**
* Construct a client to Zingg using provided arguments and spark master.
Expand All @@ -36,10 +46,14 @@ public abstract class Client<S,D,R,C,T> implements Serializable {
* if issue connecting to master
*/

public Client() {}
public Client(String zFactory) {
setZFactoryClassName(zFactory);
}

public Client(IArguments args, ClientOptions options) throws ZinggClientException {
setOptions(options);
public Client(IArguments args, ClientOptions options, String zFactory) throws ZinggClientException {
setZFactoryClassName(zFactory);
this.options = options;
setOptions(options);
try {
buildAndSetArguments(args, options);
printAnalyticsBanner(arguments.getCollectMetrics());
Expand All @@ -51,14 +65,28 @@ public Client(IArguments args, ClientOptions options) throws ZinggClientExceptio
}
}

public Client(IArguments args, ClientOptions options, S s) throws ZinggClientException {
this(args, options);

public String getZFactoryClassName() {
return zFactoryClassName;
}

public void setZFactoryClassName(String s) {
this.zFactoryClassName = s;
}

public Client(IArguments args, ClientOptions options, S s, String zFactory) throws ZinggClientException {
this(args, options, zFactory);
this.session = s;
LOG.debug("Session passed is " + s);
if (session != null) zingg.setSession(session);
}

public abstract IZinggFactory getZinggFactory() throws Exception;//(IZinggFactory) Class.forName("zingg.ZFactory").newInstance();

public IZinggFactory getZinggFactory() throws InstantiationException, IllegalAccessException, ClassNotFoundException{
LOG.debug("z factory is " + getZFactoryClassName());
return (IZinggFactory) Class.forName(getZFactoryClassName()).newInstance();
}




Expand All @@ -70,9 +98,10 @@ public void setZingg(IArguments args, ClientOptions options) throws Exception{
catch(Exception e) {
e.printStackTrace();
//set default
setZingg(zf.get(ZinggOptions.getByValue(ZinggOptions.PEEK_MODEL.getValue())));
setZingg(zf.get(ZinggOptions.getByValue(ZinggOptions.PEEK_MODEL.getName())));
}
}


public void setZingg(IZingg<S,D,R,C> zingg) {
this.zingg = zingg;
Expand Down Expand Up @@ -120,7 +149,7 @@ else if (args.getJobId() != -1) {
}

public void printBanner() {
String versionStr = "0.4.0";
String versionStr = "0.4.1-SNAPSHOT";
LOG.info("");
LOG.info("********************************************************");
LOG.info("* Zingg AI *");
Expand Down Expand Up @@ -161,17 +190,17 @@ public void mainMethod(String... args) {
Client<S,D,R,C,T> client = null;
ClientOptions options = null;
try {

for (String a: args) LOG.debug("args " + a);
options = new ClientOptions(args);
setOptions(options);

if (options.has(options.HELP) || options.has(options.HELP1) || options.get(ClientOptions.PHASE) == null) {
LOG.warn(options.getHelp());
System.exit(0);
}
String phase = options.get(ClientOptions.PHASE).value.trim();
ZinggOptions.verifyPhase(phase);
IArguments arguments = null;
if (options.get(ClientOptions.CONF).value.endsWith("json")) {
arguments = getArgsUtil().createArgumentsFromJSON(options.get(ClientOptions.CONF).value, phase);
}
Expand All @@ -184,6 +213,7 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) {

client = getClient(arguments, options);
client.init();
// after setting arguments etc. as some of the listeners need it
client.execute();
client.postMetrics();
LOG.warn("Zingg processing has completed");
Expand Down Expand Up @@ -212,6 +242,7 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) {
}
finally {
try {
EventsListener.getInstance().fireEvent(new ZinggStopEvent());
if (client != null) {
//client.postMetrics();
client.stop();
Expand All @@ -228,14 +259,12 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) {
}

public void init() throws ZinggClientException {
zingg.setClientOptions(options);
zingg.init(getArguments(), getLicense(options.get(ClientOptions.LICENSE).value.trim()));
zingg.init(getArguments(), getSession());
if (session != null) zingg.setSession(session);

initializeListeners();
EventsListener.getInstance().fireEvent(new ZinggStartEvent());
}

protected abstract IZinggLicense getLicense(String license) throws ZinggClientException ;

/**
* Stop the Spark job running context
*/
Expand Down Expand Up @@ -305,5 +334,26 @@ protected ArgumentsUtil getArgsUtil() {
}
return argsUtil;
}

public void addListener(Class<? extends IEvent> eventClass, IEventListener listener) {
EventsListener.getInstance().addListener(eventClass, listener);
}

public void initializeListeners() {
addListener(ZinggStartEvent.class, new ZinggStartListener());
addListener(ZinggStopEvent.class, new ZinggStopListener());
}

public abstract S getSession();

public void setSession(S s) {
this.session = s;
}

public abstract PipeUtilBase<S, D, R, C> getPipeUtil();

public void setPipeUtil(PipeUtilBase<S, D, R, C> pipeUtil) {
this.pipeUtil = pipeUtil;
}

}
Expand Up @@ -12,6 +12,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import zingg.common.client.options.ZinggOptions;
import zingg.common.client.util.Util;

public class ClientOptions {
Expand Down
30 changes: 30 additions & 0 deletions common/client/src/main/java/zingg/common/client/FieldDefUtil.java
@@ -0,0 +1,30 @@
package zingg.common.client;

import java.io.Serializable;
import java.util.List;
import java.util.stream.Collectors;

/**
*
* Util methods related to FieldDefinition objects
*
*/
public class FieldDefUtil implements Serializable{

private static final long serialVersionUID = 1L;

public List<? extends FieldDefinition> getFieldDefinitionDontUse(List<? extends FieldDefinition> fieldDefinition) {
return fieldDefinition.stream()
.filter(x->x.matchType.contains(MatchType.DONT_USE))
.collect(Collectors.toList());
}

public List<? extends FieldDefinition> getFieldDefinitionToUse(List<? extends FieldDefinition> fieldDefinition) {
return fieldDefinition.stream()
.filter(x->!x.matchType.contains(MatchType.DONT_USE))
.collect(Collectors.toList());
}



}
Expand Up @@ -10,6 +10,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
Expand All @@ -22,6 +23,8 @@
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import com.fasterxml.jackson.databind.ser.std.StdSerializer;

import zingg.common.client.cols.Named;


/**
* This class defines each field that we use in matching We can use this to
Expand All @@ -30,7 +33,7 @@
* @author sgoyal
*
*/
public class FieldDefinition implements
public class FieldDefinition implements Named,
Serializable {

private static final long serialVersionUID = 1L;
Expand Down Expand Up @@ -119,6 +122,21 @@ public void setFieldName(String fieldName) {
this.fieldName = fieldName;
}

@JsonIgnore
public boolean isDontUse() {
return (matchType != null && matchType.contains(MatchType.DONT_USE));
}

@Override
public String getName() {
return getFieldName();
}

@Override
public void setName(String name) {
setFieldName(name);
}

@Override
public int hashCode() {
final int prime = 31;
Expand Down
Expand Up @@ -8,7 +8,7 @@ public interface ILabelDataViewHelper<S, D, R, C> {

List<R> getClusterIds(ZFrame<D, R, C> lines);

List<C> getDisplayColumns(ZFrame<D, R, C> lines, IArguments args);
// List<C> getDisplayColumns(ZFrame<D, R, C> lines, IArguments args);

ZFrame<D, R, C> getCurrentPair(ZFrame<D, R, C> lines, int index, List<R> clusterIds, ZFrame<D, R, C> clusterLines);

Expand Down
6 changes: 2 additions & 4 deletions common/client/src/main/java/zingg/common/client/IZingg.java
@@ -1,17 +1,15 @@
package zingg.common.client;

import zingg.common.client.license.IZinggLicense;

public interface IZingg<S,D,R,C> {

public void init(IArguments args, IZinggLicense license)
public void init(IArguments args, S session)
throws ZinggClientException;

public void execute() throws ZinggClientException;

public void cleanup() throws ZinggClientException;

public ZinggOptions getZinggOptions();
//public ZinggOptions getZinggOptions();

public String getName();

Expand Down

0 comments on commit c76ef26

Please sign in to comment.