Skip to content

Commit

Permalink
[Refactor] Improve logging package wide (#1315)
Browse files Browse the repository at this point in the history
  • Loading branch information
deshraj committed Mar 14, 2024
1 parent ef69c91 commit 3616eaa
Show file tree
Hide file tree
Showing 54 changed files with 263 additions and 231 deletions.
1 change: 0 additions & 1 deletion embedchain/alembic.ini
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ keys = console
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

Expand Down
30 changes: 15 additions & 15 deletions embedchain/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from embedchain.vectordb.base import BaseVectorDB
from embedchain.vectordb.chroma import ChromaDB

logger = logging.getLogger(__name__)


@register_deserializable
class App(EmbedChain):
Expand All @@ -50,10 +52,10 @@ def __init__(
embedding_model: BaseEmbedder = None,
llm: BaseLlm = None,
config_data: dict = None,
log_level=logging.WARN,
auto_deploy: bool = False,
chunker: ChunkerConfig = None,
cache_config: CacheConfig = None,
log_level: int = logging.WARN,
):
"""
Initialize a new `App` instance.
Expand All @@ -68,8 +70,6 @@ def __init__(
:type llm: BaseLlm, optional
:param config_data: Config dictionary, defaults to None
:type config_data: dict, optional
:param log_level: Log level to use, defaults to logging.WARN
:type log_level: int, optional
:param auto_deploy: Whether to deploy the pipeline automatically, defaults to False
:type auto_deploy: bool, optional
:raises Exception: If an error occurs while creating the pipeline
Expand All @@ -83,13 +83,12 @@ def __init__(
if name and config:
raise Exception("Cannot provide both name and config. Please provide only one of them.")

# logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
self.logger = logging.getLogger(__name__)

logger.debug("4.0")
# Initialize the metadata db for the app
setup_engine(database_uri=os.environ.get("EMBEDCHAIN_DB_URI"))
init_db()

logger.debug("4.0")
self.auto_deploy = auto_deploy
# Store the dict config as an attribute to be able to send it
self.config_data = config_data if (config_data and validate_config(config_data)) else None
Expand Down Expand Up @@ -119,13 +118,15 @@ def __init__(
self.llm = llm or OpenAILlm()
self._init_db()

logger.debug("4.1")
# Session for the metadata db
self.db_session = get_session()

# If cache_config is provided, initializing the cache ...
if self.cache_config is not None:
self._init_cache()

logger.debug("4.2")
# Send anonymous telemetry
self._telemetry_props = {"class": self.__class__.__name__}
self.telemetry = AnonymousTelemetry(enabled=self.config.collect_metrics)
Expand Down Expand Up @@ -238,7 +239,7 @@ def _upload_file_to_presigned_url(self, presigned_url, file_path):
response.raise_for_status()
return response.status_code == 200
except Exception as e:
self.logger.exception(f"Error occurred during file upload: {str(e)}")
logger.exception(f"Error occurred during file upload: {str(e)}")
print("❌ Error occurred during file upload!")
return False

Expand Down Expand Up @@ -272,7 +273,7 @@ def _process_and_upload_data(self, data_hash, data_type, data_value):
metadata = {"file_path": data_value, "s3_key": s3_key}
data_value = presigned_url
else:
self.logger.error(f"File upload failed for hash: {data_hash}")
logger.error(f"File upload failed for hash: {data_hash}")
return False
else:
if data_type == "qna_pair":
Expand Down Expand Up @@ -336,6 +337,7 @@ def from_config(
:return: An instance of the App class.
:rtype: App
"""
logger.debug("6")
# Backward compatibility for yaml_path
if yaml_path and not config_path:
config_path = yaml_path
Expand All @@ -357,15 +359,13 @@ def from_config(
elif config and isinstance(config, dict):
config_data = config
else:
logging.error(
logger.error(
"Please provide either a config file path (YAML or JSON) or a config dictionary. Falling back to defaults because no config is provided.", # noqa: E501
)
config_data = {}

try:
validate_config(config_data)
except Exception as e:
raise Exception(f"Error occurred while validating the config. Error: {str(e)}")
# Validate the config
validate_config(config_data)

app_config_data = config_data.get("app", {}).get("config", {})
vector_db_config_data = config_data.get("vectordb", {})
Expand Down Expand Up @@ -477,12 +477,12 @@ def evaluate(
EvalMetric.GROUNDEDNESS.value,
]

logging.info(f"Collecting data from {len(queries)} questions for evaluation...")
logger.info(f"Collecting data from {len(queries)} questions for evaluation...")
dataset = []
for q, a, c in zip(queries, answers, contexts):
dataset.append(EvalData(question=q, answer=a, contexts=c))

logging.info(f"Evaluating {len(dataset)} data points...")
logger.info(f"Evaluating {len(dataset)} data points...")
result = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
future_to_metric = {executor.submit(self._eval, dataset, metric): metric for metric in metrics}
Expand Down
20 changes: 11 additions & 9 deletions embedchain/bots/discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
) from None


logger = logging.getLogger(__name__)

intents = discord.Intents.default()
intents.message_content = True
client = discord.Client(intents=intents)
Expand All @@ -37,15 +39,15 @@ def add_data(self, message):
self.add(data)
response = f"Added data from: {data}"
except Exception:
logging.exception(f"Failed to add data {data}.")
logger.exception(f"Failed to add data {data}.")
response = "Some error occurred while adding data."
return response

def ask_bot(self, message):
try:
response = self.query(message)
except Exception:
logging.exception(f"Failed to query {message}.")
logger.exception(f"Failed to query {message}.")
response = "An error occurred. Please try again!"
return response

Expand All @@ -60,7 +62,7 @@ def start(self):
async def query_command(interaction: discord.Interaction, question: str):
await interaction.response.defer()
member = client.guilds[0].get_member(client.user.id)
logging.info(f"User: {member}, Query: {question}")
logger.info(f"User: {member}, Query: {question}")
try:
answer = discord_bot.ask_bot(question)
if args.include_question:
Expand All @@ -70,20 +72,20 @@ async def query_command(interaction: discord.Interaction, question: str):
await interaction.followup.send(response)
except Exception as e:
await interaction.followup.send("An error occurred. Please try again!")
logging.error("Error occurred during 'query' command:", e)
logger.error("Error occurred during 'query' command:", e)


@tree.command(name="add", description="add new content to the embedchain database")
async def add_command(interaction: discord.Interaction, url_or_text: str):
await interaction.response.defer()
member = client.guilds[0].get_member(client.user.id)
logging.info(f"User: {member}, Add: {url_or_text}")
logger.info(f"User: {member}, Add: {url_or_text}")
try:
response = discord_bot.add_data(url_or_text)
await interaction.followup.send(response)
except Exception as e:
await interaction.followup.send("An error occurred. Please try again!")
logging.error("Error occurred during 'add' command:", e)
logger.error("Error occurred during 'add' command:", e)


@tree.command(name="ping", description="Simple ping pong command")
Expand All @@ -96,16 +98,16 @@ async def on_app_command_error(interaction: discord.Interaction, error: discord.
if isinstance(error, commands.CommandNotFound):
await interaction.followup.send("Invalid command. Please refer to the documentation for correct syntax.")
else:
logging.error("Error occurred during command execution:", error)
logger.error("Error occurred during command execution:", error)


@client.event
async def on_ready():
# TODO: Sync in admin command, to not hit rate limits.
# This might be overkill for most users, and it would require to set a guild or user id, where sync is allowed.
await tree.sync()
logging.debug("Command tree synced")
logging.info(f"Logged in as {client.user.name}")
logger.debug("Command tree synced")
logger.info(f"Logged in as {client.user.name}")


def start_command():
Expand Down
12 changes: 7 additions & 5 deletions embedchain/bots/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
) from None


logger = logging.getLogger(__name__)

SLACK_BOT_TOKEN = os.environ.get("SLACK_BOT_TOKEN")


Expand All @@ -42,10 +44,10 @@ def handle_message(self, event_data):
try:
response = self.chat_bot.chat(question)
self.send_slack_message(message["channel"], response)
logging.info("Query answered successfully!")
logger.info("Query answered successfully!")
except Exception as e:
self.send_slack_message(message["channel"], "An error occurred. Please try again!")
logging.error("Error occurred during 'query' command:", e)
logger.error("Error occurred during 'query' command:", e)
elif text.startswith("add"):
_, data_type, url_or_text = text.split(" ", 2)
if url_or_text.startswith("<") and url_or_text.endswith(">"):
Expand All @@ -55,10 +57,10 @@ def handle_message(self, event_data):
self.send_slack_message(message["channel"], f"Added {data_type} : {url_or_text}")
except ValueError as e:
self.send_slack_message(message["channel"], f"Error: {str(e)}")
logging.error("Error occurred during 'add' command:", e)
logger.error("Error occurred during 'add' command:", e)
except Exception as e:
self.send_slack_message(message["channel"], f"Failed to add {data_type} : {url_or_text}")
logging.error("Error occurred during 'add' command:", e)
logger.error("Error occurred during 'add' command:", e)

def send_slack_message(self, channel, message):
response = self.client.chat_postMessage(channel=channel, text=message)
Expand All @@ -68,7 +70,7 @@ def start(self, host="0.0.0.0", port=5000, debug=True):
app = Flask(__name__)

def signal_handler(sig, frame):
logging.info("\nGracefully shutting down the SlackBot...")
logger.info("\nGracefully shutting down the SlackBot...")
sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)
Expand Down
8 changes: 5 additions & 3 deletions embedchain/bots/whatsapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from .base import BaseBot

logger = logging.getLogger(__name__)


@register_deserializable
class WhatsAppBot(BaseBot):
Expand Down Expand Up @@ -35,23 +37,23 @@ def add_data(self, message):
self.add(data)
response = f"Added data from: {data}"
except Exception:
logging.exception(f"Failed to add data {data}.")
logger.exception(f"Failed to add data {data}.")
response = "Some error occurred while adding data."
return response

def ask_bot(self, message):
try:
response = self.query(message)
except Exception:
logging.exception(f"Failed to query {message}.")
logger.exception(f"Failed to query {message}.")
response = "An error occurred. Please try again!"
return response

def start(self, host="0.0.0.0", port=5000, debug=True):
app = self.flask.Flask(__name__)

def signal_handler(sig, frame):
logging.info("\nGracefully shutting down the WhatsAppBot...")
logger.info("\nGracefully shutting down the WhatsAppBot...")
sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)
Expand Down
6 changes: 4 additions & 2 deletions embedchain/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from gptcache.similarity_evaluation.exact_match import \
ExactMatchEvaluation # noqa: F401

logger = logging.getLogger(__name__)


def gptcache_pre_function(data: dict[str, Any], **params: dict[str, Any]):
return data["input_query"]
Expand All @@ -24,12 +26,12 @@ def gptcache_data_manager(vector_dimension):


def gptcache_data_convert(cache_data):
logging.info("[Cache] Cache hit, returning cache data...")
logger.info("[Cache] Cache hit, returning cache data...")
return cache_data


def gptcache_update_cache_callback(llm_data, update_cache_func, *args, **kwargs):
logging.info("[Cache] Cache missed, updating cache...")
logger.info("[Cache] Cache missed, updating cache...")
update_cache_func(Answer(llm_data, CacheDataType.STR))
return llm_data

Expand Down
4 changes: 3 additions & 1 deletion embedchain/chunkers/base_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from embedchain.helpers.json_serializable import JSONSerializable
from embedchain.models.data_type import DataType

logger = logging.getLogger(__name__)


class BaseChunker(JSONSerializable):
def __init__(self, text_splitter):
Expand All @@ -27,7 +29,7 @@ def create_chunks(self, loader, src, app_id=None, config: Optional[ChunkerConfig
chunk_ids = []
id_map = {}
min_chunk_size = config.min_chunk_size if config is not None else 1
logging.info(f"Skipping chunks smaller than {min_chunk_size} characters")
logger.info(f"Skipping chunks smaller than {min_chunk_size} characters")
data_result = loader.load_data(src)
data_records = data_result["data"]
doc_id = data_result["doc_id"]
Expand Down
18 changes: 10 additions & 8 deletions embedchain/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from embedchain.constants import CONFIG_DIR, CONFIG_FILE

logger = logging.getLogger(__name__)


class Client:
def __init__(self, api_key=None, host="https://apiv2.embedchain.ai"):
Expand All @@ -24,7 +26,7 @@ def __init__(self, api_key=None, host="https://apiv2.embedchain.ai"):
else:
if "api_key" in self.config_data:
self.api_key = self.config_data["api_key"]
logging.info("API key loaded successfully!")
logger.info("API key loaded successfully!")
else:
raise ValueError(
"You are not logged in. Please obtain an API key from https://app.embedchain.ai/settings/keys/"
Expand Down Expand Up @@ -64,34 +66,34 @@ def save(self):
with open(CONFIG_FILE, "w") as config_file:
json.dump(self.config_data, config_file, indent=4)

logging.info("API key saved successfully!")
logger.info("API key saved successfully!")

def clear(self):
if "api_key" in self.config_data:
del self.config_data["api_key"]
with open(CONFIG_FILE, "w") as config_file:
json.dump(self.config_data, config_file, indent=4)
self.api_key = None
logging.info("API key deleted successfully!")
logger.info("API key deleted successfully!")
else:
logging.warning("API key not found in the configuration file.")
logger.warning("API key not found in the configuration file.")

def update(self, api_key):
if self.check(api_key):
self.api_key = api_key
self.save()
logging.info("API key updated successfully!")
logger.info("API key updated successfully!")
else:
logging.warning("Invalid API key provided. API key not updated.")
logger.warning("Invalid API key provided. API key not updated.")

def check(self, api_key):
validation_url = f"{self.host}/api/v1/accounts/api_keys/validate/"
response = requests.post(validation_url, headers={"Authorization": f"Token {api_key}"})
if response.status_code == 200:
return True
else:
logging.warning(f"Response from API: {response.text}")
logging.warning("Invalid API key. Unable to validate.")
logger.warning(f"Response from API: {response.text}")
logger.warning("Invalid API key. Unable to validate.")
return False

def get(self):
Expand Down

0 comments on commit 3616eaa

Please sign in to comment.