Skip to content

Commit

Permalink
🌡️ feat: JSON Logging and Health Checks (#12)
Browse files Browse the repository at this point in the history
* Add json logging and health endpoint

* Wire json logging to uvicorn FastAPI
  • Loading branch information
idachev authored Mar 29, 2024
1 parent fd39d31 commit 6906742
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 42 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
.idea
.venv
.env
__pycache__
uploads/
myenv/
venv/
venv/
*.pyc
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["python", "main.py"]
2 changes: 1 addition & 1 deletion Dockerfile.lite
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ RUN pip install --no-cache-dir -r requirements.lite.txt

COPY . .

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["python", "main.py"]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ The following environment variables are required to run the application:
- `RAG_UPLOAD_DIR`: (Optional) The directory where uploaded files are stored. Default value is "./uploads/".
- `PDF_EXTRACT_IMAGES`: (Optional) A boolean value indicating whether to extract images from PDF files. Default value is "False".
- `DEBUG_RAG_API`: (Optional) Set to "True" to show more verbose logging output in the server console, and to enable postgresql database routes
- `CONSOLE_JSON`: (Optional) Set to "True" to log as json for Cloud Logging aggregations
- `EMBEDDINGS_PROVIDER`: (Optional) either "openai", "azure", "huggingface", "huggingfacetei" or "ollama", where "huggingface" uses sentence_transformers; defaults to "openai"
- `EMBEDDINGS_MODEL`: (Optional) Set a valid embeddings model to use from the configured provider.
- **Defaults**
Expand Down
113 changes: 97 additions & 16 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
# config.py

import json
import os
import logging
from datetime import datetime

from dotenv import find_dotenv, load_dotenv
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceHubEmbeddings, OllamaEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings, \
HuggingFaceHubEmbeddings, OllamaEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from starlette.middleware.base import BaseHTTPMiddleware

from store_factory import get_vector_store

load_dotenv(find_dotenv())


def get_env_variable(var_name: str, default_value: str = None) -> str:
value = os.getenv(var_name)
if value is None:
Expand All @@ -17,6 +23,7 @@ def get_env_variable(var_name: str, default_value: str = None) -> str:
return default_value
return value


RAG_UPLOAD_DIR = get_env_variable("RAG_UPLOAD_DIR", "./uploads/")
if not os.path.exists(RAG_UPLOAD_DIR):
os.makedirs(RAG_UPLOAD_DIR, exist_ok=True)
Expand All @@ -39,56 +46,130 @@ def get_env_variable(var_name: str, default_value: str = None) -> str:

## Logging

HTTP_RES = "http_res"
HTTP_REQ = "http_req"

logger = logging.getLogger()

debug_mode = get_env_variable("DEBUG_RAG_API", "False").lower() == "true"
console_json = get_env_variable("CONSOLE_JSON", "False").lower() == "true"

if debug_mode:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
if console_json:
class JsonFormatter(logging.Formatter):
def __init__(self):
super(JsonFormatter, self).__init__()

def format(self, record):
json_record = {}

json_record["message"] = record.getMessage()

if HTTP_REQ in record.__dict__:
json_record[HTTP_REQ] = record.__dict__[HTTP_REQ]

if HTTP_RES in record.__dict__:
json_record[HTTP_RES] = record.__dict__[HTTP_RES]

if record.levelno == logging.ERROR and record.exc_info:
json_record["exception"] = self.formatException(record.exc_info)

timestamp = datetime.fromtimestamp(record.created)
json_record["timestamp"] = timestamp.isoformat()

# add level
json_record["level"] = record.levelname
json_record["filename"] = record.filename
json_record["lineno"] = record.lineno
json_record["funcName"] = record.funcName
json_record["module"] = record.module
json_record["threadName"] = record.threadName

return json.dumps(json_record)

formatter = JsonFormatter()
else:
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')

handler = logging.StreamHandler() # or logging.FileHandler("app.log")
handler.setFormatter(formatter)
logger.addHandler(handler)

## Credentials

class LogMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request, call_next):
response = await call_next(request)

logger_method = logger.info

if str(request.url).endswith("/health"):
logger_method = logger.debug

logger_method(
f"Request {request.method} {request.url} - {response.status_code}",
extra={
HTTP_REQ: {"method": request.method,
"url": str(request.url)},
HTTP_RES: {"status_code": response.status_code},
},
)

return response


logging.getLogger("uvicorn.access").disabled = True


## Credentials

OPENAI_API_KEY = get_env_variable("OPENAI_API_KEY", "")
AZURE_OPENAI_API_KEY = get_env_variable("AZURE_OPENAI_API_KEY", "")
AZURE_OPENAI_ENDPOINT = get_env_variable("AZURE_OPENAI_ENDPOINT", "")
HF_TOKEN = get_env_variable("HF_TOKEN", "")
OLLAMA_BASE_URL = get_env_variable("OLLAMA_BASE_URL", "http://ollama:11434")


## Embeddings

def init_embeddings(provider, model):
if provider == "openai":
return OpenAIEmbeddings(model=model, api_key=OPENAI_API_KEY)
elif provider == "azure":
return AzureOpenAIEmbeddings(model=model, api_key=AZURE_OPENAI_API_KEY) # AZURE_OPENAI_ENDPOINT is being grabbed from the environment
return AzureOpenAIEmbeddings(model=model,
api_key=AZURE_OPENAI_API_KEY) # AZURE_OPENAI_ENDPOINT is being grabbed from the environment
elif provider == "huggingface":
return HuggingFaceEmbeddings(model_name=model, encode_kwargs={'normalize_embeddings': True})
return HuggingFaceEmbeddings(model_name=model, encode_kwargs={
'normalize_embeddings': True})
elif provider == "huggingfacetei":
return HuggingFaceHubEmbeddings(model=model)
elif provider == "ollama":
return OllamaEmbeddings(model=model, base_url=OLLAMA_BASE_URL)
return OllamaEmbeddings(model=model, base_url=OLLAMA_BASE_URL)
else:
raise ValueError(f"Unsupported embeddings provider: {provider}")



EMBEDDINGS_PROVIDER = get_env_variable("EMBEDDINGS_PROVIDER", "openai").lower()

if EMBEDDINGS_PROVIDER == "openai":
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "text-embedding-3-small")
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL",
"text-embedding-3-small")

elif EMBEDDINGS_PROVIDER == "azure":
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "text-embedding-3-small")
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL",
"text-embedding-3-small")

elif EMBEDDINGS_PROVIDER == "huggingface":
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL",
"sentence-transformers/all-MiniLM-L6-v2")

elif EMBEDDINGS_PROVIDER == "huggingfacetei":
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "http://huggingfacetei:3000")
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL",
"http://huggingfacetei:3000")

elif EMBEDDINGS_PROVIDER == "ollama":
EMBEDDINGS_MODEL = get_env_variable("EMBEDDINGS_MODEL", "nomic-embed-text")
Expand All @@ -102,10 +183,10 @@ def init_embeddings(provider, model):
## Vector store

vector_store = get_vector_store(
connection_string=CONNECTION_STRING,
embeddings=embeddings,
collection_name=COLLECTION_NAME,
mode="async",
connection_string=CONNECTION_STRING,
embeddings=embeddings,
collection_name=COLLECTION_NAME,
mode="async",
)
retriever = vector_store.as_retriever()

Expand Down
Loading

0 comments on commit 6906742

Please sign in to comment.