-
Notifications
You must be signed in to change notification settings - Fork 111
/
test_pipeline_options.yaml
47 lines (41 loc) · 1.1 KB
/
test_pipeline_options.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
input_source: "sample_data.txt"
test_dataset: "sample_questions.txt"
# input_source: "/Users/aravind/KruxAI/ragbuilder/InputFiles/pdf/uber_10k.pdf"
# test_dataset: "/Users/aravind/KruxAI/ragbuilder/rag_test_data_1726600284.375674 uber10k_shortlist.csv"
document_loaders:
- type: "unstructured"
loader_kwargs: {}
chunking_strategies:
- "RecursiveCharacterTextSplitter"
- "CharacterTextSplitter"
- "custom"
chunk_size:
min: 100
max: 500
stepsize: 100
chunk_overlap: [50, 100]
embedding_models:
- type: "openai"
model: "text-embedding-3-small"
# model_kwargs:
# show_progress_bar: true
- type: "huggingface"
model: "sentence-transformers/all-MiniLM-L6-v2"
# model_kwargs:
# show_progress: true
vector_databases:
- type: "chroma"
collection_name: "test_collection"
persist_directory: "chroma_sample2"
collection_metadata:
"hnsw:space": "cosine"
# client_settings: null
# metadata: null
- type: "faiss"
normalize_L2: true
top_k: 3
sampling_rate: null
optimization:
n_trials: 10
timeout: 600
storage: "sqlite:///test_optuna_data_ingestion.db"