File size: 1,971 Bytes
f51bb92
 
 
 
 
e19e333
ce9ef3e
f51bb92
 
f2daaee
9b7a7cf
 
6158da4
b83cc65
f0018f2
f51bb92
 
d697aa5
f51bb92
 
 
 
 
e5cd1d3
 
 
6158da4
e19e333
6d056d5
e19e333
db6b619
b409192
9b7a7cf
6158da4
e029e22
f2daaee
e029e22
d1afae8
 
dd677c3
e19e333
f2daaee
 
e19e333
f2daaee
e19e333
f51bb92
6158da4
 
 
 
 
b83cc65
 
6158da4
 
 
902a706
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
log_dir: '../storage/logs' # str
log_chunk_dir: '../storage/logs/chunks' # str
device: 'cpu' # str [cuda, cpu]

vectorstore:
  load_from_HF: True # bool
  embedd_files: False # bool
  data_path: '../storage/data' # str
  url_file_path: '../storage/data/urls.txt' # str
  expand_urls: True # bool
  db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
  db_path : '../vectorstores' # str
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
  search_top_k : 3 # int
  score_threshold : 0.2 # float

  faiss_params: # Not used as of now
    index_path: 'vectorstores/faiss.index' # str
    index_type: 'Flat' # str [Flat, HNSW, IVF]
    index_dimension: 384 # int
    index_nlist: 100 # int
    index_nprobe: 10 # int

  colbert_params:
    index_name: "new_idx" # str

llm_params: 
  llm_arch: 'langchain' # [langchain]
  use_history: True # bool
  generate_follow_up: False # bool
  memory_window: 3 # int
  llm_style: 'Normal' # str [Normal, ELI5]
  llm_loader: 'gpt-4o-mini' # str [local_llm, gpt-3.5-turbo-1106, gpt-4, gpt-4o-mini]
  openai_params:
    temperature: 0.7 # float
  local_llm_params:
    temperature: 0.7 # float
    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
  pdf_reader: 'llama' # str [llama, pymupdf, gpt]
  stream: False # bool

chat_logging:
  log_chat: True # bool
  platform: 'literalai'
  callbacks: False # bool

splitter_options:
  use_splitter: True # bool
  split_by_token : True # bool
  remove_leftover_delimiters: True # bool
  remove_chunks: False # bool
  chunk_size : 300 # int
  chunk_overlap : 30 # int
  chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
  front_chunks_to_remove : null # int or None
  last_chunks_to_remove : null # int or None
  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings