diff --git a/README.md b/README.md index 222cc6854c935d1402987018634a70f61be87b9f..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/README.md +++ b/README.md @@ -1,12 +0,0 @@ ---- -title: LLM4SciLit -emoji: 🏃 -colorFrom: indigo -colorTo: gray -sdk: gradio -sdk_version: 3.45.2 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py index 3703e2db0009fea1686d779101b431c47248e5e9..cc9ac253dfde7effc0581d29b2950f7596f8d0cf 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,7 @@ import gradio as gr def greet(name): - return "Hello " + name + "!!" + return "Hello " + name + "! aaaaaaaa !" iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch() diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff55756b3c112f3aa00e1ad9d46fb2a2a4df2549 --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,25 @@ +defaults: + - document_loader: grobid + - text_splitter: spacy + - text_embedding: huggingface + - vector_store: faiss + - document_retriever: simple_retriever + - question_answering: huggingface + - _self_ + - override hydra/hydra_logging: disabled + - override hydra/job_logging: disabled + +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store + +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false + + +hydra: + verbose: false \ No newline at end of file diff --git a/config/document_loader/grobid.yaml b/config/document_loader/grobid.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e3742b51ddd84721799bb5592dd7bcb506b623f2 --- /dev/null +++ b/config/document_loader/grobid.yaml @@ -0,0 +1,5 @@ +_target_: document_loader.grobid.GrobidLoader +grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument diff --git a/config/document_retriever/simple_retriever.yaml b/config/document_retriever/simple_retriever.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d9f58878da1f1253ab997e0ba554f617a9744c1 --- /dev/null +++ b/config/document_retriever/simple_retriever.yaml @@ -0,0 +1 @@ +_target_: document_retriever.simple_retriever.SimpleDocumentRetriever \ No newline at end of file diff --git a/config/mode/interactive.yaml b/config/mode/interactive.yaml new file mode 100644 index 0000000000000000000000000000000000000000..423fae108878d10f87a833425bc3f209a51326f3 --- /dev/null +++ b/config/mode/interactive.yaml @@ -0,0 +1 @@ +show_retrieved_documents: true \ No newline at end of file diff --git a/config/question_answering/huggingface.yaml b/config/question_answering/huggingface.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf93004c6cc2ee7f661879d7dc2c525bbcdd49d --- /dev/null +++ b/config/question_answering/huggingface.yaml @@ -0,0 +1 @@ +_target_: question_answering.huggingface.HuggingFaceQuestionAnswering \ No newline at end of file diff --git a/config/text_embedding/huggingface.yaml b/config/text_embedding/huggingface.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c26c0bc1df0ccd94586513eb0dc3907511c63602 --- /dev/null +++ b/config/text_embedding/huggingface.yaml @@ -0,0 +1 @@ +_target_: text_embedding.huggingface.HuggingFaceTextEmbedding diff --git a/config/text_splitter/spacy.yaml b/config/text_splitter/spacy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..127a30d681f20206cacb33fbad0f2deeabdbbe1a --- /dev/null +++ b/config/text_splitter/spacy.yaml @@ -0,0 +1 @@ +_target_: text_splitter.spacy.SpacySplitter \ No newline at end of file diff --git a/config/vector_store/faiss.yaml b/config/vector_store/faiss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0ab241463d3e0b78383d50999bf5e4092a5c1cce --- /dev/null +++ b/config/vector_store/faiss.yaml @@ -0,0 +1 @@ +_target_: vector_store.faiss.FAISSVectorStore \ No newline at end of file diff --git a/install.sh b/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..f6534e20ad17d72d3b99406f0e59bfa7ac95338b --- /dev/null +++ b/install.sh @@ -0,0 +1,5 @@ + + +# download spacy model for document tokenization (https://spacy.io/usage) +# the en_core_web_trf model is the best performing model for tokenization +python -m spacy download en_core_web_trf \ No newline at end of file diff --git a/notebooks/end-to-end-demo.ipynb b/notebooks/end-to-end-demo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8d6e3da86f21fa242b555bdbe1ae4eb8973e5f7d --- /dev/null +++ b/notebooks/end-to-end-demo.ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Iterable, Iterator\n", + "from langchain.docstore.document import Document\n", + "from langchain.embeddings import HuggingFaceEmbeddings\n", + "\n", + "model_name = \"sentence-transformers/all-mpnet-base-v2\"\n", + "model_kwargs = {'device': 'cpu'}\n", + "encode_kwargs = {'normalize_embeddings': False}\n", + "model = HuggingFaceEmbeddings(\n", + " model_name=model_name,\n", + " model_kwargs=model_kwargs,\n", + " encode_kwargs=encode_kwargs\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores import FAISS\n", + "path = \"/data/tommaso/llm4scilit/data/vector_store\"\n", + "db = FAISS.load_local(path, model)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These\\n\\nfindings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', metadata={'text': 'RA is a complex disease that is influenced by an intricate interactome of various environmental, genetic and microbial factors that influence the immune homeostasis.Owing to the complex genetic architecture accompanied by a plethora of microbial and environmental triggers that an organism is exposed to this has made the identification of diagnostic and prognostic markers challenging.Our study has explored the serum proteomics of this complex autoimmune disorder in a relatively understudied Pakistani population to identify disease biomarkers that are DE among various serotypes of RA patients and healthy controls.We identified that PZP, SELENOP, C4BP beta chain, ApoM, NAMLAA, CPN catalytic chain, OIT3, CPN subunit 2, ApoC1 and ApoCIII were DE between the RA patients and healthy controls.These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These findings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', 'para': '5', 'bboxes': \"[[{'page': '15', 'x': '187.65', 'y': '173.66', 'h': '371.62', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '186.22', 'h': '394.62', 'w': '9.58'}], [{'page': '15', 'x': '166.39', 'y': '198.77', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '211.32', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '223.88', 'h': '229.10', 'w': '9.58'}], [{'page': '15', 'x': '401.31', 'y': '223.88', 'h': '157.97', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '236.43', 'h': '393.18', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '248.98', 'h': '393.57', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '261.54', 'h': '130.46', 'w': '9.58'}], [{'page': '15', 'x': '299.65', 'y': '261.54', 'h': '260.87', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '274.09', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '286.64', 'h': '201.22', 'w': '9.58'}], [{'page': '15', 'x': '370.71', 'y': '286.64', 'h': '188.57', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '299.19', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '311.75', 'h': '238.67', 'w': '9.58'}], [{'page': '15', 'x': '407.54', 'y': '311.75', 'h': '151.74', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '324.30', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '336.85', 'h': '28.14', 'w': '9.58'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Conclusions', 'section_number': '5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However\\n\\n, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF\\n\\nis thus not a specific diagnostic marker for', metadata={'text': 'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', 'para': '6', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '223.58', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '236.13', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '248.68', 'h': '394.53', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '261.24', 'h': '133.81', 'w': '9.58'}], [{'page': '2', 'x': '303.29', 'y': '261.24', 'h': '257.23', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '273.79', 'h': '393.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '286.34', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '298.90', 'h': '272.66', 'w': '9.58'}], [{'page': '2', 'x': '441.85', 'y': '298.90', 'h': '117.43', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '311.45', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '324.00', 'h': '240.16', 'w': '9.58'}], [{'page': '2', 'x': '409.64', 'y': '324.00', 'h': '149.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '336.55', 'h': '67.99', 'w': '9.58'}], [{'page': '2', 'x': '236.99', 'y': '336.55', 'h': '322.28', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '349.11', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '361.66', 'h': '107.38', 'w': '9.58'}], [{'page': '2', 'x': '276.86', 'y': '361.66', 'h': '282.42', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '374.21', 'h': '325.69', 'w': '9.58'}], [{'page': '2', 'x': '495.20', 'y': '374.21', 'h': '64.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '386.77', 'h': '393.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '399.32', 'h': '65.18', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='is thus not a specific diagnostic marker for\\n\\nRA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore\\n\\n, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', metadata={'text': 'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', 'para': '6', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '223.58', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '236.13', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '248.68', 'h': '394.53', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '261.24', 'h': '133.81', 'w': '9.58'}], [{'page': '2', 'x': '303.29', 'y': '261.24', 'h': '257.23', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '273.79', 'h': '393.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '286.34', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '298.90', 'h': '272.66', 'w': '9.58'}], [{'page': '2', 'x': '441.85', 'y': '298.90', 'h': '117.43', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '311.45', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '324.00', 'h': '240.16', 'w': '9.58'}], [{'page': '2', 'x': '409.64', 'y': '324.00', 'h': '149.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '336.55', 'h': '67.99', 'w': '9.58'}], [{'page': '2', 'x': '236.99', 'y': '336.55', 'h': '322.28', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '349.11', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '361.66', 'h': '107.38', 'w': '9.58'}], [{'page': '2', 'x': '276.86', 'y': '361.66', 'h': '282.42', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '374.21', 'h': '325.69', 'w': '9.58'}], [{'page': '2', 'x': '495.20', 'y': '374.21', 'h': '64.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '386.77', 'h': '393.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '399.32', 'h': '65.18', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.\\n\\nThe demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', metadata={'text': 'For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.The demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', 'para': '1', 'bboxes': \"[[{'page': '3', 'x': '187.65', 'y': '160.81', 'h': '372.02', 'w': '9.58'}, {'page': '3', 'x': '166.10', 'y': '173.05', 'h': '394.17', 'w': '9.90'}, {'page': '3', 'x': '166.07', 'y': '185.60', 'h': '256.73', 'w': '9.90'}], [{'page': '3', 'x': '425.92', 'y': '185.92', 'h': '133.36', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '198.47', 'h': '343.00', 'w': '9.58'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Study Subjects and Serum Collection', 'section_number': '2.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'})]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.as_retriever().get_relevant_documents(\"What are the main serological markers for RA?\", metadata={\"paper_title\": \"LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "60" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.index.ntotal" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "DATA_PATH = Path(\"/data/tommaso/llm4scilit/data\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/data/tommaso/llm4scilit/data/papers/3.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/2.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/7.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/1.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/6.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/10.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/5.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/4.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/9.pdf',\n", + " '/data/tommaso/llm4scilit/data/papers/8.pdf']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import glob\n", + "glob.glob(str(DATA_PATH / \"papers/*\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='We determined that 144 proteins showed significant differential abundance between the IA and control SF proteomes, of which 11 protein candidates were selected for future follow-up studies.Similar analyses applied to our peptidomic data identified 15 peptide sequences, originating from 4 protein precursors, to have significant differential abundance in IA compared to the control SF peptidome.Pathway enrichment analysis of the IA SF peptidome along with AMP prediction suggests a possible mechanistic role of microbes in eliciting an immune response which drives the development of IA.', metadata={'text': 'We determined that 144 proteins showed significant differential abundance between the IA and control SF proteomes, of which 11 protein candidates were selected for future follow-up studies.Similar analyses applied to our peptidomic data identified 15 peptide sequences, originating from 4 protein precursors, to have significant differential abundance in IA compared to the control SF peptidome.Pathway enrichment analysis of the IA SF peptidome along with AMP prediction suggests a possible mechanistic role of microbes in eliciting an immune response which drives the development of IA.', 'para': '2', 'bboxes': \"[[{'page': '1', 'x': '101.12', 'y': '422.98', 'h': '424.81', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '434.98', 'h': '340.13', 'w': '9.24'}], [{'page': '1', 'x': '405.45', 'y': '434.98', 'h': '120.66', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '446.98', 'h': '468.92', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '458.98', 'h': '225.40', 'w': '9.24'}], [{'page': '1', 'x': '290.71', 'y': '458.98', 'h': '234.48', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '470.98', 'h': '460.78', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '482.98', 'h': '91.59', 'w': '9.24'}]]\", 'pages': \"('1', '1')\", 'section_title': 'Results:', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The discovery-phase data generated herein has provided a basis for the identification of candidates with the greatest potential to serve as novel serum biomarkers specific to inflammatory arthritides.Moreover, these findings facilitate the understanding of possible disease mechanisms specific to each subtype.', metadata={'text': 'The discovery-phase data generated herein has provided a basis for the identification of candidates with the greatest potential to serve as novel serum biomarkers specific to inflammatory arthritides.Moreover, these findings facilitate the understanding of possible disease mechanisms specific to each subtype.', 'para': '1', 'bboxes': \"[[{'page': '1', 'x': '122.15', 'y': '497.98', 'h': '394.30', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '509.98', 'h': '391.31', 'w': '9.24'}], [{'page': '1', 'x': '456.63', 'y': '509.98', 'h': '63.75', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '521.98', 'h': '374.26', 'w': '9.24'}]]\", 'pages': \"('1', '1')\", 'section_title': 'Conclusions:', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content=\"Inflammatory arthritis (IA) is characterized by synovial hyperplasia leading to degradation of adjacent articular cartilage and bone [1].The term encompasses several forms of inflammatory joint diseases that when taken together, have an annual incidence ranging from 115 to 271 per 100,000 adults [2].IA is a multifactorial disease driven by the complex interplay of both genetics and the environment.Rheumatoid arthritis (RA), the most common and potentially destructive IA, has a well-established association with class II major histocompatibility complex (MHC) alleles while the spondyloarthritides, such as psoriatic arthritis (PsA), are more frequently associated with class I MHC alleles [3].Susceptibility to IA increases when genetic predisposition is complemented by environmental risk factors such as smoking, obesity and more recently, microbial infection and intestinal dysbiosis [4][5][6].The exact etiology of IA is still poorly understood with studies aimed at delineating the molecular pathways driving loss of immunological tolerance to the body's self-antigens.Alterations to the adaptive and innate immune system perpetuate systemic inflammation and lead to an elevated risk of developing comorbid conditions such as cardiovascular disease, metabolic syndrome, diabetes and depression [7,8].Naturally, there is a compelling need to identify markers of aberrant immune pathways relevant to IA which may advance current insights into the molecular mechanisms of the disease and serve as clinical markers for disease monitoring and treatment responses.\", metadata={'text': \"Inflammatory arthritis (IA) is characterized by synovial hyperplasia leading to degradation of adjacent articular cartilage and bone [1].The term encompasses several forms of inflammatory joint diseases that when taken together, have an annual incidence ranging from 115 to 271 per 100,000 adults [2].IA is a multifactorial disease driven by the complex interplay of both genetics and the environment.Rheumatoid arthritis (RA), the most common and potentially destructive IA, has a well-established association with class II major histocompatibility complex (MHC) alleles while the spondyloarthritides, such as psoriatic arthritis (PsA), are more frequently associated with class I MHC alleles [3].Susceptibility to IA increases when genetic predisposition is complemented by environmental risk factors such as smoking, obesity and more recently, microbial infection and intestinal dysbiosis [4][5][6].The exact etiology of IA is still poorly understood with studies aimed at delineating the molecular pathways driving loss of immunological tolerance to the body's self-antigens.Alterations to the adaptive and innate immune system perpetuate systemic inflammation and lead to an elevated risk of developing comorbid conditions such as cardiovascular disease, metabolic syndrome, diabetes and depression [7,8].Naturally, there is a compelling need to identify markers of aberrant immune pathways relevant to IA which may advance current insights into the molecular mechanisms of the disease and serve as clinical markers for disease monitoring and treatment responses.\", 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '56.69', 'y': '101.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '113.84', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '125.84', 'h': '98.24', 'w': '11.68'}], [{'page': '2', 'x': '158.95', 'y': '125.84', 'h': '131.59', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '137.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '149.83', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '161.83', 'h': '124.09', 'w': '11.68'}], [{'page': '2', 'x': '183.72', 'y': '161.83', 'h': '106.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '173.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '185.84', 'h': '94.37', 'w': '11.68'}], [{'page': '2', 'x': '155.55', 'y': '185.84', 'h': '135.01', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '197.84', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '209.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '221.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '233.85', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '245.84', 'h': '212.58', 'w': '11.68'}], [{'page': '2', 'x': '272.28', 'y': '245.84', 'h': '18.27', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '257.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '269.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '281.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '293.83', 'h': '127.47', 'w': '11.68'}], [{'page': '2', 'x': '187.45', 'y': '293.83', 'h': '103.09', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '305.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '317.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '329.85', 'h': '184.18', 'w': '11.68'}], [{'page': '2', 'x': '243.59', 'y': '329.85', 'h': '46.94', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '341.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '353.84', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '365.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '377.83', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '389.83', 'h': '24.69', 'w': '11.68'}], [{'page': '2', 'x': '84.82', 'y': '389.83', 'h': '205.76', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '401.82', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '413.82', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '425.81', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '437.81', 'h': '203.55', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The rise in high-throughput technologies, such as next-generation gene sequencing and mass spectrometry (MS), facilitate the discovery of key modulators of disease.Specifically, MS-based approaches provide an essential analytical platform for the identification, quantification and characterization of candidate biomarkers.Biomarkers may come in the form of a molecular signature, a clinical feature or even as an imaging parameter.Molecular biomarkers may be further subtyped into the domains of genomics, transcriptomics, proteomics, metabolomics or peptidomics.Due to the importance of proteins in pathophysiological processes, there is increased interest in resolving the proteomic profile of biospecimens related to IA.Similarly, peptides play a seminal role in mediating physiological functions by serving as neurotransmitters, hormones, antibiotics and immune regulators [9].During IA, joint pain and inflammation are driven by aberrant proteolysis resulting in the production of inflammatory peptides and the destruction of joint cartilage and bone.Synovial fluid (SF), a proximal fluid which bathes the intrinsic joint structures, is an important reservoir of putative protein and peptide biomarkers whose abundance levels fluctuate in response to pathological changes due to disease [10].', metadata={'text': 'The rise in high-throughput technologies, such as next-generation gene sequencing and mass spectrometry (MS), facilitate the discovery of key modulators of disease.Specifically, MS-based approaches provide an essential analytical platform for the identification, quantification and characterization of candidate biomarkers.Biomarkers may come in the form of a molecular signature, a clinical feature or even as an imaging parameter.Molecular biomarkers may be further subtyped into the domains of genomics, transcriptomics, proteomics, metabolomics or peptidomics.Due to the importance of proteins in pathophysiological processes, there is increased interest in resolving the proteomic profile of biospecimens related to IA.Similarly, peptides play a seminal role in mediating physiological functions by serving as neurotransmitters, hormones, antibiotics and immune regulators [9].During IA, joint pain and inflammation are driven by aberrant proteolysis resulting in the production of inflammatory peptides and the destruction of joint cartilage and bone.Synovial fluid (SF), a proximal fluid which bathes the intrinsic joint structures, is an important reservoir of putative protein and peptide biomarkers whose abundance levels fluctuate in response to pathological changes due to disease [10].', 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '64.69', 'y': '449.80', 'h': '225.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '461.80', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '473.85', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '485.84', 'h': '45.00', 'w': '11.68'}], [{'page': '2', 'x': '106.03', 'y': '485.84', 'h': '184.52', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '497.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '509.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '521.85', 'h': '36.67', 'w': '11.68'}], [{'page': '2', 'x': '96.18', 'y': '521.85', 'h': '194.37', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '533.85', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '545.84', 'h': '44.89', 'w': '11.68'}], [{'page': '2', 'x': '105.44', 'y': '545.84', 'h': '185.11', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '557.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '569.85', 'h': '200.97', 'w': '11.68'}], [{'page': '2', 'x': '261.20', 'y': '569.85', 'h': '29.37', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '581.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '593.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '605.85', 'h': '191.41', 'w': '11.68'}], [{'page': '2', 'x': '251.27', 'y': '605.85', 'h': '39.28', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '617.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '629.84', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '641.83', 'h': '177.40', 'w': '11.68'}], [{'page': '2', 'x': '240.69', 'y': '641.83', 'h': '49.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '653.83', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '665.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '677.85', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '689.84', 'h': '23.12', 'w': '11.68'}], [{'page': '2', 'x': '82.70', 'y': '689.84', 'h': '207.82', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '701.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '713.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '89.32', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '101.32', 'h': '116.75', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='In the current study, we performed MS-based proteomic and peptidomic analyses of SF from RA and PsA patients to identify and quantify significant proteins and peptides related to the aetiopathogenesis of IA.Differential abundance analyses highlighted the capacity for dysregulated SF proteins and peptides to reflect disease activity while pathway analysis and antimicrobial peptide (AMP) prediction alluded to a larger role of microbes in the initiation and progression of IA.These findings provide the means for discovering novel candidates to serve as possible biomarkers of IA while simultaneously, highlighting possible mechanistic networks responsible for the disease progression of RA and PsA.', metadata={'text': 'In the current study, we performed MS-based proteomic and peptidomic analyses of SF from RA and PsA patients to identify and quantify significant proteins and peptides related to the aetiopathogenesis of IA.Differential abundance analyses highlighted the capacity for dysregulated SF proteins and peptides to reflect disease activity while pathway analysis and antimicrobial peptide (AMP) prediction alluded to a larger role of microbes in the initiation and progression of IA.These findings provide the means for discovering novel candidates to serve as possible biomarkers of IA while simultaneously, highlighting possible mechanistic networks responsible for the disease progression of RA and PsA.', 'para': '2', 'bboxes': \"[[{'page': '2', 'x': '312.72', 'y': '113.32', 'h': '225.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '125.33', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '137.32', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '149.32', 'h': '202.76', 'w': '11.68'}], [{'page': '2', 'x': '511.58', 'y': '149.32', 'h': '27.00', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '161.33', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '173.32', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '185.32', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '197.31', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '209.31', 'h': '150.32', 'w': '11.68'}], [{'page': '2', 'x': '458.13', 'y': '209.31', 'h': '80.45', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '221.33', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '233.32', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '245.33', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '257.32', 'h': '159.57', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', metadata={'text': 'Research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', 'para': '1', 'bboxes': \"[[{'page': '2', 'x': '304.72', 'y': '305.33', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '317.32', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '329.32', 'h': '158.46', 'w': '11.68'}], [{'page': '2', 'x': '465.58', 'y': '329.32', 'h': '73.00', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '341.31', 'h': '125.37', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Patients and SF collection', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='SF samples for the study were obtained, retrospectively, from 10 cases with RA, 10 cases with PsA and 10 cadaveric controls.RA patients were classified according to the 1987 American College of Rheumatology (ACR) classification criteria [11].PsA patients satisfied the Classification Criteria for Psoriatic Arthritis (CASPAR) [12].', metadata={'text': 'SF samples for the study were obtained, retrospectively, from 10 cases with RA, 10 cases with PsA and 10 cadaveric controls.RA patients were classified according to the 1987 American College of Rheumatology (ACR) classification criteria [11].PsA patients satisfied the Classification Criteria for Psoriatic Arthritis (CASPAR) [12].', 'para': '2', 'bboxes': \"[[{'page': '2', 'x': '312.72', 'y': '353.31', 'h': '225.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '365.30', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '377.33', 'h': '53.52', 'w': '11.68'}], [{'page': '2', 'x': '360.58', 'y': '377.33', 'h': '178.02', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '389.32', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '401.33', 'h': '80.06', 'w': '11.68'}], [{'page': '2', 'x': '388.11', 'y': '401.33', 'h': '150.47', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '413.33', 'h': '207.09', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Patients and SF collection', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Cadaveric control SF were obtained from joints through the Southern Alberta Tissue Donation Program.Inclusion criteria consisted of an age of 18 years or older with no medical history of arthritis, joint injury or joint surgery (including visual inspection of cartilage surfaces during recovery), no prescription anti-inflammatory medications and availability within 4 h of death.Exclusion criteria for all disease cohorts included patients receiving therapeutic biological drugs and the presence of other causes of inflammation (e.g.infection and/or crystal disease) or co-morbidities (e.g.cancer).', metadata={'text': 'Cadaveric control SF were obtained from joints through the Southern Alberta Tissue Donation Program.Inclusion criteria consisted of an age of 18 years or older with no medical history of arthritis, joint injury or joint surgery (including visual inspection of cartilage surfaces during recovery), no prescription anti-inflammatory medications and availability within 4 h of death.Exclusion criteria for all disease cohorts included patients receiving therapeutic biological drugs and the presence of other causes of inflammation (e.g.infection and/or crystal disease) or co-morbidities (e.g.cancer).', 'para': '4', 'bboxes': \"[[{'page': '2', 'x': '312.72', 'y': '425.32', 'h': '225.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '437.32', 'h': '233.88', 'w': '11.68'}], [{'page': '2', 'x': '304.72', 'y': '449.31', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '461.31', 'h': '233.89', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '473.30', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '485.30', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '497.29', 'h': '204.09', 'w': '11.68'}], [{'page': '2', 'x': '512.45', 'y': '497.29', 'h': '26.13', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '509.33', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '521.32', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '533.32', 'h': '160.24', 'w': '11.68'}], [{'page': '2', 'x': '469.66', 'y': '533.32', 'h': '68.90', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '545.31', 'h': '154.80', 'w': '11.68'}], [{'page': '2', 'x': '461.83', 'y': '545.31', 'h': '32.43', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Patients and SF collection', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='IA SF samples (both RA and PsA) were obtained through needle aspiration of knee joints and kept on ice.Samples were transferred to centrifuge tubes and spun at 160 RCF for 10 min at 4 °C.The supernatant was transferred to a sterile 1.5 mL centrifuge tube and spun at 2000 RCF for another 10 min at 4 °C.Samples were immediately stored at -80 °C until further processing.SF samples from cadavers were collected without the use of lavage.Samples were centrifuged at 3000 RCF for 15 min and stored at -80 °C until further processing.', metadata={'text': 'IA SF samples (both RA and PsA) were obtained through needle aspiration of knee joints and kept on ice.Samples were transferred to centrifuge tubes and spun at 160 RCF for 10 min at 4 °C.The supernatant was transferred to a sterile 1.5 mL centrifuge tube and spun at 2000 RCF for another 10 min at 4 °C.Samples were immediately stored at -80 °C until further processing.SF samples from cadavers were collected without the use of lavage.Samples were centrifuged at 3000 RCF for 15 min and stored at -80 °C until further processing.', 'para': '5', 'bboxes': \"[[{'page': '2', 'x': '304.72', 'y': '581.33', 'h': '233.90', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '593.32', 'h': '197.31', 'w': '11.68'}], [{'page': '2', 'x': '504.76', 'y': '593.32', 'h': '33.83', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '605.32', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '617.31', 'h': '77.57', 'w': '11.68'}], [{'page': '2', 'x': '385.33', 'y': '617.31', 'h': '153.23', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '629.31', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '641.30', 'h': '94.55', 'w': '11.68'}], [{'page': '2', 'x': '401.69', 'y': '641.30', 'h': '136.92', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '653.30', 'h': '7.62', 'w': '11.68'}, {'page': '2', 'x': '317.47', 'y': '652.90', 'h': '7.64', 'w': '16.21'}, {'page': '2', 'x': '326.34', 'y': '653.30', 'h': '134.05', 'w': '11.68'}], [{'page': '2', 'x': '465.52', 'y': '653.30', 'h': '73.08', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '665.29', 'h': '209.12', 'w': '11.68'}], [{'page': '2', 'x': '517.19', 'y': '665.29', 'h': '21.40', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '677.33', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '689.32', 'h': '7.62', 'w': '11.68'}, {'page': '2', 'x': '314.65', 'y': '688.92', 'h': '7.64', 'w': '16.21'}, {'page': '2', 'x': '323.52', 'y': '689.32', 'h': '122.76', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'SF sample preparation', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='At the time of analysis, samples were blinded, randomized, thawed on ice and their respective total protein concentrations were measured with a Pierce Coomassie (Bradford) total protein assay.', metadata={'text': 'At the time of analysis, samples were blinded, randomized, thawed on ice and their respective total protein concentrations were measured with a Pierce Coomassie (Bradford) total protein assay.', 'para': '0', 'bboxes': \"[[{'page': '2', 'x': '312.72', 'y': '701.32', 'h': '225.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '713.33', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '89.33', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '101.32', 'h': '120.95', 'w': '11.68'}]]\", 'pages': \"('2', '3')\", 'section_title': 'SF sample preparation', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='For proteomic investigations, SF samples were first adjusted to 300 µg total protein in 50 mM ammonium bicarbonate (ABC).Protein concentration was conducted using Amicon Ultra-0.5 centrifugal filter units (10 kDa molecular weight cut-off; MilliporeSigma) which were pre-equilibrated with 400 uL of 50 mM ABC.Samples were loaded and spun at 10,000 RPM for 35 min at 4 °C and transferred to a new tube by spinning upside at 5000 RPM for 2 min.', metadata={'text': 'For proteomic investigations, SF samples were first adjusted to 300 µg total protein in 50 mM ammonium bicarbonate (ABC).Protein concentration was conducted using Amicon Ultra-0.5 centrifugal filter units (10 kDa molecular weight cut-off; MilliporeSigma) which were pre-equilibrated with 400 uL of 50 mM ABC.Samples were loaded and spun at 10,000 RPM for 35 min at 4 °C and transferred to a new tube by spinning upside at 5000 RPM for 2 min.', 'para': '2', 'bboxes': \"[[{'page': '3', 'x': '56.69', 'y': '145.21', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '157.21', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '169.20', 'h': '79.32', 'w': '11.68'}], [{'page': '3', 'x': '138.23', 'y': '169.20', 'h': '152.33', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '181.20', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '193.19', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '205.19', 'h': '195.97', 'w': '11.68'}], [{'page': '3', 'x': '256.70', 'y': '205.19', 'h': '33.83', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '217.18', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '229.18', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '241.18', 'h': '62.93', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'SF sample preparation for proteomic analysis', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Concentrates were collected and brought to a total volume of 100 µL with 50 mM ABC.Proteins were denatured with powdered urea to a final concentration of 8 M. Dithiothreitol (DTT) (Sigma-Aldrich) was added to each concentrate sample to a final concentration of 5 mM and incubated at 60 °C for 45 min.This was followed by alkylation with 15 mM iodoacetamide (IAM) (Sigma-Aldrich) at room temperature in the dark for 45 min.Samples were diluted fivefold with 50 mM ABC to prevent inhibition of trypsin activity by high concentrations of urea.Concentrate samples were digested with trypsin (Sigma-Aldrich) in a 1:50 (trypsin to total protein) ratio for 20 h at 37 °C and then dropwise acidified to a pH of 2 with formic acid (FA) to inhibit trypsin activity.Samples were reduced to 300 µL via speed vacuum concentration and stored at -20 °C until subjected to liquid chromatography-tandem mass spectrometry (LC-MS/MS).', metadata={'text': 'Concentrates were collected and brought to a total volume of 100 µL with 50 mM ABC.Proteins were denatured with powdered urea to a final concentration of 8 M. Dithiothreitol (DTT) (Sigma-Aldrich) was added to each concentrate sample to a final concentration of 5 mM and incubated at 60 °C for 45 min.This was followed by alkylation with 15 mM iodoacetamide (IAM) (Sigma-Aldrich) at room temperature in the dark for 45 min.Samples were diluted fivefold with 50 mM ABC to prevent inhibition of trypsin activity by high concentrations of urea.Concentrate samples were digested with trypsin (Sigma-Aldrich) in a 1:50 (trypsin to total protein) ratio for 20 h at 37 °C and then dropwise acidified to a pH of 2 with formic acid (FA) to inhibit trypsin activity.Samples were reduced to 300 µL via speed vacuum concentration and stored at -20 °C until subjected to liquid chromatography-tandem mass spectrometry (LC-MS/MS).', 'para': '5', 'bboxes': \"[[{'page': '3', 'x': '64.69', 'y': '253.17', 'h': '225.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '265.21', 'h': '145.83', 'w': '11.68'}], [{'page': '3', 'x': '206.29', 'y': '265.21', 'h': '84.27', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '277.21', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '289.21', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '301.20', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '313.20', 'h': '144.15', 'w': '11.68'}], [{'page': '3', 'x': '203.88', 'y': '313.20', 'h': '86.69', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '325.19', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '337.19', 'h': '233.86', 'w': '11.68'}], [{'page': '3', 'x': '56.69', 'y': '349.18', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '361.21', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '373.21', 'h': '31.59', 'w': '11.68'}], [{'page': '3', 'x': '91.32', 'y': '373.21', 'h': '199.24', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '385.20', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '397.20', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '409.19', 'h': '197.11', 'w': '11.68'}], [{'page': '3', 'x': '256.73', 'y': '409.19', 'h': '33.83', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '421.19', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '433.18', 'h': '55.66', 'w': '11.68'}, {'page': '3', 'x': '115.92', 'y': '432.78', 'h': '7.64', 'w': '16.21'}, {'page': '3', 'x': '124.80', 'y': '433.18', 'h': '165.75', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '445.21', 'h': '212.40', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'SF sample preparation for proteomic analysis', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Peptides were isolated based on a protocol described by Kamphorst et al. [13].Fifty microliters of SF were diluted in 235 µL of 50 mM ABC and 15 µL dimethyl sulfoxide (DMSO) for peptidomic analysis.Peptide concentration was conducted using Amicon Ultra-0.5 centrifugal filter units (10 kDa MWCO; MilliporeSigma) which were pre-equilibrated with 250 µL of 50 mM ABC. SF samples were spun at 10 000 RPM for 60 min at 4 °C then washed with 100 µL of 50 mM of ABC and spun for another 10 min.Filtrates were acidified with 5 µL of FA.', metadata={'text': 'Peptides were isolated based on a protocol described by Kamphorst et al. [13].Fifty microliters of SF were diluted in 235 µL of 50 mM ABC and 15 µL dimethyl sulfoxide (DMSO) for peptidomic analysis.Peptide concentration was conducted using Amicon Ultra-0.5 centrifugal filter units (10 kDa MWCO; MilliporeSigma) which were pre-equilibrated with 250 µL of 50 mM ABC. SF samples were spun at 10 000 RPM for 60 min at 4 °C then washed with 100 µL of 50 mM of ABC and spun for another 10 min.Filtrates were acidified with 5 µL of FA.', 'para': '3', 'bboxes': \"[[{'page': '3', 'x': '56.69', 'y': '489.10', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '501.10', 'h': '88.97', 'w': '11.68'}], [{'page': '3', 'x': '148.21', 'y': '501.10', 'h': '142.36', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '513.09', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '525.09', 'h': '139.02', 'w': '11.68'}], [{'page': '3', 'x': '199.26', 'y': '525.09', 'h': '91.29', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '537.08', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '549.10', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '561.10', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '573.09', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '585.09', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '597.08', 'h': '30.69', 'w': '11.68'}], [{'page': '3', 'x': '89.69', 'y': '597.08', 'h': '159.80', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'SF sample preparation for peptidomics analysis', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Peptides were desalted using one hydrophilic-lipophilic-balanced reverse-phase cartridge per sample (Oasis HLB).Each cartridge [1 mL (30 mg); Waters cat# WAT094225] was first pre-equilibrated with 1 mL of 90% acetonitrile (ACN), 0.1% FA and 0.02% trifluoroacetic acid (TFA) and then washed with 3 mL of buffer A (5% ACN, 0.1% FA, 0.02% TFA).The SF sample was then passed through the cartridge and washed a second time with 3 mL of buffer A. Peptides were eluted with 700 µL of 60% ACN, 0.1% FA and 0.02% TFA and each eluate was reduced to a volume of less than 300 µL and stored at -20 °C until subjected to LC-MS/MS.', metadata={'text': 'Peptides were desalted using one hydrophilic-lipophilic-balanced reverse-phase cartridge per sample (Oasis HLB).Each cartridge [1 mL (30 mg); Waters cat# WAT094225] was first pre-equilibrated with 1 mL of 90% acetonitrile (ACN), 0.1% FA and 0.02% trifluoroacetic acid (TFA) and then washed with 3 mL of buffer A (5% ACN, 0.1% FA, 0.02% TFA).The SF sample was then passed through the cartridge and washed a second time with 3 mL of buffer A. Peptides were eluted with 700 µL of 60% ACN, 0.1% FA and 0.02% TFA and each eluate was reduced to a volume of less than 300 µL and stored at -20 °C until subjected to LC-MS/MS.', 'para': '2', 'bboxes': \"[[{'page': '3', 'x': '64.69', 'y': '609.08', 'h': '225.86', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '621.10', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '633.10', 'h': '53.38', 'w': '11.68'}], [{'page': '3', 'x': '113.12', 'y': '633.10', 'h': '177.46', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '645.09', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '657.09', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '669.08', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '681.08', 'h': '122.57', 'w': '11.68'}], [{'page': '3', 'x': '183.89', 'y': '681.08', 'h': '106.67', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '693.07', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '705.07', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '56.69', 'y': '717.06', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '89.29', 'h': '233.82', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '100.89', 'h': '7.64', 'w': '16.21'}, {'page': '3', 'x': '314.67', 'y': '101.29', 'h': '149.77', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'SF sample preparation for peptidomics analysis', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Processed samples were desalted using C-18 OMIX Pipette Tips (Agilent Technologies, USA) and eluted in 3 µL of MS buffer B (65% ACN, 0.1% FA in H 2 O).The eluates were then diluted with 57 µL of MS buffer A (0.1% FA in H 2 O) and 28 µL were injected onto a 2 cm C18 trap column, packed with Varian Pursuit (5 µm C18), with an 8 µm tip (New Objective).The LC setup was coupled online to a Q Exactive (Thermo Fisher Scientific, USA) mass spectrometer with a nanoelectrospray ionization source (Proxeon Biosystems).Samples for direct proteomic analysis as well as samples for direct peptidomics analysis underwent a 60-min linear gradient using MS buffer A and MS buffer B. Eluted peptides were subjected to tandem mass spectrometry in positive ion mode.Data acquisition was conducted via Thermo XCalibur v.3.0.63 (Thermo Fisher Scientific, USA).', metadata={'text': 'Processed samples were desalted using C-18 OMIX Pipette Tips (Agilent Technologies, USA) and eluted in 3 µL of MS buffer B (65% ACN, 0.1% FA in H 2 O).The eluates were then diluted with 57 µL of MS buffer A (0.1% FA in H 2 O) and 28 µL were injected onto a 2 cm C18 trap column, packed with Varian Pursuit (5 µm C18), with an 8 µm tip (New Objective).The LC setup was coupled online to a Q Exactive (Thermo Fisher Scientific, USA) mass spectrometer with a nanoelectrospray ionization source (Proxeon Biosystems).Samples for direct proteomic analysis as well as samples for direct peptidomics analysis underwent a 60-min linear gradient using MS buffer A and MS buffer B. Eluted peptides were subjected to tandem mass spectrometry in positive ion mode.Data acquisition was conducted via Thermo XCalibur v.3.0.63 (Thermo Fisher Scientific, USA).', 'para': '4', 'bboxes': \"[[{'page': '3', 'x': '304.72', 'y': '138.58', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '150.58', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '162.58', 'h': '198.22', 'w': '11.68'}, {'page': '3', 'x': '502.95', 'y': '166.97', 'h': '3.44', 'w': '8.18'}, {'page': '3', 'x': '506.39', 'y': '162.58', 'h': '13.12', 'w': '11.68'}], [{'page': '3', 'x': '523.19', 'y': '162.58', 'h': '15.40', 'w': '11.68'}, {'page': '3', 'x': '304.73', 'y': '174.58', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.73', 'y': '186.58', 'h': '31.91', 'w': '11.68'}, {'page': '3', 'x': '336.64', 'y': '190.97', 'h': '3.44', 'w': '8.18'}, {'page': '3', 'x': '340.08', 'y': '186.58', 'h': '198.48', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '198.58', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '210.58', 'h': '121.39', 'w': '11.68'}], [{'page': '3', 'x': '429.06', 'y': '210.58', 'h': '109.53', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '222.57', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '234.57', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '246.56', 'h': '124.67', 'w': '11.68'}], [{'page': '3', 'x': '434.13', 'y': '246.56', 'h': '104.45', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '258.58', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '270.58', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '282.58', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '294.57', 'h': '211.84', 'w': '11.68'}], [{'page': '3', 'x': '519.17', 'y': '294.57', 'h': '19.40', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '306.57', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '318.56', 'h': '132.70', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'LC-MS/MS', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The resulting proteomic and peptidomic raw data files were uploaded into MaxQuant v.1.5.2.8 (www.coxdocs.org) [14] with the integrated Andromeda search engine.MS and MS/MS spectra were searched against a reverted version of the SwissProt human protein database (version July 2017) for protein identification and a randomized version of the SwissProt human protein database for peptide identification.Search parameters for proteomic analysis included carbamidomethylation of cysteines as a fixed modification and oxidized methionine and N-terminal acetylation as variable modifications.Trypsin was the chosen digestion enzyme and a maximum of two missed cleavages were allowed.Search parameters for peptidomic analysis included oxidized methionine and oxidized proline as variable modifications.An unspecific enzyme search was the chosen digestion method.Both proteomic and peptidomic data were initially searched against a smaller \"human first search\" database with a peptide tolerance of 20 ppm for mass recalibration.The main search was performed using the Swissprot human protein database (version July 2017) with a peptide tolerance of 4.5 ppm.Data was analyzed using label-free quantification (LFQ) with a minimum ratio count of 1 and the \"Match between runs\" interval set to 2 min.The peptide-spectrum match and protein false discovery rate were set to 1%.', metadata={'text': 'The resulting proteomic and peptidomic raw data files were uploaded into MaxQuant v.1.5.2.8 (www.coxdocs.org) [14] with the integrated Andromeda search engine.MS and MS/MS spectra were searched against a reverted version of the SwissProt human protein database (version July 2017) for protein identification and a randomized version of the SwissProt human protein database for peptide identification.Search parameters for proteomic analysis included carbamidomethylation of cysteines as a fixed modification and oxidized methionine and N-terminal acetylation as variable modifications.Trypsin was the chosen digestion enzyme and a maximum of two missed cleavages were allowed.Search parameters for peptidomic analysis included oxidized methionine and oxidized proline as variable modifications.An unspecific enzyme search was the chosen digestion method.Both proteomic and peptidomic data were initially searched against a smaller \"human first search\" database with a peptide tolerance of 20 ppm for mass recalibration.The main search was performed using the Swissprot human protein database (version July 2017) with a peptide tolerance of 4.5 ppm.Data was analyzed using label-free quantification (LFQ) with a minimum ratio count of 1 and the \"Match between runs\" interval set to 2 min.The peptide-spectrum match and protein false discovery rate were set to 1%.', 'para': '10', 'bboxes': \"[[{'page': '3', 'x': '304.72', 'y': '355.84', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '367.84', 'h': '233.86', 'w': '11.68'}], [{'page': '3', 'x': '304.72', 'y': '379.83', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '391.83', 'h': '29.49', 'w': '11.68'}], [{'page': '3', 'x': '338.30', 'y': '391.83', 'h': '200.27', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '403.82', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '415.84', 'h': '233.83', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '427.84', 'h': '233.88', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '439.83', 'h': '150.06', 'w': '11.68'}], [{'page': '3', 'x': '459.90', 'y': '439.83', 'h': '78.69', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '451.83', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '463.82', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '475.84', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '487.84', 'h': '22.75', 'w': '11.68'}], [{'page': '3', 'x': '331.63', 'y': '487.84', 'h': '206.95', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '499.84', 'h': '203.21', 'w': '11.68'}], [{'page': '3', 'x': '510.96', 'y': '499.84', 'h': '27.63', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '511.83', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '523.83', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '535.84', 'h': '22.75', 'w': '11.68'}], [{'page': '3', 'x': '333.47', 'y': '535.84', 'h': '205.13', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '547.84', 'h': '74.99', 'w': '11.68'}], [{'page': '3', 'x': '383.16', 'y': '547.84', 'h': '155.44', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '559.83', 'h': '233.85', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '571.83', 'h': '233.86', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '583.82', 'h': '77.07', 'w': '11.68'}], [{'page': '3', 'x': '384.01', 'y': '583.82', 'h': '154.60', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '595.82', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '607.81', 'h': '152.96', 'w': '11.68'}], [{'page': '3', 'x': '461.10', 'y': '607.81', 'h': '77.52', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '619.81', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '631.80', 'h': '233.89', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '643.80', 'h': '55.04', 'w': '11.68'}], [{'page': '3', 'x': '363.64', 'y': '643.80', 'h': '174.96', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '655.79', 'h': '140.36', 'w': '11.68'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Protein identification and quantification', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Pathway analysis of dysregulated proteins identified by LC-MS/MS was conducted using the functional-analysis tool Ingenuity Pathway Analysis (IPA; http://www.ingenuity.com)[15].To determine the specificity of identified proteins at the tissue and biological fluid level, proteomic datasets were searched against ProteomicsDB (http:// www.Prote omics DB.org), a web-based database of mass spectrometry-generated proteomics data [16].Pathway analysis of SF peptides was conducted through the Database for Annotation, Visualization and Integrated Discovery (DAVID) 6.8 with reference to the Kyoto Encyclopedia of Genes and Genomes (KEGG) [17].Annotations with q-values of less than 0.05 were considered statistically significant.Identification of known AMPs in the SF peptidome was determined by comparison with experimentally validated human AMPs taken from the Collection of Anti-Microbial Peptides (CAMP R3 ) (http:// www.camp.bicnirrh.res.in/)database [18].AMP prediction of the identified peptides was performed using the support vector machine (SVM) learning algorithm developed for the CAMP R3 database.Peptides with an SVM score of 0.8 or higher were predicted to be antimicrobial.', metadata={'text': 'Pathway analysis of dysregulated proteins identified by LC-MS/MS was conducted using the functional-analysis tool Ingenuity Pathway Analysis (IPA; http://www.ingenuity.com)[15].To determine the specificity of identified proteins at the tissue and biological fluid level, proteomic datasets were searched against ProteomicsDB (http:// www.Prote omics DB.org), a web-based database of mass spectrometry-generated proteomics data [16].Pathway analysis of SF peptides was conducted through the Database for Annotation, Visualization and Integrated Discovery (DAVID) 6.8 with reference to the Kyoto Encyclopedia of Genes and Genomes (KEGG) [17].Annotations with q-values of less than 0.05 were considered statistically significant.Identification of known AMPs in the SF peptidome was determined by comparison with experimentally validated human AMPs taken from the Collection of Anti-Microbial Peptides (CAMP R3 ) (http:// www.camp.bicnirrh.res.in/)database [18].AMP prediction of the identified peptides was performed using the support vector machine (SVM) learning algorithm developed for the CAMP R3 database.Peptides with an SVM score of 0.8 or higher were predicted to be antimicrobial.', 'para': '10', 'bboxes': \"[[{'page': '3', 'x': '304.72', 'y': '693.10', 'h': '233.87', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '705.10', 'h': '233.84', 'w': '11.68'}, {'page': '3', 'x': '304.72', 'y': '717.09', 'h': '233.87', 'w': '11.68'}], [{'page': '4', 'x': '56.69', 'y': '88.58', 'h': '38.36', 'w': '11.68'}], [{'page': '4', 'x': '98.35', 'y': '88.58', 'h': '18.46', 'w': '11.68'}], [{'page': '4', 'x': '120.12', 'y': '88.58', 'h': '170.41', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '100.58', 'h': '233.88', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '112.57', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '124.57', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '136.56', 'h': '204.08', 'w': '11.68'}], [{'page': '4', 'x': '268.46', 'y': '136.56', 'h': '22.09', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '148.58', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '160.58', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '172.57', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '184.58', 'h': '195.92', 'w': '11.68'}], [{'page': '4', 'x': '256.18', 'y': '184.58', 'h': '34.37', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '196.58', 'h': '233.83', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '208.58', 'h': '93.53', 'w': '11.68'}], [{'page': '4', 'x': '153.33', 'y': '208.58', 'h': '137.23', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '220.57', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '232.57', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '244.56', 'h': '190.57', 'w': '11.68'}, {'page': '4', 'x': '247.26', 'y': '248.97', 'h': '7.70', 'w': '8.18'}, {'page': '4', 'x': '254.96', 'y': '244.58', 'h': '35.59', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '256.58', 'h': '67.94', 'w': '11.68'}], [{'page': '4', 'x': '124.64', 'y': '256.58', 'h': '44.50', 'w': '11.68'}], [{'page': '4', 'x': '173.18', 'y': '256.58', 'h': '58.02', 'w': '11.68'}], [{'page': '4', 'x': '235.25', 'y': '256.58', 'h': '55.30', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '268.58', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '280.58', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '292.58', 'h': '85.56', 'w': '11.68'}, {'page': '4', 'x': '142.25', 'y': '296.97', 'h': '7.70', 'w': '8.18'}, {'page': '4', 'x': '153.76', 'y': '292.58', 'h': '37.74', 'w': '11.68'}], [{'page': '4', 'x': '195.31', 'y': '292.58', 'h': '95.23', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '304.58', 'h': '231.28', 'w': '11.68'}]]\", 'pages': \"('3', '4')\", 'section_title': 'Bioinformatic analyses', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='A linear model was fitted to examine the effects of age and sex on the protein and peptide expression data using the LIMMA package in R [19].Due to the nature of data generated by LC-MS/MS, protein quantification is often skewed and imposes limits on statistical inference.To circumvent assumptions of normality, the Mann-Whitney U test coupled to the Benjamini-Hochberg correction to control for multiple hypothesis testing was performed for comparisons between two independent groups.Adjusted p values of less than 0.05 were considered statistically significant.Differential abundance of proteins and peptides were computed with the myTAI package in R, generating a ratio of log-transformed extracted ion currents in one group against the second group, considered to be the reference group [20].A volcano plot was used to visualize the results of the Mann-Whitney U test.', metadata={'text': 'A linear model was fitted to examine the effects of age and sex on the protein and peptide expression data using the LIMMA package in R [19].Due to the nature of data generated by LC-MS/MS, protein quantification is often skewed and imposes limits on statistical inference.To circumvent assumptions of normality, the Mann-Whitney U test coupled to the Benjamini-Hochberg correction to control for multiple hypothesis testing was performed for comparisons between two independent groups.Adjusted p values of less than 0.05 were considered statistically significant.Differential abundance of proteins and peptides were computed with the myTAI package in R, generating a ratio of log-transformed extracted ion currents in one group against the second group, considered to be the reference group [20].A volcano plot was used to visualize the results of the Mann-Whitney U test.', 'para': '5', 'bboxes': \"[[{'page': '4', 'x': '56.69', 'y': '370.15', 'h': '233.83', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '382.15', 'h': '233.84', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '394.14', 'h': '126.69', 'w': '11.68'}], [{'page': '4', 'x': '186.16', 'y': '394.14', 'h': '104.38', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '406.14', 'h': '233.88', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '418.13', 'h': '204.55', 'w': '11.68'}], [{'page': '4', 'x': '263.45', 'y': '418.13', 'h': '27.10', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '430.16', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '442.15', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '454.15', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '466.14', 'h': '194.44', 'w': '11.68'}], [{'page': '4', 'x': '253.71', 'y': '466.14', 'h': '36.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '478.14', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '490.16', 'h': '33.51', 'w': '11.68'}], [{'page': '4', 'x': '93.20', 'y': '490.16', 'h': '197.36', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '502.15', 'h': '233.84', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '514.15', 'h': '233.83', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '526.14', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '538.16', 'h': '76.53', 'w': '11.68'}], [{'page': '4', 'x': '136.70', 'y': '538.16', 'h': '153.83', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '550.15', 'h': '165.53', 'w': '11.68'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Statistical analyses and data visualizations were completed with R (R Foundation for Statistical Computing).', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Demographics, disease characteristics and concomitant therapies of recruited patients are summarized in Table 1.', metadata={'text': 'Demographics, disease characteristics and concomitant therapies of recruited patients are summarized in Table 1.', 'para': '0', 'bboxes': \"[[{'page': '4', 'x': '56.69', 'y': '603.73', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '615.72', 'h': '233.86', 'w': '11.68'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Clinical characteristics of recruited patients', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Collectively, 389 unique proteins were identified across all IA SF proteomic samples.When assessing each cohort individually, 377 unique proteins were identified in RA patient samples, 369 unique proteins were identified in PsA patient samples and 399 proteins were identified in control patient samples.A review of the overlap between proteomes of each cohort revealed 347 proteins to be common to all three patient groups.', metadata={'text': 'Collectively, 389 unique proteins were identified across all IA SF proteomic samples.When assessing each cohort individually, 377 unique proteins were identified in RA patient samples, 369 unique proteins were identified in PsA patient samples and 399 proteins were identified in control patient samples.A review of the overlap between proteomes of each cohort revealed 347 proteins to be common to all three patient groups.', 'para': '2', 'bboxes': \"[[{'page': '4', 'x': '56.69', 'y': '657.30', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '669.30', 'h': '116.50', 'w': '11.68'}], [{'page': '4', 'x': '175.43', 'y': '669.30', 'h': '115.12', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '681.29', 'h': '233.83', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '693.29', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '705.28', 'h': '233.84', 'w': '11.68'}, {'page': '4', 'x': '56.69', 'y': '717.28', 'h': '98.40', 'w': '11.68'}], [{'page': '4', 'x': '157.69', 'y': '717.28', 'h': '132.83', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '343.38', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '355.37', 'h': '146.57', 'w': '11.68'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Holistic protein and peptide mining', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='A total of 226 unique peptide sequences were identified across all IA SF samples originating from a total of 48 unique proteins.Inter-cohort comparisons identified 184 unique peptides in RA patient samples, 175 unique peptides in PsA patient samples and 192 unique peptides in control patient samples.Comparisons between the SF peptidomes of arthritic and control conditions revealed 95 peptides to be common to all three groups.', metadata={'text': 'A total of 226 unique peptide sequences were identified across all IA SF samples originating from a total of 48 unique proteins.Inter-cohort comparisons identified 184 unique peptides in RA patient samples, 175 unique peptides in PsA patient samples and 192 unique peptides in control patient samples.Comparisons between the SF peptidomes of arthritic and control conditions revealed 95 peptides to be common to all three groups.', 'para': '2', 'bboxes': \"[[{'page': '4', 'x': '312.72', 'y': '367.37', 'h': '225.87', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '379.40', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '391.39', 'h': '81.49', 'w': '11.68'}], [{'page': '4', 'x': '389.78', 'y': '391.39', 'h': '148.80', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '403.39', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '415.39', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '427.38', 'h': '110.00', 'w': '11.68'}], [{'page': '4', 'x': '417.62', 'y': '427.38', 'h': '120.98', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '439.38', 'h': '233.84', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '451.37', 'h': '187.39', 'w': '11.68'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Holistic protein and peptide mining', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Next, we investigated the overlap between the proteins identified through our peptidomic approach and those identified through our proteomic approach by comparing the IA-associated proteins originating from both experiments.Of the 48 precursor proteins from our peptidomic study, 25 proteins were also found in the IA SF proteome (Fig. 1).Taken together, they have yielded the combined identification of 412 proteins in IA SF.A complete list of identified proteins and peptides are reported in Additional file 1: Tables S1, S2 and S3.', metadata={'text': 'Next, we investigated the overlap between the proteins identified through our peptidomic approach and those identified through our proteomic approach by comparing the IA-associated proteins originating from both experiments.Of the 48 precursor proteins from our peptidomic study, 25 proteins were also found in the IA SF proteome (Fig. 1).Taken together, they have yielded the combined identification of 412 proteins in IA SF.A complete list of identified proteins and peptides are reported in Additional file 1: Tables S1, S2 and S3.', 'para': '3', 'bboxes': \"[[{'page': '4', 'x': '312.72', 'y': '463.37', 'h': '225.89', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '475.36', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '487.36', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '499.35', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '511.40', 'h': '27.69', 'w': '11.68'}], [{'page': '4', 'x': '334.67', 'y': '511.40', 'h': '203.93', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '523.39', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '535.39', 'h': '31.50', 'w': '11.68'}], [{'page': '4', 'x': '339.36', 'y': '535.39', 'h': '199.24', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '547.39', 'h': '157.69', 'w': '11.68'}], [{'page': '4', 'x': '465.22', 'y': '547.39', 'h': '73.37', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '559.38', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '571.40', 'h': '134.72', 'w': '11.68'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Holistic protein and peptide mining', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Differential abundance analyses were conducted to detect dysregulated proteins in the SF of: (1) IA compared to control and (2) RA compared to PsA.Using non-parametric statistical tests, 144 proteins were determined to have statistically significant differential abundance in IA SF with 54 proteins showing significant upregulation and 90 proteins showing significant downregulation (Fig. 2).When comparing RA and PsA proteomes, no proteins showed significant differences in abundance after correcting for multiple hypothesis testing.However, with respect to an unadjusted p value, 22 proteins were differentially abundant between the two groups with 13 proteins demonstrating significant upregulation in RA relative to PsA and 9 proteins showing significant upregulation in PsA relative to RA. Significantly dysregulated proteins in IA compared to control and significantly dysregulated proteins in RA compared to PsA are summarized in Additional file 1: Tables S4 andS5, respectively.Dysregulated functional pathways likely to be associated with the significantly upregulated and downregulated proteins of IA SF were detected with IPA.Core analyses determined the top 5 canonical pathways associated with upregulated proteins to be: (1) LXR/RXR activation, (2) FXR/RXR activation, (3) acute phase response signaling, (4) atherosclerosis signaling and (5) IL-12 signaling and production in macrophages, several of which Fig. 1 Venn diagram of proteins identified in the IA SF proteome and peptidome.The total number of proteins identified was 412, with 364 proteins detected in the proteome, 23 proteins detected in the peptidome and 25 proteins detected in both Fig. 2 Volcano plot of significantly differentially abundant proteins identified in the IA SF proteome relative to control SF.A total of 144 proteins, highlighted in blue and found above the y-intercept oflog 10 (0.05), were determined to have statistically significant differential abundance in IA SF have been previously associated with IA.Details regarding the top diseases and disorders as well as molecular and cellular functions associated with both groups of dysregulated proteins can be found in Additional file 1: Table S6.As the data suggests, upregulated proteins show more distinct relations to inflammatory and immunological processes while downregulated proteins demonstrate stronger relations to metabolic processes.Ultimately, to identify the strongest candidate biomarkers to be validated in IA patient serum, we focused on upregulated proteins in the SF.', metadata={'text': 'Differential abundance analyses were conducted to detect dysregulated proteins in the SF of: (1) IA compared to control and (2) RA compared to PsA.Using non-parametric statistical tests, 144 proteins were determined to have statistically significant differential abundance in IA SF with 54 proteins showing significant upregulation and 90 proteins showing significant downregulation (Fig. 2).When comparing RA and PsA proteomes, no proteins showed significant differences in abundance after correcting for multiple hypothesis testing.However, with respect to an unadjusted p value, 22 proteins were differentially abundant between the two groups with 13 proteins demonstrating significant upregulation in RA relative to PsA and 9 proteins showing significant upregulation in PsA relative to RA. Significantly dysregulated proteins in IA compared to control and significantly dysregulated proteins in RA compared to PsA are summarized in Additional file 1: Tables S4 andS5, respectively.Dysregulated functional pathways likely to be associated with the significantly upregulated and downregulated proteins of IA SF were detected with IPA.Core analyses determined the top 5 canonical pathways associated with upregulated proteins to be: (1) LXR/RXR activation, (2) FXR/RXR activation, (3) acute phase response signaling, (4) atherosclerosis signaling and (5) IL-12 signaling and production in macrophages, several of which Fig. 1 Venn diagram of proteins identified in the IA SF proteome and peptidome.The total number of proteins identified was 412, with 364 proteins detected in the proteome, 23 proteins detected in the peptidome and 25 proteins detected in both Fig. 2 Volcano plot of significantly differentially abundant proteins identified in the IA SF proteome relative to control SF.A total of 144 proteins, highlighted in blue and found above the y-intercept oflog 10 (0.05), were determined to have statistically significant differential abundance in IA SF have been previously associated with IA.Details regarding the top diseases and disorders as well as molecular and cellular functions associated with both groups of dysregulated proteins can be found in Additional file 1: Table S6.As the data suggests, upregulated proteins show more distinct relations to inflammatory and immunological processes while downregulated proteins demonstrate stronger relations to metabolic processes.Ultimately, to identify the strongest candidate biomarkers to be validated in IA patient serum, we focused on upregulated proteins in the SF.', 'para': '10', 'bboxes': \"[[{'page': '4', 'x': '304.72', 'y': '609.30', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '621.30', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '633.29', 'h': '164.53', 'w': '11.68'}], [{'page': '4', 'x': '473.75', 'y': '633.29', 'h': '64.83', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '645.30', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '657.30', 'h': '233.87', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '669.29', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '681.30', 'h': '233.86', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '693.30', 'h': '52.05', 'w': '11.68'}], [{'page': '4', 'x': '360.37', 'y': '693.30', 'h': '178.22', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '705.30', 'h': '233.85', 'w': '11.68'}, {'page': '4', 'x': '304.72', 'y': '717.29', 'h': '192.97', 'w': '11.68'}], [{'page': '4', 'x': '500.09', 'y': '717.29', 'h': '38.49', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '501.24', 'h': '233.88', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '513.23', 'h': '233.89', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '525.23', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '537.23', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '549.24', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '561.23', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '573.24', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '585.24', 'h': '226.18', 'w': '11.68'}], [{'page': '5', 'x': '312.72', 'y': '597.23', 'h': '225.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '609.24', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '621.24', 'h': '209.30', 'w': '11.68'}], [{'page': '5', 'x': '518.55', 'y': '621.24', 'h': '20.02', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '633.23', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '645.24', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '657.24', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '669.23', 'h': '233.86', 'w': '11.68'}, {'page': '5', 'x': '304.72', 'y': '681.24', 'h': '233.84', 'w': '11.68'}, {'page': '5', 'x': '62.94', 'y': '451.38', 'h': '259.50', 'w': '7.39'}], [{'page': '5', 'x': '323.86', 'y': '451.38', 'h': '182.25', 'w': '7.39'}, {'page': '5', 'x': '62.94', 'y': '461.38', 'h': '344.77', 'w': '7.39'}, {'page': '5', 'x': '63.07', 'y': '637.74', 'h': '212.63', 'w': '7.39'}, {'page': '5', 'x': '63.07', 'y': '647.74', 'h': '169.06', 'w': '7.39'}], [{'page': '5', 'x': '233.88', 'y': '647.74', 'h': '29.01', 'w': '7.39'}, {'page': '5', 'x': '63.07', 'y': '657.74', 'h': '208.95', 'w': '7.39'}, {'page': '5', 'x': '63.07', 'y': '667.74', 'h': '6.35', 'w': '7.39'}, {'page': '5', 'x': '71.17', 'y': '667.53', 'h': '6.24', 'w': '13.23'}, {'page': '5', 'x': '78.41', 'y': '667.74', 'h': '187.66', 'w': '8.79'}, {'page': '5', 'x': '63.07', 'y': '677.74', 'h': '97.08', 'w': '7.39'}, {'page': '6', 'x': '56.69', 'y': '88.58', 'h': '169.21', 'w': '11.68'}], [{'page': '6', 'x': '228.93', 'y': '88.58', 'h': '61.63', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '100.58', 'h': '233.83', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '112.58', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '124.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '136.57', 'h': '36.59', 'w': '11.68'}], [{'page': '6', 'x': '95.49', 'y': '136.57', 'h': '195.04', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '148.56', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '160.58', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '172.58', 'h': '173.96', 'w': '11.68'}], [{'page': '6', 'x': '234.14', 'y': '172.58', 'h': '56.43', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '184.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '196.58', 'h': '233.88', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '208.58', 'h': '73.15', 'w': '11.68'}]]\", 'pages': \"('4', '6')\", 'section_title': 'Dysregulated proteins in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Tissue and fluid specificity of upregulated proteins were used to narrow down the list of candidates deemed likely to be associated with IA, RA and PsA.We concentrated on proteins which displayed strong abundance in SF, bone, bone marrow or immune regulatory cells according to ProteomicsDB.Immunoglobulins were excluded from further analysis.The resulting list of upregulated proteins compared to the reference group consisted of 38 IAspecific, 8 RA-specific and 9 PsA-specific unique protein candidates.High abundance proteins in serum, as identified according to the literature [21,22], were excluded due to the likelihood that they were serum contaminants at the time of joint aspiration.Moreover, this ensured protein candidates were unlikely to be overexpressed in the serum of non-diseased patients.Following this filtering step, the final list of candidate biomarkers consisted of 5, 4 and 2 upregulated proteins which we deemed likely to be associated with IA, RA and PsA, respectively (Table 2).', metadata={'text': 'Tissue and fluid specificity of upregulated proteins were used to narrow down the list of candidates deemed likely to be associated with IA, RA and PsA.We concentrated on proteins which displayed strong abundance in SF, bone, bone marrow or immune regulatory cells according to ProteomicsDB.Immunoglobulins were excluded from further analysis.The resulting list of upregulated proteins compared to the reference group consisted of 38 IAspecific, 8 RA-specific and 9 PsA-specific unique protein candidates.High abundance proteins in serum, as identified according to the literature [21,22], were excluded due to the likelihood that they were serum contaminants at the time of joint aspiration.Moreover, this ensured protein candidates were unlikely to be overexpressed in the serum of non-diseased patients.Following this filtering step, the final list of candidate biomarkers consisted of 5, 4 and 2 upregulated proteins which we deemed likely to be associated with IA, RA and PsA, respectively (Table 2).', 'para': '6', 'bboxes': \"[[{'page': '6', 'x': '64.69', 'y': '220.57', 'h': '225.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '232.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '244.56', 'h': '160.38', 'w': '11.68'}], [{'page': '6', 'x': '220.15', 'y': '244.56', 'h': '70.39', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '256.56', 'h': '233.89', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '268.55', 'h': '233.87', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '280.55', 'h': '73.12', 'w': '11.68'}], [{'page': '6', 'x': '132.73', 'y': '280.55', 'h': '157.82', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '292.54', 'h': '65.60', 'w': '11.68'}], [{'page': '6', 'x': '124.62', 'y': '292.54', 'h': '165.94', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '304.54', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '316.53', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '328.53', 'h': '46.03', 'w': '11.68'}], [{'page': '6', 'x': '106.09', 'y': '328.53', 'h': '184.46', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '340.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '352.58', 'h': '233.83', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '364.57', 'h': '131.17', 'w': '11.68'}], [{'page': '6', 'x': '192.23', 'y': '364.57', 'h': '98.33', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '376.57', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '388.56', 'h': '147.52', 'w': '11.68'}], [{'page': '6', 'x': '207.03', 'y': '388.56', 'h': '83.52', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '400.58', 'h': '233.84', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '412.58', 'h': '233.89', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '424.57', 'h': '233.89', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '436.57', 'h': '38.23', 'w': '11.68'}]]\", 'pages': \"('6', '6')\", 'section_title': 'Dysregulated proteins in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Differential abundance analyses were conducted to detect strongly dysregulated peptides in the SF of: (1) IA compared to control and (2) RA compared to PsA.For both comparisons, no peptides showed statistically significant differences in abundance after correcting for multiple hypothesis testing, with the exception of the peptide sequence DSGEGDFLAEGGGV when comparing IA to the control.Alternatively, with respect to the unadjusted p value, 11 peptides were determined to be significantly differentially abundant in IA SF with 10 peptides showing significant upregulation and 1 peptide showing significant downregulation (Table 3).A complete list of dysregulated peptides in IA compared to control and dysregulated peptides in RA compared to PsA are summarized in Additional file 1: Tables S7 andS8, respectively.', metadata={'text': 'Differential abundance analyses were conducted to detect strongly dysregulated peptides in the SF of: (1) IA compared to control and (2) RA compared to PsA.For both comparisons, no peptides showed statistically significant differences in abundance after correcting for multiple hypothesis testing, with the exception of the peptide sequence DSGEGDFLAEGGGV when comparing IA to the control.Alternatively, with respect to the unadjusted p value, 11 peptides were determined to be significantly differentially abundant in IA SF with 10 peptides showing significant upregulation and 1 peptide showing significant downregulation (Table 3).A complete list of dysregulated peptides in IA compared to control and dysregulated peptides in RA compared to PsA are summarized in Additional file 1: Tables S7 andS8, respectively.', 'para': '3', 'bboxes': \"[[{'page': '6', 'x': '56.69', 'y': '472.58', 'h': '233.87', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '484.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '496.58', 'h': '194.72', 'w': '11.68'}], [{'page': '6', 'x': '254.59', 'y': '496.58', 'h': '35.98', 'w': '11.68'}, {'page': '6', 'x': '56.69', 'y': '508.58', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '88.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '100.57', 'h': '233.88', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '112.57', 'h': '233.87', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '124.56', 'h': '47.67', 'w': '11.68'}], [{'page': '6', 'x': '355.25', 'y': '124.56', 'h': '183.36', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '136.56', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '148.56', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '160.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '172.58', 'h': '147.67', 'w': '11.68'}], [{'page': '6', 'x': '457.70', 'y': '172.58', 'h': '80.90', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '184.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '196.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '208.58', 'h': '226.18', 'w': '11.68'}]]\", 'pages': \"('6', '6')\", 'section_title': 'Dysregulated peptides in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='When comparing RA and PsA peptidomes, 5 peptides showed differential abundance between the two groups with all 5 peptides demonstrating significant upregulation in PsA SF relative to RA SF (Table 4).', metadata={'text': 'When comparing RA and PsA peptidomes, 5 peptides showed differential abundance between the two groups with all 5 peptides demonstrating significant upregulation in PsA SF relative to RA SF (Table 4).', 'para': '0', 'bboxes': \"[[{'page': '6', 'x': '312.72', 'y': '220.58', 'h': '225.84', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '232.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '244.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '256.58', 'h': '169.42', 'w': '11.68'}]]\", 'pages': \"('6', '6')\", 'section_title': 'Dysregulated peptides in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='KEGG analysis revealed significantly enriched pathways (fold enrichment in brackets) related to the IA SF peptidome.Figure 3 illustrates the top KEGG pathways among which complement and coagulation cascades [23], Staphylococcus aureus infection [18], protein digestion and absorption [17] and extracellular matrix (ECM)-receptor interaction [14] were significantly enriched.', metadata={'text': 'KEGG analysis revealed significantly enriched pathways (fold enrichment in brackets) related to the IA SF peptidome.Figure 3 illustrates the top KEGG pathways among which complement and coagulation cascades [23], Staphylococcus aureus infection [18], protein digestion and absorption [17] and extracellular matrix (ECM)-receptor interaction [14] were significantly enriched.', 'para': '1', 'bboxes': \"[[{'page': '6', 'x': '304.72', 'y': '292.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '304.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '316.58', 'h': '25.34', 'w': '11.68'}], [{'page': '6', 'x': '332.34', 'y': '316.58', 'h': '206.22', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '328.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '340.58', 'h': '233.89', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '352.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '364.57', 'h': '177.38', 'w': '11.68'}]]\", 'pages': \"('6', '6')\", 'section_title': 'Pathway enrichment analysis of the SF peptidome', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Accumulating evidence suggests a crucial role of intestinal resident flora in chronic activation of innate and adaptive immune responses leading to inflammatory disorders.Microorganisms residing in the intestine play an important role in maintaining systemic homeostasis through the delicate balance of the immune system response.Perturbations in the composition of the intestinal microbiota have been shown to elicit inappropriate immune cell activation leading to an inflammatory cascade and eventually, clinical disease [24].Specifically, perturbations of the gut epithelial cell layer and/or increased exposure to microbial metabolites may be primary triggers of an inflammatory cascade resulting in joint pathology [25].Protective mechanisms, such as the expression of AMPs, have naturally developed to oppose microbial dysbiosis.AMPs are fundamental effectors of the innate immune response with a broad spectrum of microbicidal activity.Under inflammatory conditions, the synovial membrane has demonstrated an altered pattern of expression of AMPs relative to healthy controls and suggests a valuable role of these proteins in the differential diagnosis of inflammatory joint disease [23].', metadata={'text': 'Accumulating evidence suggests a crucial role of intestinal resident flora in chronic activation of innate and adaptive immune responses leading to inflammatory disorders.Microorganisms residing in the intestine play an important role in maintaining systemic homeostasis through the delicate balance of the immune system response.Perturbations in the composition of the intestinal microbiota have been shown to elicit inappropriate immune cell activation leading to an inflammatory cascade and eventually, clinical disease [24].Specifically, perturbations of the gut epithelial cell layer and/or increased exposure to microbial metabolites may be primary triggers of an inflammatory cascade resulting in joint pathology [25].Protective mechanisms, such as the expression of AMPs, have naturally developed to oppose microbial dysbiosis.AMPs are fundamental effectors of the innate immune response with a broad spectrum of microbicidal activity.Under inflammatory conditions, the synovial membrane has demonstrated an altered pattern of expression of AMPs relative to healthy controls and suggests a valuable role of these proteins in the differential diagnosis of inflammatory joint disease [23].', 'para': '6', 'bboxes': \"[[{'page': '6', 'x': '304.72', 'y': '400.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '412.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '424.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '436.57', 'h': '40.41', 'w': '11.68'}], [{'page': '6', 'x': '348.54', 'y': '436.57', 'h': '190.06', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '448.57', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '460.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '472.58', 'h': '38.56', 'w': '11.68'}], [{'page': '6', 'x': '346.55', 'y': '472.58', 'h': '192.03', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '484.58', 'h': '233.86', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '496.58', 'h': '233.85', 'w': '11.68'}, {'page': '6', 'x': '304.72', 'y': '508.58', 'h': '182.90', 'w': '11.68'}], [{'page': '6', 'x': '490.78', 'y': '508.58', 'h': '47.78', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '464.20', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '476.19', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '488.20', 'h': '233.85', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '500.19', 'h': '83.82', 'w': '11.68'}], [{'page': '7', 'x': '143.43', 'y': '500.19', 'h': '147.10', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '512.19', 'h': '233.85', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '524.18', 'h': '82.03', 'w': '11.68'}], [{'page': '7', 'x': '142.42', 'y': '524.18', 'h': '148.15', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '536.18', 'h': '233.84', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '548.17', 'h': '88.90', 'w': '11.68'}], [{'page': '7', 'x': '151.46', 'y': '548.17', 'h': '139.11', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '560.17', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '572.20', 'h': '233.87', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '584.19', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '596.20', 'h': '216.26', 'w': '11.68'}]]\", 'pages': \"('6', '7')\", 'section_title': 'Antimicrobial peptides in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Putative AMPs in the SF peptidome of IA were predicted with the assistance of a SVM learning algorithm (Additional file 1: Table S9).Overall, 26 peptide sequences originating from 8 proteins (complement C4-A, fibrinogen beta chain, fibrinogen alpha chain, annexin A1, collagen type III alpha 1 chain, collagen type I alpha 1 chain, gliomedin and EMI domain-containing protein (1) were predicted to have antimicrobial activity with an SVM score of 0.8 or higher (Table 5).', metadata={'text': 'Putative AMPs in the SF peptidome of IA were predicted with the assistance of a SVM learning algorithm (Additional file 1: Table S9).Overall, 26 peptide sequences originating from 8 proteins (complement C4-A, fibrinogen beta chain, fibrinogen alpha chain, annexin A1, collagen type III alpha 1 chain, collagen type I alpha 1 chain, gliomedin and EMI domain-containing protein (1) were predicted to have antimicrobial activity with an SVM score of 0.8 or higher (Table 5).', 'para': '1', 'bboxes': \"[[{'page': '7', 'x': '64.69', 'y': '608.19', 'h': '225.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '620.20', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '632.20', 'h': '148.12', 'w': '11.68'}], [{'page': '7', 'x': '209.01', 'y': '632.20', 'h': '81.54', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '644.19', 'h': '233.88', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '656.19', 'h': '233.88', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '668.18', 'h': '233.90', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '680.18', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '692.17', 'h': '233.88', 'w': '11.68'}, {'page': '7', 'x': '56.69', 'y': '704.17', 'h': '182.79', 'w': '11.68'}]]\", 'pages': \"('7', '7')\", 'section_title': 'Antimicrobial peptides in IA SF', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='In the current study, a comparative MS-based approach coupled to statistical and bioinformatics analyses was performed on IA SF relative to control SF, and RA SF relative to PsA SF, to detect notable differences in both the proteomic and peptidomic data.Studies using an MS-based approach to evaluate the proteome of similar inflammatory diseases, including psoriasis [26], systemic lupus erythematosus [27], and ankylosing spondylitis [28], corroborate the robustness of such analytical methodologies.The investigation of a proximal joint fluid, such as SF, was preferred since its protein and peptide expression patterns are expected to be reflective of the pathophysiological state of the joint.As such, elucidating the SF proteome and peptidome during the progression of IA can provide novel insights into molecular drivers of the disease.', metadata={'text': 'In the current study, a comparative MS-based approach coupled to statistical and bioinformatics analyses was performed on IA SF relative to control SF, and RA SF relative to PsA SF, to detect notable differences in both the proteomic and peptidomic data.Studies using an MS-based approach to evaluate the proteome of similar inflammatory diseases, including psoriasis [26], systemic lupus erythematosus [27], and ankylosing spondylitis [28], corroborate the robustness of such analytical methodologies.The investigation of a proximal joint fluid, such as SF, was preferred since its protein and peptide expression patterns are expected to be reflective of the pathophysiological state of the joint.As such, elucidating the SF proteome and peptidome during the progression of IA can provide novel insights into molecular drivers of the disease.', 'para': '3', 'bboxes': \"[[{'page': '7', 'x': '304.72', 'y': '476.68', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '488.68', 'h': '233.84', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '500.67', 'h': '233.87', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '512.67', 'h': '233.84', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '524.66', 'h': '157.29', 'w': '11.68'}], [{'page': '7', 'x': '467.01', 'y': '524.66', 'h': '71.58', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '536.66', 'h': '233.84', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '548.65', 'h': '233.87', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '560.65', 'h': '233.88', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '572.64', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '584.68', 'h': '41.68', 'w': '11.68'}], [{'page': '7', 'x': '351.47', 'y': '584.68', 'h': '187.12', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '596.68', 'h': '233.83', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '608.67', 'h': '233.88', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '620.67', 'h': '148.87', 'w': '11.68'}], [{'page': '7', 'x': '456.20', 'y': '620.67', 'h': '82.36', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '632.66', 'h': '233.85', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '644.66', 'h': '233.89', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '656.65', 'h': '46.82', 'w': '11.68'}]]\", 'pages': \"('7', '7')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The molecular pathways involved in the pathogenesis of IA are also overrepresented in the current study based on functional network analysis of IA SF proteins and peptides.Prominent mechanisms related to the identification of upregulated proteins include: (1) acute phase response signaling, (2) antimicrobial response, (3) inflammatory response, (4) IL-12 signaling and production in macrophages and ( 5) cell-to-cell signaling and interaction.Similarly, interaction networks were established through pathway enrichment analysis of IA SF peptides.Of interest was the enrichment of Staphylococcus aureus infection.As previously highlighted, correlative studies are beginning to recognize a fundamental interplay between the microbiome and immune system response in the etiology of IA [29,30].Although the role of S. aureus in the progression of IA has yet to be clarified, the enrichment of this pathway, as reflected by the peptides identified in our study, reinforces this developing hypothesis.Overall, our analyses resulted in the identification of 144 differentially expressed proteins in the IA SF proteome.Comparison of RA SF to PsA SF identified 22 differentially expressed proteins.Since we are interested in identifying putative markers which can be further validated in patient serum, we decided to focus solely on upregulated proteins in each arthritic condition.Highpotential candidate biomarkers were selected on the basis of several molecular features including: differential abundance, fluid and tissue specificity, immunoglobulin status and abundance in the plasma proteome.Our list of dysregulated proteins in IA was reduced to a total of 5 promising protein candidates representative of intrinsic joint structures including the articular cartilage, synovial membrane and synoviocytes.The re-discovery of several upregulated proteins which have been previously implicated in IA, such as CD5 molecule-like (CD5L), matrix metalloproteinase (MMP)-3, defensin alpha 3 (DEFA3), S100 calcium-binding protein (S100) A8, and A9, provided an internal validation of our analytical proteomic approach [31,32].The application of similar, stringent filtering criteria on protein candidates of RA and PsA resulted in 4 RA-specific and 2 PsA-specific promising protein candidates.', metadata={'text': 'The molecular pathways involved in the pathogenesis of IA are also overrepresented in the current study based on functional network analysis of IA SF proteins and peptides.Prominent mechanisms related to the identification of upregulated proteins include: (1) acute phase response signaling, (2) antimicrobial response, (3) inflammatory response, (4) IL-12 signaling and production in macrophages and ( 5) cell-to-cell signaling and interaction.Similarly, interaction networks were established through pathway enrichment analysis of IA SF peptides.Of interest was the enrichment of Staphylococcus aureus infection.As previously highlighted, correlative studies are beginning to recognize a fundamental interplay between the microbiome and immune system response in the etiology of IA [29,30].Although the role of S. aureus in the progression of IA has yet to be clarified, the enrichment of this pathway, as reflected by the peptides identified in our study, reinforces this developing hypothesis.Overall, our analyses resulted in the identification of 144 differentially expressed proteins in the IA SF proteome.Comparison of RA SF to PsA SF identified 22 differentially expressed proteins.Since we are interested in identifying putative markers which can be further validated in patient serum, we decided to focus solely on upregulated proteins in each arthritic condition.Highpotential candidate biomarkers were selected on the basis of several molecular features including: differential abundance, fluid and tissue specificity, immunoglobulin status and abundance in the plasma proteome.Our list of dysregulated proteins in IA was reduced to a total of 5 promising protein candidates representative of intrinsic joint structures including the articular cartilage, synovial membrane and synoviocytes.The re-discovery of several upregulated proteins which have been previously implicated in IA, such as CD5 molecule-like (CD5L), matrix metalloproteinase (MMP)-3, defensin alpha 3 (DEFA3), S100 calcium-binding protein (S100) A8, and A9, provided an internal validation of our analytical proteomic approach [31,32].The application of similar, stringent filtering criteria on protein candidates of RA and PsA resulted in 4 RA-specific and 2 PsA-specific promising protein candidates.', 'para': '12', 'bboxes': \"[[{'page': '7', 'x': '312.72', 'y': '668.65', 'h': '225.88', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '680.64', 'h': '233.89', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '692.64', 'h': '233.86', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '704.68', 'h': '21.95', 'w': '11.68'}], [{'page': '7', 'x': '328.87', 'y': '704.68', 'h': '209.69', 'w': '11.68'}, {'page': '7', 'x': '304.72', 'y': '716.68', 'h': '233.84', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '388.74', 'h': '233.85', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '400.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '412.74', 'h': '233.85', 'w': '11.68'}], [{'page': '8', 'x': '56.69', 'y': '424.73', 'h': '233.85', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '436.73', 'h': '195.15', 'w': '11.68'}], [{'page': '8', 'x': '254.51', 'y': '436.73', 'h': '36.04', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '448.74', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '460.74', 'h': '18.72', 'w': '11.68'}], [{'page': '8', 'x': '80.23', 'y': '460.74', 'h': '210.33', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '472.73', 'h': '233.85', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '484.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '496.74', 'h': '81.73', 'w': '11.68'}], [{'page': '8', 'x': '141.02', 'y': '496.74', 'h': '149.54', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '508.73', 'h': '233.83', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '520.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '532.73', 'h': '196.19', 'w': '11.68'}], [{'page': '8', 'x': '64.69', 'y': '544.72', 'h': '225.89', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '556.72', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '568.74', 'h': '27.84', 'w': '11.68'}], [{'page': '8', 'x': '89.00', 'y': '568.74', 'h': '201.55', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '580.73', 'h': '134.25', 'w': '11.68'}], [{'page': '8', 'x': '194.19', 'y': '580.73', 'h': '96.38', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '592.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '604.73', 'h': '233.83', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '616.72', 'h': '205.76', 'w': '11.68'}], [{'page': '8', 'x': '266.65', 'y': '616.72', 'h': '23.93', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '628.72', 'h': '233.88', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '640.71', 'h': '233.84', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '652.71', 'h': '233.88', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '664.70', 'h': '197.39', 'w': '11.68'}], [{'page': '8', 'x': '257.74', 'y': '664.70', 'h': '32.83', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '676.70', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '688.69', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '700.69', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '56.69', 'y': '712.68', 'h': '120.05', 'w': '11.68'}], [{'page': '8', 'x': '179.60', 'y': '712.68', 'h': '110.97', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '388.68', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '400.74', 'h': '233.88', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '412.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '424.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '436.74', 'h': '233.85', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '448.73', 'h': '77.16', 'w': '11.68'}], [{'page': '8', 'x': '386.15', 'y': '448.73', 'h': '152.42', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '460.73', 'h': '233.85', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '472.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '484.72', 'h': '78.17', 'w': '11.68'}]]\", 'pages': \"('7', '8')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Our analytical approach also yielded the discovery of novel putative biomarkers which, to our knowledge, have yet to be described in the context of IA.This includes the identification of alpha-ketoglutarate-dependent dioxygenase (FTO), family with sequence similarity 21 member C (FAM21C; more commonly known as WASH complex subunit 2C, WASHC2C) and T-box transcription factor (TBX3).Of these candidates, only TBX3 has been previously observed in IA at the genetic level [33].A genome-wide association study (GWAS) identified the single nucleotide polymorphism (SNP), rs12579024, located nearest the TBX3 gene, to be strongly associated with RA in a Korean population (p value < 0.0001).The functional roles of TBX3 have, thus far, been primarily described in relation to the morphogenesis of limbs and organs [34] as well as oncogenic processes [35].A recent study by Willmer et al. [36] attempted to delineate the molecular mechanisms driven by TBX3 and identified cyclin-dependent kinase inhibitor p21 WAF1 (p21), a key mediator of cell cycle arrest, to be a primary repressed target of TBX3.Interestingly, p21 has also been implicated in the regulation of proinflammatory cytokines and MMP production in synovial fibroblasts, both of which greatly promote inflammation and joint destruction during the pathogenesis of RA [37].Isolated RA synovial fibroblasts have shown reduced expression of p21 relative to osteoarthritis (OA) synovial fibroblasts and adenovirus-mediated delivery of p21 suppresses the spontaneous production of IL-6 and MMP1 in RA synovial fibroblasts.In support of this, p21 -/-mice maintain an enhanced experimental IA with markedly increased numbers of macrophages and articular destruction [38].This phenotype is resolved, however, with the administration of a p21-peptide mimetic.When taken with our own findings, it is conceivable that the upregulation of TBX3 in the synovial joint may lead to reduced p21 expression in synovial fibroblasts and promotes the proinflammatory state distinctive of IA pathogenesis.These findings corroborate with our hypothesis that delineating the IA proteome may highlight underlying mechanisms related to the progression of inflammatory arthritic disease and serve as novel targets for screening and therapeutic purposes.', metadata={'text': 'Our analytical approach also yielded the discovery of novel putative biomarkers which, to our knowledge, have yet to be described in the context of IA.This includes the identification of alpha-ketoglutarate-dependent dioxygenase (FTO), family with sequence similarity 21 member C (FAM21C; more commonly known as WASH complex subunit 2C, WASHC2C) and T-box transcription factor (TBX3).Of these candidates, only TBX3 has been previously observed in IA at the genetic level [33].A genome-wide association study (GWAS) identified the single nucleotide polymorphism (SNP), rs12579024, located nearest the TBX3 gene, to be strongly associated with RA in a Korean population (p value < 0.0001).The functional roles of TBX3 have, thus far, been primarily described in relation to the morphogenesis of limbs and organs [34] as well as oncogenic processes [35].A recent study by Willmer et al. [36] attempted to delineate the molecular mechanisms driven by TBX3 and identified cyclin-dependent kinase inhibitor p21 WAF1 (p21), a key mediator of cell cycle arrest, to be a primary repressed target of TBX3.Interestingly, p21 has also been implicated in the regulation of proinflammatory cytokines and MMP production in synovial fibroblasts, both of which greatly promote inflammation and joint destruction during the pathogenesis of RA [37].Isolated RA synovial fibroblasts have shown reduced expression of p21 relative to osteoarthritis (OA) synovial fibroblasts and adenovirus-mediated delivery of p21 suppresses the spontaneous production of IL-6 and MMP1 in RA synovial fibroblasts.In support of this, p21 -/-mice maintain an enhanced experimental IA with markedly increased numbers of macrophages and articular destruction [38].This phenotype is resolved, however, with the administration of a p21-peptide mimetic.When taken with our own findings, it is conceivable that the upregulation of TBX3 in the synovial joint may lead to reduced p21 expression in synovial fibroblasts and promotes the proinflammatory state distinctive of IA pathogenesis.These findings corroborate with our hypothesis that delineating the IA proteome may highlight underlying mechanisms related to the progression of inflammatory arthritic disease and serve as novel targets for screening and therapeutic purposes.', 'para': '11', 'bboxes': \"[[{'page': '8', 'x': '312.72', 'y': '496.72', 'h': '225.88', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '508.71', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '520.71', 'h': '162.16', 'w': '11.68'}], [{'page': '8', 'x': '469.31', 'y': '520.71', 'h': '69.28', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '532.70', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '544.74', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '556.74', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '568.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '580.74', 'h': '80.16', 'w': '11.68'}], [{'page': '8', 'x': '388.23', 'y': '580.74', 'h': '150.34', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '592.73', 'h': '233.89', 'w': '11.68'}], [{'page': '8', 'x': '304.72', 'y': '604.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '616.73', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '628.72', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '640.72', 'h': '214.72', 'w': '11.68'}], [{'page': '8', 'x': '523.19', 'y': '640.72', 'h': '15.40', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '652.71', 'h': '233.83', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '664.71', 'h': '233.86', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '676.70', 'h': '195.59', 'w': '11.68'}], [{'page': '8', 'x': '503.10', 'y': '676.70', 'h': '35.46', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '688.70', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '700.69', 'h': '233.87', 'w': '11.68'}, {'page': '8', 'x': '304.72', 'y': '712.69', 'h': '160.75', 'w': '11.68'}, {'page': '8', 'x': '465.47', 'y': '710.76', 'h': '18.48', 'w': '8.18'}, {'page': '8', 'x': '488.11', 'y': '712.74', 'h': '50.48', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '463.49', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '475.48', 'h': '66.52', 'w': '11.68'}], [{'page': '9', 'x': '127.31', 'y': '475.48', 'h': '163.24', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '487.49', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '499.48', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '511.48', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '523.49', 'h': '141.21', 'w': '11.68'}], [{'page': '9', 'x': '202.41', 'y': '523.49', 'h': '88.16', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '535.48', 'h': '233.89', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '547.48', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '559.49', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '571.48', 'h': '233.86', 'w': '11.68'}], [{'page': '9', 'x': '56.69', 'y': '581.23', 'h': '233.86', 'w': '13.95'}, {'page': '9', 'x': '56.70', 'y': '595.48', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '56.70', 'y': '607.48', 'h': '188.38', 'w': '11.68'}], [{'page': '9', 'x': '249.74', 'y': '607.48', 'h': '40.81', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '619.49', 'h': '233.84', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '631.48', 'h': '96.86', 'w': '11.68'}], [{'page': '9', 'x': '156.77', 'y': '631.48', 'h': '133.78', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '643.49', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '655.48', 'h': '233.83', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '667.48', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '679.49', 'h': '170.17', 'w': '11.68'}], [{'page': '9', 'x': '230.41', 'y': '679.49', 'h': '60.14', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '691.48', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '56.69', 'y': '703.48', 'h': '233.85', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '463.48', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '475.47', 'h': '233.85', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '487.47', 'h': '39.73', 'w': '11.68'}]]\", 'pages': \"('8', '9')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Comparisons of RA and PsA revealed high-priority protein candidates specific to each disease.In RA SF, coagulation factor XII, SPARC-like protein 1, Rab GDP dissociation inhibitor beta and immunoglobulin gamma Fc region receptor III-A (FCGR3A) were notably upregulated; of which, activating FCGR3A has demonstrated important roles in sustaining the inflammatory response through the secretion of cytokines and proteases from the immune cell on which it is expressed [39].Likewise, allelic studies have demonstrated SNPs that may serve as susceptibility markers for RA [40].Taken together, the therapeutic targeting of FCGR3A may facilitate future management of RA.', metadata={'text': 'Comparisons of RA and PsA revealed high-priority protein candidates specific to each disease.In RA SF, coagulation factor XII, SPARC-like protein 1, Rab GDP dissociation inhibitor beta and immunoglobulin gamma Fc region receptor III-A (FCGR3A) were notably upregulated; of which, activating FCGR3A has demonstrated important roles in sustaining the inflammatory response through the secretion of cytokines and proteases from the immune cell on which it is expressed [39].Likewise, allelic studies have demonstrated SNPs that may serve as susceptibility markers for RA [40].Taken together, the therapeutic targeting of FCGR3A may facilitate future management of RA.', 'para': '3', 'bboxes': \"[[{'page': '9', 'x': '312.72', 'y': '499.46', 'h': '225.87', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '511.46', 'h': '186.19', 'w': '11.68'}], [{'page': '9', 'x': '495.60', 'y': '511.46', 'h': '42.98', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '523.45', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '535.45', 'h': '233.84', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '547.44', 'h': '233.86', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '559.49', 'h': '233.84', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '571.48', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '583.48', 'h': '233.88', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '595.48', 'h': '192.75', 'w': '11.68'}], [{'page': '9', 'x': '500.64', 'y': '595.48', 'h': '37.95', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '607.47', 'h': '233.87', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '619.47', 'h': '147.45', 'w': '11.68'}], [{'page': '9', 'x': '456.38', 'y': '619.47', 'h': '82.23', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '631.46', 'h': '233.85', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '643.46', 'h': '81.73', 'w': '11.68'}]]\", 'pages': \"('9', '9')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content=\"Of the two PsA-specific protein candidates we identified, periostin (POSTN) has been previously investigated in our studies of the PsA tissue proteome as a potential serum marker of PsA [41].Although serum validation of POSTN did not reveal statistically significant differences between PsA and control serum, its elevated levels in both PsA lesional skin as well as SF alludes to an important role of the protein in the pathobiology of PsA and may serve as part of a panel of biomarkers to differentiate between the onset of PsA and RA.Differential abundance analyses of peptide sequences identified 11 peptides to be significantly dysregulated in IA SF compared to the control group.Upregulated peptide sequences were primarily derived from FGA while single sequences originated from collagen type I alpha 1 (COL1A1) and coiled-coil serine rich protein 2 (CCSER2).All significant FGA-derived peptide fragments were representative of the 16-amino acid residue (ADSGEGDFLAEGGGVR) of fibrinopeptide A (FpA) located at the NH 2 -terminal end of FGA.The lack of detection of the full-length FpA peptide sequence in IA SF can be rationalized by the peptide's short half-life of 3-5 min in the blood plasma [42].FpA is a cleavage product of thrombin-induced conversion of fibrinogen into a fibrin clot.Fibrin deposition in the SF or on the synovial membrane is a consistent feature of IA and is believed to perpetuate inflammation and joint tissue destruction through synovial cell activation [43,44].Liu et al. demonstrated that stimulation of synovial fibroblasts with fibrin(ogen) resulted in the upregulated expression of IL-8 and intercellular adhesion molecule 1 (ICAM-1) for the recruitment and retention, respectively, of lymphocytes within the arthritic joint [43].Elevated abundance of FGA and FpA in serum has been observed in patients with inflammation-associated diseases including systemic lupus erythematosus, Crohn's disease, ischemic heart disease and gastric cancer [45][46][47][48].These findings highlight the non-specific indication of inflammation by FpA and its associated peptide fragments, and advocates for its utility as a sensitivity index of disease activity in patients with IA.Moreover, targeting FGA in the synovial joint may be a necessary therapeutic intervention to modulate the inflammatory response.Comparisons of peptide abundance between RA and PsA identified FGA and FGB-related peptide sequences to be consistently downregulated in RA relative to PsA.Although this may be indicative of a discriminatory ability for FGA and FGB peptide fragments to differentiate between the onset of RA and PsA, this outcome does not corroborate with the finding that RA patients are at a greater increased risk of venous thromboembolism relative to PsA patients [49].Targeted quantification in a second set of SF samples is necessary to verify this finding.\", metadata={'text': \"Of the two PsA-specific protein candidates we identified, periostin (POSTN) has been previously investigated in our studies of the PsA tissue proteome as a potential serum marker of PsA [41].Although serum validation of POSTN did not reveal statistically significant differences between PsA and control serum, its elevated levels in both PsA lesional skin as well as SF alludes to an important role of the protein in the pathobiology of PsA and may serve as part of a panel of biomarkers to differentiate between the onset of PsA and RA.Differential abundance analyses of peptide sequences identified 11 peptides to be significantly dysregulated in IA SF compared to the control group.Upregulated peptide sequences were primarily derived from FGA while single sequences originated from collagen type I alpha 1 (COL1A1) and coiled-coil serine rich protein 2 (CCSER2).All significant FGA-derived peptide fragments were representative of the 16-amino acid residue (ADSGEGDFLAEGGGVR) of fibrinopeptide A (FpA) located at the NH 2 -terminal end of FGA.The lack of detection of the full-length FpA peptide sequence in IA SF can be rationalized by the peptide's short half-life of 3-5 min in the blood plasma [42].FpA is a cleavage product of thrombin-induced conversion of fibrinogen into a fibrin clot.Fibrin deposition in the SF or on the synovial membrane is a consistent feature of IA and is believed to perpetuate inflammation and joint tissue destruction through synovial cell activation [43,44].Liu et al. demonstrated that stimulation of synovial fibroblasts with fibrin(ogen) resulted in the upregulated expression of IL-8 and intercellular adhesion molecule 1 (ICAM-1) for the recruitment and retention, respectively, of lymphocytes within the arthritic joint [43].Elevated abundance of FGA and FpA in serum has been observed in patients with inflammation-associated diseases including systemic lupus erythematosus, Crohn's disease, ischemic heart disease and gastric cancer [45][46][47][48].These findings highlight the non-specific indication of inflammation by FpA and its associated peptide fragments, and advocates for its utility as a sensitivity index of disease activity in patients with IA.Moreover, targeting FGA in the synovial joint may be a necessary therapeutic intervention to modulate the inflammatory response.Comparisons of peptide abundance between RA and PsA identified FGA and FGB-related peptide sequences to be consistently downregulated in RA relative to PsA.Although this may be indicative of a discriminatory ability for FGA and FGB peptide fragments to differentiate between the onset of RA and PsA, this outcome does not corroborate with the finding that RA patients are at a greater increased risk of venous thromboembolism relative to PsA patients [49].Targeted quantification in a second set of SF samples is necessary to verify this finding.\", 'para': '14', 'bboxes': \"[[{'page': '9', 'x': '312.72', 'y': '655.45', 'h': '225.86', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '667.49', 'h': '233.85', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '679.48', 'h': '233.85', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '691.48', 'h': '109.36', 'w': '11.68'}], [{'page': '9', 'x': '416.94', 'y': '691.48', 'h': '121.62', 'w': '11.68'}, {'page': '9', 'x': '304.72', 'y': '703.48', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '88.58', 'h': '233.83', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '100.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '112.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '124.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '136.57', 'h': '139.01', 'w': '11.68'}], [{'page': '10', 'x': '64.69', 'y': '148.57', 'h': '225.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '160.56', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '172.56', 'h': '178.64', 'w': '11.68'}], [{'page': '10', 'x': '239.84', 'y': '172.56', 'h': '50.72', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '184.55', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '196.55', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '208.54', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '220.54', 'h': '53.19', 'w': '11.68'}], [{'page': '10', 'x': '114.36', 'y': '220.54', 'h': '176.20', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '232.58', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '244.58', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '256.57', 'h': '79.28', 'w': '11.68'}, {'page': '10', 'x': '135.97', 'y': '260.97', 'h': '3.44', 'w': '8.18'}, {'page': '10', 'x': '139.41', 'y': '256.58', 'h': '97.59', 'w': '11.68'}], [{'page': '10', 'x': '241.58', 'y': '256.58', 'h': '48.99', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '268.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '280.57', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '292.57', 'h': '137.51', 'w': '11.68'}], [{'page': '10', 'x': '196.42', 'y': '292.57', 'h': '94.13', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '304.58', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '316.58', 'h': '43.08', 'w': '11.68'}], [{'page': '10', 'x': '102.60', 'y': '316.58', 'h': '187.95', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '328.57', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '340.57', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '352.56', 'h': '168.11', 'w': '11.68'}], [{'page': '10', 'x': '228.21', 'y': '352.56', 'h': '62.34', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '364.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '376.58', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '388.57', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '400.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '412.58', 'h': '147.99', 'w': '11.68'}], [{'page': '10', 'x': '208.01', 'y': '412.58', 'h': '82.54', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '424.58', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '436.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '448.58', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '460.58', 'h': '170.75', 'w': '11.68'}], [{'page': '10', 'x': '230.70', 'y': '460.58', 'h': '59.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '472.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '484.57', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '496.56', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '508.56', 'h': '70.91', 'w': '11.68'}], [{'page': '10', 'x': '131.12', 'y': '508.56', 'h': '159.43', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '520.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '532.58', 'h': '161.14', 'w': '11.68'}], [{'page': '10', 'x': '222.86', 'y': '532.58', 'h': '67.69', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '544.57', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '556.57', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '568.56', 'h': '154.32', 'w': '11.68'}], [{'page': '10', 'x': '213.92', 'y': '568.56', 'h': '76.65', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '580.56', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '592.55', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '604.55', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '616.54', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '628.54', 'h': '233.87', 'w': '11.68'}], [{'page': '10', 'x': '56.69', 'y': '640.53', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '652.53', 'h': '126.90', 'w': '11.68'}]]\", 'pages': \"('9', '10')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The advent of high-throughput microbial DNA sequencing has marked a renewed interest in the complex interplay of the intestinal microbiome and inflammatory diseases.Studies suggest that the induction of autoimmunity is closely linked to intestinal dysbiosis and leads to distal synovitis and joint pathology [50].There exist several protective mechanisms to prevent changes in the gut microbiota including the physicochemical barrier of antimicrobial proteins and peptides.AMPs are a collective of naturally-occurring, cationic peptides released by lymphocytes of the innate immune system.Of the 26 peptides predicted to have antimicrobial activity, 13 of them originated from FGA or FGB precursor proteins.Despite the pro-inflammatory impression associated with the accumulation of FGA and FGB in the SF, their presence may be critical to the activation of microbicidal activity.Soluble fibrinogen and fibrin matrices have demonstrated antimicrobial host defense through their ability to physically entrap bacteria in addition to the recruitment and engagement of host immune cells which in turn, facilitate the removal of invading pathogens [51].Taken together, the deposition of fibrin during the progression of IA may initially serve the favourable purpose of limiting bacterial infection through the activation of antimicrobial host defense mechanisms.However, its added role in the recruitment and activation of leukocytes may exacerbate synovial joint inflammation thereby fueling joint disease.', metadata={'text': 'The advent of high-throughput microbial DNA sequencing has marked a renewed interest in the complex interplay of the intestinal microbiome and inflammatory diseases.Studies suggest that the induction of autoimmunity is closely linked to intestinal dysbiosis and leads to distal synovitis and joint pathology [50].There exist several protective mechanisms to prevent changes in the gut microbiota including the physicochemical barrier of antimicrobial proteins and peptides.AMPs are a collective of naturally-occurring, cationic peptides released by lymphocytes of the innate immune system.Of the 26 peptides predicted to have antimicrobial activity, 13 of them originated from FGA or FGB precursor proteins.Despite the pro-inflammatory impression associated with the accumulation of FGA and FGB in the SF, their presence may be critical to the activation of microbicidal activity.Soluble fibrinogen and fibrin matrices have demonstrated antimicrobial host defense through their ability to physically entrap bacteria in addition to the recruitment and engagement of host immune cells which in turn, facilitate the removal of invading pathogens [51].Taken together, the deposition of fibrin during the progression of IA may initially serve the favourable purpose of limiting bacterial infection through the activation of antimicrobial host defense mechanisms.However, its added role in the recruitment and activation of leukocytes may exacerbate synovial joint inflammation thereby fueling joint disease.', 'para': '8', 'bboxes': \"[[{'page': '10', 'x': '64.69', 'y': '664.52', 'h': '225.87', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '676.52', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '688.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '700.58', 'h': '69.66', 'w': '11.68'}], [{'page': '10', 'x': '130.92', 'y': '700.58', 'h': '159.66', 'w': '11.68'}, {'page': '10', 'x': '56.69', 'y': '712.58', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '88.58', 'h': '206.58', 'w': '11.68'}], [{'page': '10', 'x': '515.04', 'y': '88.58', 'h': '23.54', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '100.58', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '112.57', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '124.57', 'h': '203.03', 'w': '11.68'}], [{'page': '10', 'x': '512.97', 'y': '124.57', 'h': '25.59', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '136.56', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '148.56', 'h': '233.88', 'w': '11.68'}], [{'page': '10', 'x': '304.72', 'y': '160.55', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '172.58', 'h': '233.83', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '184.58', 'h': '36.11', 'w': '11.68'}], [{'page': '10', 'x': '343.64', 'y': '184.58', 'h': '194.94', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '196.58', 'h': '233.89', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '208.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '220.58', 'h': '63.80', 'w': '11.68'}], [{'page': '10', 'x': '373.48', 'y': '220.58', 'h': '165.11', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '232.58', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '244.57', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '256.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '268.56', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '280.58', 'h': '39.63', 'w': '11.68'}], [{'page': '10', 'x': '347.11', 'y': '280.58', 'h': '191.44', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '292.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '304.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '316.58', 'h': '207.20', 'w': '11.68'}], [{'page': '10', 'x': '515.29', 'y': '316.58', 'h': '23.29', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '328.58', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '340.58', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '352.57', 'h': '116.17', 'w': '11.68'}]]\", 'pages': \"('10', '10')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Though these findings are limited by lack of verification in a subsequent set of SF samples, the identification of IA-specific candidates using a label-free, MS-based approach has shown biological relevance and prospective utility for clinical applications.Future follow-up studies will address verification and validation efforts of selected protein and peptide candidates in a new set of SF and serum samples, respectively.We do acknowledge the limitation of sex discrepancy amongst the IA SF samples in our study which may have influenced the proteins and peptides identified.However, to compensate for this discrepancy between each subtype of IA, our control group consisted of an equal number of male and female SF samples.Moreover, we tested the influence of both sex and age on our data using a linear model and found there to be no effect by either predictor.', metadata={'text': 'Though these findings are limited by lack of verification in a subsequent set of SF samples, the identification of IA-specific candidates using a label-free, MS-based approach has shown biological relevance and prospective utility for clinical applications.Future follow-up studies will address verification and validation efforts of selected protein and peptide candidates in a new set of SF and serum samples, respectively.We do acknowledge the limitation of sex discrepancy amongst the IA SF samples in our study which may have influenced the proteins and peptides identified.However, to compensate for this discrepancy between each subtype of IA, our control group consisted of an equal number of male and female SF samples.Moreover, we tested the influence of both sex and age on our data using a linear model and found there to be no effect by either predictor.', 'para': '4', 'bboxes': \"[[{'page': '10', 'x': '312.72', 'y': '364.57', 'h': '225.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '376.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '388.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '400.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '412.58', 'h': '155.45', 'w': '11.68'}], [{'page': '10', 'x': '466.10', 'y': '412.58', 'h': '72.46', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '424.58', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '436.57', 'h': '233.84', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '448.57', 'h': '133.64', 'w': '11.68'}], [{'page': '10', 'x': '440.89', 'y': '448.57', 'h': '97.72', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '460.56', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '472.56', 'h': '233.85', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '484.55', 'h': '78.65', 'w': '11.68'}], [{'page': '10', 'x': '386.26', 'y': '484.55', 'h': '152.32', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '496.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '508.58', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '520.58', 'h': '18.62', 'w': '11.68'}], [{'page': '10', 'x': '326.83', 'y': '520.58', 'h': '211.73', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '532.58', 'h': '233.90', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '544.57', 'h': '127.38', 'w': '11.68'}]]\", 'pages': \"('10', '10')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='A technical limitation of this study includes the lack of fractionation of digested proteins and peptides which likely contributed to the low fold change ratios of our candidate biomarkers.Pre-fractionation methods are important for reducing the complexity of biological fluids and tissues.The proteomic profile of IA SF is markedly shifted compared to healthy SF with a greater concentration of pro-inflammatory cytokines, immunoglobulins, matrix-degrading enzymes and acute-phase markers.The dynamic range between proteins in diseased SF can vary by a factor of 10 10 [10] and the likelihood, therefore, of masking potentially clinically-relevant proteins within the low-abundance proteome increases and may be exacerbated by analysis of unfractionated biological samples.However, improving accessibility to low-concentration proteins comes at the cost of longer analysis times and lower reproducibility [52].Moreover, fractionation technologies have previously failed to significantly extend the sampling of the proteome relative to the unfractionated proteome [53].', metadata={'text': 'A technical limitation of this study includes the lack of fractionation of digested proteins and peptides which likely contributed to the low fold change ratios of our candidate biomarkers.Pre-fractionation methods are important for reducing the complexity of biological fluids and tissues.The proteomic profile of IA SF is markedly shifted compared to healthy SF with a greater concentration of pro-inflammatory cytokines, immunoglobulins, matrix-degrading enzymes and acute-phase markers.The dynamic range between proteins in diseased SF can vary by a factor of 10 10 [10] and the likelihood, therefore, of masking potentially clinically-relevant proteins within the low-abundance proteome increases and may be exacerbated by analysis of unfractionated biological samples.However, improving accessibility to low-concentration proteins comes at the cost of longer analysis times and lower reproducibility [52].Moreover, fractionation technologies have previously failed to significantly extend the sampling of the proteome relative to the unfractionated proteome [53].', 'para': '5', 'bboxes': \"[[{'page': '10', 'x': '312.72', 'y': '556.57', 'h': '225.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '568.56', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '580.56', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '592.55', 'h': '95.03', 'w': '11.68'}], [{'page': '10', 'x': '406.46', 'y': '592.55', 'h': '132.13', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '604.55', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '616.54', 'h': '48.33', 'w': '11.68'}], [{'page': '10', 'x': '356.55', 'y': '616.54', 'h': '182.01', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '628.54', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '640.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '652.58', 'h': '216.25', 'w': '11.68'}], [{'page': '10', 'x': '523.20', 'y': '652.58', 'h': '15.40', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '664.57', 'h': '233.88', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '676.57', 'h': '70.36', 'w': '11.68'}, {'page': '10', 'x': '375.09', 'y': '674.60', 'h': '6.89', 'w': '8.18'}, {'page': '10', 'x': '385.54', 'y': '676.58', 'h': '153.04', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '688.58', 'h': '233.87', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '700.57', 'h': '233.86', 'w': '11.68'}, {'page': '10', 'x': '304.72', 'y': '712.58', 'h': '233.85', 'w': '11.68'}], [{'page': '11', 'x': '56.69', 'y': '88.58', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '100.58', 'h': '233.88', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '112.57', 'h': '108.37', 'w': '11.68'}], [{'page': '11', 'x': '168.16', 'y': '112.57', 'h': '122.39', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '124.58', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '136.58', 'h': '233.84', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '148.57', 'h': '60.49', 'w': '11.68'}]]\", 'pages': \"('10', '11')\", 'section_title': 'Discussion', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Chronic inflammation in IA is orchestrated by a complex network of signaling pathways which are expected to be represented in the protein and peptide expression patterns of SF.Therefore, proteomic and peptidomic analysis of SF can reflect the molecular underpinnings of IA and enhance our understanding of principal drivers at the apex of this disease.Overall, through the application of high-throughput, label-free MS, this discovery-phase study has generated a comprehensive proteomic dataset representative of IA SF and its specific subtypes.We discovered 5 protein candidates and 10 peptide candidates upregulated in IA SF, of which 3 proteins have yet to be described in IA.Moreover, subtype-specific analyses identified 4 RA-specific protein candidates, 2 PsA-specific protein candidates and 5 PsA-specific peptide candidates.Several of these candidates have been associated with inflammatory pathways at the genetic level but have not been investigated at the protein level and therefore, require functional experimentation to elucidate their role in the pathogenesis of IA.The data presented herein underscores the potential for proteins and peptides to elucidate mechanistic pathways related to the onset of arthritic disease in addition to their capacity to serve as informative clinical biomarkers.', metadata={'text': 'Chronic inflammation in IA is orchestrated by a complex network of signaling pathways which are expected to be represented in the protein and peptide expression patterns of SF.Therefore, proteomic and peptidomic analysis of SF can reflect the molecular underpinnings of IA and enhance our understanding of principal drivers at the apex of this disease.Overall, through the application of high-throughput, label-free MS, this discovery-phase study has generated a comprehensive proteomic dataset representative of IA SF and its specific subtypes.We discovered 5 protein candidates and 10 peptide candidates upregulated in IA SF, of which 3 proteins have yet to be described in IA.Moreover, subtype-specific analyses identified 4 RA-specific protein candidates, 2 PsA-specific protein candidates and 5 PsA-specific peptide candidates.Several of these candidates have been associated with inflammatory pathways at the genetic level but have not been investigated at the protein level and therefore, require functional experimentation to elucidate their role in the pathogenesis of IA.The data presented herein underscores the potential for proteins and peptides to elucidate mechanistic pathways related to the onset of arthritic disease in addition to their capacity to serve as informative clinical biomarkers.', 'para': '6', 'bboxes': \"[[{'page': '11', 'x': '56.69', 'y': '188.44', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '200.44', 'h': '233.85', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '212.43', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '224.44', 'h': '46.64', 'w': '11.68'}], [{'page': '11', 'x': '106.51', 'y': '224.44', 'h': '184.05', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '236.44', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '248.44', 'h': '233.88', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '260.43', 'h': '98.26', 'w': '11.68'}], [{'page': '11', 'x': '157.87', 'y': '260.43', 'h': '132.67', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '272.43', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '284.42', 'h': '233.85', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '296.42', 'h': '199.34', 'w': '11.68'}], [{'page': '11', 'x': '258.69', 'y': '296.42', 'h': '31.87', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '308.44', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '320.44', 'h': '233.84', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '332.43', 'h': '73.25', 'w': '11.68'}], [{'page': '11', 'x': '136.13', 'y': '332.43', 'h': '154.42', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '344.43', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '356.44', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '368.44', 'h': '31.59', 'w': '11.68'}], [{'page': '11', 'x': '91.00', 'y': '368.44', 'h': '199.54', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '380.44', 'h': '233.85', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '392.43', 'h': '233.83', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '404.43', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '416.42', 'h': '124.16', 'w': '11.68'}], [{'page': '11', 'x': '183.48', 'y': '416.42', 'h': '107.07', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '428.42', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '440.41', 'h': '233.85', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '452.41', 'h': '233.86', 'w': '11.68'}, {'page': '11', 'x': '56.69', 'y': '464.40', 'h': '129.87', 'w': '11.68'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Conclusions', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Additional file 1. Table S1: Complete protein group report for proteomics.Table S2: Complete peptide report for proteomics.Table S3: Complete spectra search output for peptidomics.Table S4: Complete list of significantly dysregulated human proteins identified in inflammatory arthritis synovial fluid relative to control synovial fluid.Table S5: Complete list of significantly dysregulated human proteins identified in rheumatoid synovial fluid relative to psoriatic arthritis synovial fluid.Table S6: Functional pathways and regulatory networks associated with significantly dysregulated proteins in IA SF.Table S7: Complete list of significantly dysregulated human peptides identified in inflammatory arthritis synovial fluid relative to control synovial fluid.Table S8: Complete list of significantly dysregulated human peptides identified in rheumatoid synovial fluid relative to psoriatic arthritis synovial fluid.Table S9: Complete list of all predicted antimicrobial peptides in inflammatory arthritis synovial fluid Abbreviations IA: inflammatory arthritis; RA: rheumatoid arthritis; PsA: psoriatic arthritis; MHC: major histocompatibility complex; MS: mass spectrometry; SF: synovial fluid; AMP: antimicrobial peptide; ACR : American College of Rheumatology; CASPAR: classification criteria for psoriatic arthritis; ABC: ammonium bicarbonate; DTT: dithiothreitol; IAM: iodoacetamide; FA: formic acid; LC-MS/MS: liquid chromatography-tandem mass spectrometry; DMSO: dimethyl sulfoxide; ACN: acetonitrile; TFA: trifluoroacetic acid; LFQ: label-free quantification; IPA: ingenuity pathway analysis; DAVID: database for annotation, visualization and integrated discovery; KEGG: Kyoto Encyclopedia of Genes and Genomes; CAMP R3 : collection of anti-microbial peptides; SVM: support vector machine; FGA: fibrinogen alpha chain; CPB2: carboxypeptidase B2; FGB: fibrinogen beta chain; F2: prothrombin; TLR: toll-like receptor; TNF-α: tumor necrosis factor alpha; IL: interleukin; CD5L: CD5 molecule-like; MMP: matrix metalloproteinase; S100: S100 calcium-binding protein; DEFA3: defensin alpha 3; FTO: alphaketoglutarate-dependent dioxygenase; FAM21C: family with sequence similarity 21 member C; TBX3: T-box transcription factor; GWAS: genome-wide association study; SNP: single nucleotide polymorphism; p21: cyclin-dependent kinase inhibitor p21 WAF1 ; OA: osteoarthritis; FCGR3A: immunoglobulin gamma Fc region receptor III-A; POSTN: periostin; PGK1: phosphoglycerate kinase 1; COL1A1: collagen type I alpha 1; CCSER2: coiled-coil serine rich protein 2; FpA: fibrinopeptide A; ICAM-1: intercellular adhesion molecule 1.', metadata={'text': 'Additional file 1. Table S1: Complete protein group report for proteomics.Table S2: Complete peptide report for proteomics.Table S3: Complete spectra search output for peptidomics.Table S4: Complete list of significantly dysregulated human proteins identified in inflammatory arthritis synovial fluid relative to control synovial fluid.Table S5: Complete list of significantly dysregulated human proteins identified in rheumatoid synovial fluid relative to psoriatic arthritis synovial fluid.Table S6: Functional pathways and regulatory networks associated with significantly dysregulated proteins in IA SF.Table S7: Complete list of significantly dysregulated human peptides identified in inflammatory arthritis synovial fluid relative to control synovial fluid.Table S8: Complete list of significantly dysregulated human peptides identified in rheumatoid synovial fluid relative to psoriatic arthritis synovial fluid.Table S9: Complete list of all predicted antimicrobial peptides in inflammatory arthritis synovial fluid Abbreviations IA: inflammatory arthritis; RA: rheumatoid arthritis; PsA: psoriatic arthritis; MHC: major histocompatibility complex; MS: mass spectrometry; SF: synovial fluid; AMP: antimicrobial peptide; ACR : American College of Rheumatology; CASPAR: classification criteria for psoriatic arthritis; ABC: ammonium bicarbonate; DTT: dithiothreitol; IAM: iodoacetamide; FA: formic acid; LC-MS/MS: liquid chromatography-tandem mass spectrometry; DMSO: dimethyl sulfoxide; ACN: acetonitrile; TFA: trifluoroacetic acid; LFQ: label-free quantification; IPA: ingenuity pathway analysis; DAVID: database for annotation, visualization and integrated discovery; KEGG: Kyoto Encyclopedia of Genes and Genomes; CAMP R3 : collection of anti-microbial peptides; SVM: support vector machine; FGA: fibrinogen alpha chain; CPB2: carboxypeptidase B2; FGB: fibrinogen beta chain; F2: prothrombin; TLR: toll-like receptor; TNF-α: tumor necrosis factor alpha; IL: interleukin; CD5L: CD5 molecule-like; MMP: matrix metalloproteinase; S100: S100 calcium-binding protein; DEFA3: defensin alpha 3; FTO: alphaketoglutarate-dependent dioxygenase; FAM21C: family with sequence similarity 21 member C; TBX3: T-box transcription factor; GWAS: genome-wide association study; SNP: single nucleotide polymorphism; p21: cyclin-dependent kinase inhibitor p21 WAF1 ; OA: osteoarthritis; FCGR3A: immunoglobulin gamma Fc region receptor III-A; POSTN: periostin; PGK1: phosphoglycerate kinase 1; COL1A1: collagen type I alpha 1; CCSER2: coiled-coil serine rich protein 2; FpA: fibrinopeptide A; ICAM-1: intercellular adhesion molecule 1.', 'para': '8', 'bboxes': \"[[{'page': '11', 'x': '62.69', 'y': '530.70', 'h': '204.27', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '539.70', 'h': '22.70', 'w': '6.93'}], [{'page': '11', 'x': '87.04', 'y': '539.70', 'h': '151.96', 'w': '6.93'}], [{'page': '11', 'x': '240.64', 'y': '539.70', 'h': '28.68', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '548.70', 'h': '146.86', 'w': '6.93'}], [{'page': '11', 'x': '211.19', 'y': '548.70', 'h': '69.88', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '557.70', 'h': '212.59', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '566.70', 'h': '159.22', 'w': '6.93'}], [{'page': '11', 'x': '223.55', 'y': '566.70', 'h': '59.96', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '575.70', 'h': '207.88', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '584.70', 'h': '176.78', 'w': '6.93'}], [{'page': '11', 'x': '241.12', 'y': '584.70', 'h': '28.68', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '593.70', 'h': '221.85', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '602.70', 'h': '89.17', 'w': '6.93'}], [{'page': '11', 'x': '153.50', 'y': '602.70', 'h': '114.70', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '611.70', 'h': '219.50', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '620.70', 'h': '109.25', 'w': '6.93'}], [{'page': '11', 'x': '173.58', 'y': '620.70', 'h': '98.95', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '629.70', 'h': '209.87', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '638.70', 'h': '137.56', 'w': '6.93'}], [{'page': '11', 'x': '201.90', 'y': '638.70', 'h': '77.47', 'w': '6.93'}, {'page': '11', 'x': '62.69', 'y': '647.70', 'h': '220.05', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '673.71', 'h': '45.65', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '682.71', 'h': '232.75', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '691.71', 'h': '226.35', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '700.71', 'h': '233.85', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '709.71', 'h': '213.67', 'w': '6.93'}, {'page': '11', 'x': '56.69', 'y': '718.71', 'h': '218.10', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '88.58', 'h': '216.40', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '97.58', 'h': '223.99', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '106.58', 'h': '216.69', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '115.58', 'h': '229.09', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '124.58', 'h': '226.88', 'w': '8.24'}, {'page': '11', 'x': '304.72', 'y': '133.58', 'h': '231.25', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '142.58', 'h': '220.08', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '151.58', 'h': '233.55', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '160.58', 'h': '216.71', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '169.58', 'h': '232.85', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '178.58', 'h': '229.01', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '187.58', 'h': '224.24', 'w': '6.93'}, {'page': '11', 'x': '304.72', 'y': '195.10', 'h': '231.14', 'w': '8.41'}, {'page': '11', 'x': '304.73', 'y': '205.58', 'h': '223.60', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '214.58', 'h': '231.67', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '223.58', 'h': '177.35', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Additional file', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='We thank Ihor Batruch for his support in mass spectrometric analysis.', metadata={'text': 'We thank Ihor Batruch for his support in mass spectrometric analysis.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '250.58', 'h': '204.70', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Acknowledgements', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='We thank Ihor Batruch for his support in mass spectrometric analysis.', metadata={'text': 'We thank Ihor Batruch for his support in mass spectrometric analysis.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '250.58', 'h': '204.70', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Acknowledgements', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The mass spectrometry proteomics and peptidomics datasets supporting the conclusions of this article are available in the PRIDE Archive via the PRIDE partner repository with the data set identifier PXD011872; http://www.ebi.ac.uk/ pride /archi ve/ (username: reviewer92309@ebi.ac.uk and password: 3hXihB2 s).', metadata={'text': 'The mass spectrometry proteomics and peptidomics datasets supporting the conclusions of this article are available in the PRIDE Archive via the PRIDE partner repository with the data set identifier PXD011872; http://www.ebi.ac.uk/ pride /archi ve/ (username: reviewer92309@ebi.ac.uk and password: 3hXihB2 s).', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '385.58', 'h': '231.09', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '394.58', 'h': '232.71', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '403.58', 'h': '226.34', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '412.58', 'h': '232.94', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Availability of data and materials', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The mass spectrometry proteomics and peptidomics datasets supporting the conclusions of this article are available in the PRIDE Archive via the PRIDE partner repository with the data set identifier PXD011872; http://www.ebi.ac.uk/ pride /archi ve/ (username: reviewer92309@ebi.ac.uk and password: 3hXihB2 s).', metadata={'text': 'The mass spectrometry proteomics and peptidomics datasets supporting the conclusions of this article are available in the PRIDE Archive via the PRIDE partner repository with the data set identifier PXD011872; http://www.ebi.ac.uk/ pride /archi ve/ (username: reviewer92309@ebi.ac.uk and password: 3hXihB2 s).', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '385.58', 'h': '231.09', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '394.58', 'h': '232.71', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '403.58', 'h': '226.34', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '412.58', 'h': '232.94', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Availability of data and materials', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='This work was supported by The Krembil Foundation.', metadata={'text': 'This work was supported by The Krembil Foundation.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '358.58', 'h': '158.35', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Funding', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='This work was supported by The Krembil Foundation.', metadata={'text': 'This work was supported by The Krembil Foundation.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '358.58', 'h': '158.35', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Funding', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content=\"Authors' contributions SM, EPD, and VC participated in the conceptualization of the study and experimental design.VC provided RA and PsA SF samples.EK provided RA SF samples and RK provided control cadaveric SF samples.IB provided mass spectrometry expertise and assisted with mass spectrometry analysis.KL provided statistical expertise and assisted with the statistical analysis.SM drafted the manuscript.SM, EPD, and VC prepared the final version of the manuscript.All authors read and approved the final manuscript.\", metadata={'text': \"Authors' contributions SM, EPD, and VC participated in the conceptualization of the study and experimental design.VC provided RA and PsA SF samples.EK provided RA SF samples and RK provided control cadaveric SF samples.IB provided mass spectrometry expertise and assisted with mass spectrometry analysis.KL provided statistical expertise and assisted with the statistical analysis.SM drafted the manuscript.SM, EPD, and VC prepared the final version of the manuscript.All authors read and approved the final manuscript.\", 'para': '7', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '268.58', 'h': '72.39', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '277.58', 'h': '210.67', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '286.58', 'h': '62.65', 'w': '6.93'}], [{'page': '11', 'x': '368.75', 'y': '286.58', 'h': '108.10', 'w': '6.93'}], [{'page': '11', 'x': '478.50', 'y': '286.58', 'h': '45.67', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '295.58', 'h': '173.00', 'w': '6.93'}], [{'page': '11', 'x': '479.37', 'y': '295.58', 'h': '50.78', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '304.58', 'h': '206.83', 'w': '6.93'}], [{'page': '11', 'x': '513.20', 'y': '304.58', 'h': '21.33', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '313.58', 'h': '194.37', 'w': '6.93'}], [{'page': '11', 'x': '500.74', 'y': '313.58', 'h': '32.68', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '322.58', 'h': '46.91', 'w': '6.93'}], [{'page': '11', 'x': '353.27', 'y': '322.58', 'h': '182.31', 'w': '6.93'}], [{'page': '11', 'x': '304.73', 'y': '331.58', 'h': '153.28', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Human research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', metadata={'text': 'Human research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', 'para': '1', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '439.58', 'h': '222.73', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '448.58', 'h': '231.99', 'w': '6.93'}], [{'page': '11', 'x': '304.73', 'y': '457.58', 'h': '146.68', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Not applicable.', metadata={'text': 'Not applicable.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '484.58', 'h': '44.93', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The authors declare that they have no competing financial interest.', metadata={'text': 'The authors declare that they have no competing financial interest.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '511.58', 'h': '200.08', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.', metadata={'text': 'Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.', 'para': '0', 'bboxes': \"[[{'page': '13', 'x': '304.72', 'y': '193.58', 'h': '223.21', 'w': '6.93'}, {'page': '13', 'x': '304.72', 'y': '202.58', 'h': '119.32', 'w': '6.93'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Human research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', metadata={'text': 'Human research ethics board approval was received for the study from the University Health Network, Mount Sinai Hospital and the University of Calgary.Informed consent was obtained from all patients.', 'para': '1', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '439.58', 'h': '222.73', 'w': '6.93'}, {'page': '11', 'x': '304.73', 'y': '448.58', 'h': '231.99', 'w': '6.93'}], [{'page': '11', 'x': '304.73', 'y': '457.58', 'h': '146.68', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Ethics approval and consent to participate', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Not applicable.', metadata={'text': 'Not applicable.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '484.58', 'h': '44.93', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Consent for publication', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The authors declare that they have no competing financial interest.', metadata={'text': 'The authors declare that they have no competing financial interest.', 'para': '0', 'bboxes': \"[[{'page': '11', 'x': '304.73', 'y': '511.58', 'h': '200.08', 'w': '6.93'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Competing interests', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.', metadata={'text': 'Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.', 'para': '0', 'bboxes': \"[[{'page': '13', 'x': '304.72', 'y': '193.58', 'h': '223.21', 'w': '6.93'}, {'page': '13', 'x': '304.72', 'y': '202.58', 'h': '119.32', 'w': '6.93'}]]\", 'pages': \"('13', '13')\", 'section_title': \"Publisher's Note\", 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.document_loaders.parsers import GrobidParser\n", + "from langchain.document_loaders.generic import GenericLoader\n", + "\n", + "loader = GenericLoader.from_filesystem(\n", + " DATA_PATH / \"papers/\",\n", + " glob=\"2.pdf\",\n", + " suffixes=[\".pdf\"],\n", + " parser=GrobidParser(segment_sentences=False),\n", + ")\n", + "docs = loader.load()\n", + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/spacy/pipeline/lemmatizer.py:211: UserWarning: [W108] The rule-based lemmatizer did not find POS annotation for one or more tokens. Check that your pipeline includes components that assign token.pos, typically 'tagger'+'attribute_ruler' or 'morphologizer'.\n", + " warnings.warn(Warnings.W108)\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='We determined that 144 proteins showed significant differential abundance between the IA and control SF proteomes, of which 11 protein candidates were selected for future follow-up studies.\\n\\nSimilar analyses applied to our peptidomic data identified 15 peptide sequences, originating from 4 protein precursors, to have significant differential abundance in IA compared to the control SF peptidome.\\n\\nPathway enrichment analysis of the IA SF peptidome along with AMP prediction suggests a possible mechanistic role of microbes in eliciting an immune response which drives the development of IA.', metadata={'text': 'We determined that 144 proteins showed significant differential abundance between the IA and control SF proteomes, of which 11 protein candidates were selected for future follow-up studies.Similar analyses applied to our peptidomic data identified 15 peptide sequences, originating from 4 protein precursors, to have significant differential abundance in IA compared to the control SF peptidome.Pathway enrichment analysis of the IA SF peptidome along with AMP prediction suggests a possible mechanistic role of microbes in eliciting an immune response which drives the development of IA.', 'para': '2', 'bboxes': \"[[{'page': '1', 'x': '101.12', 'y': '422.98', 'h': '424.81', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '434.98', 'h': '340.13', 'w': '9.24'}], [{'page': '1', 'x': '405.45', 'y': '434.98', 'h': '120.66', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '446.98', 'h': '468.92', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '458.98', 'h': '225.40', 'w': '9.24'}], [{'page': '1', 'x': '290.71', 'y': '458.98', 'h': '234.48', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '470.98', 'h': '460.78', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '482.98', 'h': '91.59', 'w': '9.24'}]]\", 'pages': \"('1', '1')\", 'section_title': 'Results:', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The discovery-phase data generated herein has provided a basis for the identification of candidates with the greatest potential to serve as novel serum biomarkers specific to inflammatory arthritides.\\n\\nMoreover, these findings facilitate the understanding of possible disease mechanisms specific to each subtype.', metadata={'text': 'The discovery-phase data generated herein has provided a basis for the identification of candidates with the greatest potential to serve as novel serum biomarkers specific to inflammatory arthritides.Moreover, these findings facilitate the understanding of possible disease mechanisms specific to each subtype.', 'para': '1', 'bboxes': \"[[{'page': '1', 'x': '122.15', 'y': '497.98', 'h': '394.30', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '509.98', 'h': '391.31', 'w': '9.24'}], [{'page': '1', 'x': '456.63', 'y': '509.98', 'h': '63.75', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '521.98', 'h': '374.26', 'w': '9.24'}]]\", 'pages': \"('1', '1')\", 'section_title': 'Conclusions:', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='Inflammatory arthritis (IA) is characterized by synovial hyperplasia leading to degradation of adjacent articular cartilage and bone [1].The\\n\\nterm encompasses several forms of inflammatory joint diseases that when taken together, have an annual incidence ranging from 115 to 271 per 100,000 adults [2].IA\\n\\nis a multifactorial disease driven by the complex interplay of both genetics and the environment.\\n\\nRheumatoid arthritis (RA), the most common and potentially destructive IA, has a well-established association with class II major histocompatibility complex (MHC) alleles while the spondyloarthritides, such as psoriatic arthritis (PsA), are more frequently associated with class I MHC alleles [', metadata={'text': \"Inflammatory arthritis (IA) is characterized by synovial hyperplasia leading to degradation of adjacent articular cartilage and bone [1].The term encompasses several forms of inflammatory joint diseases that when taken together, have an annual incidence ranging from 115 to 271 per 100,000 adults [2].IA is a multifactorial disease driven by the complex interplay of both genetics and the environment.Rheumatoid arthritis (RA), the most common and potentially destructive IA, has a well-established association with class II major histocompatibility complex (MHC) alleles while the spondyloarthritides, such as psoriatic arthritis (PsA), are more frequently associated with class I MHC alleles [3].Susceptibility to IA increases when genetic predisposition is complemented by environmental risk factors such as smoking, obesity and more recently, microbial infection and intestinal dysbiosis [4][5][6].The exact etiology of IA is still poorly understood with studies aimed at delineating the molecular pathways driving loss of immunological tolerance to the body's self-antigens.Alterations to the adaptive and innate immune system perpetuate systemic inflammation and lead to an elevated risk of developing comorbid conditions such as cardiovascular disease, metabolic syndrome, diabetes and depression [7,8].Naturally, there is a compelling need to identify markers of aberrant immune pathways relevant to IA which may advance current insights into the molecular mechanisms of the disease and serve as clinical markers for disease monitoring and treatment responses.\", 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '56.69', 'y': '101.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '113.84', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '125.84', 'h': '98.24', 'w': '11.68'}], [{'page': '2', 'x': '158.95', 'y': '125.84', 'h': '131.59', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '137.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '149.83', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '161.83', 'h': '124.09', 'w': '11.68'}], [{'page': '2', 'x': '183.72', 'y': '161.83', 'h': '106.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '173.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '185.84', 'h': '94.37', 'w': '11.68'}], [{'page': '2', 'x': '155.55', 'y': '185.84', 'h': '135.01', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '197.84', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '209.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '221.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '233.85', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '245.84', 'h': '212.58', 'w': '11.68'}], [{'page': '2', 'x': '272.28', 'y': '245.84', 'h': '18.27', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '257.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '269.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '281.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '293.83', 'h': '127.47', 'w': '11.68'}], [{'page': '2', 'x': '187.45', 'y': '293.83', 'h': '103.09', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '305.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '317.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '329.85', 'h': '184.18', 'w': '11.68'}], [{'page': '2', 'x': '243.59', 'y': '329.85', 'h': '46.94', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '341.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '353.84', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '365.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '377.83', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '389.83', 'h': '24.69', 'w': '11.68'}], [{'page': '2', 'x': '84.82', 'y': '389.83', 'h': '205.76', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '401.82', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '413.82', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '425.81', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '437.81', 'h': '203.55', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content=\"3].Susceptibility to IA increases when genetic predisposition is complemented by environmental risk factors such as smoking, obesity and more recently, microbial infection and intestinal dysbiosis [4][5][6].The exact etiology of IA is still poorly understood with studies aimed at delineating the molecular pathways driving loss of immunological tolerance to the body's self-antigens.\\n\\nAlterations to the adaptive and innate immune system perpetuate systemic inflammation and lead to an elevated risk of developing comorbid conditions such as cardiovascular disease, metabolic syndrome, diabetes and depression [7,8].Naturally, there is a compelling need to identify markers of aberrant immune pathways relevant to IA which may advance current insights into the molecular mechanisms of the disease and serve as clinical markers for disease monitoring and treatment responses.\", metadata={'text': \"Inflammatory arthritis (IA) is characterized by synovial hyperplasia leading to degradation of adjacent articular cartilage and bone [1].The term encompasses several forms of inflammatory joint diseases that when taken together, have an annual incidence ranging from 115 to 271 per 100,000 adults [2].IA is a multifactorial disease driven by the complex interplay of both genetics and the environment.Rheumatoid arthritis (RA), the most common and potentially destructive IA, has a well-established association with class II major histocompatibility complex (MHC) alleles while the spondyloarthritides, such as psoriatic arthritis (PsA), are more frequently associated with class I MHC alleles [3].Susceptibility to IA increases when genetic predisposition is complemented by environmental risk factors such as smoking, obesity and more recently, microbial infection and intestinal dysbiosis [4][5][6].The exact etiology of IA is still poorly understood with studies aimed at delineating the molecular pathways driving loss of immunological tolerance to the body's self-antigens.Alterations to the adaptive and innate immune system perpetuate systemic inflammation and lead to an elevated risk of developing comorbid conditions such as cardiovascular disease, metabolic syndrome, diabetes and depression [7,8].Naturally, there is a compelling need to identify markers of aberrant immune pathways relevant to IA which may advance current insights into the molecular mechanisms of the disease and serve as clinical markers for disease monitoring and treatment responses.\", 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '56.69', 'y': '101.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '113.84', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '125.84', 'h': '98.24', 'w': '11.68'}], [{'page': '2', 'x': '158.95', 'y': '125.84', 'h': '131.59', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '137.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '149.83', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '161.83', 'h': '124.09', 'w': '11.68'}], [{'page': '2', 'x': '183.72', 'y': '161.83', 'h': '106.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '173.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '185.84', 'h': '94.37', 'w': '11.68'}], [{'page': '2', 'x': '155.55', 'y': '185.84', 'h': '135.01', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '197.84', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '209.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '221.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '233.85', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '245.84', 'h': '212.58', 'w': '11.68'}], [{'page': '2', 'x': '272.28', 'y': '245.84', 'h': '18.27', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '257.85', 'h': '233.83', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '269.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '281.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '293.83', 'h': '127.47', 'w': '11.68'}], [{'page': '2', 'x': '187.45', 'y': '293.83', 'h': '103.09', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '305.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '317.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '329.85', 'h': '184.18', 'w': '11.68'}], [{'page': '2', 'x': '243.59', 'y': '329.85', 'h': '46.94', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '341.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '353.84', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '365.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '377.83', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '389.83', 'h': '24.69', 'w': '11.68'}], [{'page': '2', 'x': '84.82', 'y': '389.83', 'h': '205.76', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '401.82', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '413.82', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '425.81', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '437.81', 'h': '203.55', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}),\n", + " Document(page_content='The rise in high-throughput technologies, such as next-generation gene sequencing and mass spectrometry (MS), facilitate the discovery of key modulators of disease.\\n\\nSpecifically, MS-based approaches provide an essential analytical platform for the identification, quantification and characterization of candidate biomarkers.\\n\\nBiomarkers may come in the form of a molecular signature, a clinical feature or even as an imaging parameter.\\n\\nMolecular biomarkers may be further subtyped into the domains of genomics, transcriptomics, proteomics, metabolomics or peptidomics.\\n\\nDue to the importance of proteins in pathophysiological processes, there is increased interest in resolving the proteomic profile of biospecimens related to IA.Similarly, peptides play a seminal role in mediating physiological functions by serving as neurotransmitters, hormones, antibiotics and immune regulators [9].During IA', metadata={'text': 'The rise in high-throughput technologies, such as next-generation gene sequencing and mass spectrometry (MS), facilitate the discovery of key modulators of disease.Specifically, MS-based approaches provide an essential analytical platform for the identification, quantification and characterization of candidate biomarkers.Biomarkers may come in the form of a molecular signature, a clinical feature or even as an imaging parameter.Molecular biomarkers may be further subtyped into the domains of genomics, transcriptomics, proteomics, metabolomics or peptidomics.Due to the importance of proteins in pathophysiological processes, there is increased interest in resolving the proteomic profile of biospecimens related to IA.Similarly, peptides play a seminal role in mediating physiological functions by serving as neurotransmitters, hormones, antibiotics and immune regulators [9].During IA, joint pain and inflammation are driven by aberrant proteolysis resulting in the production of inflammatory peptides and the destruction of joint cartilage and bone.Synovial fluid (SF), a proximal fluid which bathes the intrinsic joint structures, is an important reservoir of putative protein and peptide biomarkers whose abundance levels fluctuate in response to pathological changes due to disease [10].', 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '64.69', 'y': '449.80', 'h': '225.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '461.80', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '473.85', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '485.84', 'h': '45.00', 'w': '11.68'}], [{'page': '2', 'x': '106.03', 'y': '485.84', 'h': '184.52', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '497.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '509.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '521.85', 'h': '36.67', 'w': '11.68'}], [{'page': '2', 'x': '96.18', 'y': '521.85', 'h': '194.37', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '533.85', 'h': '233.87', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '545.84', 'h': '44.89', 'w': '11.68'}], [{'page': '2', 'x': '105.44', 'y': '545.84', 'h': '185.11', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '557.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '569.85', 'h': '200.97', 'w': '11.68'}], [{'page': '2', 'x': '261.20', 'y': '569.85', 'h': '29.37', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '581.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '593.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '605.85', 'h': '191.41', 'w': '11.68'}], [{'page': '2', 'x': '251.27', 'y': '605.85', 'h': '39.28', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '617.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '629.84', 'h': '233.84', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '641.83', 'h': '177.40', 'w': '11.68'}], [{'page': '2', 'x': '240.69', 'y': '641.83', 'h': '49.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '653.83', 'h': '233.85', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '665.83', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '677.85', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '689.84', 'h': '23.12', 'w': '11.68'}], [{'page': '2', 'x': '82.70', 'y': '689.84', 'h': '207.82', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '701.84', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '56.69', 'y': '713.85', 'h': '233.86', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '89.32', 'h': '233.88', 'w': '11.68'}, {'page': '2', 'x': '304.72', 'y': '101.32', 'h': '116.75', 'w': '11.68'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': 'None', 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry', 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'})]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spacy\n", + "# spacy.require_gpu(gpu_id=1)\n", + "\n", + "import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline\n", + "from langchain.text_splitter import SpacyTextSplitter\n", + "from itertools import chain\n", + "\n", + "splitter = SpacyTextSplitter(chunk_size=1000, pipeline=\"en_core_web_trf\")\n", + "chunks = splitter.split_documents(docs)\n", + "chunks[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "db_paper_2 = FAISS.from_documents(chunks, model)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "db.merge_from(db_paper_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These\\n\\nfindings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', metadata={'text': 'RA is a complex disease that is influenced by an intricate interactome of various environmental, genetic and microbial factors that influence the immune homeostasis.Owing to the complex genetic architecture accompanied by a plethora of microbial and environmental triggers that an organism is exposed to this has made the identification of diagnostic and prognostic markers challenging.Our study has explored the serum proteomics of this complex autoimmune disorder in a relatively understudied Pakistani population to identify disease biomarkers that are DE among various serotypes of RA patients and healthy controls.We identified that PZP, SELENOP, C4BP beta chain, ApoM, NAMLAA, CPN catalytic chain, OIT3, CPN subunit 2, ApoC1 and ApoCIII were DE between the RA patients and healthy controls.These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These findings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', 'para': '5', 'bboxes': \"[[{'page': '15', 'x': '187.65', 'y': '173.66', 'h': '371.62', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '186.22', 'h': '394.62', 'w': '9.58'}], [{'page': '15', 'x': '166.39', 'y': '198.77', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '211.32', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '223.88', 'h': '229.10', 'w': '9.58'}], [{'page': '15', 'x': '401.31', 'y': '223.88', 'h': '157.97', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '236.43', 'h': '393.18', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '248.98', 'h': '393.57', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '261.54', 'h': '130.46', 'w': '9.58'}], [{'page': '15', 'x': '299.65', 'y': '261.54', 'h': '260.87', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '274.09', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '286.64', 'h': '201.22', 'w': '9.58'}], [{'page': '15', 'x': '370.71', 'y': '286.64', 'h': '188.57', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '299.19', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '311.75', 'h': '238.67', 'w': '9.58'}], [{'page': '15', 'x': '407.54', 'y': '311.75', 'h': '151.74', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '324.30', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '336.85', 'h': '28.14', 'w': '9.58'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Conclusions', 'section_number': '5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However\\n\\n, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF\\n\\nis thus not a specific diagnostic marker for', metadata={'text': 'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', 'para': '6', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '223.58', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '236.13', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '248.68', 'h': '394.53', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '261.24', 'h': '133.81', 'w': '9.58'}], [{'page': '2', 'x': '303.29', 'y': '261.24', 'h': '257.23', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '273.79', 'h': '393.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '286.34', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '298.90', 'h': '272.66', 'w': '9.58'}], [{'page': '2', 'x': '441.85', 'y': '298.90', 'h': '117.43', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '311.45', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '324.00', 'h': '240.16', 'w': '9.58'}], [{'page': '2', 'x': '409.64', 'y': '324.00', 'h': '149.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '336.55', 'h': '67.99', 'w': '9.58'}], [{'page': '2', 'x': '236.99', 'y': '336.55', 'h': '322.28', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '349.11', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '361.66', 'h': '107.38', 'w': '9.58'}], [{'page': '2', 'x': '276.86', 'y': '361.66', 'h': '282.42', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '374.21', 'h': '325.69', 'w': '9.58'}], [{'page': '2', 'x': '495.20', 'y': '374.21', 'h': '64.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '386.77', 'h': '393.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '399.32', 'h': '65.18', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='is thus not a specific diagnostic marker for\\n\\nRA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore\\n\\n, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', metadata={'text': 'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', 'para': '6', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '223.58', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '236.13', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '248.68', 'h': '394.53', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '261.24', 'h': '133.81', 'w': '9.58'}], [{'page': '2', 'x': '303.29', 'y': '261.24', 'h': '257.23', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '273.79', 'h': '393.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '286.34', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '298.90', 'h': '272.66', 'w': '9.58'}], [{'page': '2', 'x': '441.85', 'y': '298.90', 'h': '117.43', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '311.45', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '324.00', 'h': '240.16', 'w': '9.58'}], [{'page': '2', 'x': '409.64', 'y': '324.00', 'h': '149.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '336.55', 'h': '67.99', 'w': '9.58'}], [{'page': '2', 'x': '236.99', 'y': '336.55', 'h': '322.28', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '349.11', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '361.66', 'h': '107.38', 'w': '9.58'}], [{'page': '2', 'x': '276.86', 'y': '361.66', 'h': '282.42', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '374.21', 'h': '325.69', 'w': '9.58'}], [{'page': '2', 'x': '495.20', 'y': '374.21', 'h': '64.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '386.77', 'h': '393.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '399.32', 'h': '65.18', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'}),\n", + " Document(page_content='For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.\\n\\nThe demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', metadata={'text': 'For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.The demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', 'para': '1', 'bboxes': \"[[{'page': '3', 'x': '187.65', 'y': '160.81', 'h': '372.02', 'w': '9.58'}, {'page': '3', 'x': '166.10', 'y': '173.05', 'h': '394.17', 'w': '9.90'}, {'page': '3', 'x': '166.07', 'y': '185.60', 'h': '256.73', 'w': '9.90'}], [{'page': '3', 'x': '425.92', 'y': '185.92', 'h': '133.36', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '198.47', 'h': '343.00', 'w': '9.58'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Study Subjects and Serum Collection', 'section_number': '2.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/llm4scilit/data/papers/1.pdf'})]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.as_retriever().get_relevant_documents(\"What are the main serological markers for RA?\", metadata={\"paper_title\": \"LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "results = db.as_retriever().get_relevant_documents(\"What are the main serological markers for RA?\", search_kwargs={\"metadata\": {\"paper_title\": \"Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry\"}})" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results[0].metadata[\"paper_title\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "134" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db.index.ntotal" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'text': 'We determined that 144 proteins showed significant differential abundance between the IA and control SF proteomes, of which 11 protein candidates were selected for future follow-up studies.Similar analyses applied to our peptidomic data identified 15 peptide sequences, originating from 4 protein precursors, to have significant differential abundance in IA compared to the control SF peptidome.Pathway enrichment analysis of the IA SF peptidome along with AMP prediction suggests a possible mechanistic role of microbes in eliciting an immune response which drives the development of IA.',\n", + " 'para': '2',\n", + " 'bboxes': \"[[{'page': '1', 'x': '101.12', 'y': '422.98', 'h': '424.81', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '434.98', 'h': '340.13', 'w': '9.24'}], [{'page': '1', 'x': '405.45', 'y': '434.98', 'h': '120.66', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '446.98', 'h': '468.92', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '458.98', 'h': '225.40', 'w': '9.24'}], [{'page': '1', 'x': '290.71', 'y': '458.98', 'h': '234.48', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '470.98', 'h': '460.78', 'w': '9.24'}, {'page': '1', 'x': '63.12', 'y': '482.98', 'h': '91.59', 'w': '9.24'}]]\",\n", + " 'pages': \"('1', '1')\",\n", + " 'section_title': 'Results:',\n", + " 'section_number': 'None',\n", + " 'paper_title': 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry',\n", + " 'file_path': '/data/tommaso/llm4scilit/data/papers/2.pdf'}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chunks[0].metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry',\n", + " 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry',\n", + " 'Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry']" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[x.metadata[\"paper_title\"] for x in db.as_retriever(search_kwargs={\"filter\": {\"paper_title\": \"Elucidating the endogenous synovial fluid proteome and peptidome of inflammatory arthritis using label-free mass spectrometry\"}}).get_relevant_documents(\"What are the main serological markers for RA?\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['ask_paper', 'usu sus', 'asd']" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import shlex\n", + "\n", + "shlex.split('ask_paper \"usu sus\" asd')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm4scilit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..56c3c0820e2e6b02bdcbdedfdf12a56d8e8f14ec --- /dev/null +++ b/notebooks/test.ipynb @@ -0,0 +1,601 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "DATA_PATH = Path(\"/data/tommaso/data\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', metadata={'source': PosixPath('/data/tommaso/data/papers_processed/1.txt'), 'filename': '1.txt', 'file_directory': '/data/tommaso/data/papers_processed', 'filetype': 'text/plain', 'category': 'UncategorizedText'}),\n", + " Document(page_content='Abstract: Rheumatoid arthritis is an autoimmune disorder of complex disease etiology. Currently available serological diagnostic markers lack in terms of sensitivity and specificity and thus addi- tional biomarkers are warranted for early disease diagnosis and management. We aimed to screen and compare serum proteome profiles of rheumatoid arthritis serotypes with healthy controls in the Pakistani population for identification of potential disease biomarkers. Serum samples from rheumatoid arthritis patients and healthy controls were enriched for low abundance proteins using ProteoMinerTM columns. Rheumatoid arthritis patients were assigned to one of the four serotypes based on anti-citrullinated peptide antibodies and rheumatoid factor. Serum protein profiles were ana- lyzed via liquid chromatography-tandem mass spectrometry. The changes in the protein abundances were determined using label-free quantification software ProgenesisQITM followed by pathway analysis. Findings were validated in an independent cohort of patients and healthy controls using an enzyme-linked immunosorbent assay. A total of 213 proteins were identified.', metadata={'source': PosixPath('/data/tommaso/data/papers_processed/1.txt'), 'filename': '1.txt', 'file_directory': '/data/tommaso/data/papers_processed', 'filetype': 'text/plain', 'category': 'NarrativeText'}),\n", + " Document(page_content='Comparative analysis of all groups (false discovery rate < 0.05, >2-fold change, and identified with ≥2 unique peptides) identified ten proteins that were differentially expressed between rheumatoid arthritis serotypes and healthy controls including pregnancy zone protein, selenoprotein P, C4b-binding protein beta chain, apolipoprotein M, N-acetylmuramoyl-L-alanine amidase, catalytic chain, oncoprotein-induced transcript 3 protein, Carboxypeptidase N subunit 2, Apolipoprotein C-I and Apolipoprotein C-III. Pathway analysis predicted inhibition of liver X receptor/retinoid X receptor activation pathway and production of nitric oxide and reactive oxygen species pathway in macrophages in all serotypes. A catalogue of potential serum biomarkers for rheumatoid arthritis were identified. These biomark- ers can be further evaluated in larger cohorts from different populations for their diagnostic and prognostic potential.', metadata={'source': PosixPath('/data/tommaso/data/papers_processed/1.txt'), 'filename': '1.txt', 'file_directory': '/data/tommaso/data/papers_processed', 'filetype': 'text/plain', 'category': 'NarrativeText'}),\n", + " Document(page_content='Keywords: rheumatoid arthritis; serum; proteomics; biomarkers; LC-MS', metadata={'source': PosixPath('/data/tommaso/data/papers_processed/1.txt'), 'filename': '1.txt', 'file_directory': '/data/tommaso/data/papers_processed', 'filetype': 'text/plain', 'category': 'Title'})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.document_loaders import UnstructuredFileLoader\n", + "from unstructured.cleaners.core import clean_extra_whitespace, group_broken_paragraphs\n", + "\n", + "loader = UnstructuredFileLoader(\n", + " DATA_PATH / \"papers_processed\" / \"1.txt\",\n", + " strategy=\"hi_res\",\n", + " mode=\"elements\",\n", + " post_processors=[\n", + " clean_extra_whitespace,\n", + " group_broken_paragraphs,\n", + " ])\n", + "docs = loader.load()\n", + "docs[:4]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.document_loaders.parsers import GrobidParser\n", + "from langchain.document_loaders.generic import GenericLoader\n", + "\n", + "loader = GenericLoader.from_filesystem(\n", + " DATA_PATH / \"papers\",\n", + " glob=\"1.pdf\",\n", + " suffixes=[\".pdf\"],\n", + " parser=GrobidParser(segment_sentences=False),\n", + ")\n", + "docs = loader.load()\n", + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spacy\n", + "spacy.require_gpu(gpu_id=1)\n", + "\n", + "import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline\n", + "from langchain.text_splitter import SpacyTextSplitter\n", + "from itertools import chain\n", + "\n", + "splitter = SpacyTextSplitter(chunk_size=1000, pipeline=\"en_core_web_trf\")\n", + "chunks = splitter.split_documents(docs)\n", + "chunks[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## BioBERT" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[1].page_content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Use a pipeline as a high-level helper\n", + "from transformers import pipeline\n", + "\n", + "pipe = pipeline(\"question-answering\", model=\"dmis-lab/biobert-large-cased-v1.1-squad\", device=1, handle_impossible_answer=True, max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BertForQuestionAnswering(\n", + " (bert): BertModel(\n", + " (embeddings): BertEmbeddings(\n", + " (word_embeddings): Embedding(58996, 1024, padding_idx=0)\n", + " (position_embeddings): Embedding(512, 1024)\n", + " (token_type_embeddings): Embedding(2, 1024)\n", + " (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (encoder): BertEncoder(\n", + " (layer): ModuleList(\n", + " (0-23): 24 x BertLayer(\n", + " (attention): BertAttention(\n", + " (self): BertSelfAttention(\n", + " (query): Linear(in_features=1024, out_features=1024, bias=True)\n", + " (key): Linear(in_features=1024, out_features=1024, bias=True)\n", + " (value): Linear(in_features=1024, out_features=1024, bias=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " (output): BertSelfOutput(\n", + " (dense): Linear(in_features=1024, out_features=1024, bias=True)\n", + " (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " (intermediate): BertIntermediate(\n", + " (dense): Linear(in_features=1024, out_features=4096, bias=True)\n", + " (intermediate_act_fn): GELUActivation()\n", + " )\n", + " (output): BertOutput(\n", + " (dense): Linear(in_features=4096, out_features=1024, bias=True)\n", + " (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)\n", + " (dropout): Dropout(p=0.1, inplace=False)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (qa_outputs): Linear(in_features=1024, out_features=2, bias=True)\n", + ")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipe.model" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Question: How did the authors detect protein abundances?\n", + "Answer 1 (score: 0.121): 'Mass spectrometry (MS)-based serum proteomics'\n", + "Answer 2 (score: 0.114): 'ProgenesisQITM followed by pathway analysis'\n", + "\n", + "\n", + "Question: How can RA patients be categorized?\n", + "Answer 1 (score: 0.377): 'four serotypes'\n", + "Answer 2 (score: 0.320): 'into four serotypes'\n", + "\n" + ] + } + ], + "source": [ + "questions = [\n", + " \"How did the authors detect protein abundances?\",\n", + " \"How can RA patients be categorized?\"\n", + "]\n", + "context = \"\\n\".join([x.page_content for x in docs])\n", + "\n", + "for q in questions:\n", + " a = pipe(question=q, context=context, top_k=2)\n", + " print(f'''\n", + "Question: {q}\n", + "Answer 1 (score: {a[0][\"score\"]:.3f}): '{a[0][\"answer\"]}'\n", + "Answer 2 (score: {a[1][\"score\"]:.3f}): '{a[1][\"answer\"]}'\n", + "''')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'score': 0.12108789384365082,\n", + " 'start': 4854,\n", + " 'end': 4899,\n", + " 'answer': 'Mass spectrometry (MS)-based serum proteomics'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "context = \"\\n\".join([x.page_content for x in docs])\n", + "pipe(question=\"How did the authors detect protein abundances?\", context=context)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## BioGPT" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain import HuggingFaceHub, HuggingFacePipeline\n", + "\n", + "HUGGINGFACE_TOKEN = \"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\"\n", + "\n", + "# llm = HuggingFacePipeline.from_model_id(\n", + "# model_id=\"stanford-crfm/BioMedLM\",\n", + "# task=\"text-generation\",\n", + "# device=1,\n", + "# model_kwargs={\"temperature\": 0},\n", + "# )\n", + "\n", + "from langchain import PromptTemplate, LLMChain\n", + "\n", + "template = \"\"\"You are a useful and reliableQuestion: {question}\n", + "Context: {context}\"\"\"\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\", \"context\"])\n", + "llm = HuggingFaceHub(\n", + " repo_id=\"microsoft/BioGPT-Large-PubMedQA\",\n", + " model_kwargs={\"temperature\": 0.1, \"max_length\":200},\n", + " huggingfacehub_api_token=HUGGINGFACE_TOKEN\n", + ")\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "question = \"How did the authors detect protein abundances?\"\n", + "context = \"\\n\".join([x.page_content for x in chunks])\n", + "\n", + "# print(llm_chain.run(question=question, context=context))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Rheumatoid arthritis (RA) is an autoimmune disorder of complex disease etiology.RA leads to the inflammation of joints and surrounding synovial membrane [1].The global prevalence rate of RA is 0.24% and RA has been ranked as the 42nd highest contributor to global disability [2].Diagnosing RA is a highly individualized process and is based on a combination of both clinical manifestations and serological assays.Early disease diagnosis is the key to prevent joint damage and permanent physical disability in RA [3].RA is considered to be a continuum that begins with a disease-susceptibility stage characterized by a combination of genetic risk factors.This stage proceeds through a pre-clinical stage before the development of early RA characterized by articular inflammation.Environmental and microbial triggers continuously operate across this continuum.Immune-mediated etiology associated with stromal tissue dysregulation contributes to the chronic inflammation and ultimate articular destruction that is identified as established RA [4,5].A number of proteins and pathways have been linked to the disease pathogenesis of RA.However, there are still some gaps in current knowledge.Research aimed at the better clarification of these mechanisms can enable the development of more specific disease-modifying therapies [6].', metadata={'text': 'Rheumatoid arthritis (RA) is an autoimmune disorder of complex disease etiology.RA leads to the inflammation of joints and surrounding synovial membrane [1].The global prevalence rate of RA is 0.24% and RA has been ranked as the 42nd highest contributor to global disability [2].Diagnosing RA is a highly individualized process and is based on a combination of both clinical manifestations and serological assays.Early disease diagnosis is the key to prevent joint damage and permanent physical disability in RA [3].RA is considered to be a continuum that begins with a disease-susceptibility stage characterized by a combination of genetic risk factors.This stage proceeds through a pre-clinical stage before the development of early RA characterized by articular inflammation.Environmental and microbial triggers continuously operate across this continuum.Immune-mediated etiology associated with stromal tissue dysregulation contributes to the chronic inflammation and ultimate articular destruction that is identified as established RA [4,5].A number of proteins and pathways have been linked to the disease pathogenesis of RA.However, there are still some gaps in current knowledge.Research aimed at the better clarification of these mechanisms can enable the development of more specific disease-modifying therapies [6].', 'para': '11', 'bboxes': \"[[{'page': '1', 'x': '187.65', 'y': '696.70', 'h': '354.85', 'w': '9.58'}], [{'page': '1', 'x': '545.55', 'y': '696.70', 'h': '14.12', 'w': '9.58'}, {'page': '1', 'x': '166.39', 'y': '709.26', 'h': '341.80', 'w': '9.58'}], [{'page': '1', 'x': '511.79', 'y': '709.26', 'h': '47.49', 'w': '9.58'}, {'page': '1', 'x': '166.10', 'y': '721.81', 'h': '393.18', 'w': '9.58'}, {'page': '1', 'x': '166.39', 'y': '734.36', 'h': '88.77', 'w': '9.58'}], [{'page': '1', 'x': '258.26', 'y': '734.36', 'h': '301.02', 'w': '9.58'}, {'page': '1', 'x': '166.39', 'y': '746.91', 'h': '288.55', 'w': '9.58'}], [{'page': '1', 'x': '458.05', 'y': '746.91', 'h': '101.22', 'w': '9.58'}, {'page': '1', 'x': '166.39', 'y': '759.47', 'h': '346.80', 'w': '9.58'}], [{'page': '2', 'x': '187.65', 'y': '98.05', 'h': '371.62', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '110.60', 'h': '248.38', 'w': '9.58'}], [{'page': '2', 'x': '420.94', 'y': '110.60', 'h': '138.33', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '123.15', 'h': '394.83', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '135.71', 'h': '20.27', 'w': '9.58'}], [{'page': '2', 'x': '190.03', 'y': '135.71', 'h': '370.99', 'w': '9.58'}], [{'page': '2', 'x': '166.39', 'y': '148.26', 'h': '392.89', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '160.81', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '173.37', 'h': '38.95', 'w': '9.58'}], [{'page': '2', 'x': '208.46', 'y': '173.37', 'h': '352.47', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '185.92', 'h': '47.87', 'w': '9.58'}], [{'page': '2', 'x': '216.91', 'y': '185.92', 'h': '256.92', 'w': '9.58'}], [{'page': '2', 'x': '477.36', 'y': '185.92', 'h': '81.91', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '198.47', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '211.02', 'h': '141.30', 'w': '9.58'}]]\", 'pages': \"('1', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', metadata={'text': 'Rheumatoid factor (RF) and anti-citrullinated peptide antibodies (ACPA) are considered as the main serological markers for RA that have been included in the 2010 American College of Rheumatology (ACR)/European League against Rheumatism (EULAR) classification criteria for RA [7][8][9].Based on 2010 ACR/EULAR classification criteria for RA, clinically diagnosed RA patients can be categorized into four serotypes: (i) positive for both RF and ACPA, (ii) positive for RF and negative for ACPA, (iii) negative for RF and positive for ACPA and (iv) negative for both RF and ACPA.However, the levels of RF are also perturbed in connective tissue diseases [10] and some chronic infectious diseases such as hepatitis B and hepatitis C virus infections [11].RF is thus not a specific diagnostic marker for RA.ACPA is comparatively a more specific biomarker and two-thirds of the individuals ultimately diagnosed with RA were tested positive for ACPAs 6-10 years before diagnosis [12,13].A total of 1-3% of the healthy population may also test positive for ACPAs suggesting the decreased specificity of this biomarker [14][15][16][17].Therefore, it is important to discover the biomarkers for the diagnosis of RA with both increased sensitivity and specificity.', 'para': '6', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '223.58', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '236.13', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '248.68', 'h': '394.53', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '261.24', 'h': '133.81', 'w': '9.58'}], [{'page': '2', 'x': '303.29', 'y': '261.24', 'h': '257.23', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '273.79', 'h': '393.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '286.34', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '298.90', 'h': '272.66', 'w': '9.58'}], [{'page': '2', 'x': '441.85', 'y': '298.90', 'h': '117.43', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '311.45', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '324.00', 'h': '240.16', 'w': '9.58'}], [{'page': '2', 'x': '409.64', 'y': '324.00', 'h': '149.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '336.55', 'h': '67.99', 'w': '9.58'}], [{'page': '2', 'x': '236.99', 'y': '336.55', 'h': '322.28', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '349.11', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '361.66', 'h': '107.38', 'w': '9.58'}], [{'page': '2', 'x': '276.86', 'y': '361.66', 'h': '282.42', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '374.21', 'h': '325.69', 'w': '9.58'}], [{'page': '2', 'x': '495.20', 'y': '374.21', 'h': '64.08', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '386.77', 'h': '393.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '399.32', 'h': '65.18', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Mass spectrometry (MS)-based serum proteomics has emerged as a powerful technology in biological research targeted at the RA biomarker discovery [18,19].Several proteins and peptides have been identified that are unique to serum proteome of RA patients [18,20].A recent study compared the serum proteome profiles of seronegative patients with healthy controls [21].However, to our knowledge, no study has compared the serum proteome profiles of all the RA serotypes based on ACPAs and RF.Furthermore, the proteomic profiles of Pakistani RA patients have not been investigated in any previous study.This study aims to screen the RA serotypes, based on ACPAs and RF, and compare them with healthy controls in the Pakistani population for the identification of biomarkers that are differentially expressed (DE) between RA patients and healthy controls.', metadata={'text': 'Mass spectrometry (MS)-based serum proteomics has emerged as a powerful technology in biological research targeted at the RA biomarker discovery [18,19].Several proteins and peptides have been identified that are unique to serum proteome of RA patients [18,20].A recent study compared the serum proteome profiles of seronegative patients with healthy controls [21].However, to our knowledge, no study has compared the serum proteome profiles of all the RA serotypes based on ACPAs and RF.Furthermore, the proteomic profiles of Pakistani RA patients have not been investigated in any previous study.This study aims to screen the RA serotypes, based on ACPAs and RF, and compare them with healthy controls in the Pakistani population for the identification of biomarkers that are differentially expressed (DE) between RA patients and healthy controls.', 'para': '5', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '411.87', 'h': '373.27', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '424.42', 'h': '319.69', 'w': '9.58'}], [{'page': '2', 'x': '489.19', 'y': '424.42', 'h': '70.09', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '436.98', 'h': '394.62', 'w': '9.58'}], [{'page': '2', 'x': '166.01', 'y': '449.53', 'h': '393.66', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '462.08', 'h': '57.92', 'w': '9.58'}], [{'page': '2', 'x': '228.10', 'y': '462.08', 'h': '331.17', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '474.64', 'h': '262.67', 'w': '9.58'}], [{'page': '2', 'x': '432.38', 'y': '474.64', 'h': '126.90', 'w': '9.58'}, {'page': '2', 'x': '166.10', 'y': '487.19', 'h': '370.43', 'w': '9.58'}], [{'page': '2', 'x': '539.87', 'y': '487.19', 'h': '19.41', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '499.74', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '512.30', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '524.85', 'h': '315.47', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Introduction', 'section_number': '1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The study was approved by the institutional review board (IRB) of the National University of Sciences and Technology (NUST), Islamabad, Pakistan, and written informed consent was obtained from all the study subjects.Human blood sera were collected from Pakistani RA patients that were diagnosed according to 2010 ACR/EULAR criteria [7] as well as healthy controls.The venous blood was collected from each patient in a 5 mL BD Vacutainer ® tubes (BD vacutainer TM, Frankin Lakes, NJ, USA) containing spray-coated silica and a polymer gel for serum separation.Butterfly needle was used depending on the condition of the patient.The samples were allowed to clot, and the serum was carefully alliquoted and stored at -80 • C. ACPA-status was evaluated using the commercial ACPA AESKULISA ® enzyme-linked immunosorbent assay (ELISA) assay kit (AESKU.Diagnostics, Wendelsheim, Germany).RF-status was determined using a latex agglutination slide test kit for RF (Werfen, Barcelona, Spain).A total of 18 patients (mean age ± SD = 40.1 ± 12.13) selected for the study were divided into 4 cohorts.The first cohort included RA patients that were double-positive for both RF and ACPA (n = 5), the second and third cohort included RA patients that were either positive for RF or ACPA (n = 5 each) and the fourth cohort included RA patients that were negative for both of these serological markers (n = 3).', metadata={'text': 'The study was approved by the institutional review board (IRB) of the National University of Sciences and Technology (NUST), Islamabad, Pakistan, and written informed consent was obtained from all the study subjects.Human blood sera were collected from Pakistani RA patients that were diagnosed according to 2010 ACR/EULAR criteria [7] as well as healthy controls.The venous blood was collected from each patient in a 5 mL BD Vacutainer ® tubes (BD vacutainer TM, Frankin Lakes, NJ, USA) containing spray-coated silica and a polymer gel for serum separation.Butterfly needle was used depending on the condition of the patient.The samples were allowed to clot, and the serum was carefully alliquoted and stored at -80 • C. ACPA-status was evaluated using the commercial ACPA AESKULISA ® enzyme-linked immunosorbent assay (ELISA) assay kit (AESKU.Diagnostics, Wendelsheim, Germany).RF-status was determined using a latex agglutination slide test kit for RF (Werfen, Barcelona, Spain).A total of 18 patients (mean age ± SD = 40.1 ± 12.13) selected for the study were divided into 4 cohorts.The first cohort included RA patients that were double-positive for both RF and ACPA (n = 5), the second and third cohort included RA patients that were either positive for RF or ACPA (n = 5 each) and the fourth cohort included RA patients that were negative for both of these serological markers (n = 3).', 'para': '7', 'bboxes': \"[[{'page': '2', 'x': '187.65', 'y': '576.26', 'h': '371.62', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '588.81', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '601.36', 'h': '217.09', 'w': '9.58'}], [{'page': '2', 'x': '386.61', 'y': '601.36', 'h': '172.66', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '613.91', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '165.98', 'y': '626.47', 'h': '107.48', 'w': '9.58'}], [{'page': '2', 'x': '276.54', 'y': '626.47', 'h': '282.74', 'w': '9.58'}, {'page': '2', 'x': '166.04', 'y': '639.02', 'h': '47.95', 'w': '9.58'}, {'page': '2', 'x': '213.98', 'y': '637.03', 'h': '5.66', 'w': '7.28'}, {'page': '2', 'x': '222.64', 'y': '639.02', 'h': '336.63', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '651.57', 'h': '198.34', 'w': '9.58'}], [{'page': '2', 'x': '367.82', 'y': '651.57', 'h': '191.46', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '664.13', 'h': '107.76', 'w': '9.58'}], [{'page': '2', 'x': '277.53', 'y': '664.13', 'h': '282.13', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '676.36', 'h': '125.23', 'w': '9.90'}, {'page': '2', 'x': '294.21', 'y': '674.45', 'h': '3.94', 'w': '6.92'}, {'page': '2', 'x': '298.74', 'y': '676.68', 'h': '260.92', 'w': '9.58'}, {'page': '2', 'x': '166.01', 'y': '689.23', 'h': '55.35', 'w': '9.58'}, {'page': '2', 'x': '221.35', 'y': '687.24', 'h': '5.66', 'w': '7.28'}, {'page': '2', 'x': '229.57', 'y': '689.23', 'h': '330.96', 'w': '9.58'}, {'page': '2', 'x': '165.90', 'y': '701.79', 'h': '112.72', 'w': '9.58'}], [{'page': '2', 'x': '281.70', 'y': '701.79', 'h': '277.57', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '714.34', 'h': '159.98', 'w': '9.58'}], [{'page': '2', 'x': '329.49', 'y': '714.02', 'h': '230.78', 'w': '9.90'}, {'page': '2', 'x': '166.39', 'y': '726.89', 'h': '223.73', 'w': '9.58'}], [{'page': '2', 'x': '393.21', 'y': '726.89', 'h': '166.06', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '739.44', 'h': '392.88', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '752.00', 'h': '392.89', 'w': '9.58'}, {'page': '2', 'x': '166.39', 'y': '764.55', 'h': '394.63', 'w': '9.58'}]]\", 'pages': \"('2', '2')\", 'section_title': 'Study Subjects and Serum Collection', 'section_number': '2.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Life 2022, 12, 464 3 of 17 A total of 5 healthy controls (n = 5) (mean age ± SD = 43.4± 9.11) were also included in the study.Each cohort contained age-matched samples with a female-to-male ratio of 4:1.Blood samples from both RA cases and healthy controls were collected in vacutainers without anticoagulants.Serum was then separated from blood at 4000× g for 5 min, aliquoted into polyethylene tubes (Eppendorf AG, Hamburg, Germany) and stored at -80 • C until use.', metadata={'text': 'Life 2022, 12, 464 3 of 17 A total of 5 healthy controls (n = 5) (mean age ± SD = 43.4± 9.11) were also included in the study.Each cohort contained age-matched samples with a female-to-male ratio of 4:1.Blood samples from both RA cases and healthy controls were collected in vacutainers without anticoagulants.Serum was then separated from blood at 4000× g for 5 min, aliquoted into polyethylene tubes (Eppendorf AG, Hamburg, Germany) and stored at -80 • C until use.', 'para': '4', 'bboxes': \"[[{'page': '3', 'x': '35.49', 'y': '57.46', 'h': '57.79', 'w': '7.77'}, {'page': '3', 'x': '536.53', 'y': '57.56', 'h': '22.95', 'w': '7.67'}, {'page': '3', 'x': '166.01', 'y': '97.73', 'h': '249.40', 'w': '9.90'}], [{'page': '3', 'x': '417.90', 'y': '97.73', 'h': '141.38', 'w': '9.90'}, {'page': '3', 'x': '166.39', 'y': '110.60', 'h': '25.94', 'w': '9.58'}], [{'page': '3', 'x': '195.28', 'y': '110.60', 'h': '335.62', 'w': '9.58'}], [{'page': '3', 'x': '533.84', 'y': '110.60', 'h': '25.43', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '123.15', 'h': '392.88', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '135.71', 'h': '66.29', 'w': '9.58'}], [{'page': '3', 'x': '235.79', 'y': '135.58', 'h': '323.49', 'w': '9.71'}, {'page': '3', 'x': '166.10', 'y': '147.94', 'h': '333.23', 'w': '9.90'}, {'page': '3', 'x': '501.91', 'y': '146.03', 'h': '3.94', 'w': '6.92'}, {'page': '3', 'x': '506.44', 'y': '148.26', 'h': '50.39', 'w': '9.58'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Study Subjects and Serum Collection', 'section_number': '2.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.The demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', metadata={'text': 'For validation, serum samples were collected and processed from RA patients (n = 60) (mean age ± SD = 41.495 ± 12.8275) and healthy controls (n = 20) (mean age ± SD = 45.4 ± 11.31) from the same population.The demographics and clinical characteristics of the experimental and validation cohort are shown in Table 1.', 'para': '1', 'bboxes': \"[[{'page': '3', 'x': '187.65', 'y': '160.81', 'h': '372.02', 'w': '9.58'}, {'page': '3', 'x': '166.10', 'y': '173.05', 'h': '394.17', 'w': '9.90'}, {'page': '3', 'x': '166.07', 'y': '185.60', 'h': '256.73', 'w': '9.90'}], [{'page': '3', 'x': '425.92', 'y': '185.92', 'h': '133.36', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '198.47', 'h': '343.00', 'w': '9.58'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Study Subjects and Serum Collection', 'section_number': '2.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Serum samples were thawed on ice followed by centrifugation at 14,000× g for 10 min at 4 • C. Protein concentrations for serum samples from each donor were then determined through Pierce ® 660 nm protein assay kit for protein concentration (Thermo Scientific, Waltham, MA, USA).The sample volumes containing 10 mg total protein were calculated and mixed with double-distilled water (ddH 2 O) to make the total volume up to 500 µL.', metadata={'text': 'Serum samples were thawed on ice followed by centrifugation at 14,000× g for 10 min at 4 • C. Protein concentrations for serum samples from each donor were then determined through Pierce ® 660 nm protein assay kit for protein concentration (Thermo Scientific, Waltham, MA, USA).The sample volumes containing 10 mg total protein were calculated and mixed with double-distilled water (ddH 2 O) to make the total volume up to 500 µL.', 'para': '1', 'bboxes': \"[[{'page': '3', 'x': '187.65', 'y': '635.30', 'h': '371.63', 'w': '9.71'}, {'page': '3', 'x': '166.39', 'y': '647.98', 'h': '15.71', 'w': '9.58'}, {'page': '3', 'x': '184.70', 'y': '645.75', 'h': '3.94', 'w': '6.92'}, {'page': '3', 'x': '189.24', 'y': '647.98', 'h': '370.04', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '660.54', 'h': '66.80', 'w': '9.58'}, {'page': '3', 'x': '233.20', 'y': '658.55', 'h': '5.66', 'w': '7.28'}, {'page': '3', 'x': '242.68', 'y': '660.54', 'h': '317.84', 'w': '9.58'}, {'page': '3', 'x': '165.90', 'y': '673.09', 'h': '93.36', 'w': '9.58'}], [{'page': '3', 'x': '261.76', 'y': '673.09', 'h': '297.51', 'w': '9.58'}, {'page': '3', 'x': '166.39', 'y': '685.53', 'h': '384.50', 'w': '10.84'}]]\", 'pages': \"('3', '3')\", 'section_title': 'Protein Assay', 'section_number': '2.2.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Serum samples were analyzed using one-dimensional (1D) sodium dodecyl sulfate polyacrylamide gel electrophoresis (SDS-PAGE) for assessment of the gross quantitative as well as qualitative differences in the serum protein profiles of the study subjects.Briefly, 16 µg of serum samples were mixed with an equal volume of NativePAGE™ sample buffer (Thermo Scientific, Waltham, MA, USA) and loaded on NativePAGE™ 1.0 mm, 4-16%, bis-tris, mini protein gels (Thermo Scientific, Waltham, MA, USA).Novex Sharp Pre-Stained Protein Standard for molecular weight estimation (Thermo Scientific, Waltham, MA, USA) was also loaded in a separate well.The samples and the standard were run in NuPAGE™ MES SDS running buffer (Thermo Scientific, Waltham, MA, USA) at 120 V for 60 min and then at 150 V for 30 min.The gels were washed for 5 min in ddH 2 O.The washing was repeated thrice.Prior to visualization, the protein gels were stained for 16 hours in Coomassie Brilliant Blue R-250 dye (Bio-Rad, Hemel Hempstead, UK) and rinsed in ddH 2 O for 30 min.The whole figure can be found at Supplementary Materials (Figures S1-S3).', metadata={'text': 'Serum samples were analyzed using one-dimensional (1D) sodium dodecyl sulfate polyacrylamide gel electrophoresis (SDS-PAGE) for assessment of the gross quantitative as well as qualitative differences in the serum protein profiles of the study subjects.Briefly, 16 µg of serum samples were mixed with an equal volume of NativePAGE™ sample buffer (Thermo Scientific, Waltham, MA, USA) and loaded on NativePAGE™ 1.0 mm, 4-16%, bis-tris, mini protein gels (Thermo Scientific, Waltham, MA, USA).Novex Sharp Pre-Stained Protein Standard for molecular weight estimation (Thermo Scientific, Waltham, MA, USA) was also loaded in a separate well.The samples and the standard were run in NuPAGE™ MES SDS running buffer (Thermo Scientific, Waltham, MA, USA) at 120 V for 60 min and then at 150 V for 30 min.The gels were washed for 5 min in ddH 2 O.The washing was repeated thrice.Prior to visualization, the protein gels were stained for 16 hours in Coomassie Brilliant Blue R-250 dye (Bio-Rad, Hemel Hempstead, UK) and rinsed in ddH 2 O for 30 min.The whole figure can be found at Supplementary Materials (Figures S1-S3).', 'para': '7', 'bboxes': \"[[{'page': '3', 'x': '187.65', 'y': '723.60', 'h': '371.62', 'w': '9.58'}, {'page': '3', 'x': '166.10', 'y': '736.15', 'h': '393.18', 'w': '9.58'}, {'page': '3', 'x': '165.98', 'y': '748.71', 'h': '360.04', 'w': '9.58'}], [{'page': '3', 'x': '529.21', 'y': '748.71', 'h': '31.31', 'w': '9.58'}, {'page': '3', 'x': '165.90', 'y': '761.15', 'h': '393.58', 'w': '9.69'}, {'page': '3', 'x': '166.07', 'y': '773.81', 'h': '394.45', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '98.05', 'h': '282.71', 'w': '9.58'}], [{'page': '4', 'x': '451.18', 'y': '98.05', 'h': '108.09', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '110.60', 'h': '393.88', 'w': '9.58'}, {'page': '4', 'x': '165.98', 'y': '123.15', 'h': '152.63', 'w': '9.58'}], [{'page': '4', 'x': '321.71', 'y': '123.15', 'h': '239.52', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '135.71', 'h': '392.88', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '148.26', 'h': '132.03', 'w': '9.58'}], [{'page': '4', 'x': '302.77', 'y': '148.26', 'h': '195.38', 'w': '10.73'}], [{'page': '4', 'x': '501.06', 'y': '148.26', 'h': '58.22', 'w': '9.58'}, {'page': '4', 'x': '165.98', 'y': '160.81', 'h': '90.55', 'w': '9.58'}], [{'page': '4', 'x': '260.29', 'y': '160.81', 'h': '298.99', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '173.37', 'h': '392.88', 'w': '10.73'}, {'page': '4', 'x': '166.39', 'y': '185.92', 'h': '47.62', 'w': '9.58'}], [{'page': '4', 'x': '217.10', 'y': '185.92', 'h': '331.85', 'w': '9.58'}]]\", 'pages': \"('3', '4')\", 'section_title': 'SDS-PAGE and Silver Staining', 'section_number': '2.3.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='For qualitative assessment of the elution efficiency of ProteoMiner™ columns (Bio-Rad, Hemel Hempstead, UK), one serum sample processed through the column was also evaluated using 1D SDS-PAGE.For this purpose, the serum sample, the flow-through after each wash, and the eluted samples were run using the aforementioned protocol.Additionally, trypsin digested samples were also analyzed using 1D SDS-PAGE to confirm complete protein digestion before liquid chromatography-tandem mass spectrometry (LC-MS).', metadata={'text': 'For qualitative assessment of the elution efficiency of ProteoMiner™ columns (Bio-Rad, Hemel Hempstead, UK), one serum sample processed through the column was also evaluated using 1D SDS-PAGE.For this purpose, the serum sample, the flow-through after each wash, and the eluted samples were run using the aforementioned protocol.Additionally, trypsin digested samples were also analyzed using 1D SDS-PAGE to confirm complete protein digestion before liquid chromatography-tandem mass spectrometry (LC-MS).', 'para': '2', 'bboxes': \"[[{'page': '4', 'x': '187.65', 'y': '198.47', 'h': '373.27', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '211.02', 'h': '392.88', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '223.58', 'h': '136.31', 'w': '9.58'}], [{'page': '4', 'x': '305.20', 'y': '223.58', 'h': '254.27', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '236.13', 'h': '348.40', 'w': '9.58'}], [{'page': '4', 'x': '517.88', 'y': '236.13', 'h': '43.05', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '248.68', 'h': '392.88', 'w': '9.58'}, {'page': '4', 'x': '166.10', 'y': '261.24', 'h': '377.12', 'w': '9.58'}]]\", 'pages': \"('4', '4')\", 'section_title': 'SDS-PAGE and Silver Staining', 'section_number': '2.3.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='ProteoMiner™ Small Capacity bead columns for protein enrichment were loaded with 10 mg of protein from each sample separately.The bead columns were then rotated at the room temperature for 2 h followed by centrifugation at 1000× g for 60 s.Washing of the beads was performed thrice in phosphate-buffered saline (Sigma-Aldrich, Gillingham, UK) followed by rotation for 5 min and subsequent centrifugation for 60 s at 1000× g.This eluted the maximum amount of unbound protein.', metadata={'text': 'ProteoMiner™ Small Capacity bead columns for protein enrichment were loaded with 10 mg of protein from each sample separately.The bead columns were then rotated at the room temperature for 2 h followed by centrifugation at 1000× g for 60 s.Washing of the beads was performed thrice in phosphate-buffered saline (Sigma-Aldrich, Gillingham, UK) followed by rotation for 5 min and subsequent centrifugation for 60 s at 1000× g.This eluted the maximum amount of unbound protein.', 'para': '3', 'bboxes': \"[[{'page': '4', 'x': '187.65', 'y': '301.41', 'h': '371.62', 'w': '9.58'}, {'page': '4', 'x': '165.90', 'y': '313.96', 'h': '202.25', 'w': '9.58'}], [{'page': '4', 'x': '371.24', 'y': '313.96', 'h': '188.03', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '326.38', 'h': '322.63', 'w': '9.71'}], [{'page': '4', 'x': '492.17', 'y': '326.51', 'h': '67.11', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '339.06', 'h': '393.87', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '351.49', 'h': '368.81', 'w': '9.71'}], [{'page': '4', 'x': '539.86', 'y': '351.62', 'h': '19.41', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '364.17', 'h': '220.03', 'w': '9.58'}]]\", 'pages': \"('4', '4')\", 'section_title': 'ProteoMiner TM Column Processing', 'section_number': '2.4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='A pre-mixed solution of 0.05% (w/v) RapiGest (Waters, Elstree, Hertfordshire, UK) and 160 µL of 25 mM ammonium bicarbonate (NH 4 HCO 3 ) (Fluka Chemicals Ltd., Gillingham, UK) was used for resuspension of the Proteominer TM beads.The resuspended beads were then heated for 10 min at 80 • C; DL-Dithiothreitol (Sigma-Aldrich, Gillingham, UK) to 3 mM final concentration was added, incubated for 10 min at 60 • C and iodoacetamide (Sigma-Aldrich, Gillingham, UK) was added to a final concentration of 9 mM, incubated in the dark for 30 min at room temperature.Protease enzyme trypsin (Sigma-Aldrich, Gillingham, UK) was used for enzymatic protein digestion.A total of 2 µg of trypsin was added to each sample and rotated at 37 • C for 16 h.The samples containing the beads were supplemented again with 2 µg trypsin and rotation for 2 h at 37 • C. The digested serum samples were then centrifuged at 1000× g for 1 min at room temperature.Supernatant was removed followed by the inhibition of the trypsin activity by acidification with 0.5% (v/v) trifluoroacetic acid (TFA, Sigma-Aldrich, Gillingham, UK) and rotation at 37 • C for 30 min.The samples were then centrifuged at 13,000× g for 15 min at 4 • C.', metadata={'text': 'A pre-mixed solution of 0.05% (w/v) RapiGest (Waters, Elstree, Hertfordshire, UK) and 160 µL of 25 mM ammonium bicarbonate (NH 4 HCO 3 ) (Fluka Chemicals Ltd., Gillingham, UK) was used for resuspension of the Proteominer TM beads.The resuspended beads were then heated for 10 min at 80 • C; DL-Dithiothreitol (Sigma-Aldrich, Gillingham, UK) to 3 mM final concentration was added, incubated for 10 min at 60 • C and iodoacetamide (Sigma-Aldrich, Gillingham, UK) was added to a final concentration of 9 mM, incubated in the dark for 30 min at room temperature.Protease enzyme trypsin (Sigma-Aldrich, Gillingham, UK) was used for enzymatic protein digestion.A total of 2 µg of trypsin was added to each sample and rotated at 37 • C for 16 h.The samples containing the beads were supplemented again with 2 µg trypsin and rotation for 2 h at 37 • C. The digested serum samples were then centrifuged at 1000× g for 1 min at room temperature.Supernatant was removed followed by the inhibition of the trypsin activity by acidification with 0.5% (v/v) trifluoroacetic acid (TFA, Sigma-Aldrich, Gillingham, UK) and rotation at 37 • C for 30 min.The samples were then centrifuged at 13,000× g for 15 min at 4 • C.', 'para': '6', 'bboxes': \"[[{'page': '4', 'x': '187.65', 'y': '402.13', 'h': '371.62', 'w': '9.58'}, {'page': '4', 'x': '165.90', 'y': '414.57', 'h': '394.62', 'w': '10.84'}, {'page': '4', 'x': '166.39', 'y': '427.23', 'h': '220.48', 'w': '9.58'}, {'page': '4', 'x': '386.88', 'y': '425.24', 'h': '11.80', 'w': '7.28'}, {'page': '4', 'x': '401.67', 'y': '427.23', 'h': '27.81', 'w': '9.58'}], [{'page': '4', 'x': '432.57', 'y': '427.23', 'h': '126.71', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '439.79', 'h': '128.16', 'w': '9.58'}, {'page': '4', 'x': '297.71', 'y': '437.56', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '302.25', 'y': '439.79', 'h': '257.02', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '452.34', 'h': '289.28', 'w': '9.58'}, {'page': '4', 'x': '458.58', 'y': '450.11', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '463.12', 'y': '452.34', 'h': '96.15', 'w': '9.58'}, {'page': '4', 'x': '166.07', 'y': '464.89', 'h': '393.21', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '477.45', 'h': '201.41', 'w': '9.58'}], [{'page': '4', 'x': '373.30', 'y': '477.45', 'h': '187.22', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '490.00', 'h': '261.11', 'w': '9.58'}], [{'page': '4', 'x': '430.61', 'y': '489.89', 'h': '128.66', 'w': '9.69'}, {'page': '4', 'x': '166.39', 'y': '502.55', 'h': '168.86', 'w': '9.58'}, {'page': '4', 'x': '337.78', 'y': '500.32', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '342.32', 'y': '502.55', 'h': '44.55', 'w': '9.58'}], [{'page': '4', 'x': '389.93', 'y': '502.55', 'h': '169.35', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '515.00', 'h': '284.75', 'w': '9.69'}, {'page': '4', 'x': '453.78', 'y': '512.87', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '458.32', 'y': '515.11', 'h': '100.96', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '527.53', 'h': '317.21', 'w': '9.71'}], [{'page': '4', 'x': '486.71', 'y': '527.66', 'h': '72.56', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '540.21', 'h': '393.88', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '552.76', 'h': '330.90', 'w': '9.58'}, {'page': '4', 'x': '499.89', 'y': '550.53', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '504.43', 'y': '552.76', 'h': '56.60', 'w': '9.58'}], [{'page': '4', 'x': '166.09', 'y': '565.19', 'h': '276.76', 'w': '9.71'}, {'page': '4', 'x': '445.43', 'y': '563.09', 'h': '3.94', 'w': '6.92'}, {'page': '4', 'x': '449.97', 'y': '565.32', 'h': '9.55', 'w': '9.58'}]]\", 'pages': \"('4', '4')\", 'section_title': 'Protein Digestion', 'section_number': '2.5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Each serum digest sample was analyzed using LC-MS/MS on an UltiMate 3000 Nano LC System (Dionex/Thermo Scientific, Waltham, MA, USA).The system was attached to a Q Exactive TM Quadrupole-Orbitrap instrument (Thermo Scientific, Waltham, MA, USA).Prior to loading onto the instrument, the samples were carefully randomized using Microsoft Excel.All the samples were run in one single batch.For this purpose, 150 ng of the tryptic digest from each trypsin-digested serum sample was subjected to LC-MS/MS via a 90 min gradient.For loading on trapping column (100 Å, 75 µm × 2 cm, Acclaim PepMap 100 C18, 3 µm packing material) loading buffer was used that contained 2% (v/v) acetonitrile and 0.1% (v/v) TFA in water.The sample digests mixed with loaded buffer were run at a flow rate of 12 µL min -1 for 7 min.Then, a trapping column was coupled with an analytical column (100 Å, 75 µm × 50 cm, EASY-Spray PepMap RSLC C18, 2 µm packing material) followed by elution of the peptides through a linear gradient.The linear gradient consisted of 96.2%A composed of 0.1% (v/v) formic acid: 3.8% B consisting of 0.1% (v/v) formic acid in water/acetonitrile [80/20] (v/v) to 50% A: 50% B at a flow rate of 300 nl min -1 over 90 min and washed for 5 min at 1% A: 99% B. The column was then re-equilibrated to the starting conditions and maintained at 40 • C before direct introduction of the affluent into the integrated nano-electrospray ionization source that was operating in the positive ion mode.The MS instrument was operated in the data-dependent acquisition (DDA) mode with the survey scans between the mass to charge ratio (m/z) range of 350 to 2000 that were acquired at a mass resolution of about 60,000 and the fullwidth at halfmaximum (FWHM) at m/z of about 200.The automatic gain control was set to 3e6 with a maximum injection time of 100 ms.For MS/MS, 12 of the most intense precursor ions with an isolation window of 2 m/z units and charge states ranging from 2+ to 5+ were selected.For this, the automatic gain control was set to a value of 1e5 with the maximum injection time of 100 ms.The peptide fragmentation was obtained by the higher-energy collisional dissociation utilizing a normalized collision energy of 30%.Dynamic exclusion of the m/z values was used to avoid the repeated fragmentation of the same peptide with an exclusion time of 20 s.All MS raw files for this experiment have been deposited to the ProteomeXchange Consortium through the PRIDE partner proteomics repository.The dataset identifier for this submission is PXD020235 and 10.6019/PXD020235 [22].', metadata={'text': 'Each serum digest sample was analyzed using LC-MS/MS on an UltiMate 3000 Nano LC System (Dionex/Thermo Scientific, Waltham, MA, USA).The system was attached to a Q Exactive TM Quadrupole-Orbitrap instrument (Thermo Scientific, Waltham, MA, USA).Prior to loading onto the instrument, the samples were carefully randomized using Microsoft Excel.All the samples were run in one single batch.For this purpose, 150 ng of the tryptic digest from each trypsin-digested serum sample was subjected to LC-MS/MS via a 90 min gradient.For loading on trapping column (100 Å, 75 µm × 2 cm, Acclaim PepMap 100 C18, 3 µm packing material) loading buffer was used that contained 2% (v/v) acetonitrile and 0.1% (v/v) TFA in water.The sample digests mixed with loaded buffer were run at a flow rate of 12 µL min -1 for 7 min.Then, a trapping column was coupled with an analytical column (100 Å, 75 µm × 50 cm, EASY-Spray PepMap RSLC C18, 2 µm packing material) followed by elution of the peptides through a linear gradient.The linear gradient consisted of 96.2%A composed of 0.1% (v/v) formic acid: 3.8% B consisting of 0.1% (v/v) formic acid in water/acetonitrile [80/20] (v/v) to 50% A: 50% B at a flow rate of 300 nl min -1 over 90 min and washed for 5 min at 1% A: 99% B. The column was then re-equilibrated to the starting conditions and maintained at 40 • C before direct introduction of the affluent into the integrated nano-electrospray ionization source that was operating in the positive ion mode.The MS instrument was operated in the data-dependent acquisition (DDA) mode with the survey scans between the mass to charge ratio (m/z) range of 350 to 2000 that were acquired at a mass resolution of about 60,000 and the fullwidth at halfmaximum (FWHM) at m/z of about 200.The automatic gain control was set to 3e6 with a maximum injection time of 100 ms.For MS/MS, 12 of the most intense precursor ions with an isolation window of 2 m/z units and charge states ranging from 2+ to 5+ were selected.For this, the automatic gain control was set to a value of 1e5 with the maximum injection time of 100 ms.The peptide fragmentation was obtained by the higher-energy collisional dissociation utilizing a normalized collision energy of 30%.Dynamic exclusion of the m/z values was used to avoid the repeated fragmentation of the same peptide with an exclusion time of 20 s.All MS raw files for this experiment have been deposited to the ProteomeXchange Consortium through the PRIDE partner proteomics repository.The dataset identifier for this submission is PXD020235 and 10.6019/PXD020235 [22].', 'para': '17', 'bboxes': \"[[{'page': '4', 'x': '187.65', 'y': '603.27', 'h': '371.62', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '615.83', 'h': '275.76', 'w': '9.58'}], [{'page': '4', 'x': '445.29', 'y': '615.83', 'h': '113.98', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '628.38', 'h': '69.65', 'w': '9.58'}, {'page': '4', 'x': '236.04', 'y': '626.39', 'h': '11.80', 'w': '7.28'}, {'page': '4', 'x': '251.61', 'y': '628.38', 'h': '308.91', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '640.93', 'h': '26.46', 'w': '9.58'}], [{'page': '4', 'x': '195.34', 'y': '640.93', 'h': '363.93', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '653.49', 'h': '70.68', 'w': '9.58'}], [{'page': '4', 'x': '240.17', 'y': '653.49', 'h': '198.27', 'w': '9.58'}], [{'page': '4', 'x': '441.54', 'y': '653.49', 'h': '117.74', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '666.04', 'h': '392.88', 'w': '9.58'}, {'page': '4', 'x': '166.12', 'y': '678.59', 'h': '99.03', 'w': '9.58'}], [{'page': '4', 'x': '269.47', 'y': '678.28', 'h': '289.81', 'w': '9.90'}, {'page': '4', 'x': '166.39', 'y': '691.04', 'h': '393.88', 'w': '9.69'}, {'page': '4', 'x': '166.39', 'y': '703.70', 'h': '184.20', 'w': '9.58'}], [{'page': '4', 'x': '354.67', 'y': '703.70', 'h': '204.81', 'w': '9.58'}, {'page': '4', 'x': '165.98', 'y': '716.14', 'h': '161.87', 'w': '9.69'}, {'page': '4', 'x': '327.94', 'y': '714.02', 'h': '10.01', 'w': '6.92'}, {'page': '4', 'x': '341.08', 'y': '716.25', 'h': '43.65', 'w': '9.58'}], [{'page': '4', 'x': '388.22', 'y': '716.25', 'h': '171.05', 'w': '9.58'}, {'page': '4', 'x': '165.98', 'y': '728.49', 'h': '393.30', 'w': '9.90'}, {'page': '4', 'x': '166.10', 'y': '741.36', 'h': '346.29', 'w': '9.58'}], [{'page': '4', 'x': '515.48', 'y': '741.36', 'h': '43.99', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '753.91', 'h': '123.17', 'w': '9.58'}], [{'page': '4', 'x': '292.23', 'y': '753.91', 'h': '267.05', 'w': '9.58'}, {'page': '4', 'x': '166.39', 'y': '766.46', 'h': '392.88', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '98.05', 'h': '57.86', 'w': '9.58'}, {'page': '5', 'x': '224.34', 'y': '95.82', 'h': '10.01', 'w': '6.92'}, {'page': '5', 'x': '237.34', 'y': '98.05', 'h': '321.93', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '110.60', 'h': '266.57', 'w': '9.58'}, {'page': '5', 'x': '435.34', 'y': '108.37', 'h': '3.94', 'w': '6.92'}, {'page': '5', 'x': '439.88', 'y': '110.60', 'h': '119.40', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '123.15', 'h': '392.88', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '135.71', 'h': '96.68', 'w': '9.58'}], [{'page': '5', 'x': '266.18', 'y': '135.71', 'h': '293.10', 'w': '9.58'}, {'page': '5', 'x': '166.07', 'y': '148.26', 'h': '393.21', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '160.81', 'h': '394.53', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '173.24', 'h': '181.48', 'w': '9.71'}], [{'page': '5', 'x': '351.05', 'y': '173.37', 'h': '208.23', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '185.92', 'h': '165.42', 'w': '9.58'}], [{'page': '5', 'x': '335.20', 'y': '185.92', 'h': '224.07', 'w': '9.58'}, {'page': '5', 'x': '165.98', 'y': '198.34', 'h': '393.30', 'w': '9.71'}, {'page': '5', 'x': '166.39', 'y': '211.02', 'h': '37.95', 'w': '9.58'}], [{'page': '5', 'x': '207.44', 'y': '211.02', 'h': '351.83', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '223.58', 'h': '109.24', 'w': '9.58'}], [{'page': '5', 'x': '279.32', 'y': '223.58', 'h': '280.35', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '236.13', 'h': '305.80', 'w': '9.58'}], [{'page': '5', 'x': '475.28', 'y': '236.13', 'h': '83.99', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '248.55', 'h': '392.88', 'w': '9.71'}, {'page': '5', 'x': '166.39', 'y': '261.24', 'h': '115.17', 'w': '9.58'}], [{'page': '5', 'x': '286.60', 'y': '261.24', 'h': '272.67', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '273.79', 'h': '373.17', 'w': '9.58'}], [{'page': '5', 'x': '542.65', 'y': '273.79', 'h': '16.63', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '286.34', 'h': '355.30', 'w': '9.58'}]]\", 'pages': \"('4', '5')\", 'section_title': 'LC-MS/MS', 'section_number': '2.6.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='For label-free quantification, all the raw files were processed using Progenesis™ QI 2.0 software (Nonlinear Dynamics, Waters).Progenesis™ QI software undertakes the spectral alignment, consistent peak picking across all runs, normalization of the total protein abundance as well as peptide/protein quantification.For each feature, the top five spectra were exported, and the peptide and protein identifications were carried out via in-house Mascot server (Version 2.6.2).Reviewed Homo sapiens database was used to perform the identifications.Search parameters included: fragment mass tolerance value of 0.01 Da; peptide mass tolerance value of 10.0 ppm; enzyme, trypsin; one allowed missed cleavage; carbamidomethylation (cysteine) as the fixed modifications and oxidation (methionine) as the variable modification; The criteria used for protein identification included a false discovery rate (FDR) of 1% and ≥2 unique peptides.', metadata={'text': 'For label-free quantification, all the raw files were processed using Progenesis™ QI 2.0 software (Nonlinear Dynamics, Waters).Progenesis™ QI software undertakes the spectral alignment, consistent peak picking across all runs, normalization of the total protein abundance as well as peptide/protein quantification.For each feature, the top five spectra were exported, and the peptide and protein identifications were carried out via in-house Mascot server (Version 2.6.2).Reviewed Homo sapiens database was used to perform the identifications.Search parameters included: fragment mass tolerance value of 0.01 Da; peptide mass tolerance value of 10.0 ppm; enzyme, trypsin; one allowed missed cleavage; carbamidomethylation (cysteine) as the fixed modifications and oxidation (methionine) as the variable modification; The criteria used for protein identification included a false discovery rate (FDR) of 1% and ≥2 unique peptides.', 'para': '4', 'bboxes': \"[[{'page': '5', 'x': '187.65', 'y': '324.30', 'h': '371.62', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '336.85', 'h': '174.99', 'w': '9.58'}], [{'page': '5', 'x': '344.48', 'y': '336.85', 'h': '214.80', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '349.41', 'h': '392.88', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '361.96', 'h': '231.45', 'w': '9.58'}], [{'page': '5', 'x': '400.98', 'y': '361.96', 'h': '158.30', 'w': '9.58'}, {'page': '5', 'x': '165.98', 'y': '374.51', 'h': '393.30', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '387.06', 'h': '131.22', 'w': '9.58'}], [{'page': '5', 'x': '300.78', 'y': '386.93', 'h': '258.50', 'w': '9.71'}, {'page': '5', 'x': '166.39', 'y': '399.62', 'h': '66.54', 'w': '9.58'}], [{'page': '5', 'x': '237.66', 'y': '399.62', 'h': '322.86', 'w': '9.58'}, {'page': '5', 'x': '166.10', 'y': '412.17', 'h': '394.42', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '424.72', 'h': '393.87', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '437.28', 'h': '392.88', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '449.51', 'h': '230.16', 'w': '9.90'}]]\", 'pages': \"('5', '5')\", 'section_title': 'Label-Free Quantification', 'section_number': '2.7.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Canonical pathways, networks and disregulated regulators of the proteins that were identified with an FDR adjusted p-value of <0.05 and ≥2 unique peptides were performed using Ingenuity Pathway Analysis (IPA) (Qiagen, Hilden, Germany).For this, the gene names for the identified proteins were uploaded and analyzed for humans.All identified proteins were used as a background.The uncharacterized proteins were excluded from analysis.', metadata={'text': 'Canonical pathways, networks and disregulated regulators of the proteins that were identified with an FDR adjusted p-value of <0.05 and ≥2 unique peptides were performed using Ingenuity Pathway Analysis (IPA) (Qiagen, Hilden, Germany).For this, the gene names for the identified proteins were uploaded and analyzed for humans.All identified proteins were used as a background.The uncharacterized proteins were excluded from analysis.', 'para': '3', 'bboxes': \"[[{'page': '5', 'x': '187.65', 'y': '487.79', 'h': '371.62', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '500.02', 'h': '392.89', 'w': '9.90'}, {'page': '5', 'x': '166.39', 'y': '512.89', 'h': '310.88', 'w': '9.58'}], [{'page': '5', 'x': '481.23', 'y': '512.89', 'h': '78.04', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '525.45', 'h': '342.92', 'w': '9.58'}], [{'page': '5', 'x': '514.37', 'y': '525.45', 'h': '46.56', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '538.00', 'h': '186.85', 'w': '9.58'}], [{'page': '5', 'x': '357.87', 'y': '538.00', 'h': '201.40', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '550.55', 'h': '61.84', 'w': '9.58'}]]\", 'pages': \"('5', '5')\", 'section_title': 'Pathway Analysis', 'section_number': '2.8.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content=\"A human PZP ELISA kit (CSB-EL019131HU, CUSABIO, Houston, TX, USA) was used for the quantification of PZP protein in human samples from an independent cohort of RA patients and controls according to the manufacturer's directions.All the samples were analyzed in duplicates and protein concentration was determined as an average of the duplicates.\", metadata={'text': \"A human PZP ELISA kit (CSB-EL019131HU, CUSABIO, Houston, TX, USA) was used for the quantification of PZP protein in human samples from an independent cohort of RA patients and controls according to the manufacturer's directions.All the samples were analyzed in duplicates and protein concentration was determined as an average of the duplicates.\", 'para': '1', 'bboxes': \"[[{'page': '5', 'x': '187.65', 'y': '588.51', 'h': '371.62', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '601.06', 'h': '392.88', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '613.62', 'h': '315.12', 'w': '9.58'}], [{'page': '5', 'x': '487.71', 'y': '613.62', 'h': '71.56', 'w': '9.58'}, {'page': '5', 'x': '165.98', 'y': '626.17', 'h': '393.30', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '638.72', 'h': '64.33', 'w': '9.58'}]]\", 'pages': \"('5', '5')\", 'section_title': 'Validation of MS Using ELISA', 'section_number': '2.9.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Heat map plots were created and visualized using MetaboAnalyst 4.0.Principal component analysis (PCA) was also performed using MetaboAnalyst 4.0.Log transformation and Pareto scaling were applied for data analysis of the normalized data.For this study, the DE proteins were defined as those with a FDR adjusted p-value of <0.05, identified ≥2 unique peptides and a >2 fold expression change using ANOVA.For comparison of PZP concentration between RA patients and healthy controls, a t-test was used.A boxplot depicting the ELISA results was designed using R 4.1.1.', metadata={'text': 'Heat map plots were created and visualized using MetaboAnalyst 4.0.Principal component analysis (PCA) was also performed using MetaboAnalyst 4.0.Log transformation and Pareto scaling were applied for data analysis of the normalized data.For this study, the DE proteins were defined as those with a FDR adjusted p-value of <0.05, identified ≥2 unique peptides and a >2 fold expression change using ANOVA.For comparison of PZP concentration between RA patients and healthy controls, a t-test was used.A boxplot depicting the ELISA results was designed using R 4.1.1.', 'para': '5', 'bboxes': \"[[{'page': '5', 'x': '187.65', 'y': '676.68', 'h': '324.45', 'w': '9.58'}], [{'page': '5', 'x': '518.63', 'y': '676.68', 'h': '40.64', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '689.23', 'h': '331.18', 'w': '9.58'}], [{'page': '5', 'x': '501.86', 'y': '689.23', 'h': '59.07', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '701.79', 'h': '372.04', 'w': '9.58'}], [{'page': '5', 'x': '544.26', 'y': '701.79', 'h': '15.21', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '714.21', 'h': '394.12', 'w': '9.71'}, {'page': '5', 'x': '166.39', 'y': '726.58', 'h': '339.08', 'w': '9.90'}], [{'page': '5', 'x': '507.91', 'y': '726.89', 'h': '53.02', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '739.31', 'h': '382.39', 'w': '9.71'}], [{'page': '5', 'x': '551.89', 'y': '739.44', 'h': '7.78', 'w': '9.58'}, {'page': '5', 'x': '166.39', 'y': '752.00', 'h': '280.43', 'w': '9.58'}]]\", 'pages': \"('5', '5')\", 'section_title': 'Statistics', 'section_number': '2.10.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Life 2022, 12, 464 6 of 17', metadata={'text': 'Life 2022, 12, 464 6 of 17', 'para': '0', 'bboxes': \"[[{'page': '6', 'x': '35.49', 'y': '57.46', 'h': '57.79', 'w': '7.77'}, {'page': '6', 'x': '536.53', 'y': '57.56', 'h': '22.95', 'w': '7.67'}]]\", 'pages': \"('6', '6')\", 'section_title': 'Statistics', 'section_number': '2.10.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='1-D SDS PAGE did not demonstrate any significant differences among groups (Figure 1).A large band of serum albumin appeared at 67 kDa in all the samples; the most abundant protein in human serum.1-D SDS-PAGE of the serum samples processed through Pro-teoMiner™ columns showed that with each wash, the albumin and other high abundance proteins gradually decreased, and all the on-bead proteins were enriched gradually as depicted by the presence of all protein bands and their respective intensities in the SDS-PAGE of eluted samples (Figure 2).', metadata={'text': '1-D SDS PAGE did not demonstrate any significant differences among groups (Figure 1).A large band of serum albumin appeared at 67 kDa in all the samples; the most abundant protein in human serum.1-D SDS-PAGE of the serum samples processed through Pro-teoMiner™ columns showed that with each wash, the albumin and other high abundance proteins gradually decreased, and all the on-bead proteins were enriched gradually as depicted by the presence of all protein bands and their respective intensities in the SDS-PAGE of eluted samples (Figure 2).', 'para': '2', 'bboxes': \"[[{'page': '6', 'x': '187.55', 'y': '127.04', 'h': '373.46', 'w': '9.58'}], [{'page': '6', 'x': '166.01', 'y': '139.59', 'h': '393.27', 'w': '9.58'}, {'page': '6', 'x': '166.10', 'y': '152.14', 'h': '113.19', 'w': '9.58'}], [{'page': '6', 'x': '283.92', 'y': '152.14', 'h': '277.01', 'w': '9.58'}, {'page': '6', 'x': '166.39', 'y': '164.70', 'h': '392.88', 'w': '9.58'}, {'page': '6', 'x': '166.10', 'y': '177.25', 'h': '394.83', 'w': '9.58'}, {'page': '6', 'x': '166.10', 'y': '189.80', 'h': '393.18', 'w': '9.58'}, {'page': '6', 'x': '166.39', 'y': '202.36', 'h': '125.01', 'w': '9.58'}]]\", 'pages': \"('6', '6')\", 'section_title': '1-D SDS-PAGE Qualitative Analysis', 'section_number': '3.1.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='A total of 213 proteins were identified following ProgenesisQI™ using Mascot (Table S1).One RF-negative and ACPA-positive sample returned a very low alignment score of 8.6% and was, therefore, excluded from the analysis.For the remaining samples, more than 1 unique peptide was mapped to 165 proteins out of 213 proteins.Out of 213 proteins, 124 proteins showed >a 2-fold change.A total of 37 out of these 213 proteins had q-value < 0.05.', metadata={'text': 'A total of 213 proteins were identified following ProgenesisQI™ using Mascot (Table S1).One RF-negative and ACPA-positive sample returned a very low alignment score of 8.6% and was, therefore, excluded from the analysis.For the remaining samples, more than 1 unique peptide was mapped to 165 proteins out of 213 proteins.Out of 213 proteins, 124 proteins showed >a 2-fold change.A total of 37 out of these 213 proteins had q-value < 0.05.', 'para': '4', 'bboxes': \"[[{'page': '7', 'x': '187.65', 'y': '453.98', 'h': '371.62', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '466.53', 'h': '16.21', 'w': '9.58'}], [{'page': '7', 'x': '185.69', 'y': '466.53', 'h': '373.58', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '479.08', 'h': '238.50', 'w': '9.58'}], [{'page': '7', 'x': '409.44', 'y': '479.08', 'h': '149.84', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '491.63', 'h': '305.54', 'w': '9.58'}], [{'page': '7', 'x': '475.02', 'y': '491.63', 'h': '85.50', 'w': '9.58'}, {'page': '7', 'x': '165.90', 'y': '504.19', 'h': '181.57', 'w': '9.58'}], [{'page': '7', 'x': '356.21', 'y': '504.19', 'h': '203.06', 'w': '9.58'}, {'page': '7', 'x': '166.12', 'y': '516.74', 'h': '64.13', 'w': '9.58'}]]\", 'pages': \"('7', '7')\", 'section_title': 'Identification of Proteins in Serum', 'section_number': '3.2.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The comparative analysis of all groups (a FDR adjusted p-value of <0.05, identified ≥2 unique peptides and a >2 fold expression change) identified 25 proteins that were DE (Table 2), of which 10 proteins were DE between healthy control subjects and 1 of the serotypes including PZP, selenoprotein P (SELENOP), C4b-binding protein (C4BP) beta chain, apolipoprotein M (ApoM), N-acetylmuramoyl-L-alanine amidase (NAMLAA), carboxypeptidase N (CPN) catalytic chain, oncoprotein Induced Transcript 3 (OIT3), CPN subunit 2, apolipoprotein C-I (ApoC1) and apolipoprotein C-III (ApoCIII).', metadata={'text': 'The comparative analysis of all groups (a FDR adjusted p-value of <0.05, identified ≥2 unique peptides and a >2 fold expression change) identified 25 proteins that were DE (Table 2), of which 10 proteins were DE between healthy control subjects and 1 of the serotypes including PZP, selenoprotein P (SELENOP), C4b-binding protein (C4BP) beta chain, apolipoprotein M (ApoM), N-acetylmuramoyl-L-alanine amidase (NAMLAA), carboxypeptidase N (CPN) catalytic chain, oncoprotein Induced Transcript 3 (OIT3), CPN subunit 2, apolipoprotein C-I (ApoC1) and apolipoprotein C-III (ApoCIII).', 'para': '0', 'bboxes': \"[[{'page': '7', 'x': '187.65', 'y': '554.57', 'h': '373.28', 'w': '9.71'}, {'page': '7', 'x': '166.39', 'y': '566.93', 'h': '392.88', 'w': '9.90'}, {'page': '7', 'x': '166.39', 'y': '579.80', 'h': '392.88', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '592.36', 'h': '393.87', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '604.91', 'h': '394.12', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '617.46', 'h': '392.88', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '630.02', 'h': '326.51', 'w': '9.58'}]]\", 'pages': \"('7', '7')\", 'section_title': 'Differentially Expressed Proteins', 'section_number': '3.3.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The PCA analysis (Figure 3A,B) showed that only 22.1% of the proteins (PC1) were divided between RA patients and healthy controls.The distribution only decreased to 21.3%, when only the patient groups were included in PCA (Figure 3C).The heat map of the proteins showed that the group averages of various proteins were different between patients and healthy subjects (Figure 4A).The heat map of the patient serotypes and controls however showed that although distinguishable patterns of expression existed between normalized abundances of individual proteins between patient serotypes as well as healthy subjects, only Q96PD5 (NAMLAA) showed similar trends across all the RA serogrpups as compared to healthy controls (Figure 4B).', metadata={'text': 'The PCA analysis (Figure 3A,B) showed that only 22.1% of the proteins (PC1) were divided between RA patients and healthy controls.The distribution only decreased to 21.3%, when only the patient groups were included in PCA (Figure 3C).The heat map of the proteins showed that the group averages of various proteins were different between patients and healthy subjects (Figure 4A).The heat map of the patient serotypes and controls however showed that although distinguishable patterns of expression existed between normalized abundances of individual proteins between patient serotypes as well as healthy subjects, only Q96PD5 (NAMLAA) showed similar trends across all the RA serogrpups as compared to healthy controls (Figure 4B).', 'para': '3', 'bboxes': \"[[{'page': '7', 'x': '187.65', 'y': '642.57', 'h': '371.62', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '655.12', 'h': '231.62', 'w': '9.58'}], [{'page': '7', 'x': '402.93', 'y': '655.12', 'h': '156.34', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '667.67', 'h': '318.32', 'w': '9.58'}], [{'page': '7', 'x': '487.20', 'y': '667.67', 'h': '72.08', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '680.23', 'h': '392.88', 'w': '9.58'}, {'page': '7', 'x': '166.10', 'y': '692.78', 'h': '192.29', 'w': '9.58'}], [{'page': '7', 'x': '362.14', 'y': '692.78', 'h': '197.14', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '705.33', 'h': '392.88', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '717.89', 'h': '392.88', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '730.44', 'h': '393.27', 'w': '9.58'}, {'page': '7', 'x': '166.39', 'y': '742.99', 'h': '246.41', 'w': '9.58'}]]\", 'pages': \"('7', '7')\", 'section_title': 'Differentially Expressed Proteins', 'section_number': '3.3.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Canonical pathway analysis was undertaken on the DE proteins between each serotype of RA and healthy controls.The comparison of double-positive RA samples with healthy controls predicted activation of dendritic cell maturation (p = 0.009); and inhibition of liver X receptor/retinoid X receptor (LXR/RXR) pathway (p = 7.9 × 10 -28 ), acute phase response signalling (p = 3.16 × 10 -27 ) and production of NO and ROS species in themacrophages (p = 1.41 × 10 -08 ) (Figure 5A).The comparison of RF-positive RA patients with healthy controls revealed an activation of the coagulation system (p = 3.98 × 10 -11 ), the intrinsic prothrombin activation pathway (p = 8.70 × 10 -09 ) and the GP6 signaling pathway (p = 0.0009); and inhibition of the LXR/RXR pathway (p = 5.01 × 10 -21 ), production of NO and ROS in macrophages (p = 2.57 × 10 -08 ) and maturity onset diabetes of young (MODY) signaling (p = 2.29 × 10 -06 ) (Figure 5B).The comparison of ACPA-positive RA patients with healthy controls revealed activation of the coagulation system (p = 3.54 × 10 -08 ), the intrinsic prothrombin activation pathway (p = 4.89 × 10 -06 ), the extrinsic prothrombin activation pathway (p = 5.01 × 10 -10 ) and acute phase response signalling (p = 5.01 × 10 -11 ); and inhibition of the LXR/RXR pathway (p = 1.99 × 10 -14 ) and production of NO and ROS in macrophages (p = 0.001) (Figure 5C).Pathway analysis of double-negative RA patients with healthy controls revealed the activation of the coagulation system (p = 7.94 × 10 -19 ), the intrinsic prothrombin activation pathway (p = 5.01 × 10 -12 ) and the extrinsic prothrombin activation pathway (p = 1.25 × 10 -13 ); and inhibition of the LXR/RXR pathway (p = 1.58 × 10 -25 ); acute phase response signalling (p = 1 × 10 -23 ) and production of NO and ROS in macrophages (p = 2.18 × 10 -10 ) (Figure 5D).', metadata={'text': 'Canonical pathway analysis was undertaken on the DE proteins between each serotype of RA and healthy controls.The comparison of double-positive RA samples with healthy controls predicted activation of dendritic cell maturation (p = 0.009); and inhibition of liver X receptor/retinoid X receptor (LXR/RXR) pathway (p = 7.9 × 10 -28 ), acute phase response signalling (p = 3.16 × 10 -27 ) and production of NO and ROS species in themacrophages (p = 1.41 × 10 -08 ) (Figure 5A).The comparison of RF-positive RA patients with healthy controls revealed an activation of the coagulation system (p = 3.98 × 10 -11 ), the intrinsic prothrombin activation pathway (p = 8.70 × 10 -09 ) and the GP6 signaling pathway (p = 0.0009); and inhibition of the LXR/RXR pathway (p = 5.01 × 10 -21 ), production of NO and ROS in macrophages (p = 2.57 × 10 -08 ) and maturity onset diabetes of young (MODY) signaling (p = 2.29 × 10 -06 ) (Figure 5B).The comparison of ACPA-positive RA patients with healthy controls revealed activation of the coagulation system (p = 3.54 × 10 -08 ), the intrinsic prothrombin activation pathway (p = 4.89 × 10 -06 ), the extrinsic prothrombin activation pathway (p = 5.01 × 10 -10 ) and acute phase response signalling (p = 5.01 × 10 -11 ); and inhibition of the LXR/RXR pathway (p = 1.99 × 10 -14 ) and production of NO and ROS in macrophages (p = 0.001) (Figure 5C).Pathway analysis of double-negative RA patients with healthy controls revealed the activation of the coagulation system (p = 7.94 × 10 -19 ), the intrinsic prothrombin activation pathway (p = 5.01 × 10 -12 ) and the extrinsic prothrombin activation pathway (p = 1.25 × 10 -13 ); and inhibition of the LXR/RXR pathway (p = 1.58 × 10 -25 ); acute phase response signalling (p = 1 × 10 -23 ) and production of NO and ROS in macrophages (p = 2.18 × 10 -10 ) (Figure 5D).', 'para': '4', 'bboxes': \"[[{'page': '10', 'x': '187.65', 'y': '187.01', 'h': '371.62', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '199.57', 'h': '121.34', 'w': '9.58'}], [{'page': '10', 'x': '290.85', 'y': '199.57', 'h': '268.81', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '212.12', 'h': '393.08', 'w': '9.58'}, {'page': '10', 'x': '166.07', 'y': '224.36', 'h': '279.60', 'w': '9.90'}, {'page': '10', 'x': '445.76', 'y': '222.44', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '460.56', 'y': '224.67', 'h': '98.72', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '236.91', 'h': '107.64', 'w': '9.90'}, {'page': '10', 'x': '274.13', 'y': '235.00', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '288.92', 'y': '237.23', 'h': '270.35', 'w': '9.58'}, {'page': '10', 'x': '166.07', 'y': '249.46', 'h': '61.63', 'w': '9.90'}, {'page': '10', 'x': '227.80', 'y': '247.55', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '242.59', 'y': '249.78', 'h': '60.14', 'w': '9.58'}], [{'page': '10', 'x': '305.42', 'y': '249.78', 'h': '254.24', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '262.02', 'h': '324.85', 'w': '9.90'}, {'page': '10', 'x': '491.34', 'y': '260.10', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '506.13', 'y': '262.33', 'h': '54.79', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '274.57', 'h': '228.54', 'w': '9.90'}, {'page': '10', 'x': '395.03', 'y': '272.65', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '409.83', 'y': '274.88', 'h': '149.84', 'w': '9.58'}, {'page': '10', 'x': '166.07', 'y': '287.12', 'h': '292.45', 'w': '9.90'}, {'page': '10', 'x': '458.61', 'y': '285.21', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '473.41', 'y': '287.44', 'h': '85.87', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '299.67', 'h': '171.14', 'w': '9.90'}, {'page': '10', 'x': '337.63', 'y': '297.76', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '352.42', 'y': '299.99', 'h': '207.85', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '312.23', 'h': '106.03', 'w': '9.90'}, {'page': '10', 'x': '272.52', 'y': '310.31', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '287.32', 'y': '312.54', 'h': '58.57', 'w': '9.58'}], [{'page': '10', 'x': '348.64', 'y': '312.54', 'h': '210.64', 'w': '9.58'}, {'page': '10', 'x': '165.98', 'y': '324.78', 'h': '356.34', 'w': '9.90'}, {'page': '10', 'x': '522.41', 'y': '322.87', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '537.20', 'y': '325.10', 'h': '22.07', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '337.33', 'h': '240.24', 'w': '9.90'}, {'page': '10', 'x': '406.72', 'y': '335.42', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '421.52', 'y': '337.65', 'h': '139.41', 'w': '9.58'}, {'page': '10', 'x': '166.12', 'y': '349.89', 'h': '131.91', 'w': '9.90'}, {'page': '10', 'x': '298.12', 'y': '347.97', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '312.92', 'y': '349.89', 'h': '226.91', 'w': '9.90'}, {'page': '10', 'x': '539.92', 'y': '347.97', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '554.71', 'y': '350.20', 'h': '5.81', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '362.44', 'h': '235.96', 'w': '9.90'}, {'page': '10', 'x': '402.45', 'y': '360.53', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '417.24', 'y': '362.76', 'h': '142.04', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '375.31', 'h': '172.72', 'w': '9.58'}], [{'page': '10', 'x': '341.60', 'y': '375.31', 'h': '217.67', 'w': '9.58'}, {'page': '10', 'x': '165.98', 'y': '387.55', 'h': '373.85', 'w': '9.90'}, {'page': '10', 'x': '539.92', 'y': '385.63', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '554.71', 'y': '387.86', 'h': '5.81', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '400.10', 'h': '272.29', 'w': '9.90'}, {'page': '10', 'x': '438.77', 'y': '398.18', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '453.57', 'y': '400.41', 'h': '107.36', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '412.65', 'h': '190.95', 'w': '9.90'}, {'page': '10', 'x': '357.44', 'y': '410.74', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '372.24', 'y': '412.97', 'h': '187.43', 'w': '9.58'}, {'page': '10', 'x': '166.07', 'y': '425.20', 'h': '60.01', 'w': '9.90'}, {'page': '10', 'x': '226.17', 'y': '423.29', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '240.97', 'y': '425.20', 'h': '198.64', 'w': '9.90'}, {'page': '10', 'x': '439.70', 'y': '423.29', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '454.50', 'y': '425.52', 'h': '104.78', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '437.76', 'h': '173.96', 'w': '9.90'}, {'page': '10', 'x': '340.45', 'y': '435.84', 'h': '14.30', 'w': '6.92'}, {'page': '10', 'x': '355.24', 'y': '438.07', 'h': '58.63', 'w': '9.58'}]]\", 'pages': \"('10', '10')\", 'section_title': 'Pathway Analysis', 'section_number': '3.4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The comparison of the four serotypes of RA with healthy controls revealed an inhibition of inflammatory response, leukocyte migration, binding of professional phagocytic cells, migration of cells, adhesion of phagocytes, cell movement of phagocytes and cell movement of leukocytes in all serotypes except double-negative serotype.Accumulation of leukocytes was, however, inhibited in all serotypes.Concentration of cholesterol was inhibited in all serotypes except ACPA-positive patients that did not show activation or inhibition of this protein (Figure 6).The detailed results of pathway analysis are provided in Table S2.', metadata={'text': 'The comparison of the four serotypes of RA with healthy controls revealed an inhibition of inflammatory response, leukocyte migration, binding of professional phagocytic cells, migration of cells, adhesion of phagocytes, cell movement of phagocytes and cell movement of leukocytes in all serotypes except double-negative serotype.Accumulation of leukocytes was, however, inhibited in all serotypes.Concentration of cholesterol was inhibited in all serotypes except ACPA-positive patients that did not show activation or inhibition of this protein (Figure 6).The detailed results of pathway analysis are provided in Table S2.', 'para': '3', 'bboxes': \"[[{'page': '10', 'x': '187.65', 'y': '450.63', 'h': '373.27', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '463.18', 'h': '392.88', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '475.73', 'h': '392.88', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '488.29', 'h': '326.66', 'w': '9.58'}], [{'page': '10', 'x': '496.17', 'y': '488.29', 'h': '63.10', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '500.84', 'h': '242.32', 'w': '9.58'}], [{'page': '10', 'x': '412.04', 'y': '500.84', 'h': '147.24', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '513.39', 'h': '393.08', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '525.94', 'h': '154.46', 'w': '9.58'}], [{'page': '10', 'x': '323.94', 'y': '525.94', 'h': '235.34', 'w': '9.58'}, {'page': '10', 'x': '166.39', 'y': '538.50', 'h': '49.73', 'w': '9.58'}]]\", 'pages': \"('10', '10')\", 'section_title': 'Pathway Analysis', 'section_number': '3.4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='cells, migration of cells, adhesion of phagocytes, cell movement of phagocytes and cell movement of leukocytes in all serotypes except double-negative serotype.Accumulation of leukocytes was, however, inhibited in all serotypes.Concentration of cholesterol was inhibited in all serotypes except ACPA-positive patients that did not show activation or inhibition of this protein (Figure 6).The detailed results of pathway analysis are provided in Table S2.', metadata={'text': 'cells, migration of cells, adhesion of phagocytes, cell movement of phagocytes and cell movement of leukocytes in all serotypes except double-negative serotype.Accumulation of leukocytes was, however, inhibited in all serotypes.Concentration of cholesterol was inhibited in all serotypes except ACPA-positive patients that did not show activation or inhibition of this protein (Figure 6).The detailed results of pathway analysis are provided in Table S2.', 'para': '3', 'bboxes': \"[[{'page': '11', 'x': '161.33', 'y': '2.64', 'h': '392.96', 'w': '10.17'}, {'page': '11', 'x': '161.33', 'y': '15.42', 'h': '327.31', 'w': '10.17'}], [{'page': '11', 'x': '491.28', 'y': '15.42', 'h': '63.01', 'w': '10.17'}, {'page': '11', 'x': '161.33', 'y': '28.26', 'h': '243.11', 'w': '10.17'}], [{'page': '11', 'x': '407.57', 'y': '28.26', 'h': '146.64', 'w': '10.17'}, {'page': '11', 'x': '161.33', 'y': '41.10', 'h': '392.99', 'w': '10.17'}, {'page': '11', 'x': '161.33', 'y': '53.88', 'h': '154.79', 'w': '10.17'}], [{'page': '11', 'x': '318.41', 'y': '53.88', 'h': '236.00', 'w': '10.17'}, {'page': '11', 'x': '161.33', 'y': '66.71', 'h': '50.88', 'w': '10.17'}]]\", 'pages': \"('11', '11')\", 'section_title': 'Pathway Analysis', 'section_number': '3.4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='We validated the mass spectrometry results using ELISA for PZP.As Figure 7 shows, the expression of PZP was significantly higher among patients (7.54 ± 6.35 µg/mL) as compared to controls (1.03 ± 0.54 µg/mL (p-value 7.41 × 10 -11 ).The PZP concentration for each sample is represented in Table S3.The sensitivity of PZP for detecting RA is 96.7% and specificity is 95%.', metadata={'text': 'We validated the mass spectrometry results using ELISA for PZP.As Figure 7 shows, the expression of PZP was significantly higher among patients (7.54 ± 6.35 µg/mL) as compared to controls (1.03 ± 0.54 µg/mL (p-value 7.41 × 10 -11 ).The PZP concentration for each sample is represented in Table S3.The sensitivity of PZP for detecting RA is 96.7% and specificity is 95%.', 'para': '3', 'bboxes': \"[[{'page': '13', 'x': '187.65', 'y': '113.59', 'h': '286.90', 'w': '9.58'}], [{'page': '13', 'x': '477.04', 'y': '113.59', 'h': '83.48', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '125.83', 'h': '392.88', 'w': '9.90'}, {'page': '13', 'x': '166.39', 'y': '138.38', 'h': '267.36', 'w': '9.90'}, {'page': '13', 'x': '433.85', 'y': '136.46', 'h': '13.80', 'w': '6.92'}, {'page': '13', 'x': '448.14', 'y': '138.70', 'h': '5.91', 'w': '9.58'}], [{'page': '13', 'x': '457.13', 'y': '138.70', 'h': '102.14', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '151.25', 'h': '183.14', 'w': '9.58'}], [{'page': '13', 'x': '352.62', 'y': '151.25', 'h': '207.49', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '163.80', 'h': '96.73', 'w': '9.58'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Validation of Mass Spectrometry Using ELISA', 'section_number': '3.5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Life 2022, 12, x FOR PEER REVIEW 14 of 19 of the proteins up-or downregulation with the activation of the respective function.The negative Z score on contrary represents inhibition of the function.The orange-colored squares represent upregulation during the disease state and the blue squares represent downregulation with the color intensity being directly correlated with the prediction strength.', metadata={'text': 'Life 2022, 12, x FOR PEER REVIEW 14 of 19 of the proteins up-or downregulation with the activation of the respective function.The negative Z score on contrary represents inhibition of the function.The orange-colored squares represent upregulation during the disease state and the blue squares represent downregulation with the color intensity being directly correlated with the prediction strength.', 'para': '2', 'bboxes': \"[[{'page': '13', 'x': '37.64', 'y': '1.90', 'h': '123.72', 'w': '8.10'}, {'page': '13', 'x': '529.95', 'y': '1.90', 'h': '30.99', 'w': '8.04'}, {'page': '13', 'x': '168.02', 'y': '39.19', 'h': '331.53', 'w': '9.07'}], [{'page': '13', 'x': '501.74', 'y': '39.19', 'h': '59.29', 'w': '9.07'}, {'page': '13', 'x': '168.01', 'y': '50.71', 'h': '221.30', 'w': '9.07'}], [{'page': '13', 'x': '392.29', 'y': '50.71', 'h': '168.68', 'w': '9.07'}, {'page': '13', 'x': '168.01', 'y': '62.30', 'h': '392.96', 'w': '9.07'}, {'page': '13', 'x': '168.01', 'y': '73.88', 'h': '250.47', 'w': '9.07'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Validation of Mass Spectrometry Using ELISA', 'section_number': '3.5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='We validated the mass spectrometry results using ELISA for PZP.As Figure 7 shows, the expression of PZP was significantly higher among patients (7.54 ± 6.35 µg/mL) as compared to controls (1.03 ± 0.54 µg/mL (p-value 7.41 × 10 -11 ).The PZP concentration for each sample is represented in Table S3.The sensitivity of PZP for detecting RA is 96.7% and specificity is 95%.', metadata={'text': 'We validated the mass spectrometry results using ELISA for PZP.As Figure 7 shows, the expression of PZP was significantly higher among patients (7.54 ± 6.35 µg/mL) as compared to controls (1.03 ± 0.54 µg/mL (p-value 7.41 × 10 -11 ).The PZP concentration for each sample is represented in Table S3.The sensitivity of PZP for detecting RA is 96.7% and specificity is 95%.', 'para': '3', 'bboxes': \"[[{'page': '13', 'x': '189.26', 'y': '113.46', 'h': '286.80', 'w': '10.10'}], [{'page': '13', 'x': '478.18', 'y': '113.46', 'h': '82.72', 'w': '10.10'}, {'page': '13', 'x': '168.01', 'y': '126.30', 'h': '393.04', 'w': '10.10'}, {'page': '13', 'x': '168.00', 'y': '139.14', 'h': '251.76', 'w': '10.11'}], [{'page': '13', 'x': '422.30', 'y': '139.14', 'h': '138.68', 'w': '10.10'}, {'page': '13', 'x': '168.02', 'y': '151.92', 'h': '153.23', 'w': '10.10'}], [{'page': '13', 'x': '324.44', 'y': '151.92', 'h': '236.56', 'w': '10.10'}, {'page': '13', 'x': '168.02', 'y': '164.75', 'h': '77.85', 'w': '10.10'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Validation of Mass Spectrometry Using ELISA', 'section_number': '3.5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='In this study, we identified 10 DE proteins between RA serotypes and healthy controls.Next, we undertook successfully validation of one of the DE proteins; PZP, in an independent sample cohort indicating our findings for this protein are applicable to another population.We then performed canonical pathway analysis for the DE proteins across each serotype in comparison to healthy controls to identify the key pathways and biological processes that are perturbed across these serotypes.We used ProteoMiner TM protein enrichment columns to deplete the proteins with high abundance and enrich the proteins with low abundance [23].ProteoMiner TM protein enrichment of low abundance proteins has several advantages over the immunoaffinity-based protein depletion', metadata={'text': 'In this study, we identified 10 DE proteins between RA serotypes and healthy controls.Next, we undertook successfully validation of one of the DE proteins; PZP, in an independent sample cohort indicating our findings for this protein are applicable to another population.We then performed canonical pathway analysis for the DE proteins across each serotype in comparison to healthy controls to identify the key pathways and biological processes that are perturbed across these serotypes.We used ProteoMiner TM protein enrichment columns to deplete the proteins with high abundance and enrich the proteins with low abundance [23].ProteoMiner TM protein enrichment of low abundance proteins has several advantages over the immunoaffinity-based protein depletion', 'para': '4', 'bboxes': \"[[{'page': '13', 'x': '189.27', 'y': '595.50', 'h': '371.67', 'w': '10.10'}, {'page': '13', 'x': '168.02', 'y': '608.28', 'h': '22.36', 'w': '10.10'}], [{'page': '13', 'x': '193.76', 'y': '608.28', 'h': '367.28', 'w': '10.10'}, {'page': '13', 'x': '168.03', 'y': '621.12', 'h': '392.93', 'w': '10.10'}, {'page': '13', 'x': '168.03', 'y': '633.95', 'h': '78.65', 'w': '10.10'}], [{'page': '13', 'x': '250.60', 'y': '633.95', 'h': '310.37', 'w': '10.10'}, {'page': '13', 'x': '168.03', 'y': '646.73', 'h': '392.99', 'w': '10.10'}, {'page': '13', 'x': '168.03', 'y': '659.56', 'h': '280.89', 'w': '10.10'}], [{'page': '13', 'x': '452.48', 'y': '659.56', 'h': '108.46', 'w': '10.10'}, {'page': '13', 'x': '168.02', 'y': '672.42', 'h': '392.93', 'w': '10.10'}, {'page': '13', 'x': '168.01', 'y': '685.19', 'h': '153.16', 'w': '10.10'}], [{'page': '13', 'x': '324.12', 'y': '685.19', 'h': '236.83', 'w': '10.11'}, {'page': '13', 'x': '168.02', 'y': '698.04', 'h': '393.02', 'w': '10.10'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='In this study, we identified 10 DE proteins between RA serotypes and healthy controls.Next, we undertook successfully validation of one of the DE proteins; PZP, in an independent sample cohort indicating our findings for this protein are applicable to another population.We then performed canonical pathway analysis for the DE proteins across each serotype in comparison to healthy controls to identify the key pathways and biological processes that are perturbed across these serotypes.We used ProteoMiner TM protein enrichment columns to deplete the proteins with high abundance and enrich the proteins with low abundance [23].ProteoMiner TM protein enrichment of low abundance proteins has several advantages over the immunoaffinity-based protein depletion approaches including a relatively less-complicated procedure, high material yield and reproducibility [24,25].', metadata={'text': 'In this study, we identified 10 DE proteins between RA serotypes and healthy controls.Next, we undertook successfully validation of one of the DE proteins; PZP, in an independent sample cohort indicating our findings for this protein are applicable to another population.We then performed canonical pathway analysis for the DE proteins across each serotype in comparison to healthy controls to identify the key pathways and biological processes that are perturbed across these serotypes.We used ProteoMiner TM protein enrichment columns to deplete the proteins with high abundance and enrich the proteins with low abundance [23].ProteoMiner TM protein enrichment of low abundance proteins has several advantages over the immunoaffinity-based protein depletion approaches including a relatively less-complicated procedure, high material yield and reproducibility [24,25].', 'para': '4', 'bboxes': \"[[{'page': '13', 'x': '187.65', 'y': '594.15', 'h': '373.37', 'w': '9.58'}], [{'page': '13', 'x': '166.39', 'y': '606.70', 'h': '394.53', 'w': '9.58'}, {'page': '13', 'x': '166.10', 'y': '619.26', 'h': '393.37', 'w': '9.58'}, {'page': '13', 'x': '166.10', 'y': '631.81', 'h': '50.15', 'w': '9.58'}], [{'page': '13', 'x': '219.31', 'y': '631.81', 'h': '339.97', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '644.36', 'h': '392.88', 'w': '9.58'}, {'page': '13', 'x': '166.10', 'y': '656.92', 'h': '220.39', 'w': '9.58'}], [{'page': '13', 'x': '389.49', 'y': '656.92', 'h': '93.06', 'w': '9.58'}, {'page': '13', 'x': '482.56', 'y': '654.92', 'h': '11.80', 'w': '7.28'}, {'page': '13', 'x': '497.10', 'y': '656.92', 'h': '63.82', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '669.47', 'h': '392.89', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '682.02', 'h': '90.84', 'w': '9.58'}], [{'page': '13', 'x': '260.75', 'y': '682.02', 'h': '56.61', 'w': '9.58'}, {'page': '13', 'x': '317.37', 'y': '680.03', 'h': '11.80', 'w': '7.28'}, {'page': '13', 'x': '332.30', 'y': '682.02', 'h': '226.97', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '694.57', 'h': '392.88', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '707.13', 'h': '382.95', 'w': '9.58'}]]\", 'pages': \"('13', '13')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='PZP is a high-molecular-weight immunosuppressive glycoprotein that is elevated during pregnancy.The role of this protein as an autoimmunity mediator was established by a recent LC-MS/MS-based study in inflammatory bowel disease patients [26].In this study, we also found increased expression of PZP in all the RA serotypes as compared to the controls using LC-MS/MS.The results were further validated by ELISA in a different cohort of RA patients and subjects.The high sensitivity and specificity of this protein for RA patients signify strong candidacy of PZP as a disease biomarker.', metadata={'text': 'PZP is a high-molecular-weight immunosuppressive glycoprotein that is elevated during pregnancy.The role of this protein as an autoimmunity mediator was established by a recent LC-MS/MS-based study in inflammatory bowel disease patients [26].In this study, we also found increased expression of PZP in all the RA serotypes as compared to the controls using LC-MS/MS.The results were further validated by ELISA in a different cohort of RA patients and subjects.The high sensitivity and specificity of this protein for RA patients signify strong candidacy of PZP as a disease biomarker.', 'para': '4', 'bboxes': \"[[{'page': '13', 'x': '187.65', 'y': '719.68', 'h': '371.62', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '732.23', 'h': '81.38', 'w': '9.58'}], [{'page': '13', 'x': '250.90', 'y': '732.23', 'h': '308.38', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '744.79', 'h': '361.65', 'w': '9.58'}], [{'page': '13', 'x': '531.13', 'y': '744.79', 'h': '28.14', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '757.34', 'h': '392.88', 'w': '9.58'}, {'page': '13', 'x': '166.39', 'y': '769.89', 'h': '136.95', 'w': '9.58'}], [{'page': '13', 'x': '305.83', 'y': '769.89', 'h': '253.45', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '98.05', 'h': '154.73', 'w': '9.58'}], [{'page': '14', 'x': '324.24', 'y': '98.05', 'h': '235.24', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '110.60', 'h': '299.06', 'w': '9.58'}]]\", 'pages': \"('13', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='In this study, the serum expression of SELENOP was decreased in all RA serotypes in comparison to controls.SELENOP is a biomarker of selenium status that has been identified as a major preventable trigger for autoimmune diseases including RA [27].In comparison to controls, the serum selenium concentrations [28] and SELENOP concentrations [29,30] have been reported to be decreased in RA patients.The selenium status has been linked to the upregulation of a whole set of inflammation-related genes via nuclear factor kappalight-chain enhancer of activated B cells (NF-κB) mediated activation of several intracellular selenoproteins [28].The role of selenium and SELENOP, combined with previous findings suggest strong candidacy of this protein as a biomarker of autoimmunity.', metadata={'text': 'In this study, the serum expression of SELENOP was decreased in all RA serotypes in comparison to controls.SELENOP is a biomarker of selenium status that has been identified as a major preventable trigger for autoimmune diseases including RA [27].In comparison to controls, the serum selenium concentrations [28] and SELENOP concentrations [29,30] have been reported to be decreased in RA patients.The selenium status has been linked to the upregulation of a whole set of inflammation-related genes via nuclear factor kappalight-chain enhancer of activated B cells (NF-κB) mediated activation of several intracellular selenoproteins [28].The role of selenium and SELENOP, combined with previous findings suggest strong candidacy of this protein as a biomarker of autoimmunity.', 'para': '4', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '123.15', 'h': '371.63', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '135.71', 'h': '100.51', 'w': '9.58'}], [{'page': '14', 'x': '269.87', 'y': '135.71', 'h': '289.40', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '148.26', 'h': '326.53', 'w': '9.58'}], [{'page': '14', 'x': '496.01', 'y': '148.26', 'h': '63.26', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '160.81', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '173.37', 'h': '227.71', 'w': '9.58'}], [{'page': '14', 'x': '397.21', 'y': '173.37', 'h': '162.06', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '185.92', 'h': '394.53', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '198.47', 'h': '393.08', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '211.02', 'h': '84.88', 'w': '9.58'}], [{'page': '14', 'x': '254.41', 'y': '211.02', 'h': '304.87', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '223.58', 'h': '322.31', 'w': '9.58'}]]\", 'pages': \"('14', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='NAMLAA degrades bacterial cell wall component peptidoglycan [31] that has strong pro-inflammatory properties and can induce arthritis in rat models [32,33].The degradation of these pro-inflammatory components should suggestively confer an anti-inflammatory and protective role to NAMLAA against arthritis.However, Saha et al. [34] demonstrated that NAMLAA is indeed essential for the development of arthritis, a relatively unexpected finding.The study findings of Saha et al. [34] have not been supported by animal model studies for other inflammatory diseases [35].Decreased levels of this protein in human RA subjects as compared to healthy controls were observed in this study.The autoantigenic potential of NAMLAA and the presence of antibodies has been reported in a recent study [18] that can explain the lower serum levels of circulating NAMLAA.The imbalance of this homeostasis is probably responsible for the development of RA that needs to be further explored.', metadata={'text': 'NAMLAA degrades bacterial cell wall component peptidoglycan [31] that has strong pro-inflammatory properties and can induce arthritis in rat models [32,33].The degradation of these pro-inflammatory components should suggestively confer an anti-inflammatory and protective role to NAMLAA against arthritis.However, Saha et al. [34] demonstrated that NAMLAA is indeed essential for the development of arthritis, a relatively unexpected finding.The study findings of Saha et al. [34] have not been supported by animal model studies for other inflammatory diseases [35].Decreased levels of this protein in human RA subjects as compared to healthy controls were observed in this study.The autoantigenic potential of NAMLAA and the presence of antibodies has been reported in a recent study [18] that can explain the lower serum levels of circulating NAMLAA.The imbalance of this homeostasis is probably responsible for the development of RA that needs to be further explored.', 'para': '6', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '236.13', 'h': '371.62', 'w': '9.58'}, {'page': '14', 'x': '166.10', 'y': '248.68', 'h': '319.08', 'w': '9.58'}], [{'page': '14', 'x': '488.13', 'y': '248.68', 'h': '71.15', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '261.24', 'h': '393.27', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '273.79', 'h': '217.44', 'w': '9.58'}], [{'page': '14', 'x': '386.91', 'y': '273.79', 'h': '172.36', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '286.34', 'h': '392.89', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '298.90', 'h': '35.23', 'w': '9.58'}], [{'page': '14', 'x': '204.71', 'y': '298.90', 'h': '354.57', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '311.45', 'h': '200.75', 'w': '9.58'}], [{'page': '14', 'x': '371.28', 'y': '311.45', 'h': '187.99', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '324.00', 'h': '329.87', 'w': '9.58'}], [{'page': '14', 'x': '500.37', 'y': '324.00', 'h': '60.55', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '336.55', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '349.11', 'h': '327.04', 'w': '9.58'}], [{'page': '14', 'x': '495.92', 'y': '349.11', 'h': '63.36', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '361.66', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '374.21', 'h': '74.85', 'w': '9.58'}]]\", 'pages': \"('14', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='C4BP β-chain, a complement inhibitor [36], and CPN, a zinc metalloprotease [37], were also observed to be DE in this study.However, a lack of consensus regarding the role of these proteins in autoimmunity and RA hereby suggest further exploration.', metadata={'text': 'C4BP β-chain, a complement inhibitor [36], and CPN, a zinc metalloprotease [37], were also observed to be DE in this study.However, a lack of consensus regarding the role of these proteins in autoimmunity and RA hereby suggest further exploration.', 'para': '1', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '386.66', 'h': '372.87', 'w': '9.69'}, {'page': '14', 'x': '165.98', 'y': '399.32', 'h': '181.73', 'w': '9.58'}], [{'page': '14', 'x': '350.80', 'y': '399.32', 'h': '208.48', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '411.87', 'h': '343.98', 'w': '9.58'}]]\", 'pages': \"('14', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='We found three apolipoproteins to be DE between RA patients and healthy controls including ApoM, ApoC1 and ApoCIII.These apolipoproteins are implicated in protection against atherosclerosis owing to their role in HDL metabolism as well as anti-inflammatory properties [38].The polymorphisms in the ApoM gene have been associated with the risk of dyslipidaemia in RA patients [39,40].However, no study reports the serum levels of this chaperone in RA patients.ApoC1 has been identified as a predictor of drug response to RA [41,42].The risk of developing cardiovascular disease is elevated among RA patients than the general population [43,44].The observed decrease in the serum levels of these apolipoproteins in RA patients could suggestively explain the increased risk of developing cardiovascular disease among RA patients and highlight the link between these two illnesses.', metadata={'text': 'We found three apolipoproteins to be DE between RA patients and healthy controls including ApoM, ApoC1 and ApoCIII.These apolipoproteins are implicated in protection against atherosclerosis owing to their role in HDL metabolism as well as anti-inflammatory properties [38].The polymorphisms in the ApoM gene have been associated with the risk of dyslipidaemia in RA patients [39,40].However, no study reports the serum levels of this chaperone in RA patients.ApoC1 has been identified as a predictor of drug response to RA [41,42].The risk of developing cardiovascular disease is elevated among RA patients than the general population [43,44].The observed decrease in the serum levels of these apolipoproteins in RA patients could suggestively explain the increased risk of developing cardiovascular disease among RA patients and highlight the link between these two illnesses.', 'para': '6', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '424.42', 'h': '371.62', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '436.98', 'h': '169.44', 'w': '9.58'}], [{'page': '14', 'x': '338.31', 'y': '436.98', 'h': '220.97', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '449.53', 'h': '393.27', 'w': '9.58'}, {'page': '14', 'x': '166.10', 'y': '462.08', 'h': '68.53', 'w': '9.58'}], [{'page': '14', 'x': '240.26', 'y': '462.08', 'h': '319.02', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '474.64', 'h': '199.50', 'w': '9.58'}], [{'page': '14', 'x': '370.58', 'y': '474.64', 'h': '190.35', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '487.19', 'h': '164.71', 'w': '9.58'}], [{'page': '14', 'x': '336.00', 'y': '487.19', 'h': '223.28', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '499.74', 'h': '102.28', 'w': '9.58'}], [{'page': '14', 'x': '271.78', 'y': '499.74', 'h': '287.50', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '512.30', 'h': '207.79', 'w': '9.58'}], [{'page': '14', 'x': '377.30', 'y': '512.30', 'h': '181.97', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '524.85', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '537.40', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '549.95', 'h': '58.69', 'w': '9.58'}]]\", 'pages': \"('14', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The pathway analysis of the DE proteins showed that some pathways were differentially inhibited or activated in various serotypes suggesting that these serotypes are indeed regulated by different pathogenic mechanisms.However, some similarities were also observed including inhibition of LXR/RXR pathway and NO and ROS production in macrophages.LXR/RXR pathway was inhibited among all the RA serotypes.This pathway has been reported to inhibit atherosclerosis [45] and inflammation [46], suggesting an important and relatively unexplored link between this pathway and RA.The role of ROS in autoimmunity is complex and has been generally viewed as detrimental in the pathogenesis of autoimmune disease [47].A recent study revealed the regulatory role of these oxidative stress markers to prevent the pathogenesis of chronic inflammatory diseases [48].The inhibition of NO and ROS pathway in macrophage across all the serotypes warrants further exploration about the precise role of this pathway in the pathogenesis of RA.', metadata={'text': 'The pathway analysis of the DE proteins showed that some pathways were differentially inhibited or activated in various serotypes suggesting that these serotypes are indeed regulated by different pathogenic mechanisms.However, some similarities were also observed including inhibition of LXR/RXR pathway and NO and ROS production in macrophages.LXR/RXR pathway was inhibited among all the RA serotypes.This pathway has been reported to inhibit atherosclerosis [45] and inflammation [46], suggesting an important and relatively unexplored link between this pathway and RA.The role of ROS in autoimmunity is complex and has been generally viewed as detrimental in the pathogenesis of autoimmune disease [47].A recent study revealed the regulatory role of these oxidative stress markers to prevent the pathogenesis of chronic inflammatory diseases [48].The inhibition of NO and ROS pathway in macrophage across all the serotypes warrants further exploration about the precise role of this pathway in the pathogenesis of RA.', 'para': '6', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '562.51', 'h': '373.28', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '575.06', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '587.61', 'h': '243.63', 'w': '9.58'}], [{'page': '14', 'x': '413.11', 'y': '587.61', 'h': '146.16', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '600.17', 'h': '392.88', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '612.72', 'h': '74.63', 'w': '9.58'}], [{'page': '14', 'x': '246.68', 'y': '612.72', 'h': '287.53', 'w': '9.58'}], [{'page': '14', 'x': '539.86', 'y': '612.72', 'h': '19.41', 'w': '9.58'}, {'page': '14', 'x': '166.10', 'y': '625.27', 'h': '393.18', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '637.83', 'h': '323.22', 'w': '9.58'}], [{'page': '14', 'x': '491.83', 'y': '637.83', 'h': '67.45', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '650.38', 'h': '394.53', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '662.93', 'h': '160.94', 'w': '9.58'}], [{'page': '14', 'x': '330.42', 'y': '662.93', 'h': '228.85', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '675.48', 'h': '394.63', 'w': '9.58'}], [{'page': '14', 'x': '166.09', 'y': '688.04', 'h': '393.19', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '700.59', 'h': '370.26', 'w': '9.58'}]]\", 'pages': \"('14', '14')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='RA is a complex disorder with molecular and clinical heterogeneity.We used RF and ACPA to classify our patient population and studied the DE proteins in comparison to all healthy controls.However, due to the COVID-19 pandemic, only a limited number of samples could be collected for validation of the identified proteins.The lockdown situation also limited the access to the laboratory facilities and the samples were not tested for their individual RF and ACPA status.The validation of the mass spectrometry result for PZP in an independent cohort of patients suggest that identified proteins can be tested on larger cohorts of patients from different populations in the future to validate the study findings and identify disease biomarkers for RA.', metadata={'text': 'RA is a complex disorder with molecular and clinical heterogeneity.We used RF and ACPA to classify our patient population and studied the DE proteins in comparison to all healthy controls.However, due to the COVID-19 pandemic, only a limited number of samples could be collected for validation of the identified proteins.The lockdown situation also limited the access to the laboratory facilities and the samples were not tested for their individual RF and ACPA status.The validation of the mass spectrometry result for PZP in an independent cohort of patients suggest that identified proteins can be tested on larger cohorts of patients from different populations in the future to validate the study findings and identify disease biomarkers for RA.', 'para': '4', 'bboxes': \"[[{'page': '14', 'x': '187.65', 'y': '713.14', 'h': '297.24', 'w': '9.58'}], [{'page': '14', 'x': '487.97', 'y': '713.14', 'h': '71.30', 'w': '9.58'}, {'page': '14', 'x': '166.01', 'y': '725.70', 'h': '393.27', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '738.25', 'h': '87.33', 'w': '9.58'}], [{'page': '14', 'x': '256.82', 'y': '738.25', 'h': '302.45', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '750.80', 'h': '287.62', 'w': '9.58'}], [{'page': '14', 'x': '457.08', 'y': '750.80', 'h': '102.19', 'w': '9.58'}, {'page': '14', 'x': '166.39', 'y': '763.35', 'h': '393.08', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '98.05', 'h': '140.07', 'w': '9.58'}], [{'page': '15', 'x': '309.55', 'y': '98.05', 'h': '249.73', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '110.60', 'h': '393.08', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '123.15', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '135.71', 'h': '175.46', 'w': '9.58'}]]\", 'pages': \"('14', '15')\", 'section_title': 'Discussion', 'section_number': '4.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='RA is a complex disease that is influenced by an intricate interactome of various environmental, genetic and microbial factors that influence the immune homeostasis.Owing to the complex genetic architecture accompanied by a plethora of microbial and environmental triggers that an organism is exposed to this has made the identification of diagnostic and prognostic markers challenging.Our study has explored the serum proteomics of this complex autoimmune disorder in a relatively understudied Pakistani population to identify disease biomarkers that are DE among various serotypes of RA patients and healthy controls.We identified that PZP, SELENOP, C4BP beta chain, ApoM, NAMLAA, CPN catalytic chain, OIT3, CPN subunit 2, ApoC1 and ApoCIII were DE between the RA patients and healthy controls.These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These findings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', metadata={'text': 'RA is a complex disease that is influenced by an intricate interactome of various environmental, genetic and microbial factors that influence the immune homeostasis.Owing to the complex genetic architecture accompanied by a plethora of microbial and environmental triggers that an organism is exposed to this has made the identification of diagnostic and prognostic markers challenging.Our study has explored the serum proteomics of this complex autoimmune disorder in a relatively understudied Pakistani population to identify disease biomarkers that are DE among various serotypes of RA patients and healthy controls.We identified that PZP, SELENOP, C4BP beta chain, ApoM, NAMLAA, CPN catalytic chain, OIT3, CPN subunit 2, ApoC1 and ApoCIII were DE between the RA patients and healthy controls.These serum proteins have strong potential to serve as diagnostic and prognostic biomarkers of RA and can also be evaluated to fill the gaps in the current knowledge of pathogenesis of RA.These findings can be validated in larger cohorts from different populations to identify diagnostic and prognostic biomarkers of RA.', 'para': '5', 'bboxes': \"[[{'page': '15', 'x': '187.65', 'y': '173.66', 'h': '371.62', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '186.22', 'h': '394.62', 'w': '9.58'}], [{'page': '15', 'x': '166.39', 'y': '198.77', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '211.32', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '223.88', 'h': '229.10', 'w': '9.58'}], [{'page': '15', 'x': '401.31', 'y': '223.88', 'h': '157.97', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '236.43', 'h': '393.18', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '248.98', 'h': '393.57', 'w': '9.58'}, {'page': '15', 'x': '166.10', 'y': '261.54', 'h': '130.46', 'w': '9.58'}], [{'page': '15', 'x': '299.65', 'y': '261.54', 'h': '260.87', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '274.09', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '286.64', 'h': '201.22', 'w': '9.58'}], [{'page': '15', 'x': '370.71', 'y': '286.64', 'h': '188.57', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '299.19', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '311.75', 'h': '238.67', 'w': '9.58'}], [{'page': '15', 'x': '407.54', 'y': '311.75', 'h': '151.74', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '324.30', 'h': '392.88', 'w': '9.58'}, {'page': '15', 'x': '166.39', 'y': '336.85', 'h': '28.14', 'w': '9.58'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Conclusions', 'section_number': '5.', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The following supporting information can be downloaded at: https: //www.mdpi.com/article/10.3390/life12030464/s1;Table S1: Accession, number of unique peptides and description of identified proteins in all samples, Table S2: Pathway analysis results using Ingenuity Pathway Analysis, Table S3: The PZP concentration for the validation cohort, Figure S1: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-6: Double positive RA patients for RF factor and anti-CCP, Lane 7-11: Single positive RA patients for RF factor.The integrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ, Figure S2: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-6: Single positive RA patients for anti-CCP, Lane 7-9: Double negative RA patients for RF factor and anti-CCP.The in-tegrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ, Figure S3: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-8: Healthy control sam-ples.The integrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ.', metadata={'text': 'The following supporting information can be downloaded at: https: //www.mdpi.com/article/10.3390/life12030464/s1;Table S1: Accession, number of unique peptides and description of identified proteins in all samples, Table S2: Pathway analysis results using Ingenuity Pathway Analysis, Table S3: The PZP concentration for the validation cohort, Figure S1: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-6: Double positive RA patients for RF factor and anti-CCP, Lane 7-11: Single positive RA patients for RF factor.The integrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ, Figure S2: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-6: Single positive RA patients for anti-CCP, Lane 7-9: Double negative RA patients for RF factor and anti-CCP.The in-tegrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ, Figure S3: Serum samples from study subjects loaded on SDS-PAGE Gel to check the presence of proteins and get a rough idea of protein integrity: Lane 1: Ladder, Lane 2-8: Healthy control sam-ples.The integrated density ratio is shown at the bottom for each band.Integrated density ratio is calculated using ImageJ.', 'para': '7', 'bboxes': \"[[{'page': '15', 'x': '278.51', 'y': '361.51', 'h': '281.88', 'w': '8.63'}, {'page': '15', 'x': '165.31', 'y': '373.54', 'h': '205.36', 'w': '8.63'}], [{'page': '15', 'x': '372.93', 'y': '373.54', 'h': '186.34', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '385.57', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '397.60', 'h': '394.00', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '409.63', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '421.66', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '433.69', 'h': '282.04', 'w': '8.63'}], [{'page': '15', 'x': '451.20', 'y': '433.69', 'h': '108.07', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '445.72', 'h': '148.76', 'w': '8.63'}], [{'page': '15', 'x': '317.96', 'y': '445.72', 'h': '242.43', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '457.75', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '469.78', 'h': '394.00', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '481.81', 'h': '267.09', 'w': '8.63'}], [{'page': '15', 'x': '435.72', 'y': '481.81', 'h': '123.56', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '493.84', 'h': '143.55', 'w': '8.63'}], [{'page': '15', 'x': '313.13', 'y': '493.84', 'h': '247.27', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '505.87', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '517.90', 'h': '372.43', 'w': '8.63'}], [{'page': '15', 'x': '543.97', 'y': '517.90', 'h': '15.31', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '529.93', 'h': '245.00', 'w': '8.63'}], [{'page': '15', 'x': '414.17', 'y': '529.93', 'h': '145.11', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '541.96', 'h': '54.19', 'w': '8.63'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Supplementary Materials:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The MS raw data for this study are available at the ProteomeXchange Consortium doi PXD020235, 10.6019/PXD020235.', metadata={'text': 'The MS raw data for this study are available at the ProteomeXchange Consortium doi PXD020235, 10.6019/PXD020235.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '285.36', 'y': '128.13', 'h': '273.91', 'w': '8.63'}, {'page': '16', 'x': '166.39', 'y': '139.85', 'h': '196.80', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Data Availability Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The MS raw data for this study are available at the ProteomeXchange Consortium doi PXD020235, 10.6019/PXD020235.', metadata={'text': 'The MS raw data for this study are available at the ProteomeXchange Consortium doi PXD020235, 10.6019/PXD020235.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '285.36', 'y': '128.13', 'h': '273.91', 'w': '8.63'}, {'page': '16', 'x': '166.39', 'y': '139.85', 'h': '196.80', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Data Availability Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Author Contributions: Conceptualization, S.J., P.J., M.J.P. and J.M.M.; methodology, S.J., M.J.P. and J.R.A.; software, J.R.A. and M.J.P.; validation, S.J. and P.J.; formal analysis, A.B., M.M.A. and M.J.P.; investigation, S.J.; resources, P.J., A.B. and M.J.P.; data curation, S.J. and J.M.M.; writing-original draft preparation, S.J. and M.M.A.; writing-review and editing, M.J.P.; visualization, J.R.A.; supervision, P.J., M.J.P., J.M.M. and A.B.; project administration, P.J.; funding acquisition, P.J., A.B. and M.J.P.All authors have read and agreed to the published version of the manuscript.', metadata={'text': 'Author Contributions: Conceptualization, S.J., P.J., M.J.P. and J.M.M.; methodology, S.J., M.J.P. and J.R.A.; software, J.R.A. and M.J.P.; validation, S.J. and P.J.; formal analysis, A.B., M.M.A. and M.J.P.; investigation, S.J.; resources, P.J., A.B. and M.J.P.; data curation, S.J. and J.M.M.; writing-original draft preparation, S.J. and M.M.A.; writing-review and editing, M.J.P.; visualization, J.R.A.; supervision, P.J., M.J.P., J.M.M. and A.B.; project administration, P.J.; funding acquisition, P.J., A.B. and M.J.P.All authors have read and agreed to the published version of the manuscript.', 'para': '1', 'bboxes': \"[[{'page': '15', 'x': '166.04', 'y': '559.66', 'h': '393.23', 'w': '8.63'}, {'page': '15', 'x': '166.24', 'y': '571.37', 'h': '394.15', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '583.09', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.13', 'y': '594.81', 'h': '394.27', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '606.52', 'h': '378.39', 'w': '8.63'}], [{'page': '15', 'x': '547.04', 'y': '606.52', 'h': '12.24', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '618.24', 'h': '291.00', 'w': '8.63'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Funding: Sidrah Jahangir, Peter John, Attya Bhatti and Muhammad Muaaz Aslam were funded by Higher Education Commission (HEC), Pakistan, (grant number 5965).Mandy Peffers was funded through a Wellcome Trust Clinical Intermediate Fellowship (grant number 107471/Z/15/Z).This work was also supported by the MRC and Versus Arthritis as part of the Medical Research Council Versus Arthritis Centre for Integrated Research into Musculoskeletal Ageing (CIMA) (MR/R502182/1).James Anderson was funded by the Horserace betting Levy Board.', metadata={'text': 'Funding: Sidrah Jahangir, Peter John, Attya Bhatti and Muhammad Muaaz Aslam were funded by Higher Education Commission (HEC), Pakistan, (grant number 5965).Mandy Peffers was funded through a Wellcome Trust Clinical Intermediate Fellowship (grant number 107471/Z/15/Z).This work was also supported by the MRC and Versus Arthritis as part of the Medical Research Council Versus Arthritis Centre for Integrated Research into Musculoskeletal Ageing (CIMA) (MR/R502182/1).James Anderson was funded by the Horserace betting Levy Board.', 'para': '3', 'bboxes': \"[[{'page': '15', 'x': '166.39', 'y': '635.93', 'h': '393.23', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '647.65', 'h': '281.19', 'w': '8.63'}], [{'page': '15', 'x': '450.36', 'y': '647.65', 'h': '108.91', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '659.36', 'h': '373.15', 'w': '8.63'}], [{'page': '15', 'x': '541.81', 'y': '659.36', 'h': '17.47', 'w': '8.63'}, {'page': '15', 'x': '166.02', 'y': '671.08', 'h': '394.75', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '682.80', 'h': '394.45', 'w': '8.63'}], [{'page': '15', 'x': '166.24', 'y': '694.51', 'h': '264.08', 'w': '8.63'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The study was conducted in accordance with the Declaration of Helsinki, and approved by the Institutional Review Board of Atta-ur-Rahman School of Applied Biosciences (ASAB), National University of Sciences and Technology (NUST), 44,000 before the commencement of study.', metadata={'text': 'The study was conducted in accordance with the Declaration of Helsinki, and approved by the Institutional Review Board of Atta-ur-Rahman School of Applied Biosciences (ASAB), National University of Sciences and Technology (NUST), 44,000 before the commencement of study.', 'para': '0', 'bboxes': \"[[{'page': '15', 'x': '324.87', 'y': '712.21', 'h': '234.41', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '723.92', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '735.64', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '747.35', 'h': '99.06', 'w': '8.63'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Informed Consent Statement: Informed consent was obtained from all subjects involved in the study.', metadata={'text': 'Informed Consent Statement: Informed consent was obtained from all subjects involved in the study.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '166.39', 'y': '98.72', 'h': '392.88', 'w': '8.63'}, {'page': '16', 'x': '166.39', 'y': '110.44', 'h': '38.52', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The authors declare no conflict of interest.', metadata={'text': 'The authors declare no conflict of interest.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '252.09', 'y': '157.54', 'h': '165.99', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The study was conducted in accordance with the Declaration of Helsinki, and approved by the Institutional Review Board of Atta-ur-Rahman School of Applied Biosciences (ASAB), National University of Sciences and Technology (NUST), 44,000 before the commencement of study.', metadata={'text': 'The study was conducted in accordance with the Declaration of Helsinki, and approved by the Institutional Review Board of Atta-ur-Rahman School of Applied Biosciences (ASAB), National University of Sciences and Technology (NUST), 44,000 before the commencement of study.', 'para': '0', 'bboxes': \"[[{'page': '15', 'x': '324.87', 'y': '712.21', 'h': '234.41', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '723.92', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '735.64', 'h': '392.88', 'w': '8.63'}, {'page': '15', 'x': '166.39', 'y': '747.35', 'h': '99.06', 'w': '8.63'}]]\", 'pages': \"('15', '15')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='Informed Consent Statement: Informed consent was obtained from all subjects involved in the study.', metadata={'text': 'Informed Consent Statement: Informed consent was obtained from all subjects involved in the study.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '166.39', 'y': '98.72', 'h': '392.88', 'w': '8.63'}, {'page': '16', 'x': '166.39', 'y': '110.44', 'h': '38.52', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Institutional Review Board Statement:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'}),\n", + " Document(page_content='The authors declare no conflict of interest.', metadata={'text': 'The authors declare no conflict of interest.', 'para': '0', 'bboxes': \"[[{'page': '16', 'x': '252.09', 'y': '157.54', 'h': '165.99', 'w': '8.63'}]]\", 'pages': \"('16', '16')\", 'section_title': 'Conflicts of Interest:', 'section_number': 'None', 'paper_title': 'LC-MS/MS-Based Serum Protein Profiling for Identification of Candidate Biomarkers in Pakistani Rheumatoid Arthritis Patients', 'file_path': '/data/tommaso/data/papers/1.pdf'})]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Type: stuff. \n", + "The authors detected protein abundances by using a technique called quantitative proteomics, which involves the use of mass spectrometry to measure the amount of protein in a sample. The authors then compared the protein abundances in the samples to determine which proteins were most abundant and which ones were present at lower levels.\n", + "Type: map_reduce. \n", + "The authors detected protein abundances by using a technique called quantitative proteomics, which involves the use of mass spectrometry to measure the amount of protein in a sample. The authors then compared the protein abundances in the samples to determine which proteins were most abundant and which ones were present at lower levels.\n", + "Type: refine. \n", + "The authors detected protein abundances by using a technique called quantitative proteomics, which involves the use of mass spectrometry to measure the amount of protein in a sample. The authors then compared the protein abundances in the samples to determine which proteins were most abundant and which ones were present at lower levels.\n", + "Type: map_rerank. \n", + "The authors detected protein abundances by using a technique called quantitative proteomics, which involves the use of mass spectrometry to measure the amount of protein in a sample. The authors then compared the protein abundances in the samples to determine which proteins were most abundant and which ones were present at lower levels.\n" + ] + } + ], + "source": [ + "from langchain import HuggingFaceHub\n", + "from langchain.chains.question_answering import load_qa_chain\n", + "\n", + "HUGGINGFACE_TOKEN = \"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\"\n", + "\n", + "llm = HuggingFaceHub(\n", + " repo_id=\"tiiuae/falcon-7b-instruct\",\n", + " model_kwargs={\"temperature\": 0.1, \"max_new_tokens\": 80},\n", + " huggingfacehub_api_token=HUGGINGFACE_TOKEN\n", + ")\n", + "question = \"How did the authors detect protein abundances?\"\n", + "\n", + "chain_types = [\"map_reduce\", \"refine\", \"map_rerank\"]\n", + "\n", + "chain = load_qa_chain(llm, chain_type=\"stuff\")\n", + "print(f\"\"\"Type: stuff. {chain({\"input_documents\": docs[1:3], \"question\": question}, return_only_outputs=True)[\"output_text\"]}\"\"\")\n", + "\n", + "for t in chain_types:\n", + " chain = load_qa_chain(llm, chain_type=\"stuff\")\n", + " # chain.llm_chain.prompt.template = \"\"\"question: {question}. context: {context}. answer: dummy answer.\"\"\"\n", + " print(f\"\"\"Type: {t}. {chain({\"input_documents\": docs[1:2], \"question\": question}, return_only_outputs=True)[\"output_text\"]}\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Error raised by inference API: Model yhyhy3/med-orca-instruct-33b time out", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/tommaso/llm4scilit/notebooks/test.ipynb Cell 15\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 13\u001b[0m chain_types \u001b[39m=\u001b[39m [\u001b[39m\"\u001b[39m\u001b[39mmap_reduce\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mrefine\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mmap_rerank\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 15\u001b[0m chain \u001b[39m=\u001b[39m load_qa_chain(llm, chain_type\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mstuff\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m---> 16\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\"\"\u001b[39m\u001b[39mType: stuff. \u001b[39m\u001b[39m{\u001b[39;00mchain({\u001b[39m\"\u001b[39;49m\u001b[39minput_documents\u001b[39;49m\u001b[39m\"\u001b[39;49m:\u001b[39m \u001b[39;49mdocs[\u001b[39m1\u001b[39;49m:\u001b[39m3\u001b[39;49m],\u001b[39m \u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mquestion\u001b[39;49m\u001b[39m\"\u001b[39;49m:\u001b[39m \u001b[39;49mquestion},\u001b[39m \u001b[39;49mreturn_only_outputs\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m)[\u001b[39m\"\u001b[39m\u001b[39moutput_text\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\"\"\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[39mfor\u001b[39;00m t \u001b[39min\u001b[39;00m chain_types:\n\u001b[1;32m 19\u001b[0m chain \u001b[39m=\u001b[39m load_qa_chain(llm, chain_type\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mstuff\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/base.py:243\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, include_run_info)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 242\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 243\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 244\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_end(outputs)\n\u001b[1;32m 245\u001b[0m final_outputs: Dict[\u001b[39mstr\u001b[39m, Any] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_outputs(\n\u001b[1;32m 246\u001b[0m inputs, outputs, return_only_outputs\n\u001b[1;32m 247\u001b[0m )\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/base.py:237\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, include_run_info)\u001b[0m\n\u001b[1;32m 231\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_chain_start(\n\u001b[1;32m 232\u001b[0m dumpd(\u001b[39mself\u001b[39m),\n\u001b[1;32m 233\u001b[0m inputs,\n\u001b[1;32m 234\u001b[0m )\n\u001b[1;32m 235\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 236\u001b[0m outputs \u001b[39m=\u001b[39m (\n\u001b[0;32m--> 237\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(inputs, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[1;32m 238\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 239\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(inputs)\n\u001b[1;32m 240\u001b[0m )\n\u001b[1;32m 241\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 242\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/combine_documents/base.py:106\u001b[0m, in \u001b[0;36mBaseCombineDocumentsChain._call\u001b[0;34m(self, inputs, run_manager)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[39m# Other keys are assumed to be needed for LLM prediction\u001b[39;00m\n\u001b[1;32m 105\u001b[0m other_keys \u001b[39m=\u001b[39m {k: v \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m inputs\u001b[39m.\u001b[39mitems() \u001b[39mif\u001b[39;00m k \u001b[39m!=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minput_key}\n\u001b[0;32m--> 106\u001b[0m output, extra_return_dict \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcombine_docs(\n\u001b[1;32m 107\u001b[0m docs, callbacks\u001b[39m=\u001b[39;49m_run_manager\u001b[39m.\u001b[39;49mget_child(), \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mother_keys\n\u001b[1;32m 108\u001b[0m )\n\u001b[1;32m 109\u001b[0m extra_return_dict[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_key] \u001b[39m=\u001b[39m output\n\u001b[1;32m 110\u001b[0m \u001b[39mreturn\u001b[39;00m extra_return_dict\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/combine_documents/stuff.py:165\u001b[0m, in \u001b[0;36mStuffDocumentsChain.combine_docs\u001b[0;34m(self, docs, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 163\u001b[0m inputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_inputs(docs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 164\u001b[0m \u001b[39m# Call predict on the LLM.\u001b[39;00m\n\u001b[0;32m--> 165\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mllm_chain\u001b[39m.\u001b[39;49mpredict(callbacks\u001b[39m=\u001b[39;49mcallbacks, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49minputs), {}\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/llm.py:252\u001b[0m, in \u001b[0;36mLLMChain.predict\u001b[0;34m(self, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 237\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpredict\u001b[39m(\u001b[39mself\u001b[39m, callbacks: Callbacks \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 238\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Format prompt with kwargs and pass to LLM.\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \n\u001b[1;32m 240\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[39m completion = llm.predict(adjective=\"funny\")\u001b[39;00m\n\u001b[1;32m 251\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 252\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m(kwargs, callbacks\u001b[39m=\u001b[39;49mcallbacks)[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_key]\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/base.py:243\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, include_run_info)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 242\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 243\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 244\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_end(outputs)\n\u001b[1;32m 245\u001b[0m final_outputs: Dict[\u001b[39mstr\u001b[39m, Any] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_outputs(\n\u001b[1;32m 246\u001b[0m inputs, outputs, return_only_outputs\n\u001b[1;32m 247\u001b[0m )\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/base.py:237\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, include_run_info)\u001b[0m\n\u001b[1;32m 231\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_chain_start(\n\u001b[1;32m 232\u001b[0m dumpd(\u001b[39mself\u001b[39m),\n\u001b[1;32m 233\u001b[0m inputs,\n\u001b[1;32m 234\u001b[0m )\n\u001b[1;32m 235\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 236\u001b[0m outputs \u001b[39m=\u001b[39m (\n\u001b[0;32m--> 237\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(inputs, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[1;32m 238\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 239\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(inputs)\n\u001b[1;32m 240\u001b[0m )\n\u001b[1;32m 241\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 242\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/llm.py:92\u001b[0m, in \u001b[0;36mLLMChain._call\u001b[0;34m(self, inputs, run_manager)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_call\u001b[39m(\n\u001b[1;32m 88\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 89\u001b[0m inputs: Dict[\u001b[39mstr\u001b[39m, Any],\n\u001b[1;32m 90\u001b[0m run_manager: Optional[CallbackManagerForChainRun] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 91\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Dict[\u001b[39mstr\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m---> 92\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgenerate([inputs], run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[1;32m 93\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcreate_outputs(response)[\u001b[39m0\u001b[39m]\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/llm.py:102\u001b[0m, in \u001b[0;36mLLMChain.generate\u001b[0;34m(self, input_list, run_manager)\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Generate LLM result from inputs.\"\"\"\u001b[39;00m\n\u001b[1;32m 101\u001b[0m prompts, stop \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_prompts(input_list, run_manager\u001b[39m=\u001b[39mrun_manager)\n\u001b[0;32m--> 102\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mllm\u001b[39m.\u001b[39;49mgenerate_prompt(\n\u001b[1;32m 103\u001b[0m prompts,\n\u001b[1;32m 104\u001b[0m stop,\n\u001b[1;32m 105\u001b[0m callbacks\u001b[39m=\u001b[39;49mrun_manager\u001b[39m.\u001b[39;49mget_child() \u001b[39mif\u001b[39;49;00m run_manager \u001b[39melse\u001b[39;49;00m \u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 106\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mllm_kwargs,\n\u001b[1;32m 107\u001b[0m )\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/base.py:188\u001b[0m, in \u001b[0;36mBaseLLM.generate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mgenerate_prompt\u001b[39m(\n\u001b[1;32m 181\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 182\u001b[0m prompts: List[PromptValue],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any,\n\u001b[1;32m 186\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m LLMResult:\n\u001b[1;32m 187\u001b[0m prompt_strings \u001b[39m=\u001b[39m [p\u001b[39m.\u001b[39mto_string() \u001b[39mfor\u001b[39;00m p \u001b[39min\u001b[39;00m prompts]\n\u001b[0;32m--> 188\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgenerate(prompt_strings, stop\u001b[39m=\u001b[39;49mstop, callbacks\u001b[39m=\u001b[39;49mcallbacks, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/base.py:281\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 276\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 277\u001b[0m )\n\u001b[1;32m 278\u001b[0m run_managers \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_llm_start(\n\u001b[1;32m 279\u001b[0m dumpd(\u001b[39mself\u001b[39m), prompts, invocation_params\u001b[39m=\u001b[39mparams, options\u001b[39m=\u001b[39moptions\n\u001b[1;32m 280\u001b[0m )\n\u001b[0;32m--> 281\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_generate_helper(\n\u001b[1;32m 282\u001b[0m prompts, stop, run_managers, \u001b[39mbool\u001b[39;49m(new_arg_supported), \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 283\u001b[0m )\n\u001b[1;32m 284\u001b[0m \u001b[39mreturn\u001b[39;00m output\n\u001b[1;32m 285\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(missing_prompts) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m:\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/base.py:225\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[39mfor\u001b[39;00m run_manager \u001b[39min\u001b[39;00m run_managers:\n\u001b[1;32m 224\u001b[0m run_manager\u001b[39m.\u001b[39mon_llm_error(e)\n\u001b[0;32m--> 225\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 226\u001b[0m flattened_outputs \u001b[39m=\u001b[39m output\u001b[39m.\u001b[39mflatten()\n\u001b[1;32m 227\u001b[0m \u001b[39mfor\u001b[39;00m manager, flattened_output \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(run_managers, flattened_outputs):\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/base.py:212\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_generate_helper\u001b[39m(\n\u001b[1;32m 203\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 204\u001b[0m prompts: List[\u001b[39mstr\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any,\n\u001b[1;32m 209\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m LLMResult:\n\u001b[1;32m 210\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 211\u001b[0m output \u001b[39m=\u001b[39m (\n\u001b[0;32m--> 212\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_generate(\n\u001b[1;32m 213\u001b[0m prompts,\n\u001b[1;32m 214\u001b[0m stop\u001b[39m=\u001b[39;49mstop,\n\u001b[1;32m 215\u001b[0m \u001b[39m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[1;32m 216\u001b[0m run_manager\u001b[39m=\u001b[39;49mrun_managers[\u001b[39m0\u001b[39;49m] \u001b[39mif\u001b[39;49;00m run_managers \u001b[39melse\u001b[39;49;00m \u001b[39mNone\u001b[39;49;00m,\n\u001b[1;32m 217\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs,\n\u001b[1;32m 218\u001b[0m )\n\u001b[1;32m 219\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 220\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(prompts, stop\u001b[39m=\u001b[39mstop)\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 223\u001b[0m \u001b[39mfor\u001b[39;00m run_manager \u001b[39min\u001b[39;00m run_managers:\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/base.py:604\u001b[0m, in \u001b[0;36mLLM._generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 601\u001b[0m new_arg_supported \u001b[39m=\u001b[39m inspect\u001b[39m.\u001b[39msignature(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call)\u001b[39m.\u001b[39mparameters\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mrun_manager\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 602\u001b[0m \u001b[39mfor\u001b[39;00m prompt \u001b[39min\u001b[39;00m prompts:\n\u001b[1;32m 603\u001b[0m text \u001b[39m=\u001b[39m (\n\u001b[0;32m--> 604\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(prompt, stop\u001b[39m=\u001b[39;49mstop, run_manager\u001b[39m=\u001b[39;49mrun_manager, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 605\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 606\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(prompt, stop\u001b[39m=\u001b[39mstop, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 607\u001b[0m )\n\u001b[1;32m 608\u001b[0m generations\u001b[39m.\u001b[39mappend([Generation(text\u001b[39m=\u001b[39mtext)])\n\u001b[1;32m 609\u001b[0m \u001b[39mreturn\u001b[39;00m LLMResult(generations\u001b[39m=\u001b[39mgenerations)\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/llms/huggingface_hub.py:113\u001b[0m, in \u001b[0;36mHuggingFaceHub._call\u001b[0;34m(self, prompt, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient(inputs\u001b[39m=\u001b[39mprompt, params\u001b[39m=\u001b[39mparams)\n\u001b[1;32m 112\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39merror\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m response:\n\u001b[0;32m--> 113\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mError raised by inference API: \u001b[39m\u001b[39m{\u001b[39;00mresponse[\u001b[39m'\u001b[39m\u001b[39merror\u001b[39m\u001b[39m'\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 114\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mtask \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 115\u001b[0m \u001b[39m# Text generation return includes the starter text.\u001b[39;00m\n\u001b[1;32m 116\u001b[0m text \u001b[39m=\u001b[39m response[\u001b[39m0\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mgenerated_text\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39mlen\u001b[39m(prompt) :]\n", + "\u001b[0;31mValueError\u001b[0m: Error raised by inference API: Model yhyhy3/med-orca-instruct-33b time out" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click here for more info. View Jupyter log for further details." + ] + } + ], + "source": [ + "from langchain import HuggingFaceHub\n", + "from langchain.chains.question_answering import load_qa_chain\n", + "\n", + "HUGGINGFACE_TOKEN = \"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\"\n", + "\n", + "llm = HuggingFaceHub(\n", + " repo_id=\"yhyhy3/med-orca-instruct-33b\",\n", + " model_kwargs={\"temperature\": 0.1, \"max_new_tokens\": 80},\n", + " huggingfacehub_api_token=HUGGINGFACE_TOKEN\n", + ")\n", + "question = \"How did the authors detect protein abundances?\"\n", + "\n", + "chain_types = [\"map_reduce\", \"refine\", \"map_rerank\"]\n", + "\n", + "chain = load_qa_chain(llm, chain_type=\"stuff\")\n", + "print(f\"\"\"Type: stuff. {chain({\"input_documents\": docs[1:3], \"question\": question}, return_only_outputs=True)[\"output_text\"]}\"\"\")\n", + "\n", + "for t in chain_types:\n", + " chain = load_qa_chain(llm, chain_type=\"stuff\")\n", + " # chain.llm_chain.prompt.template = \"\"\"question: {question}. context: {context}. answer: dummy answer.\"\"\"\n", + " print(f\"\"\"Type: {t}. {chain({\"input_documents\": docs[1:2], \"question\": question}, return_only_outputs=True)[\"output_text\"]}\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain import HuggingFaceHub\n", + "from langchain.chains.question_answering import load_qa_chain\n", + "\n", + "HUGGINGFACE_TOKEN = \"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\"\n", + "\n", + "llm = HuggingFaceHub(\n", + " # repo_id=\"tiiuae/falcon-7b-instruct\",\n", + " repo_id=\"yhyhy3/open_llama_7b_v2_med_instruct\",\n", + " model_kwargs={\"temperature\": 0.1, \"max_new_tokens\": 80},\n", + " huggingfacehub_api_token=HUGGINGFACE_TOKEN\n", + ")\n", + "question = \"How did the authors detect protein abundances?\"\n", + "\n", + "chain_types = [\"map_reduce\", \"refine\", \"map_rerank\"]\n", + "\n", + "chain = load_qa_chain(llm, chain_type=\"stuff\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\\n\\n{context}\\n\\nQuestion: {question}\\nHelpful Answer:\"" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.llm_chain.prompt.template" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "`run` supported with either positional arguments or keyword arguments, but none were provided.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/tommaso/llm4scilit/notebooks/test.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m chain\u001b[39m.\u001b[39;49mrun()\n", + "File \u001b[0;32m/data/tommaso/mambaforge/envs/llm4scilit/lib/python3.10/site-packages/langchain/chains/base.py:450\u001b[0m, in \u001b[0;36mChain.run\u001b[0;34m(self, callbacks, tags, metadata, *args, **kwargs)\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m(kwargs, callbacks\u001b[39m=\u001b[39mcallbacks, tags\u001b[39m=\u001b[39mtags, metadata\u001b[39m=\u001b[39mmetadata)[\n\u001b[1;32m 446\u001b[0m _output_key\n\u001b[1;32m 447\u001b[0m ]\n\u001b[1;32m 449\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m kwargs \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m args:\n\u001b[0;32m--> 450\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 451\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m`run` supported with either positional arguments or keyword arguments,\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 452\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m but none were provided.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 453\u001b[0m )\n\u001b[1;32m 454\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 455\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 456\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m`run` supported with either positional arguments or keyword arguments\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 457\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m but not both. Got args: \u001b[39m\u001b[39m{\u001b[39;00margs\u001b[39m}\u001b[39;00m\u001b[39m and kwargs: \u001b[39m\u001b[39m{\u001b[39;00mkwargs\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 458\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: `run` supported with either positional arguments or keyword arguments, but none were provided." + ] + } + ], + "source": [ + "chain.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{context}\\n{question} '" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain import PromptTemplate\n", + "\n", + "template = \"\"\"{context}\\n{question} \"\"\"\n", + "\n", + "prompt_template = PromptTemplate(\n", + " template=template,\n", + " input_variables=[\"context\", \"question\"],\n", + ")\n", + "\n", + "load_qa_chain(llm, chain_type=\"stuff\", prompt=prompt_template).llm_chain.prompt.template" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/outputs/2023-09-04/10-52-54/.hydra/config.yaml b/outputs/2023-09-04/10-52-54/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/10-52-54/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/10-52-54/.hydra/hydra.yaml b/outputs/2023-09-04/10-52-54/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6e6ee860861657a5f0024775ea055f5cbd098197 --- /dev/null +++ b/outputs/2023-09-04/10-52-54/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/10-52-54 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/10-52-54/.hydra/overrides.yaml b/outputs/2023-09-04/10-52-54/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/10-52-54/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-23-02/.hydra/config.yaml b/outputs/2023-09-04/11-23-02/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-23-02/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-23-02/.hydra/hydra.yaml b/outputs/2023-09-04/11-23-02/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14c290e2bc0f15373a1bfba5255d04e4aeee6b5c --- /dev/null +++ b/outputs/2023-09-04/11-23-02/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-23-02 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-23-02/.hydra/overrides.yaml b/outputs/2023-09-04/11-23-02/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-23-02/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-28-02/.hydra/config.yaml b/outputs/2023-09-04/11-28-02/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-28-02/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-28-02/.hydra/hydra.yaml b/outputs/2023-09-04/11-28-02/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eef373ca8f2e98aab666ef5bb1150ec9e83c288d --- /dev/null +++ b/outputs/2023-09-04/11-28-02/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-28-02 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-28-02/.hydra/overrides.yaml b/outputs/2023-09-04/11-28-02/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-28-02/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-28-08/.hydra/config.yaml b/outputs/2023-09-04/11-28-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-28-08/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-28-08/.hydra/hydra.yaml b/outputs/2023-09-04/11-28-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7900e22498a933e3a0f52061bad3cc555e035424 --- /dev/null +++ b/outputs/2023-09-04/11-28-08/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-28-08 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-28-08/.hydra/overrides.yaml b/outputs/2023-09-04/11-28-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-28-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-28-16/.hydra/config.yaml b/outputs/2023-09-04/11-28-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-28-16/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-28-16/.hydra/hydra.yaml b/outputs/2023-09-04/11-28-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eaaf58cab20e944b6a5ae09e4d49f83e01df5742 --- /dev/null +++ b/outputs/2023-09-04/11-28-16/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-28-16 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-28-16/.hydra/overrides.yaml b/outputs/2023-09-04/11-28-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-28-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-28-31/.hydra/config.yaml b/outputs/2023-09-04/11-28-31/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-28-31/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-28-31/.hydra/hydra.yaml b/outputs/2023-09-04/11-28-31/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7f902890eacc3a14097f8898b73d9ec3151010ac --- /dev/null +++ b/outputs/2023-09-04/11-28-31/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-28-31 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-28-31/.hydra/overrides.yaml b/outputs/2023-09-04/11-28-31/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-28-31/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-29-46/.hydra/config.yaml b/outputs/2023-09-04/11-29-46/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-29-46/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-29-46/.hydra/hydra.yaml b/outputs/2023-09-04/11-29-46/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a4556d211e2d01d8276d8dabb0b73335900b830 --- /dev/null +++ b/outputs/2023-09-04/11-29-46/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-29-46 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-29-46/.hydra/overrides.yaml b/outputs/2023-09-04/11-29-46/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-29-46/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-31-52/.hydra/config.yaml b/outputs/2023-09-04/11-31-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-31-52/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-31-52/.hydra/hydra.yaml b/outputs/2023-09-04/11-31-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3ca2052643982680923adc4aac4efa06526b8b4 --- /dev/null +++ b/outputs/2023-09-04/11-31-52/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-31-52 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-31-52/.hydra/overrides.yaml b/outputs/2023-09-04/11-31-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-31-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-32-46/.hydra/config.yaml b/outputs/2023-09-04/11-32-46/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f7699562b2a7d67c5f56166af84409913157a2e --- /dev/null +++ b/outputs/2023-09-04/11-32-46/.hydra/config.yaml @@ -0,0 +1,3 @@ +document_loader: {} +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-32-46/.hydra/hydra.yaml b/outputs/2023-09-04/11-32-46/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1231f5576080c16a8a827d783ff8967ba4a191fa --- /dev/null +++ b/outputs/2023-09-04/11-32-46/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-32-46 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-32-46/.hydra/overrides.yaml b/outputs/2023-09-04/11-32-46/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-32-46/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-33-33/.hydra/config.yaml b/outputs/2023-09-04/11-33-33/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7e91bba3b83acba2964abd329011ebd1f6e4cffb --- /dev/null +++ b/outputs/2023-09-04/11-33-33/.hydra/config.yaml @@ -0,0 +1,5 @@ +document_loader: + loader: + - name: grobid +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-33-33/.hydra/hydra.yaml b/outputs/2023-09-04/11-33-33/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aefeb02c050fe3a7fac3249f520e440cfae7a51c --- /dev/null +++ b/outputs/2023-09-04/11-33-33/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-33-33 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-33-33/.hydra/overrides.yaml b/outputs/2023-09-04/11-33-33/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-33-33/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-33-42/.hydra/config.yaml b/outputs/2023-09-04/11-33-42/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a68c5bedab37c1a86d9dec69a42ff46afc3b95dd --- /dev/null +++ b/outputs/2023-09-04/11-33-42/.hydra/config.yaml @@ -0,0 +1,4 @@ +document_loader: + name: grobid +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-33-42/.hydra/hydra.yaml b/outputs/2023-09-04/11-33-42/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6f6a57bf24c8534f46c179c6d1e67ad923c2960 --- /dev/null +++ b/outputs/2023-09-04/11-33-42/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-33-42 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-33-42/.hydra/overrides.yaml b/outputs/2023-09-04/11-33-42/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-33-42/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/11-33-52/.hydra/config.yaml b/outputs/2023-09-04/11-33-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a70c8ef7033a10f0b2fe845e0624a65d168c27a4 --- /dev/null +++ b/outputs/2023-09-04/11-33-52/.hydra/config.yaml @@ -0,0 +1,5 @@ +document_loader: + name: grobid + lines: 100 +text_splitter: {} +vector_storage: {} diff --git a/outputs/2023-09-04/11-33-52/.hydra/hydra.yaml b/outputs/2023-09-04/11-33-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a39fd3e495950ea780bdb776e9a6ba06411f3fa --- /dev/null +++ b/outputs/2023-09-04/11-33-52/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/11-33-52 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/11-33-52/.hydra/overrides.yaml b/outputs/2023-09-04/11-33-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/11-33-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/12-33-40/.hydra/config.yaml b/outputs/2023-09-04/12-33-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7369e07080d234eab7df845c55b335fa412d4fe --- /dev/null +++ b/outputs/2023-09-04/12-33-40/.hydra/config.yaml @@ -0,0 +1,7 @@ +document_loader: + name: grobid + target: document_loader/grobid + grobid_service: http://localhost:8070 +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/12-33-40/.hydra/hydra.yaml b/outputs/2023-09-04/12-33-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e6ea109d6bc06d57713417c33ef8712f36c6af1 --- /dev/null +++ b/outputs/2023-09-04/12-33-40/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/12-33-40 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/12-33-40/.hydra/overrides.yaml b/outputs/2023-09-04/12-33-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/12-33-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/12-55-57/.hydra/config.yaml b/outputs/2023-09-04/12-55-57/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a831c658b947fb2e4552a635182fa09f907ab7e5 --- /dev/null +++ b/outputs/2023-09-04/12-55-57/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + grobid: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: ${grobid_parser.grobid_base_url}/api/${grobid_parser.grobid_service} +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/12-55-57/.hydra/hydra.yaml b/outputs/2023-09-04/12-55-57/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65ee75cabd001a5393462ac234a0a74e4e042a7d --- /dev/null +++ b/outputs/2023-09-04/12-55-57/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/12-55-57 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/12-55-57/.hydra/overrides.yaml b/outputs/2023-09-04/12-55-57/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/12-55-57/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/12-57-40/.hydra/config.yaml b/outputs/2023-09-04/12-57-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f7201367d3689aa46c1efa4a291bf322c43e299 --- /dev/null +++ b/outputs/2023-09-04/12-57-40/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + grobid: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/12-57-40/.hydra/hydra.yaml b/outputs/2023-09-04/12-57-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5fa532ab006d7c5cdbeea586049be1da66e4fe71 --- /dev/null +++ b/outputs/2023-09-04/12-57-40/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/12-57-40 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/12-57-40/.hydra/overrides.yaml b/outputs/2023-09-04/12-57-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/12-57-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/12-58-08/.hydra/config.yaml b/outputs/2023-09-04/12-58-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f7201367d3689aa46c1efa4a291bf322c43e299 --- /dev/null +++ b/outputs/2023-09-04/12-58-08/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + grobid: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/12-58-08/.hydra/hydra.yaml b/outputs/2023-09-04/12-58-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7229908cee804a0963f534f7d695ecfbe8bcc6c --- /dev/null +++ b/outputs/2023-09-04/12-58-08/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/12-58-08 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/12-58-08/.hydra/overrides.yaml b/outputs/2023-09-04/12-58-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/12-58-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/12-58-45/.hydra/config.yaml b/outputs/2023-09-04/12-58-45/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c61f7f9ea5e11a40bd5b11d8c313e678a0f54be --- /dev/null +++ b/outputs/2023-09-04/12-58-45/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/12-58-45/.hydra/hydra.yaml b/outputs/2023-09-04/12-58-45/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..583db484ad88e19cf7c2b6391fab6d2f0cc20bc1 --- /dev/null +++ b/outputs/2023-09-04/12-58-45/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/12-58-45 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/12-58-45/.hydra/overrides.yaml b/outputs/2023-09-04/12-58-45/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/12-58-45/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/13-00-06/.hydra/config.yaml b/outputs/2023-09-04/13-00-06/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c61f7f9ea5e11a40bd5b11d8c313e678a0f54be --- /dev/null +++ b/outputs/2023-09-04/13-00-06/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/13-00-06/.hydra/hydra.yaml b/outputs/2023-09-04/13-00-06/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..911f4d50b1dde4c0bcd4f8318eb5ac26e58d0471 --- /dev/null +++ b/outputs/2023-09-04/13-00-06/.hydra/hydra.yaml @@ -0,0 +1,159 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: + HYDRA_FULL_ERROR: 1 + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - ++hydra.env.HYDRA_FULL_ERROR=1 + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/13-00-06 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/13-00-06/.hydra/overrides.yaml b/outputs/2023-09-04/13-00-06/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/13-00-06/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/13-00-51/.hydra/config.yaml b/outputs/2023-09-04/13-00-51/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c61f7f9ea5e11a40bd5b11d8c313e678a0f54be --- /dev/null +++ b/outputs/2023-09-04/13-00-51/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.Grobid + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/13-00-51/.hydra/hydra.yaml b/outputs/2023-09-04/13-00-51/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fffd863b3f3b1b33f8d0a1550221ac70870e4bb --- /dev/null +++ b/outputs/2023-09-04/13-00-51/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/13-00-51 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/13-00-51/.hydra/overrides.yaml b/outputs/2023-09-04/13-00-51/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/13-00-51/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/13-01-23/.hydra/config.yaml b/outputs/2023-09-04/13-01-23/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fcfbe8930c1f5fdf8ea770cedf04358c1ad46d7c --- /dev/null +++ b/outputs/2023-09-04/13-01-23/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.grobid_parser.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/13-01-23/.hydra/hydra.yaml b/outputs/2023-09-04/13-01-23/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dae6fc1ef81d33743eb78c458b7f3a416d353cb3 --- /dev/null +++ b/outputs/2023-09-04/13-01-23/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/13-01-23 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/13-01-23/.hydra/overrides.yaml b/outputs/2023-09-04/13-01-23/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/13-01-23/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/13-01-54/.hydra/config.yaml b/outputs/2023-09-04/13-01-54/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1997aa8ab62ab6a945ef904e8c3862e16d4b1b53 --- /dev/null +++ b/outputs/2023-09-04/13-01-54/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_base_url: http://localhost:8070 + grobid_service: processFulltextDocument + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/13-01-54/.hydra/hydra.yaml b/outputs/2023-09-04/13-01-54/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65bb3a27e5b5909900ebee35e9c47c32e836310d --- /dev/null +++ b/outputs/2023-09-04/13-01-54/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/13-01-54 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/13-01-54/.hydra/overrides.yaml b/outputs/2023-09-04/13-01-54/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/13-01-54/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/13-02-19/.hydra/config.yaml b/outputs/2023-09-04/13-02-19/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc24f1ad52d670ff6f9bcc41aa5ea9433e391685 --- /dev/null +++ b/outputs/2023-09-04/13-02-19/.hydra/config.yaml @@ -0,0 +1,9 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/13-02-19/.hydra/hydra.yaml b/outputs/2023-09-04/13-02-19/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f00f3e8993866ff1acf38db4952549309479d00 --- /dev/null +++ b/outputs/2023-09-04/13-02-19/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/13-02-19 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/13-02-19/.hydra/overrides.yaml b/outputs/2023-09-04/13-02-19/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/13-02-19/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-20-53/.hydra/config.yaml b/outputs/2023-09-04/15-20-53/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc24f1ad52d670ff6f9bcc41aa5ea9433e391685 --- /dev/null +++ b/outputs/2023-09-04/15-20-53/.hydra/config.yaml @@ -0,0 +1,9 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/15-20-53/.hydra/hydra.yaml b/outputs/2023-09-04/15-20-53/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..639ac3faa0d4f53ea561a53ebac325262af7816f --- /dev/null +++ b/outputs/2023-09-04/15-20-53/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-20-53 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-20-53/.hydra/overrides.yaml b/outputs/2023-09-04/15-20-53/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-20-53/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-22-26/.hydra/config.yaml b/outputs/2023-09-04/15-22-26/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cc24f1ad52d670ff6f9bcc41aa5ea9433e391685 --- /dev/null +++ b/outputs/2023-09-04/15-22-26/.hydra/config.yaml @@ -0,0 +1,9 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +debug: true diff --git a/outputs/2023-09-04/15-22-26/.hydra/hydra.yaml b/outputs/2023-09-04/15-22-26/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..454e1fa435ae7a346cc40afe7cf581e91dab3aac --- /dev/null +++ b/outputs/2023-09-04/15-22-26/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-22-26 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-22-26/.hydra/overrides.yaml b/outputs/2023-09-04/15-22-26/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-22-26/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-33-54/.hydra/config.yaml b/outputs/2023-09-04/15-33-54/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-33-54/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-33-54/.hydra/hydra.yaml b/outputs/2023-09-04/15-33-54/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83aec827f1a83d69112b7432c8d32a0275786a93 --- /dev/null +++ b/outputs/2023-09-04/15-33-54/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-33-54 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-33-54/.hydra/overrides.yaml b/outputs/2023-09-04/15-33-54/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-33-54/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-34-27/.hydra/config.yaml b/outputs/2023-09-04/15-34-27/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-34-27/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-34-27/.hydra/hydra.yaml b/outputs/2023-09-04/15-34-27/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed527f05b78f9406a1ebcb53ff6b502a2ca74335 --- /dev/null +++ b/outputs/2023-09-04/15-34-27/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-34-27 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-34-27/.hydra/overrides.yaml b/outputs/2023-09-04/15-34-27/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-34-27/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-35-01/.hydra/config.yaml b/outputs/2023-09-04/15-35-01/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-35-01/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-35-01/.hydra/hydra.yaml b/outputs/2023-09-04/15-35-01/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dbe94dbd3d6df1c9d5ceb92fd93f52fb7e12fee1 --- /dev/null +++ b/outputs/2023-09-04/15-35-01/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-35-01 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-35-01/.hydra/overrides.yaml b/outputs/2023-09-04/15-35-01/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-35-01/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-50-22/.hydra/config.yaml b/outputs/2023-09-04/15-50-22/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-50-22/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-50-22/.hydra/hydra.yaml b/outputs/2023-09-04/15-50-22/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a935c78e090ca5292adbf571bf681c25d8a7f25 --- /dev/null +++ b/outputs/2023-09-04/15-50-22/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-50-22 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-50-22/.hydra/overrides.yaml b/outputs/2023-09-04/15-50-22/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-50-22/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-51-04/.hydra/config.yaml b/outputs/2023-09-04/15-51-04/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-51-04/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-51-04/.hydra/hydra.yaml b/outputs/2023-09-04/15-51-04/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ddf0a7f5df1ca946e2fc3d0f7df893b5d505c90 --- /dev/null +++ b/outputs/2023-09-04/15-51-04/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-51-04 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-51-04/.hydra/overrides.yaml b/outputs/2023-09-04/15-51-04/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-51-04/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-51-12/.hydra/config.yaml b/outputs/2023-09-04/15-51-12/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-51-12/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-51-12/.hydra/hydra.yaml b/outputs/2023-09-04/15-51-12/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68a2b71759da516a702ef35e9603e6bc6a120224 --- /dev/null +++ b/outputs/2023-09-04/15-51-12/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-51-12 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-51-12/.hydra/overrides.yaml b/outputs/2023-09-04/15-51-12/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-51-12/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-51-28/.hydra/config.yaml b/outputs/2023-09-04/15-51-28/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-51-28/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-51-28/.hydra/hydra.yaml b/outputs/2023-09-04/15-51-28/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9abad0eda801487ebdd43201479c07914594d73c --- /dev/null +++ b/outputs/2023-09-04/15-51-28/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-51-28 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: ${debug} diff --git a/outputs/2023-09-04/15-51-28/.hydra/overrides.yaml b/outputs/2023-09-04/15-51-28/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-51-28/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/15-52-17/.hydra/config.yaml b/outputs/2023-09-04/15-52-17/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/15-52-17/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/15-52-17/.hydra/hydra.yaml b/outputs/2023-09-04/15-52-17/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25aeb6a2d944de498a5a6183e958ad0e4801c8aa --- /dev/null +++ b/outputs/2023-09-04/15-52-17/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/15-52-17 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/15-52-17/.hydra/overrides.yaml b/outputs/2023-09-04/15-52-17/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/15-52-17/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-14-35/.hydra/config.yaml b/outputs/2023-09-04/16-14-35/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/16-14-35/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/16-14-35/.hydra/hydra.yaml b/outputs/2023-09-04/16-14-35/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b395e6ab0f0501c5e4dbe0c8f5fac3f085b2bcbf --- /dev/null +++ b/outputs/2023-09-04/16-14-35/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-14-35 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-14-35/.hydra/overrides.yaml b/outputs/2023-09-04/16-14-35/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-14-35/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-22-58/.hydra/config.yaml b/outputs/2023-09-04/16-22-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d54ad54ed04041c54223c56a3836688e0d27c22 --- /dev/null +++ b/outputs/2023-09-04/16-22-58/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: + type: interactive +debug: true diff --git a/outputs/2023-09-04/16-22-58/.hydra/hydra.yaml b/outputs/2023-09-04/16-22-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..205324a9a83f8fbb0754f60b6287bc302d390cbe --- /dev/null +++ b/outputs/2023-09-04/16-22-58/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-22-58 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-22-58/.hydra/overrides.yaml b/outputs/2023-09-04/16-22-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-22-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-24-17/.hydra/config.yaml b/outputs/2023-09-04/16-24-17/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed90322e1d2d7b749128dd7a0e071fc8ac9e6746 --- /dev/null +++ b/outputs/2023-09-04/16-24-17/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-24-17/.hydra/hydra.yaml b/outputs/2023-09-04/16-24-17/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..162890176b809c847ec72f190d2d38d4235dcdc7 --- /dev/null +++ b/outputs/2023-09-04/16-24-17/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-24-17 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-24-17/.hydra/overrides.yaml b/outputs/2023-09-04/16-24-17/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-24-17/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-28-26/.hydra/config.yaml b/outputs/2023-09-04/16-28-26/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ed90322e1d2d7b749128dd7a0e071fc8ac9e6746 --- /dev/null +++ b/outputs/2023-09-04/16-28-26/.hydra/config.yaml @@ -0,0 +1,11 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: {} +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-28-26/.hydra/hydra.yaml b/outputs/2023-09-04/16-28-26/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..206d7e5d798e381bcfcb5ad3017f6c6bdbbf7a34 --- /dev/null +++ b/outputs/2023-09-04/16-28-26/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-28-26 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-28-26/.hydra/overrides.yaml b/outputs/2023-09-04/16-28-26/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-28-26/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-29-26/.hydra/config.yaml b/outputs/2023-09-04/16-29-26/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae883ab6e1a246363cc2787563f97e6e76af15d --- /dev/null +++ b/outputs/2023-09-04/16-29-26/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: langchain.text_splitter.SpacyTextSplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-29-26/.hydra/hydra.yaml b/outputs/2023-09-04/16-29-26/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04e0d8645a74bbcee9b31eddbdf23524ee589b22 --- /dev/null +++ b/outputs/2023-09-04/16-29-26/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-29-26 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-29-26/.hydra/overrides.yaml b/outputs/2023-09-04/16-29-26/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-29-26/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-30-44/.hydra/config.yaml b/outputs/2023-09-04/16-30-44/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae883ab6e1a246363cc2787563f97e6e76af15d --- /dev/null +++ b/outputs/2023-09-04/16-30-44/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: langchain.text_splitter.SpacyTextSplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-30-44/.hydra/hydra.yaml b/outputs/2023-09-04/16-30-44/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c64ca66d9b83c0223eb43ca836a4caec4d83c90 --- /dev/null +++ b/outputs/2023-09-04/16-30-44/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-30-44 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-30-44/.hydra/overrides.yaml b/outputs/2023-09-04/16-30-44/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-30-44/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-32-56/.hydra/config.yaml b/outputs/2023-09-04/16-32-56/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae883ab6e1a246363cc2787563f97e6e76af15d --- /dev/null +++ b/outputs/2023-09-04/16-32-56/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: langchain.text_splitter.SpacyTextSplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-32-56/.hydra/hydra.yaml b/outputs/2023-09-04/16-32-56/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..207f39d5cec8597c7695ee84e1747229f381f276 --- /dev/null +++ b/outputs/2023-09-04/16-32-56/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-32-56 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-32-56/.hydra/overrides.yaml b/outputs/2023-09-04/16-32-56/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-32-56/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-33-54/.hydra/config.yaml b/outputs/2023-09-04/16-33-54/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4994d7a159faef8ea86e6c81b1132a26f08cca2c --- /dev/null +++ b/outputs/2023-09-04/16-33-54/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-33-54/.hydra/hydra.yaml b/outputs/2023-09-04/16-33-54/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9e43f5687f81c2db042e2e3685ae90343c87fb0 --- /dev/null +++ b/outputs/2023-09-04/16-33-54/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-33-54 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-33-54/.hydra/overrides.yaml b/outputs/2023-09-04/16-33-54/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-33-54/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-35-58/.hydra/config.yaml b/outputs/2023-09-04/16-35-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4994d7a159faef8ea86e6c81b1132a26f08cca2c --- /dev/null +++ b/outputs/2023-09-04/16-35-58/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-35-58/.hydra/hydra.yaml b/outputs/2023-09-04/16-35-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..228f4ab7ba15ff3f76a938074cd82e51a9461caf --- /dev/null +++ b/outputs/2023-09-04/16-35-58/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-35-58 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-35-58/.hydra/overrides.yaml b/outputs/2023-09-04/16-35-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-35-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-04/16-38-12/.hydra/config.yaml b/outputs/2023-09-04/16-38-12/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4994d7a159faef8ea86e6c81b1132a26f08cca2c --- /dev/null +++ b/outputs/2023-09-04/16-38-12/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-04/16-38-12/.hydra/hydra.yaml b/outputs/2023-09-04/16-38-12/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c090c61b95bc42c201b3b617f8a95c3c7c04df8 --- /dev/null +++ b/outputs/2023-09-04/16-38-12/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-04/16-38-12 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-04/16-38-12/.hydra/overrides.yaml b/outputs/2023-09-04/16-38-12/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-04/16-38-12/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-05/16-52-42/.hydra/config.yaml b/outputs/2023-09-05/16-52-42/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4994d7a159faef8ea86e6c81b1132a26f08cca2c --- /dev/null +++ b/outputs/2023-09-05/16-52-42/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-05/16-52-42/.hydra/hydra.yaml b/outputs/2023-09-05/16-52-42/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f763ca7832a037ec1cfe6ec9ed8ca3a8f1484b90 --- /dev/null +++ b/outputs/2023-09-05/16-52-42/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-05/16-52-42 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-05/16-52-42/.hydra/overrides.yaml b/outputs/2023-09-05/16-52-42/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-05/16-52-42/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-05/16-57-16/.hydra/config.yaml b/outputs/2023-09-05/16-57-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4994d7a159faef8ea86e6c81b1132a26f08cca2c --- /dev/null +++ b/outputs/2023-09-05/16-57-16/.hydra/config.yaml @@ -0,0 +1,12 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +vector_storage: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-05/16-57-16/.hydra/hydra.yaml b/outputs/2023-09-05/16-57-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b03eeaab394a7e0503d649fafa4555a51ec12b39 --- /dev/null +++ b/outputs/2023-09-05/16-57-16/.hydra/hydra.yaml @@ -0,0 +1,157 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-05/16-57-16 + choices: + vector_storage: faiss + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-05/16-57-16/.hydra/overrides.yaml b/outputs/2023-09-05/16-57-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-05/16-57-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-15-45/.hydra/config.yaml b/outputs/2023-09-07/17-15-45/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6baa0e126bded47e97971db89b39da006a204db --- /dev/null +++ b/outputs/2023-09-07/17-15-45/.hydra/config.yaml @@ -0,0 +1,14 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: {} +vector_storage: {} +document_retriever: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-15-45/.hydra/hydra.yaml b/outputs/2023-09-07/17-15-45/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd87cb4daf7bf3c543673400d23492ad7e4e04fd --- /dev/null +++ b/outputs/2023-09-07/17-15-45/.hydra/hydra.yaml @@ -0,0 +1,159 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-15-45 + choices: + document_retriever: simple_retriever + vector_storage: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-15-45/.hydra/overrides.yaml b/outputs/2023-09-07/17-15-45/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-15-45/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-38-12/.hydra/config.yaml b/outputs/2023-09-07/17-38-12/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cee4482c48b9f318f56fb2be5e5a769f578dd77f --- /dev/null +++ b/outputs/2023-09-07/17-38-12/.hydra/config.yaml @@ -0,0 +1,17 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_storage: + _target_: vector_storage.faiss.FAISSVectorStorage +document_retriever: {} +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-38-12/.hydra/hydra.yaml b/outputs/2023-09-07/17-38-12/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be8289c795ed68ef990bcdf80971f91c6b52ec3e --- /dev/null +++ b/outputs/2023-09-07/17-38-12/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-38-12 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_storage: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-38-12/.hydra/overrides.yaml b/outputs/2023-09-07/17-38-12/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-38-12/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-39-32/.hydra/config.yaml b/outputs/2023-09-07/17-39-32/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4871df5d873c239e70b558ee1f49bd351116bee --- /dev/null +++ b/outputs/2023-09-07/17-39-32/.hydra/config.yaml @@ -0,0 +1,17 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: {} +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-39-32/.hydra/hydra.yaml b/outputs/2023-09-07/17-39-32/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a3c4b2a28eb5475a4001c669fae0d50a940ff16 --- /dev/null +++ b/outputs/2023-09-07/17-39-32/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-39-32 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-39-32/.hydra/overrides.yaml b/outputs/2023-09-07/17-39-32/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-39-32/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-39-57/.hydra/config.yaml b/outputs/2023-09-07/17-39-57/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4871df5d873c239e70b558ee1f49bd351116bee --- /dev/null +++ b/outputs/2023-09-07/17-39-57/.hydra/config.yaml @@ -0,0 +1,17 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: {} +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-39-57/.hydra/hydra.yaml b/outputs/2023-09-07/17-39-57/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af03487eb3a13bfeb806c36362af7ceadebe9747 --- /dev/null +++ b/outputs/2023-09-07/17-39-57/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-39-57 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-39-57/.hydra/overrides.yaml b/outputs/2023-09-07/17-39-57/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-39-57/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-41-23/.hydra/config.yaml b/outputs/2023-09-07/17-41-23/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4871df5d873c239e70b558ee1f49bd351116bee --- /dev/null +++ b/outputs/2023-09-07/17-41-23/.hydra/config.yaml @@ -0,0 +1,17 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: {} +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-41-23/.hydra/hydra.yaml b/outputs/2023-09-07/17-41-23/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae74ac764977e3f89f4c1af69fab72ca73fa34f5 --- /dev/null +++ b/outputs/2023-09-07/17-41-23/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-41-23 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-41-23/.hydra/overrides.yaml b/outputs/2023-09-07/17-41-23/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-41-23/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-53-01/.hydra/config.yaml b/outputs/2023-09-07/17-53-01/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4871df5d873c239e70b558ee1f49bd351116bee --- /dev/null +++ b/outputs/2023-09-07/17-53-01/.hydra/config.yaml @@ -0,0 +1,17 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: {} +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-53-01/.hydra/hydra.yaml b/outputs/2023-09-07/17-53-01/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ffe44d5423f7ae3e5b16e1eb5be523f051bd92b --- /dev/null +++ b/outputs/2023-09-07/17-53-01/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-53-01 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-53-01/.hydra/overrides.yaml b/outputs/2023-09-07/17-53-01/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-53-01/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-58-09/.hydra/config.yaml b/outputs/2023-09-07/17-58-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/17-58-09/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-58-09/.hydra/hydra.yaml b/outputs/2023-09-07/17-58-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e362acb714a2edc4a6e9d5d8ef5b8d5ae5fc60c --- /dev/null +++ b/outputs/2023-09-07/17-58-09/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-58-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-58-09/.hydra/overrides.yaml b/outputs/2023-09-07/17-58-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-58-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/17-59-43/.hydra/config.yaml b/outputs/2023-09-07/17-59-43/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/17-59-43/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/17-59-43/.hydra/hydra.yaml b/outputs/2023-09-07/17-59-43/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2581b2e80cc6e834dee76d8835a9f1f863818e4 --- /dev/null +++ b/outputs/2023-09-07/17-59-43/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/17-59-43 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/17-59-43/.hydra/overrides.yaml b/outputs/2023-09-07/17-59-43/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/17-59-43/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-03-19/.hydra/config.yaml b/outputs/2023-09-07/18-03-19/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/18-03-19/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-03-19/.hydra/hydra.yaml b/outputs/2023-09-07/18-03-19/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ca2ec09e175d9eefb905f515fe13f1b9493997df --- /dev/null +++ b/outputs/2023-09-07/18-03-19/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-03-19 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-03-19/.hydra/overrides.yaml b/outputs/2023-09-07/18-03-19/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-03-19/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-03-52/.hydra/config.yaml b/outputs/2023-09-07/18-03-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/18-03-52/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-03-52/.hydra/hydra.yaml b/outputs/2023-09-07/18-03-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f52f07a7cb67d2c49080a65518c611d3e9981bdd --- /dev/null +++ b/outputs/2023-09-07/18-03-52/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-03-52 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-03-52/.hydra/overrides.yaml b/outputs/2023-09-07/18-03-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-03-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-04-11/.hydra/config.yaml b/outputs/2023-09-07/18-04-11/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/18-04-11/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-04-11/.hydra/hydra.yaml b/outputs/2023-09-07/18-04-11/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..06e94c7f9feece5df719a4cf5a225fb68130cf1f --- /dev/null +++ b/outputs/2023-09-07/18-04-11/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-04-11 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-04-11/.hydra/overrides.yaml b/outputs/2023-09-07/18-04-11/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-04-11/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-13-08/.hydra/config.yaml b/outputs/2023-09-07/18-13-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..471712d427fcdf4c8592503f9738d86882691efa --- /dev/null +++ b/outputs/2023-09-07/18-13-08/.hydra/config.yaml @@ -0,0 +1,18 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: {} +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-13-08/.hydra/hydra.yaml b/outputs/2023-09-07/18-13-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..893b0aaec46ef3f5a016c3792c752cf2f8d21407 --- /dev/null +++ b/outputs/2023-09-07/18-13-08/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-13-08 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-13-08/.hydra/overrides.yaml b/outputs/2023-09-07/18-13-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-13-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-15-28/.hydra/config.yaml b/outputs/2023-09-07/18-15-28/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-15-28/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-15-28/.hydra/hydra.yaml b/outputs/2023-09-07/18-15-28/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68f856e8e3d143a29e7276a85d6505df6d175218 --- /dev/null +++ b/outputs/2023-09-07/18-15-28/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-15-28 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-15-28/.hydra/overrides.yaml b/outputs/2023-09-07/18-15-28/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-15-28/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-18-21/.hydra/config.yaml b/outputs/2023-09-07/18-18-21/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-18-21/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-18-21/.hydra/hydra.yaml b/outputs/2023-09-07/18-18-21/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce591b5246620159f5988d006d8f31d41f1e2a67 --- /dev/null +++ b/outputs/2023-09-07/18-18-21/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-18-21 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-18-21/.hydra/overrides.yaml b/outputs/2023-09-07/18-18-21/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-18-21/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-21-09/.hydra/config.yaml b/outputs/2023-09-07/18-21-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-21-09/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-21-09/.hydra/hydra.yaml b/outputs/2023-09-07/18-21-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0684b8185c27e160b4ce47ea4440c10d300a67e1 --- /dev/null +++ b/outputs/2023-09-07/18-21-09/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-21-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-21-09/.hydra/overrides.yaml b/outputs/2023-09-07/18-21-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-21-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-23-13/.hydra/config.yaml b/outputs/2023-09-07/18-23-13/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-23-13/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-23-13/.hydra/hydra.yaml b/outputs/2023-09-07/18-23-13/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..15d95ce91f5d9e7b96910dca65dd3b6ae38b1f5e --- /dev/null +++ b/outputs/2023-09-07/18-23-13/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-23-13 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-23-13/.hydra/overrides.yaml b/outputs/2023-09-07/18-23-13/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-23-13/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-27-09/.hydra/config.yaml b/outputs/2023-09-07/18-27-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-27-09/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-27-09/.hydra/hydra.yaml b/outputs/2023-09-07/18-27-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c7481462fc5727b7710d437b46798148dafe30b4 --- /dev/null +++ b/outputs/2023-09-07/18-27-09/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-27-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-27-09/.hydra/overrides.yaml b/outputs/2023-09-07/18-27-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-27-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-27-34/.hydra/config.yaml b/outputs/2023-09-07/18-27-34/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-27-34/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-27-34/.hydra/hydra.yaml b/outputs/2023-09-07/18-27-34/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75c9a044aa31ff3158842ef127c92ec2dd82daaa --- /dev/null +++ b/outputs/2023-09-07/18-27-34/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-27-34 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-27-34/.hydra/overrides.yaml b/outputs/2023-09-07/18-27-34/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-27-34/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-27-55/.hydra/config.yaml b/outputs/2023-09-07/18-27-55/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-27-55/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-27-55/.hydra/hydra.yaml b/outputs/2023-09-07/18-27-55/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4000d1a2940d96baa2f57f009963bd7d9f055a4 --- /dev/null +++ b/outputs/2023-09-07/18-27-55/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-27-55 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-27-55/.hydra/overrides.yaml b/outputs/2023-09-07/18-27-55/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-27-55/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-28-22/.hydra/config.yaml b/outputs/2023-09-07/18-28-22/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-28-22/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-28-22/.hydra/hydra.yaml b/outputs/2023-09-07/18-28-22/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd53dd90b752f7d2881fde9c515b654ff0ea4868 --- /dev/null +++ b/outputs/2023-09-07/18-28-22/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-28-22 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-28-22/.hydra/overrides.yaml b/outputs/2023-09-07/18-28-22/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-28-22/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-34-52/.hydra/config.yaml b/outputs/2023-09-07/18-34-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-34-52/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-34-52/.hydra/hydra.yaml b/outputs/2023-09-07/18-34-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..677db4d3422e658f7e62705ed527581bdc2186a8 --- /dev/null +++ b/outputs/2023-09-07/18-34-52/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-34-52 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-34-52/.hydra/overrides.yaml b/outputs/2023-09-07/18-34-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-34-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/18-43-20/.hydra/config.yaml b/outputs/2023-09-07/18-43-20/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/18-43-20/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/18-43-20/.hydra/hydra.yaml b/outputs/2023-09-07/18-43-20/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a5dd4c4119a336ef21fd6c9103bb2b574a43f967 --- /dev/null +++ b/outputs/2023-09-07/18-43-20/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/18-43-20 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/18-43-20/.hydra/overrides.yaml b/outputs/2023-09-07/18-43-20/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/18-43-20/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-00-46/.hydra/config.yaml b/outputs/2023-09-07/19-00-46/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-00-46/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-00-46/.hydra/hydra.yaml b/outputs/2023-09-07/19-00-46/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bba16c90bb3f06c168f28003f60cfa04fec331c --- /dev/null +++ b/outputs/2023-09-07/19-00-46/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-00-46 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-00-46/.hydra/overrides.yaml b/outputs/2023-09-07/19-00-46/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-00-46/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-09-47/.hydra/config.yaml b/outputs/2023-09-07/19-09-47/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-09-47/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-09-47/.hydra/hydra.yaml b/outputs/2023-09-07/19-09-47/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..610afd8d5ec2a2ade67ae3b45e1295da3a1aa937 --- /dev/null +++ b/outputs/2023-09-07/19-09-47/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-09-47 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-09-47/.hydra/overrides.yaml b/outputs/2023-09-07/19-09-47/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-09-47/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-10-12/.hydra/config.yaml b/outputs/2023-09-07/19-10-12/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-10-12/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-10-12/.hydra/hydra.yaml b/outputs/2023-09-07/19-10-12/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96a6b0a46a701ba0058d9f4c938e64c25fcf7148 --- /dev/null +++ b/outputs/2023-09-07/19-10-12/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-10-12 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-10-12/.hydra/overrides.yaml b/outputs/2023-09-07/19-10-12/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-10-12/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-11-30/.hydra/config.yaml b/outputs/2023-09-07/19-11-30/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-11-30/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-11-30/.hydra/hydra.yaml b/outputs/2023-09-07/19-11-30/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..43d6c394d4d1028aeae548570e8cc56f067b387f --- /dev/null +++ b/outputs/2023-09-07/19-11-30/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-11-30 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-11-30/.hydra/overrides.yaml b/outputs/2023-09-07/19-11-30/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-11-30/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-16-40/.hydra/config.yaml b/outputs/2023-09-07/19-16-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-16-40/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-16-40/.hydra/hydra.yaml b/outputs/2023-09-07/19-16-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..193f74221567bc1dbd1977250886e764f67a473e --- /dev/null +++ b/outputs/2023-09-07/19-16-40/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-16-40 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-16-40/.hydra/overrides.yaml b/outputs/2023-09-07/19-16-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-16-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-17-21/.hydra/config.yaml b/outputs/2023-09-07/19-17-21/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-17-21/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-17-21/.hydra/hydra.yaml b/outputs/2023-09-07/19-17-21/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e69596a47229186ee614bba0a82c5840a7324b84 --- /dev/null +++ b/outputs/2023-09-07/19-17-21/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-17-21 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-17-21/.hydra/overrides.yaml b/outputs/2023-09-07/19-17-21/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-17-21/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-19-29/.hydra/config.yaml b/outputs/2023-09-07/19-19-29/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-19-29/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-19-29/.hydra/hydra.yaml b/outputs/2023-09-07/19-19-29/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55f772ffabb0a2cb1844a82a52f057717b16b94c --- /dev/null +++ b/outputs/2023-09-07/19-19-29/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-19-29 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-19-29/.hydra/overrides.yaml b/outputs/2023-09-07/19-19-29/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-19-29/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-20-39/.hydra/config.yaml b/outputs/2023-09-07/19-20-39/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-20-39/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-20-39/.hydra/hydra.yaml b/outputs/2023-09-07/19-20-39/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ba34b98a353262d0546a46fe40cbab6c09589534 --- /dev/null +++ b/outputs/2023-09-07/19-20-39/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-20-39 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-20-39/.hydra/overrides.yaml b/outputs/2023-09-07/19-20-39/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-20-39/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-20-48/.hydra/config.yaml b/outputs/2023-09-07/19-20-48/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-20-48/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-20-48/.hydra/hydra.yaml b/outputs/2023-09-07/19-20-48/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..81ff10c249add2cbeb981f3f6a3fa1aa6f3a1821 --- /dev/null +++ b/outputs/2023-09-07/19-20-48/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-20-48 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-20-48/.hydra/overrides.yaml b/outputs/2023-09-07/19-20-48/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-20-48/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-21-03/.hydra/config.yaml b/outputs/2023-09-07/19-21-03/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-21-03/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-21-03/.hydra/hydra.yaml b/outputs/2023-09-07/19-21-03/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..25201a8f523a764539a783c8d3bddd294e139caf --- /dev/null +++ b/outputs/2023-09-07/19-21-03/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-21-03 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-21-03/.hydra/overrides.yaml b/outputs/2023-09-07/19-21-03/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-21-03/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-21-11/.hydra/config.yaml b/outputs/2023-09-07/19-21-11/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-21-11/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-21-11/.hydra/hydra.yaml b/outputs/2023-09-07/19-21-11/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34292f20da192ab48da18d23f6eb39dcb1ea78e8 --- /dev/null +++ b/outputs/2023-09-07/19-21-11/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-21-11 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-21-11/.hydra/overrides.yaml b/outputs/2023-09-07/19-21-11/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-21-11/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-25-22/.hydra/config.yaml b/outputs/2023-09-07/19-25-22/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-25-22/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-25-22/.hydra/hydra.yaml b/outputs/2023-09-07/19-25-22/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..52c4381dafbaf81988558bbe4d2c357c03032f55 --- /dev/null +++ b/outputs/2023-09-07/19-25-22/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-25-22 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-25-22/.hydra/overrides.yaml b/outputs/2023-09-07/19-25-22/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-25-22/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-26-58/.hydra/config.yaml b/outputs/2023-09-07/19-26-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-26-58/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-26-58/.hydra/hydra.yaml b/outputs/2023-09-07/19-26-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d52022b860e848d73e6df037445ebf19ab349dd2 --- /dev/null +++ b/outputs/2023-09-07/19-26-58/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-26-58 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-26-58/.hydra/overrides.yaml b/outputs/2023-09-07/19-26-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-26-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-27-18/.hydra/config.yaml b/outputs/2023-09-07/19-27-18/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-27-18/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-27-18/.hydra/hydra.yaml b/outputs/2023-09-07/19-27-18/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a94ae441a799fc318e08874846bca20a85520d9d --- /dev/null +++ b/outputs/2023-09-07/19-27-18/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-27-18 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-27-18/.hydra/overrides.yaml b/outputs/2023-09-07/19-27-18/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-27-18/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-07/19-29-34/.hydra/config.yaml b/outputs/2023-09-07/19-29-34/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-07/19-29-34/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-07/19-29-34/.hydra/hydra.yaml b/outputs/2023-09-07/19-29-34/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..37d82be5a55adcfee1c112d2a14705b9d93078a1 --- /dev/null +++ b/outputs/2023-09-07/19-29-34/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-07/19-29-34 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-07/19-29-34/.hydra/overrides.yaml b/outputs/2023-09-07/19-29-34/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-07/19-29-34/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/19-58-14/.hydra/config.yaml b/outputs/2023-09-08/19-58-14/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de76c13b3c705cf155377be703d6cb37df20d9dd --- /dev/null +++ b/outputs/2023-09-08/19-58-14/.hydra/config.yaml @@ -0,0 +1,19 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +documents_path: /data/tommaso/data/papers +mode: interactive +debug: true diff --git a/outputs/2023-09-08/19-58-14/.hydra/hydra.yaml b/outputs/2023-09-08/19-58-14/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54c6f39f36d4d5073554698f461960dc0554e9fb --- /dev/null +++ b/outputs/2023-09-08/19-58-14/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/19-58-14 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/19-58-14/.hydra/overrides.yaml b/outputs/2023-09-08/19-58-14/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/19-58-14/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-18-10/.hydra/config.yaml b/outputs/2023-09-08/20-18-10/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..74564c359100bc2ace86413d8012a6221c70c95f --- /dev/null +++ b/outputs/2023-09-08/20-18-10/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${path.base}/papers + documents_processed: ${path.base}_processed + vector_store: ${path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-18-10/.hydra/hydra.yaml b/outputs/2023-09-08/20-18-10/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fadf83e6b0690a932e2d9a4718e5a3fb91ee9e7b --- /dev/null +++ b/outputs/2023-09-08/20-18-10/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-18-10 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-18-10/.hydra/overrides.yaml b/outputs/2023-09-08/20-18-10/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-18-10/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-32-04/.hydra/config.yaml b/outputs/2023-09-08/20-32-04/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-32-04/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-32-04/.hydra/hydra.yaml b/outputs/2023-09-08/20-32-04/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea9f9883af33acc581953db3135f32806c0bb2e6 --- /dev/null +++ b/outputs/2023-09-08/20-32-04/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-32-04 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-32-04/.hydra/overrides.yaml b/outputs/2023-09-08/20-32-04/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-32-04/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-33-44/.hydra/config.yaml b/outputs/2023-09-08/20-33-44/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-33-44/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-33-44/.hydra/hydra.yaml b/outputs/2023-09-08/20-33-44/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b0d5a716383696b592b04c4d0300e40beb40df56 --- /dev/null +++ b/outputs/2023-09-08/20-33-44/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-33-44 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-33-44/.hydra/overrides.yaml b/outputs/2023-09-08/20-33-44/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-33-44/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-34-48/.hydra/config.yaml b/outputs/2023-09-08/20-34-48/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-34-48/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-34-48/.hydra/hydra.yaml b/outputs/2023-09-08/20-34-48/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f471b1b6d7e9b9079d885a393812d36ab3e46774 --- /dev/null +++ b/outputs/2023-09-08/20-34-48/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-34-48 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-34-48/.hydra/overrides.yaml b/outputs/2023-09-08/20-34-48/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-34-48/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-39-03/.hydra/config.yaml b/outputs/2023-09-08/20-39-03/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-39-03/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-39-03/.hydra/hydra.yaml b/outputs/2023-09-08/20-39-03/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5cbab2ba49d15f6f189c728932c80660e0a69a2 --- /dev/null +++ b/outputs/2023-09-08/20-39-03/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-39-03 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-39-03/.hydra/overrides.yaml b/outputs/2023-09-08/20-39-03/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-39-03/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-43-02/.hydra/config.yaml b/outputs/2023-09-08/20-43-02/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-43-02/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-43-02/.hydra/hydra.yaml b/outputs/2023-09-08/20-43-02/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..92f72a9930a74c416b6689d4a2da641fc25e9567 --- /dev/null +++ b/outputs/2023-09-08/20-43-02/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-43-02 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-43-02/.hydra/overrides.yaml b/outputs/2023-09-08/20-43-02/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-43-02/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-43-52/.hydra/config.yaml b/outputs/2023-09-08/20-43-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-43-52/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-43-52/.hydra/hydra.yaml b/outputs/2023-09-08/20-43-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf968e99b27a0e5c75a91cb1458649f4c17e58e3 --- /dev/null +++ b/outputs/2023-09-08/20-43-52/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-43-52 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-43-52/.hydra/overrides.yaml b/outputs/2023-09-08/20-43-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-43-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-45-31/.hydra/config.yaml b/outputs/2023-09-08/20-45-31/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-45-31/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-45-31/.hydra/hydra.yaml b/outputs/2023-09-08/20-45-31/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1bc309164683ea833aae15fccd38d93bc7e2fc0e --- /dev/null +++ b/outputs/2023-09-08/20-45-31/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-45-31 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-45-31/.hydra/overrides.yaml b/outputs/2023-09-08/20-45-31/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-45-31/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-46-27/.hydra/config.yaml b/outputs/2023-09-08/20-46-27/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-46-27/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-46-27/.hydra/hydra.yaml b/outputs/2023-09-08/20-46-27/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5a751ed96accaf87025e93bf99c71471588f5746 --- /dev/null +++ b/outputs/2023-09-08/20-46-27/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-46-27 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-46-27/.hydra/overrides.yaml b/outputs/2023-09-08/20-46-27/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-46-27/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-47-22/.hydra/config.yaml b/outputs/2023-09-08/20-47-22/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-47-22/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-47-22/.hydra/hydra.yaml b/outputs/2023-09-08/20-47-22/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e90744215e45c9fee1a24f49e32cb2cd62fcdb4 --- /dev/null +++ b/outputs/2023-09-08/20-47-22/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-47-22 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-47-22/.hydra/overrides.yaml b/outputs/2023-09-08/20-47-22/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-47-22/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-56-19/.hydra/config.yaml b/outputs/2023-09-08/20-56-19/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-56-19/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-56-19/.hydra/hydra.yaml b/outputs/2023-09-08/20-56-19/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e8df982244542ab11c7cb5075bd1d480628dee08 --- /dev/null +++ b/outputs/2023-09-08/20-56-19/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-56-19 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-56-19/.hydra/overrides.yaml b/outputs/2023-09-08/20-56-19/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-56-19/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/20-59-31/.hydra/config.yaml b/outputs/2023-09-08/20-59-31/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/20-59-31/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/20-59-31/.hydra/hydra.yaml b/outputs/2023-09-08/20-59-31/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..30c54cbbf2ca0b5395d2a393f7ce112ea0ccfee5 --- /dev/null +++ b/outputs/2023-09-08/20-59-31/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/20-59-31 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/20-59-31/.hydra/overrides.yaml b/outputs/2023-09-08/20-59-31/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/20-59-31/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-13-05/.hydra/config.yaml b/outputs/2023-09-08/21-13-05/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..425025bf3a4f5566108d808416ab4e4efcba8bd8 --- /dev/null +++ b/outputs/2023-09-08/21-13-05/.hydra/config.yaml @@ -0,0 +1,27 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false +hydra/hydra_logging: disabled +hydra/job_logging: disabled diff --git a/outputs/2023-09-08/21-13-05/.hydra/hydra.yaml b/outputs/2023-09-08/21-13-05/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bb12eedf0ea11387495248040b4fb1ce2ce6b19 --- /dev/null +++ b/outputs/2023-09-08/21-13-05/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-13-05 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-13-05/.hydra/overrides.yaml b/outputs/2023-09-08/21-13-05/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-13-05/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-13-38/.hydra/config.yaml b/outputs/2023-09-08/21-13-38/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-13-38/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-13-38/.hydra/hydra.yaml b/outputs/2023-09-08/21-13-38/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ffcfe4c05a5e88e5a978cd299c46a070f592cdcf --- /dev/null +++ b/outputs/2023-09-08/21-13-38/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-13-38 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-13-38/.hydra/overrides.yaml b/outputs/2023-09-08/21-13-38/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-13-38/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-14-16/.hydra/config.yaml b/outputs/2023-09-08/21-14-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-14-16/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-14-16/.hydra/hydra.yaml b/outputs/2023-09-08/21-14-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07f9b7ef7da4342cb13ae203bc479b14beb2f7f1 --- /dev/null +++ b/outputs/2023-09-08/21-14-16/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-14-16 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-14-16/.hydra/overrides.yaml b/outputs/2023-09-08/21-14-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-14-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-17-40/.hydra/config.yaml b/outputs/2023-09-08/21-17-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-17-40/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-17-40/.hydra/hydra.yaml b/outputs/2023-09-08/21-17-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..079a81b8f36b4f37674304f3b95254327a8fa323 --- /dev/null +++ b/outputs/2023-09-08/21-17-40/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-17-40 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-17-40/.hydra/overrides.yaml b/outputs/2023-09-08/21-17-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-17-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-19-27/.hydra/config.yaml b/outputs/2023-09-08/21-19-27/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-19-27/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-19-27/.hydra/hydra.yaml b/outputs/2023-09-08/21-19-27/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ab0bdd84f785820e3de1b9ac5ce491c9e102cb4 --- /dev/null +++ b/outputs/2023-09-08/21-19-27/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-19-27 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-19-27/.hydra/overrides.yaml b/outputs/2023-09-08/21-19-27/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-19-27/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-21-20/.hydra/config.yaml b/outputs/2023-09-08/21-21-20/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-21-20/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-21-20/.hydra/hydra.yaml b/outputs/2023-09-08/21-21-20/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af6844ee2193d5c720e4ef85af6e58e8fc321676 --- /dev/null +++ b/outputs/2023-09-08/21-21-20/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-21-20 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-21-20/.hydra/overrides.yaml b/outputs/2023-09-08/21-21-20/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-21-20/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-23-16/.hydra/config.yaml b/outputs/2023-09-08/21-23-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-23-16/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-23-16/.hydra/hydra.yaml b/outputs/2023-09-08/21-23-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5a739de0c72289ed330c6da4216b0749e7a740b --- /dev/null +++ b/outputs/2023-09-08/21-23-16/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-23-16 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-23-16/.hydra/overrides.yaml b/outputs/2023-09-08/21-23-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-23-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-23-53/.hydra/config.yaml b/outputs/2023-09-08/21-23-53/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-23-53/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-23-53/.hydra/hydra.yaml b/outputs/2023-09-08/21-23-53/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47094fca09ac96c5faf56015c7436ec1ea9b3cc4 --- /dev/null +++ b/outputs/2023-09-08/21-23-53/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-23-53 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-23-53/.hydra/overrides.yaml b/outputs/2023-09-08/21-23-53/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-23-53/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-26-36/.hydra/config.yaml b/outputs/2023-09-08/21-26-36/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-26-36/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-26-36/.hydra/hydra.yaml b/outputs/2023-09-08/21-26-36/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a3f1fabd502aa21e2fe704a163b79b7d4bdb813 --- /dev/null +++ b/outputs/2023-09-08/21-26-36/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-26-36 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-26-36/.hydra/overrides.yaml b/outputs/2023-09-08/21-26-36/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-26-36/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/21-29-36/.hydra/config.yaml b/outputs/2023-09-08/21-29-36/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/21-29-36/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/21-29-36/.hydra/hydra.yaml b/outputs/2023-09-08/21-29-36/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..789d59c845c23d38d57b1ab1b216011bf707c193 --- /dev/null +++ b/outputs/2023-09-08/21-29-36/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/21-29-36 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/21-29-36/.hydra/overrides.yaml b/outputs/2023-09-08/21-29-36/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/21-29-36/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-08-31/.hydra/config.yaml b/outputs/2023-09-08/22-08-31/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-08-31/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-08-31/.hydra/hydra.yaml b/outputs/2023-09-08/22-08-31/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f19cfbc043d796328050b4f52cf7f566fb1943f3 --- /dev/null +++ b/outputs/2023-09-08/22-08-31/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-08-31 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-08-31/.hydra/overrides.yaml b/outputs/2023-09-08/22-08-31/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-08-31/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-13-49/.hydra/config.yaml b/outputs/2023-09-08/22-13-49/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-13-49/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-13-49/.hydra/hydra.yaml b/outputs/2023-09-08/22-13-49/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f46349337830147ca6a91abfee3ca6f126c03d6a --- /dev/null +++ b/outputs/2023-09-08/22-13-49/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-13-49 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-13-49/.hydra/overrides.yaml b/outputs/2023-09-08/22-13-49/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-13-49/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-18-39/.hydra/config.yaml b/outputs/2023-09-08/22-18-39/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-18-39/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-18-39/.hydra/hydra.yaml b/outputs/2023-09-08/22-18-39/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebe9c215368e6e189712e026842b00410f842e65 --- /dev/null +++ b/outputs/2023-09-08/22-18-39/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-18-39 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-18-39/.hydra/overrides.yaml b/outputs/2023-09-08/22-18-39/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-18-39/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-20-08/.hydra/config.yaml b/outputs/2023-09-08/22-20-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-20-08/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-20-08/.hydra/hydra.yaml b/outputs/2023-09-08/22-20-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b4f3a0ee03f0fc5832cf0588135cb7f8d8ef9d0 --- /dev/null +++ b/outputs/2023-09-08/22-20-08/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-20-08 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-20-08/.hydra/overrides.yaml b/outputs/2023-09-08/22-20-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-20-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-22-09/.hydra/config.yaml b/outputs/2023-09-08/22-22-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-22-09/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-22-09/.hydra/hydra.yaml b/outputs/2023-09-08/22-22-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..01c2435673eee2ab756778af6b8fc191996511d3 --- /dev/null +++ b/outputs/2023-09-08/22-22-09/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-22-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-22-09/.hydra/overrides.yaml b/outputs/2023-09-08/22-22-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-22-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-26-33/.hydra/config.yaml b/outputs/2023-09-08/22-26-33/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-26-33/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-26-33/.hydra/hydra.yaml b/outputs/2023-09-08/22-26-33/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..afa1530b05e8aa94634de9ff80e73a00c8b4b9a9 --- /dev/null +++ b/outputs/2023-09-08/22-26-33/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-26-33 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-26-33/.hydra/overrides.yaml b/outputs/2023-09-08/22-26-33/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-26-33/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-28-43/.hydra/config.yaml b/outputs/2023-09-08/22-28-43/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-28-43/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-28-43/.hydra/hydra.yaml b/outputs/2023-09-08/22-28-43/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f84a23b124d02d158622d971adf214ed52954480 --- /dev/null +++ b/outputs/2023-09-08/22-28-43/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-28-43 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-28-43/.hydra/overrides.yaml b/outputs/2023-09-08/22-28-43/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-28-43/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-38-32/.hydra/config.yaml b/outputs/2023-09-08/22-38-32/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-38-32/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-38-32/.hydra/hydra.yaml b/outputs/2023-09-08/22-38-32/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..154e176f46c230183c3069f3a9e27daab7bb36ca --- /dev/null +++ b/outputs/2023-09-08/22-38-32/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-38-32 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-38-32/.hydra/overrides.yaml b/outputs/2023-09-08/22-38-32/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-38-32/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-55-06/.hydra/config.yaml b/outputs/2023-09-08/22-55-06/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-55-06/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-55-06/.hydra/hydra.yaml b/outputs/2023-09-08/22-55-06/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fff51dd4925ebdf267cc4baffe867b1b2fb01e2c --- /dev/null +++ b/outputs/2023-09-08/22-55-06/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-55-06 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-55-06/.hydra/overrides.yaml b/outputs/2023-09-08/22-55-06/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-55-06/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/22-58-05/.hydra/config.yaml b/outputs/2023-09-08/22-58-05/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/22-58-05/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/22-58-05/.hydra/hydra.yaml b/outputs/2023-09-08/22-58-05/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..96a4cf488d1fbc110747b79d52ba47beaf677ae2 --- /dev/null +++ b/outputs/2023-09-08/22-58-05/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/22-58-05 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/22-58-05/.hydra/overrides.yaml b/outputs/2023-09-08/22-58-05/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/22-58-05/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-01-07/.hydra/config.yaml b/outputs/2023-09-08/23-01-07/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-01-07/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-01-07/.hydra/hydra.yaml b/outputs/2023-09-08/23-01-07/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b78fecb0fe982b112c1b766802eb2068a7bb7616 --- /dev/null +++ b/outputs/2023-09-08/23-01-07/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-01-07 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-01-07/.hydra/overrides.yaml b/outputs/2023-09-08/23-01-07/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-01-07/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-13-21/.hydra/config.yaml b/outputs/2023-09-08/23-13-21/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-13-21/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-13-21/.hydra/hydra.yaml b/outputs/2023-09-08/23-13-21/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8aef4f14f6e2c2ff5d377d83bf89aafb87f35cd1 --- /dev/null +++ b/outputs/2023-09-08/23-13-21/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-13-21 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-13-21/.hydra/overrides.yaml b/outputs/2023-09-08/23-13-21/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-13-21/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-15-13/.hydra/config.yaml b/outputs/2023-09-08/23-15-13/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-15-13/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-15-13/.hydra/hydra.yaml b/outputs/2023-09-08/23-15-13/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d8cba4ccdea827ac643bf0d2573d3687298b1236 --- /dev/null +++ b/outputs/2023-09-08/23-15-13/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-15-13 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-15-13/.hydra/overrides.yaml b/outputs/2023-09-08/23-15-13/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-15-13/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-15-27/.hydra/config.yaml b/outputs/2023-09-08/23-15-27/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-15-27/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-15-27/.hydra/hydra.yaml b/outputs/2023-09-08/23-15-27/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a76b5264a6612362a5d5bd2f106ea0daf62e3c39 --- /dev/null +++ b/outputs/2023-09-08/23-15-27/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-15-27 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-15-27/.hydra/overrides.yaml b/outputs/2023-09-08/23-15-27/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-15-27/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-16-08/.hydra/config.yaml b/outputs/2023-09-08/23-16-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-16-08/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-16-08/.hydra/hydra.yaml b/outputs/2023-09-08/23-16-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84e875bf90301d40c60e0e93355e3a4434a099b4 --- /dev/null +++ b/outputs/2023-09-08/23-16-08/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-16-08 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-16-08/.hydra/overrides.yaml b/outputs/2023-09-08/23-16-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-16-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-20-24/.hydra/config.yaml b/outputs/2023-09-08/23-20-24/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-20-24/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-20-24/.hydra/hydra.yaml b/outputs/2023-09-08/23-20-24/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da770b041e568099c5e135812f1530eab02910cc --- /dev/null +++ b/outputs/2023-09-08/23-20-24/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-20-24 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-20-24/.hydra/overrides.yaml b/outputs/2023-09-08/23-20-24/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-20-24/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-23-58/.hydra/config.yaml b/outputs/2023-09-08/23-23-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-23-58/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-23-58/.hydra/hydra.yaml b/outputs/2023-09-08/23-23-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea426532b3f5bf858dca9f911e7957c77395df05 --- /dev/null +++ b/outputs/2023-09-08/23-23-58/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-23-58 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-23-58/.hydra/overrides.yaml b/outputs/2023-09-08/23-23-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-23-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-26-22/.hydra/config.yaml b/outputs/2023-09-08/23-26-22/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-26-22/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-26-22/.hydra/hydra.yaml b/outputs/2023-09-08/23-26-22/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b13e6f9980299a5d28c29b7183cb89c6700d6207 --- /dev/null +++ b/outputs/2023-09-08/23-26-22/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-26-22 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-26-22/.hydra/overrides.yaml b/outputs/2023-09-08/23-26-22/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-26-22/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-27-53/.hydra/config.yaml b/outputs/2023-09-08/23-27-53/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-08/23-27-53/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-27-53/.hydra/hydra.yaml b/outputs/2023-09-08/23-27-53/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..659494897476f99f5262095cd2a870f3168c0a10 --- /dev/null +++ b/outputs/2023-09-08/23-27-53/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-27-53 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-27-53/.hydra/overrides.yaml b/outputs/2023-09-08/23-27-53/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-27-53/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-29-29/.hydra/config.yaml b/outputs/2023-09-08/23-29-29/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-29-29/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-29-29/.hydra/hydra.yaml b/outputs/2023-09-08/23-29-29/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e79fb44210f6c8038c2a31e6ef78c899ab89f5d9 --- /dev/null +++ b/outputs/2023-09-08/23-29-29/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-29-29 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-29-29/.hydra/overrides.yaml b/outputs/2023-09-08/23-29-29/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-29-29/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-35-14/.hydra/config.yaml b/outputs/2023-09-08/23-35-14/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-35-14/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-35-14/.hydra/hydra.yaml b/outputs/2023-09-08/23-35-14/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55ae93f17c4daf63e26185d9836341e181ef5580 --- /dev/null +++ b/outputs/2023-09-08/23-35-14/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-35-14 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-35-14/.hydra/overrides.yaml b/outputs/2023-09-08/23-35-14/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-35-14/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-41-11/.hydra/config.yaml b/outputs/2023-09-08/23-41-11/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-41-11/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-41-11/.hydra/hydra.yaml b/outputs/2023-09-08/23-41-11/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83f57ceb3251720c04f1a0c129834cf9ed3f3114 --- /dev/null +++ b/outputs/2023-09-08/23-41-11/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-41-11 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-41-11/.hydra/overrides.yaml b/outputs/2023-09-08/23-41-11/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-41-11/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-43-31/.hydra/config.yaml b/outputs/2023-09-08/23-43-31/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-43-31/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-43-31/.hydra/hydra.yaml b/outputs/2023-09-08/23-43-31/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd2eeec173554ee027e0f5e75e94d30f3308b2ea --- /dev/null +++ b/outputs/2023-09-08/23-43-31/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-43-31 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-43-31/.hydra/overrides.yaml b/outputs/2023-09-08/23-43-31/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-43-31/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-44-38/.hydra/config.yaml b/outputs/2023-09-08/23-44-38/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-44-38/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-44-38/.hydra/hydra.yaml b/outputs/2023-09-08/23-44-38/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2adea9170b61502d68f7e08d751ef8afaf62fa4 --- /dev/null +++ b/outputs/2023-09-08/23-44-38/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-44-38 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-44-38/.hydra/overrides.yaml b/outputs/2023-09-08/23-44-38/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-44-38/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-45-36/.hydra/config.yaml b/outputs/2023-09-08/23-45-36/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-45-36/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-45-36/.hydra/hydra.yaml b/outputs/2023-09-08/23-45-36/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6600b115fbef11e48c25ed1e2e209038dbe10d32 --- /dev/null +++ b/outputs/2023-09-08/23-45-36/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-45-36 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-45-36/.hydra/overrides.yaml b/outputs/2023-09-08/23-45-36/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-45-36/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-45-44/.hydra/config.yaml b/outputs/2023-09-08/23-45-44/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-45-44/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-45-44/.hydra/hydra.yaml b/outputs/2023-09-08/23-45-44/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3eacc0a7dfbf4b9fb4d891c635956730dd313aa7 --- /dev/null +++ b/outputs/2023-09-08/23-45-44/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-45-44 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-45-44/.hydra/overrides.yaml b/outputs/2023-09-08/23-45-44/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-45-44/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-47-16/.hydra/config.yaml b/outputs/2023-09-08/23-47-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-47-16/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-47-16/.hydra/hydra.yaml b/outputs/2023-09-08/23-47-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86696b36b2d5d00b910e08da113f39b030b66d46 --- /dev/null +++ b/outputs/2023-09-08/23-47-16/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-47-16 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-47-16/.hydra/overrides.yaml b/outputs/2023-09-08/23-47-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-47-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-49-44/.hydra/config.yaml b/outputs/2023-09-08/23-49-44/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-49-44/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-49-44/.hydra/hydra.yaml b/outputs/2023-09-08/23-49-44/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b3bc9c3f9d1c633f869e6c843131e33ae57e670e --- /dev/null +++ b/outputs/2023-09-08/23-49-44/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-49-44 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-49-44/.hydra/overrides.yaml b/outputs/2023-09-08/23-49-44/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-49-44/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-52-03/.hydra/config.yaml b/outputs/2023-09-08/23-52-03/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-52-03/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-52-03/.hydra/hydra.yaml b/outputs/2023-09-08/23-52-03/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9408c3a93f3bd1b53eb47b70968ed71e2a5b445 --- /dev/null +++ b/outputs/2023-09-08/23-52-03/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-52-03 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-52-03/.hydra/overrides.yaml b/outputs/2023-09-08/23-52-03/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-52-03/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-53-15/.hydra/config.yaml b/outputs/2023-09-08/23-53-15/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-53-15/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-53-15/.hydra/hydra.yaml b/outputs/2023-09-08/23-53-15/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6d48cbb234269b42f71649da80c5df59e4ab7596 --- /dev/null +++ b/outputs/2023-09-08/23-53-15/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-53-15 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-53-15/.hydra/overrides.yaml b/outputs/2023-09-08/23-53-15/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-53-15/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-55-33/.hydra/config.yaml b/outputs/2023-09-08/23-55-33/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-55-33/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-55-33/.hydra/hydra.yaml b/outputs/2023-09-08/23-55-33/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9cca2cd77aee8be89a244b244fcc244055647726 --- /dev/null +++ b/outputs/2023-09-08/23-55-33/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-55-33 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-55-33/.hydra/overrides.yaml b/outputs/2023-09-08/23-55-33/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-55-33/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-08/23-58-48/.hydra/config.yaml b/outputs/2023-09-08/23-58-48/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-08/23-58-48/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-08/23-58-48/.hydra/hydra.yaml b/outputs/2023-09-08/23-58-48/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa08db0debc831f6ef91dc57454674c21733afbc --- /dev/null +++ b/outputs/2023-09-08/23-58-48/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-08/23-58-48 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-08/23-58-48/.hydra/overrides.yaml b/outputs/2023-09-08/23-58-48/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-08/23-58-48/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/00-00-45/.hydra/config.yaml b/outputs/2023-09-09/00-00-45/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/00-00-45/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/00-00-45/.hydra/hydra.yaml b/outputs/2023-09-09/00-00-45/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c4761ad074f424513008406748b16651d02c1e3 --- /dev/null +++ b/outputs/2023-09-09/00-00-45/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/00-00-45 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/00-00-45/.hydra/overrides.yaml b/outputs/2023-09-09/00-00-45/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/00-00-45/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-44-36/.hydra/config.yaml b/outputs/2023-09-09/10-44-36/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-44-36/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-44-36/.hydra/hydra.yaml b/outputs/2023-09-09/10-44-36/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5cb9d06784c2a78edc8154b7d6b800a313cfb1db --- /dev/null +++ b/outputs/2023-09-09/10-44-36/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-44-36 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-44-36/.hydra/overrides.yaml b/outputs/2023-09-09/10-44-36/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-44-36/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-45-05/.hydra/config.yaml b/outputs/2023-09-09/10-45-05/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-45-05/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-45-05/.hydra/hydra.yaml b/outputs/2023-09-09/10-45-05/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1919f603e9a38ab6885a3cd0fc803322181d0f62 --- /dev/null +++ b/outputs/2023-09-09/10-45-05/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-45-05 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-45-05/.hydra/overrides.yaml b/outputs/2023-09-09/10-45-05/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-45-05/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-52-55/.hydra/config.yaml b/outputs/2023-09-09/10-52-55/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-52-55/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-52-55/.hydra/hydra.yaml b/outputs/2023-09-09/10-52-55/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eaf0b46552e1f65391a1131ea8f172ef9f7011ed --- /dev/null +++ b/outputs/2023-09-09/10-52-55/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-52-55 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-52-55/.hydra/overrides.yaml b/outputs/2023-09-09/10-52-55/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-52-55/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-53-55/.hydra/config.yaml b/outputs/2023-09-09/10-53-55/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-53-55/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-53-55/.hydra/hydra.yaml b/outputs/2023-09-09/10-53-55/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c478be4bf0aa2f3e17f50593acd33d173f526b1e --- /dev/null +++ b/outputs/2023-09-09/10-53-55/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-53-55 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-53-55/.hydra/overrides.yaml b/outputs/2023-09-09/10-53-55/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-53-55/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-59-30/.hydra/config.yaml b/outputs/2023-09-09/10-59-30/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-59-30/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-59-30/.hydra/hydra.yaml b/outputs/2023-09-09/10-59-30/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d2caa7ef20adcbab50fb8bd6980a9e45e6be315 --- /dev/null +++ b/outputs/2023-09-09/10-59-30/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-59-30 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-59-30/.hydra/overrides.yaml b/outputs/2023-09-09/10-59-30/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-59-30/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/10-59-36/.hydra/config.yaml b/outputs/2023-09-09/10-59-36/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/10-59-36/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/10-59-36/.hydra/hydra.yaml b/outputs/2023-09-09/10-59-36/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6a20d7d12c9a4242c06e2a8ddbdb8b23926be989 --- /dev/null +++ b/outputs/2023-09-09/10-59-36/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/10-59-36 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/10-59-36/.hydra/overrides.yaml b/outputs/2023-09-09/10-59-36/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/10-59-36/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-24-02/.hydra/config.yaml b/outputs/2023-09-09/11-24-02/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-24-02/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-24-02/.hydra/hydra.yaml b/outputs/2023-09-09/11-24-02/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e5c4690cd1d7dcdfbabee43d053c3300d194356 --- /dev/null +++ b/outputs/2023-09-09/11-24-02/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-24-02 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-24-02/.hydra/overrides.yaml b/outputs/2023-09-09/11-24-02/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-24-02/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-25-16/.hydra/config.yaml b/outputs/2023-09-09/11-25-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-25-16/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-25-16/.hydra/hydra.yaml b/outputs/2023-09-09/11-25-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bdc310a43beef7cc0bdbf0ac8416b524a06edc80 --- /dev/null +++ b/outputs/2023-09-09/11-25-16/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-25-16 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-25-16/.hydra/overrides.yaml b/outputs/2023-09-09/11-25-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-25-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-25-23/.hydra/config.yaml b/outputs/2023-09-09/11-25-23/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-25-23/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-25-23/.hydra/hydra.yaml b/outputs/2023-09-09/11-25-23/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e8a0a8bdeeeb89661bfa37fa775a36a9d5b2ca0 --- /dev/null +++ b/outputs/2023-09-09/11-25-23/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-25-23 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-25-23/.hydra/overrides.yaml b/outputs/2023-09-09/11-25-23/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-25-23/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-25-30/.hydra/config.yaml b/outputs/2023-09-09/11-25-30/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-25-30/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-25-30/.hydra/hydra.yaml b/outputs/2023-09-09/11-25-30/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d526897d3d97b343b043d9c06c87fc669c15db52 --- /dev/null +++ b/outputs/2023-09-09/11-25-30/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-25-30 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-25-30/.hydra/overrides.yaml b/outputs/2023-09-09/11-25-30/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-25-30/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-26-20/.hydra/config.yaml b/outputs/2023-09-09/11-26-20/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-26-20/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-26-20/.hydra/hydra.yaml b/outputs/2023-09-09/11-26-20/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..129dac35fed12cccfb3858938972228f5cc8fc7d --- /dev/null +++ b/outputs/2023-09-09/11-26-20/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-26-20 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-26-20/.hydra/overrides.yaml b/outputs/2023-09-09/11-26-20/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-26-20/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-26-58/.hydra/config.yaml b/outputs/2023-09-09/11-26-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-26-58/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-26-58/.hydra/hydra.yaml b/outputs/2023-09-09/11-26-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..006dd9a44cf120e7e74f80b77d93424697ed6ee0 --- /dev/null +++ b/outputs/2023-09-09/11-26-58/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-26-58 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-26-58/.hydra/overrides.yaml b/outputs/2023-09-09/11-26-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-26-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-28-20/.hydra/config.yaml b/outputs/2023-09-09/11-28-20/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-28-20/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-28-20/.hydra/hydra.yaml b/outputs/2023-09-09/11-28-20/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f9488224fbb504af92c9d90dbb7520e78afc3dd --- /dev/null +++ b/outputs/2023-09-09/11-28-20/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-28-20 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-28-20/.hydra/overrides.yaml b/outputs/2023-09-09/11-28-20/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-28-20/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-30-45/.hydra/config.yaml b/outputs/2023-09-09/11-30-45/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-30-45/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-30-45/.hydra/hydra.yaml b/outputs/2023-09-09/11-30-45/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..342ad339a80cae3a75fff9fad0abbcd56fd88c0a --- /dev/null +++ b/outputs/2023-09-09/11-30-45/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-30-45 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-30-45/.hydra/overrides.yaml b/outputs/2023-09-09/11-30-45/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-30-45/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-09/11-30-58/.hydra/config.yaml b/outputs/2023-09-09/11-30-58/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-09/11-30-58/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-09/11-30-58/.hydra/hydra.yaml b/outputs/2023-09-09/11-30-58/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1990a04bf9887f294b1f696631a2140f928bb915 --- /dev/null +++ b/outputs/2023-09-09/11-30-58/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-09/11-30-58 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-09/11-30-58/.hydra/overrides.yaml b/outputs/2023-09-09/11-30-58/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-09/11-30-58/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-05-29/.hydra/config.yaml b/outputs/2023-09-10/16-05-29/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-05-29/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-05-29/.hydra/hydra.yaml b/outputs/2023-09-10/16-05-29/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..08092001eb99fe89a8857e22dc1d651eb0672ebe --- /dev/null +++ b/outputs/2023-09-10/16-05-29/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-05-29 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-05-29/.hydra/overrides.yaml b/outputs/2023-09-10/16-05-29/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-05-29/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-06-17/.hydra/config.yaml b/outputs/2023-09-10/16-06-17/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-06-17/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-06-17/.hydra/hydra.yaml b/outputs/2023-09-10/16-06-17/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa4da63e150dc59fef7185d094e21b5bb873db15 --- /dev/null +++ b/outputs/2023-09-10/16-06-17/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-06-17 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-06-17/.hydra/overrides.yaml b/outputs/2023-09-10/16-06-17/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-06-17/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-07-33/.hydra/config.yaml b/outputs/2023-09-10/16-07-33/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-07-33/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-07-33/.hydra/hydra.yaml b/outputs/2023-09-10/16-07-33/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..84f3bbd67de6fdfcda66bcfc0591b7f7248a6bb4 --- /dev/null +++ b/outputs/2023-09-10/16-07-33/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-07-33 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-07-33/.hydra/overrides.yaml b/outputs/2023-09-10/16-07-33/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-07-33/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-11-09/.hydra/config.yaml b/outputs/2023-09-10/16-11-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-11-09/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-11-09/.hydra/hydra.yaml b/outputs/2023-09-10/16-11-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb055922f626c8860e3e70b0243dc73919928bbe --- /dev/null +++ b/outputs/2023-09-10/16-11-09/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-11-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-11-09/.hydra/overrides.yaml b/outputs/2023-09-10/16-11-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-11-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-14-09/.hydra/config.yaml b/outputs/2023-09-10/16-14-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-14-09/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-14-09/.hydra/hydra.yaml b/outputs/2023-09-10/16-14-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c17f5f00e1505b40a04ea5046f462e7b5b1c2934 --- /dev/null +++ b/outputs/2023-09-10/16-14-09/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-14-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-14-09/.hydra/overrides.yaml b/outputs/2023-09-10/16-14-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-14-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-15-24/.hydra/config.yaml b/outputs/2023-09-10/16-15-24/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-15-24/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-15-24/.hydra/hydra.yaml b/outputs/2023-09-10/16-15-24/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97c34f6188ecbf9a8165fe414dddf9411a069137 --- /dev/null +++ b/outputs/2023-09-10/16-15-24/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-15-24 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-15-24/.hydra/overrides.yaml b/outputs/2023-09-10/16-15-24/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-15-24/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-16-53/.hydra/config.yaml b/outputs/2023-09-10/16-16-53/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-16-53/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-16-53/.hydra/hydra.yaml b/outputs/2023-09-10/16-16-53/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3daa188283f223301f51271706c3f7622c766f7f --- /dev/null +++ b/outputs/2023-09-10/16-16-53/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-16-53 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-16-53/.hydra/overrides.yaml b/outputs/2023-09-10/16-16-53/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-16-53/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-19-47/.hydra/config.yaml b/outputs/2023-09-10/16-19-47/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-19-47/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-19-47/.hydra/hydra.yaml b/outputs/2023-09-10/16-19-47/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdb7995aa442ba15aea825ffb8b58b17e6d26703 --- /dev/null +++ b/outputs/2023-09-10/16-19-47/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-19-47 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-19-47/.hydra/overrides.yaml b/outputs/2023-09-10/16-19-47/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-19-47/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-23-08/.hydra/config.yaml b/outputs/2023-09-10/16-23-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-23-08/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-23-08/.hydra/hydra.yaml b/outputs/2023-09-10/16-23-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ae4effb04339088cb87ac17ce8e17e97095e0f3 --- /dev/null +++ b/outputs/2023-09-10/16-23-08/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-23-08 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-23-08/.hydra/overrides.yaml b/outputs/2023-09-10/16-23-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-23-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-28-08/.hydra/config.yaml b/outputs/2023-09-10/16-28-08/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-28-08/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-28-08/.hydra/hydra.yaml b/outputs/2023-09-10/16-28-08/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a14bca742e63ee92beda0444d3a2bd3ce82623e --- /dev/null +++ b/outputs/2023-09-10/16-28-08/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-28-08 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-28-08/.hydra/overrides.yaml b/outputs/2023-09-10/16-28-08/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-28-08/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-28-56/.hydra/config.yaml b/outputs/2023-09-10/16-28-56/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-28-56/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-28-56/.hydra/hydra.yaml b/outputs/2023-09-10/16-28-56/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3984406cfb1ddc1a47c40158e4f0c4202fd19044 --- /dev/null +++ b/outputs/2023-09-10/16-28-56/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-28-56 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-28-56/.hydra/overrides.yaml b/outputs/2023-09-10/16-28-56/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-28-56/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-29-48/.hydra/config.yaml b/outputs/2023-09-10/16-29-48/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-29-48/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-29-48/.hydra/hydra.yaml b/outputs/2023-09-10/16-29-48/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67cab43889f70991cfb7a75103138c069587b70b --- /dev/null +++ b/outputs/2023-09-10/16-29-48/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-29-48 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-29-48/.hydra/overrides.yaml b/outputs/2023-09-10/16-29-48/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-29-48/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-31-32/.hydra/config.yaml b/outputs/2023-09-10/16-31-32/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-31-32/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-31-32/.hydra/hydra.yaml b/outputs/2023-09-10/16-31-32/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5ad5dd225409a9339cc2b07b71a3a55acc081c4c --- /dev/null +++ b/outputs/2023-09-10/16-31-32/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-31-32 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-31-32/.hydra/overrides.yaml b/outputs/2023-09-10/16-31-32/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-31-32/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-32-34/.hydra/config.yaml b/outputs/2023-09-10/16-32-34/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-32-34/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-32-34/.hydra/hydra.yaml b/outputs/2023-09-10/16-32-34/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e12341b5fa84dc340e6b2615ec7f9a0d4352027 --- /dev/null +++ b/outputs/2023-09-10/16-32-34/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-32-34 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-32-34/.hydra/overrides.yaml b/outputs/2023-09-10/16-32-34/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-32-34/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-36-14/.hydra/config.yaml b/outputs/2023-09-10/16-36-14/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-36-14/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-36-14/.hydra/hydra.yaml b/outputs/2023-09-10/16-36-14/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fb0ca94366eeb9c4ba15da1d5a56a5245ecdb112 --- /dev/null +++ b/outputs/2023-09-10/16-36-14/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-36-14 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-36-14/.hydra/overrides.yaml b/outputs/2023-09-10/16-36-14/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-36-14/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-38-25/.hydra/config.yaml b/outputs/2023-09-10/16-38-25/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-38-25/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-38-25/.hydra/hydra.yaml b/outputs/2023-09-10/16-38-25/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..80a3542cec4925d8399bafdce0a9194d55e56a9d --- /dev/null +++ b/outputs/2023-09-10/16-38-25/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-38-25 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-38-25/.hydra/overrides.yaml b/outputs/2023-09-10/16-38-25/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-38-25/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-40-30/.hydra/config.yaml b/outputs/2023-09-10/16-40-30/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-40-30/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-40-30/.hydra/hydra.yaml b/outputs/2023-09-10/16-40-30/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e6a3d8e50af2183561914a3ea82baef8b45f64b --- /dev/null +++ b/outputs/2023-09-10/16-40-30/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-40-30 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-40-30/.hydra/overrides.yaml b/outputs/2023-09-10/16-40-30/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-40-30/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-41-40/.hydra/config.yaml b/outputs/2023-09-10/16-41-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-41-40/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-41-40/.hydra/hydra.yaml b/outputs/2023-09-10/16-41-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea3d7f9d8998c7dbdda6344fdf754348f6daef81 --- /dev/null +++ b/outputs/2023-09-10/16-41-40/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-41-40 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-41-40/.hydra/overrides.yaml b/outputs/2023-09-10/16-41-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-41-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-43-52/.hydra/config.yaml b/outputs/2023-09-10/16-43-52/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/16-43-52/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-43-52/.hydra/hydra.yaml b/outputs/2023-09-10/16-43-52/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6b4acb362bd1df3b5806f2e970e0844ad74d2ec --- /dev/null +++ b/outputs/2023-09-10/16-43-52/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-43-52 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-43-52/.hydra/overrides.yaml b/outputs/2023-09-10/16-43-52/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-43-52/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-45-01/.hydra/config.yaml b/outputs/2023-09-10/16-45-01/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-10/16-45-01/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-45-01/.hydra/hydra.yaml b/outputs/2023-09-10/16-45-01/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1b9e70b6efc9d9a70357f8547bef5220aae3d82 --- /dev/null +++ b/outputs/2023-09-10/16-45-01/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-45-01 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-45-01/.hydra/overrides.yaml b/outputs/2023-09-10/16-45-01/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-45-01/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/16-55-35/.hydra/config.yaml b/outputs/2023-09-10/16-55-35/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-10/16-55-35/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-10/16-55-35/.hydra/hydra.yaml b/outputs/2023-09-10/16-55-35/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..24091a1120bfc3b19f74c7172a19c8c1d433dde4 --- /dev/null +++ b/outputs/2023-09-10/16-55-35/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/16-55-35 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/16-55-35/.hydra/overrides.yaml b/outputs/2023-09-10/16-55-35/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/16-55-35/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-07-10/.hydra/config.yaml b/outputs/2023-09-10/17-07-10/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-10/17-07-10/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-07-10/.hydra/hydra.yaml b/outputs/2023-09-10/17-07-10/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dafadce72f29560719d43a85fe5de20392d733a3 --- /dev/null +++ b/outputs/2023-09-10/17-07-10/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-07-10 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-07-10/.hydra/overrides.yaml b/outputs/2023-09-10/17-07-10/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-07-10/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-11-07/.hydra/config.yaml b/outputs/2023-09-10/17-11-07/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f4768f58578a8551242fc52e1b7359aa960d5ca --- /dev/null +++ b/outputs/2023-09-10/17-11-07/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: true + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-11-07/.hydra/hydra.yaml b/outputs/2023-09-10/17-11-07/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ee0297eebcd3319157bfae9ae5cb6e0ed49ced74 --- /dev/null +++ b/outputs/2023-09-10/17-11-07/.hydra/hydra.yaml @@ -0,0 +1,160 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-11-07 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-11-07/.hydra/overrides.yaml b/outputs/2023-09-10/17-11-07/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-11-07/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-12-51/.hydra/config.yaml b/outputs/2023-09-10/17-12-51/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-12-51/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-12-51/.hydra/hydra.yaml b/outputs/2023-09-10/17-12-51/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..666a728a63212077beef0f016fd49746f7f1dd2c --- /dev/null +++ b/outputs/2023-09-10/17-12-51/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-12-51 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-12-51/.hydra/overrides.yaml b/outputs/2023-09-10/17-12-51/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-12-51/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-15-01/.hydra/config.yaml b/outputs/2023-09-10/17-15-01/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-15-01/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-15-01/.hydra/hydra.yaml b/outputs/2023-09-10/17-15-01/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..149c83d0d747729f0aea99ff93c1767e7bf8ab81 --- /dev/null +++ b/outputs/2023-09-10/17-15-01/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-15-01 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-15-01/.hydra/overrides.yaml b/outputs/2023-09-10/17-15-01/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-15-01/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-15-13/.hydra/config.yaml b/outputs/2023-09-10/17-15-13/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-15-13/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-15-13/.hydra/hydra.yaml b/outputs/2023-09-10/17-15-13/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6037968575bd8f421681ca86e3d79122ffe96ea0 --- /dev/null +++ b/outputs/2023-09-10/17-15-13/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-15-13 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-15-13/.hydra/overrides.yaml b/outputs/2023-09-10/17-15-13/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-15-13/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-15-32/.hydra/config.yaml b/outputs/2023-09-10/17-15-32/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-15-32/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-15-32/.hydra/hydra.yaml b/outputs/2023-09-10/17-15-32/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..03cc654396130d5e5797dbf8c8445d66b07a56cb --- /dev/null +++ b/outputs/2023-09-10/17-15-32/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-15-32 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-15-32/.hydra/overrides.yaml b/outputs/2023-09-10/17-15-32/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-15-32/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-16-40/.hydra/config.yaml b/outputs/2023-09-10/17-16-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-16-40/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-16-40/.hydra/hydra.yaml b/outputs/2023-09-10/17-16-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f82d2cb94e47373ad72a5a6a51ed27cc9afd2b14 --- /dev/null +++ b/outputs/2023-09-10/17-16-40/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-16-40 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-16-40/.hydra/overrides.yaml b/outputs/2023-09-10/17-16-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-16-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-18-09/.hydra/config.yaml b/outputs/2023-09-10/17-18-09/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-18-09/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-18-09/.hydra/hydra.yaml b/outputs/2023-09-10/17-18-09/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9f6d06ed2277f191daa3b3222cc63368c713c87 --- /dev/null +++ b/outputs/2023-09-10/17-18-09/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-18-09 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-18-09/.hydra/overrides.yaml b/outputs/2023-09-10/17-18-09/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-18-09/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-20-26/.hydra/config.yaml b/outputs/2023-09-10/17-20-26/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-20-26/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-20-26/.hydra/hydra.yaml b/outputs/2023-09-10/17-20-26/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d1e8a4fffdbc257f96a4b72202116c32bae931c0 --- /dev/null +++ b/outputs/2023-09-10/17-20-26/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-20-26 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-20-26/.hydra/overrides.yaml b/outputs/2023-09-10/17-20-26/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-20-26/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-21-33/.hydra/config.yaml b/outputs/2023-09-10/17-21-33/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-21-33/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-21-33/.hydra/hydra.yaml b/outputs/2023-09-10/17-21-33/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9c55dd20fecd432a353a9e3a8bdf2dca51b88e9 --- /dev/null +++ b/outputs/2023-09-10/17-21-33/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-21-33 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-21-33/.hydra/overrides.yaml b/outputs/2023-09-10/17-21-33/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-21-33/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/17-23-06/.hydra/config.yaml b/outputs/2023-09-10/17-23-06/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/17-23-06/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/17-23-06/.hydra/hydra.yaml b/outputs/2023-09-10/17-23-06/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78b4d33cc1d90ac5c164d06666d0c17884ea525d --- /dev/null +++ b/outputs/2023-09-10/17-23-06/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/17-23-06 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/17-23-06/.hydra/overrides.yaml b/outputs/2023-09-10/17-23-06/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/17-23-06/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-10/22-48-40/.hydra/config.yaml b/outputs/2023-09-10/22-48-40/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-10/22-48-40/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-10/22-48-40/.hydra/hydra.yaml b/outputs/2023-09-10/22-48-40/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8600b05e983d09a29a19849b755c0e6619351bf7 --- /dev/null +++ b/outputs/2023-09-10/22-48-40/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-10/22-48-40 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-10/22-48-40/.hydra/overrides.yaml b/outputs/2023-09-10/22-48-40/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-10/22-48-40/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-11/15-04-04/.hydra/config.yaml b/outputs/2023-09-11/15-04-04/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-11/15-04-04/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-11/15-04-04/.hydra/hydra.yaml b/outputs/2023-09-11/15-04-04/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..188b65d59099051c17e40a38973e739b39c7b1c1 --- /dev/null +++ b/outputs/2023-09-11/15-04-04/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-11/15-04-04 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-11/15-04-04/.hydra/overrides.yaml b/outputs/2023-09-11/15-04-04/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-11/15-04-04/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-11/15-04-25/.hydra/config.yaml b/outputs/2023-09-11/15-04-25/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-11/15-04-25/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-11/15-04-25/.hydra/hydra.yaml b/outputs/2023-09-11/15-04-25/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7d639513ad50ad998c8bed60c295449b46d68e6 --- /dev/null +++ b/outputs/2023-09-11/15-04-25/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-11/15-04-25 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-11/15-04-25/.hydra/overrides.yaml b/outputs/2023-09-11/15-04-25/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-11/15-04-25/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/outputs/2023-09-12/11-04-16/.hydra/config.yaml b/outputs/2023-09-12/11-04-16/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe16713cb97d457583e1b65718ca1ef4da2a8d5f --- /dev/null +++ b/outputs/2023-09-12/11-04-16/.hydra/config.yaml @@ -0,0 +1,25 @@ +document_loader: + _target_: document_loader.grobid.GrobidLoader + grobid_parser: + _target_: langchain.document_loaders.parsers.GrobidParser + segment_sentences: false + grobid_server: http://localhost:8070/api/processFulltextDocument +text_splitter: + _target_: text_splitter.spacy.SpacySplitter +text_embedding: + _target_: text_embedding.huggingface.HuggingFaceTextEmbedding +vector_store: + _target_: vector_store.faiss.FAISSVectorStore +document_retriever: + _target_: document_retriever.simple_retriever.SimpleDocumentRetriever +question_answering: + _target_: question_answering.huggingface.HuggingFaceQuestionAnswering +storage_path: + base: /data/tommaso/llm4scilit/data + documents: ${storage_path.base}/papers + documents_processed: ${storage_path.documents}_processed + vector_store: ${storage_path.base}/vector_store +mode: interactive +debug: + is_debug: false + force_rebuild_storage: false diff --git a/outputs/2023-09-12/11-04-16/.hydra/hydra.yaml b/outputs/2023-09-12/11-04-16/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4073194fdae22e8f5691b468675105ecb677f9b --- /dev/null +++ b/outputs/2023-09-12/11-04-16/.hydra/hydra.yaml @@ -0,0 +1,132 @@ +hydra: + run: + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + job_logging: + version: 1 + root: + level: ERROR + disable_existing_loggers: true + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: [] + job: + name: demo + chdir: null + override_dirname: '' + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/tommaso/llm4scilit + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/tommaso/llm4scilit/config + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /home/tommaso/llm4scilit/outputs/2023-09-12/11-04-16 + choices: + question_answering: huggingface + document_retriever: simple_retriever + vector_store: faiss + text_embedding: huggingface + text_splitter: spacy + document_loader: grobid + hydra/env: default + hydra/callbacks: null + hydra/job_logging: disabled + hydra/hydra_logging: disabled + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/outputs/2023-09-12/11-04-16/.hydra/overrides.yaml b/outputs/2023-09-12/11-04-16/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe51488c7066f6687ef680d6bfaa4f7768ef205c --- /dev/null +++ b/outputs/2023-09-12/11-04-16/.hydra/overrides.yaml @@ -0,0 +1 @@ +[] diff --git a/src/demo.py b/src/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad229f38ee9761ff1bbb1971ccb83908baf34ab --- /dev/null +++ b/src/demo.py @@ -0,0 +1,124 @@ +import logging +from pathlib import Path +import cmd +import shlex +import hydra +from omegaconf import DictConfig, OmegaConf +from art import tprint +import utils + +log = logging.getLogger(__name__) + +class CLIApp(cmd.Cmd): + + class CleanExit: + def __init__(self, cliapp): + self.cliapp = cliapp + def __enter__(self): + return self + def __exit__(self, exc_type, exc_value, exc_tb): + if exc_type is KeyboardInterrupt: + print("\n", end="") + self.cliapp.do_exit(None) + return True + return exc_type is None + + prompt = '> ' + intro = """Running in interactive mode: +Welcome to the LLM4SciLit shell. Type help or ? to list commands.\n""" + + def __init__(self, app, cfg : DictConfig) -> None: + super().__init__() + self.app = app + self.cfg = cfg + + def do_exit(self, _): + """Exit the shell.""" + # self.app.vector_store.save(self.cfg.storage_path.vector_store) + print("\nLLM4SciLit: Bye!\n") + self.app.exit() + return True + do_EOF = do_exit + + def do_ask_paper(self, line): + """Ask a question about a paper.""" + paper, line = shlex.split(line) + filter_dict = {"paper_title": paper} + print(f"\nLLM4SciLit: {self.app.qa_model.answer_question(line, filter_dict)['result']}\n") + + def default(self, line): + # print(f"\nLLM4SciLit: a bunch of nonsense\n") + print(f"\nLLM4SciLit: {self.app.qa_model.answer_question(line, {})['result']}\n") + + +class App: + def __init__(self, cfg : DictConfig) -> None: + self.cfg = cfg + + log.info("Loading: Document Loader") + self.loader = hydra.utils.instantiate(cfg.document_loader) + log.info("Loading: Text Splitter") + self.splitter = hydra.utils.instantiate(cfg.text_splitter) + log.info("Loading: Text Embedding Model") + self.text_embedding_model = hydra.utils.instantiate(cfg.text_embedding) + log.info("Loading: Vector Store") + self.vector_store = hydra.utils.instantiate(cfg.vector_store, self.text_embedding_model) + log.info("Loading: Document Retriever") + self.retriever = hydra.utils.instantiate(cfg.document_retriever, self.vector_store) + log.info("Loading: Question Answering Model") + self.qa_model = hydra.utils.instantiate(cfg.question_answering, self.retriever) + + def _bootstrap(self) -> None: + # if vector store does not exist, create it + # if vector store exists, load it + + if not Path(self.cfg.storage_path.vector_store).exists() or self.cfg.debug.force_rebuild_storage: + message = ( + "Vector store not found at %s. Building storage from scratch" + if not self.cfg.debug.force_rebuild_storage + else "Forced to rebuild storage. Building storage from scratch" + ) + log.info(message, self.cfg.storage_path.vector_store) + + docs = self.loader.load_documents(self.cfg.storage_path.documents) + docs = self.splitter.split_documents(docs) + utils.save_docs_to_jsonl(docs, self.cfg.storage_path.documents_processed) + + self.vector_store.initialize_from_documents(docs) + self.vector_store.save(self.cfg.storage_path.vector_store) + else: + log.info("Vector store found at %s. Loading existing storage", self.cfg.storage_path.vector_store) + self.vector_store.initialize_from_file(self.cfg.storage_path.vector_store) + + self.retriever.initialize() + self.qa_model.initialize() + print("Ready to answer your questions 🔥🔥\n") + + def run_interactive(self) -> None: + self._bootstrap() + cli = CLIApp(self, self.cfg) + with CLIApp.CleanExit(cli): + cli.cmdloop() + + def exit(self): + """ + Do any cleanup here + """ + +@hydra.main(version_base=None, config_path="../config", config_name="config") +def main(cfg : DictConfig) -> None: + tprint("LLM4SciLit") + + if cfg.debug.is_debug: + print("Running with config:") + print(OmegaConf.to_yaml(cfg)) + + app = App(cfg) + match cfg.mode: + case "interactive": + app.run_interactive() + case _: + raise ValueError(f"Unknown mode: {cfg.mode}") + +if __name__ == "__main__": + main() # pylint: disable=E1120:no-value-for-parameter diff --git a/src/document_loader/grobid.py b/src/document_loader/grobid.py new file mode 100644 index 0000000000000000000000000000000000000000..b278ffa239a05898d79396a92fd08be0405b24d0 --- /dev/null +++ b/src/document_loader/grobid.py @@ -0,0 +1,19 @@ +from typing import Iterator +from langchain.docstore.document import Document +from langchain.document_loaders.parsers import GrobidParser +from langchain.document_loaders.generic import GenericLoader + +class GrobidLoader: + def __init__(self, grobid_parser: GrobidParser): + self.parser = grobid_parser + + def load_documents(self, root: str) -> Iterator[Document]: + loader = GenericLoader.from_filesystem( + root, + glob="**/*.pdf", + show_progress=True, + suffixes=[".pdf"], + parser=self.parser, + ) + + return loader.lazy_load() \ No newline at end of file diff --git a/src/document_retriever/simple_retriever.py b/src/document_retriever/simple_retriever.py new file mode 100644 index 0000000000000000000000000000000000000000..b1116fd84cbda6508632384bfe2229d183779e56 --- /dev/null +++ b/src/document_retriever/simple_retriever.py @@ -0,0 +1,10 @@ +class SimpleDocumentRetriever: + def __init__(self, vector_store): + self.vector_store = vector_store + self.retriever = None + + def initialize(self): + self.retriever = self.vector_store.db.as_retriever(search_kwargs={"k": 10, "fetch_k": 40}) + + def retrieve(self, query: str, k: int = 4): + pass \ No newline at end of file diff --git a/src/question_answering/huggingface.py b/src/question_answering/huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..725771fa72020c3ebef15d2b2d90911bd4be7f93 --- /dev/null +++ b/src/question_answering/huggingface.py @@ -0,0 +1,40 @@ +from langchain import PromptTemplate +from langchain.chains import RetrievalQA +from langchain.llms import HuggingFacePipeline + +class HuggingFaceQuestionAnswering: + def __init__(self, retriever) -> None: + self.retriever = retriever + self.llm = HuggingFacePipeline.from_model_id( + # model_id="bigscience/bloom-1b7", + model_id="bigscience/bloomz-1b1", + task="text-generation", + device=1, + # model_kwargs={"do_sample": True, "temperature": 0.7, "num_beams": 4, "top_p": 0.95, "repetition_penalty": 1.25, "length_penalty": 1.2}, + model_kwargs={"do_sample": True, "temperature": 0.7, "num_beams": 2}, + # pipeline_kwargs={"max_new_tokens": 256, "min_new_tokens": 30}, + pipeline_kwargs={"max_new_tokens": 256, "min_new_tokens": 30}, + ) + self.chain = None + + def initialize(self): + template = """Use the information contained in the following text: {context}. Complete the phrase: {question} """ + prompt_template = PromptTemplate( + template=template, + input_variables=["context", "question"], + ) + # self.chain = RetrievalQA.from_chain_type(self.llm, retriever=self.retriever.retriever, chain_type_kwargs={"prompt": prompt_template}) + + def answer_question(self, question: str, filter_dict): + retriever = self.retriever.vector_store.db.as_retriever(search_kwargs={"filter": filter_dict, "fetch_k": 150}) + + try: + self.chain = RetrievalQA.from_chain_type(self.llm, retriever=retriever, return_source_documents=True) + result = self.chain({"query": question}) + docs = '\n'.join([x.metadata["paper_title"][:40] + " - " + x.page_content[:40].replace("\n", " ") + "..." for x in result["source_documents"]]) + print(f""" +Retrieved Documents: +{docs if docs != "" else "No documents found."}""") + return result + except: + return {"result": "Error generating answer."} diff --git a/src/start.sh b/src/start.sh new file mode 100644 index 0000000000000000000000000000000000000000..d2513733db25960d81e576d5821debe1e458759f --- /dev/null +++ b/src/start.sh @@ -0,0 +1 @@ +docker run --rm --gpus all --init --ulimit core=0 -p 8070:8070 grobid/grobid:0.8.0-SNAPSHOT \ No newline at end of file diff --git a/src/text_embedding/huggingface.py b/src/text_embedding/huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..bd69428a1756487f90dd414da6dab4d3b105dd9e --- /dev/null +++ b/src/text_embedding/huggingface.py @@ -0,0 +1,30 @@ +from typing import Iterable, Iterator +from langchain.docstore.document import Document +from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceHubEmbeddings # TODO check HuggingFaceInstructEmbeddings + +class HuggingFaceTextEmbedding: + def __init__(self) -> None: + model_name = "sentence-transformers/all-mpnet-base-v2" + model_kwargs = {'device': 'cpu'} + encode_kwargs = {'normalize_embeddings': False} + self.model = HuggingFaceEmbeddings( + model_name=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + + def embed_documents(self, docs: Iterable[Document]) -> Iterator[Document]: + embeddings = self.model.embed_documents(docs) + return embeddings + +# class HuggingFaceInferenceAPITextEmbedding: +# def __init__(self) -> None: +# pass + +# def embed_documents(self, docs: Iterable[Document]) -> Iterator[Document]: +# embeddings = HuggingFaceInferenceAPIEmbeddings( +# api_key=inference_api_key, +# model_name="sentence-transformers/all-MiniLM-l6-v2" +# ) +# chunks = embeddings.embed_documents(docs) +# return chunks \ No newline at end of file diff --git a/src/text_splitter/spacy.py b/src/text_splitter/spacy.py new file mode 100644 index 0000000000000000000000000000000000000000..27975378b71c81089bbb84e449e9bed34651eb1d --- /dev/null +++ b/src/text_splitter/spacy.py @@ -0,0 +1,15 @@ +import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline +import spacy +from typing import Iterable, Iterator +from langchain.docstore.document import Document +from langchain.text_splitter import SpacyTextSplitter + + +class SpacySplitter: + def __init__(self): + self.splitter = SpacyTextSplitter(chunk_size=1000, pipeline="en_core_web_trf") + + def split_documents(self, docs: Iterable[Document]) -> Iterator[Document]: + spacy.prefer_gpu(gpu_id=1) + chunks = self.splitter.split_documents(docs) + return chunks diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5a9f505b083c2cfb75ba45f668ec5018bce06d80 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,38 @@ +from pathlib import Path +from langchain.schema import Document +import json +from typing import Iterable + +def save_docs_to_jsonl(array:Iterable[Document], path: str)->None: + """ + Save an array of Document objects to a JSONL file. + + Args: + array: An iterable of Document objects. + path: The path to the folder where the output file should be. + + Returns: + None + """ + Path(path).mkdir(parents=True, exist_ok=True) + with open(f"{path}/data.jsonl", 'w', encoding='utf-8') as jsonl_file: + for doc in array: + jsonl_file.write(doc.json() + '\n') + +def load_docs_from_jsonl(path: str) -> Iterable[Document]: + """ + Load an array of Document objects from a JSONL file. + + Args: + path: The path to the folder where the input file is. + + Returns: + An iterable of Document objects. + """ + array = [] + with open(f"{path}/data.jsonl", 'r', encoding='utf-8') as jsonl_file: + for line in jsonl_file: + data = json.loads(line) + obj = Document(**data) + array.append(obj) + return array diff --git a/src/vector_store/faiss.py b/src/vector_store/faiss.py new file mode 100644 index 0000000000000000000000000000000000000000..e9f81f28e8989ffcd83b7e309b4f936294e83750 --- /dev/null +++ b/src/vector_store/faiss.py @@ -0,0 +1,22 @@ +from langchain.vectorstores import FAISS + +class FAISSVectorStore: + def __init__(self, embedding_model): + self.embedding_model = embedding_model + self.db = None + + def initialize_from_documents(self, docs): + self.db = FAISS.from_documents(docs, self.embedding_model.model) + + def initialize_from_file(self, path): + self.db = FAISS.load_local(path, self.embedding_model.model) + + def save(self, path): + self.db.save_local(path) + + def add_documents(self, documents): + return self.db.add_documents(documents) + + def query(self, query: str, k: int = 4): + # TODO adjust fetch_k parameter. It is now set to match the defaults k=4, fetch_k=20 in the original code. + return self.db.similarity_search_with_score(query, k=k, fetch_k=5*k)