Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,9 @@ from huggingface_hub import hf_hub_download
|
|
5 |
from pynvml import *
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
-
|
9 |
title = "RWKV-4 14B fp16"
|
10 |
-
desc = '''DEMO limited to ctxlen
|
11 |
<a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
|
12 |
<a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
|
13 |
<a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
|
@@ -18,7 +18,7 @@ os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (muc
|
|
18 |
|
19 |
from rwkv.model import RWKV
|
20 |
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
|
21 |
-
model = RWKV(model=model_path, strategy='cuda fp16 *
|
22 |
# model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
|
23 |
# model = RWKV(model=model_path, strategy='cuda fp16')
|
24 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
@@ -53,7 +53,7 @@ def infer(
|
|
53 |
occurrence = {}
|
54 |
state = None
|
55 |
for i in range(int(token_count)):
|
56 |
-
out, state = model.forward(pipeline.encode(ctx)[:
|
57 |
for n in args.token_ban:
|
58 |
out[n] = -float('inf')
|
59 |
for n in occurrence:
|
|
|
5 |
from pynvml import *
|
6 |
nvmlInit()
|
7 |
gpu_h = nvmlDeviceGetHandleByIndex(0)
|
8 |
+
ctx_limit = 512
|
9 |
title = "RWKV-4 14B fp16"
|
10 |
+
desc = f'''DEMO limited to ctxlen {ctx_limit}, and slow because A10g does not have enough VRAM (some layers are computed on CPU instead). Links:
|
11 |
<a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
|
12 |
<a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
|
13 |
<a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
|
|
|
18 |
|
19 |
from rwkv.model import RWKV
|
20 |
model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230213-8019.pth")
|
21 |
+
model = RWKV(model=model_path, strategy='cuda fp16 *33 -> cpu fp32')
|
22 |
# model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
|
23 |
# model = RWKV(model=model_path, strategy='cuda fp16')
|
24 |
from rwkv.utils import PIPELINE, PIPELINE_ARGS
|
|
|
53 |
occurrence = {}
|
54 |
state = None
|
55 |
for i in range(int(token_count)):
|
56 |
+
out, state = model.forward(pipeline.encode(ctx)[:ctx_limit] if i == 0 else [token], state)
|
57 |
for n in args.token_ban:
|
58 |
out[n] = -float('inf')
|
59 |
for n in occurrence:
|