BlinkDL commited on
Commit
a839ef3
1 Parent(s): 519b62b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -5,9 +5,9 @@ from huggingface_hub import hf_hub_download
5
  from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
- ctx_limit = 512
9
- title = "RWKV-4 14B fp16"
10
- desc = f'''DEMO limited to ctxlen {ctx_limit}, and slow because A10g does not have enough VRAM for 14B fp16 (some layers are computed on CPU instead). Links:
11
  <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
12
  <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
13
  <a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
@@ -18,9 +18,7 @@ os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (muc
18
 
19
  from rwkv.model import RWKV
20
  model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230228-ctx4096-test663.pth")
21
- model = RWKV(model=model_path, strategy='cuda fp16 *32 -> cpu fp32')
22
- # model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-169m", filename="RWKV-4-Pile-169M-20220807-8023.pth")
23
- # model = RWKV(model=model_path, strategy='cuda fp16')
24
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
25
  pipeline = PIPELINE(model, "20B_tokenizer.json")
26
 
 
5
  from pynvml import *
6
  nvmlInit()
7
  gpu_h = nvmlDeviceGetHandleByIndex(0)
8
+ ctx_limit = 1024
9
+ title = "RWKV-4-Pile-14B-20230228-ctx4096-test663"
10
+ desc = f'''DEMO limited to ctxlen {ctx_limit}. Links:
11
  <a href='https://github.com/BlinkDL/ChatRWKV' target="_blank" style="margin:0 0.5em">ChatRWKV</a>
12
  <a href='https://github.com/BlinkDL/RWKV-LM' target="_blank" style="margin:0 0.5em">RWKV-LM</a>
13
  <a href="https://pypi.org/project/rwkv/" target="_blank" style="margin:0 0.5em">RWKV pip package</a>
 
18
 
19
  from rwkv.model import RWKV
20
  model_path = hf_hub_download(repo_id="BlinkDL/rwkv-4-pile-14b", filename="RWKV-4-Pile-14B-20230228-ctx4096-test663.pth")
21
+ model = RWKV(model=model_path, strategy='cuda fp16i8 *10 -> cuda fp16')
 
 
22
  from rwkv.utils import PIPELINE, PIPELINE_ARGS
23
  pipeline = PIPELINE(model, "20B_tokenizer.json")
24