yujiepan commited on
Commit
67d75a5
1 Parent(s): d1dc452

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +24 -14
README.md CHANGED
@@ -13,9 +13,11 @@ Note the model is in float16.
13
 
14
  Codes:
15
  ```python
16
- import transformers
17
- import torch
18
  import os
 
 
 
 
19
  from huggingface_hub import create_repo, upload_folder
20
 
21
  source_model_id = 'state-spaces/mamba-2.8b-hf'
@@ -25,32 +27,40 @@ repo_id = f'yujiepan/{tiny_random_name}'
25
 
26
  config = transformers.AutoConfig.from_pretrained(
27
  source_model_id, trust_remote_code=True)
28
- config.hidden_size = 4
29
- config.intermediate_size = 6
30
- # config.num_attention_heads = 4
31
- # config.num_key_value_heads = 2
32
  config.num_hidden_layers = 2
33
  config.n_layer = 2
34
-
35
- config.torch_dtype = torch.float16
36
 
37
  model = transformers.AutoModelForCausalLM.from_config(
38
- config, trust_remote_code=True, torch_dtype=torch.float16)
39
- model = model.half()
 
 
 
 
 
40
 
 
 
 
 
 
 
 
41
  tokenizer = transformers.AutoTokenizer.from_pretrained(
42
  source_model_id, trust_remote_code=True)
43
 
44
  result = transformers.pipelines.pipeline(
45
  'text-generation',
46
  model=model, tokenizer=tokenizer,
47
- device=0,
48
  max_new_tokens=16,
49
  )('Hello')
50
  print(result)
51
- # model = model.cuda()
52
- # response, history = model.chat(tokenizer, "Hi", history=[], max_length=32)
53
- # print(response)
54
 
55
  model.save_pretrained(save_path)
56
  tokenizer.save_pretrained(save_path)
 
13
 
14
  Codes:
15
  ```python
 
 
16
  import os
17
+
18
+ import torch
19
+
20
+ import transformers
21
  from huggingface_hub import create_repo, upload_folder
22
 
23
  source_model_id = 'state-spaces/mamba-2.8b-hf'
 
27
 
28
  config = transformers.AutoConfig.from_pretrained(
29
  source_model_id, trust_remote_code=True)
30
+ config.hidden_size = 8
31
+ config.expand = 4
32
+ config.intermediate_size = 32
33
+ config.state_size = 8
34
  config.num_hidden_layers = 2
35
  config.n_layer = 2
36
+ config.torch_dtype = torch.bfloat16
 
37
 
38
  model = transformers.AutoModelForCausalLM.from_config(
39
+ config, torch_dtype=torch.bfloat16,
40
+ trust_remote_code=True,
41
+ )
42
+ model.generation_config = transformers.GenerationConfig.from_pretrained(
43
+ source_model_id,
44
+ trust_remote_code=True,
45
+ )
46
 
47
+ transformers.set_seed(42)
48
+ with torch.no_grad():
49
+ for name, p in sorted(model.named_parameters()):
50
+ print(name, p.shape)
51
+ torch.nn.init.uniform_(p, -0.5, 0.5)
52
+
53
+ model.save_pretrained(save_path)
54
  tokenizer = transformers.AutoTokenizer.from_pretrained(
55
  source_model_id, trust_remote_code=True)
56
 
57
  result = transformers.pipelines.pipeline(
58
  'text-generation',
59
  model=model, tokenizer=tokenizer,
60
+ device='cuda',
61
  max_new_tokens=16,
62
  )('Hello')
63
  print(result)
 
 
 
64
 
65
  model.save_pretrained(save_path)
66
  tokenizer.save_pretrained(save_path)