masanorihirano commited on
Commit
f732d7c
·
1 Parent(s): f011117
Files changed (7) hide show
  1. .gitignore +163 -0
  2. Dockerfile +36 -0
  3. Makefile +35 -0
  4. README.md +1 -1
  5. app.py +138 -0
  6. model_pull.py +18 -0
  7. pyproject.toml +60 -0
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea
2
+ .env
3
+ poetry.lock
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM docker.io/nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04
2
+ COPY --link --chown=1000 ./ /home/user/app
3
+ RUN useradd -m -u 1000 user
4
+ ENV TZ=Asia/Tokyo
5
+ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
6
+ RUN sed -i 's http://deb.debian.org http://cdn-aws.deb.debian.org g' /etc/apt/sources.list && \
7
+ sed -i 's http://archive.ubuntu.com http://us-east-1.ec2.archive.ubuntu.com g' /etc/apt/sources.list && \
8
+ sed -i '/security/d' /etc/apt/sources.list && apt-get update && \
9
+ apt-get install -y \
10
+ git \
11
+ make build-essential libssl-dev zlib1g-dev \
12
+ libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \
13
+ libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev git-lfs \
14
+ ffmpeg libsm6 libxext6 cmake libgl1-mesa-glx && \
15
+ rm -rf /var/lib/apt/lists/* && \
16
+ git lfs install
17
+ RUN curl https://pyenv.run | bash
18
+ ENV PYENV_ROOT /root/.pyenv
19
+ ENV PATH ${PYENV_ROOT}/shims:${PYENV_ROOT}/bin:${PATH}
20
+ RUN eval "$(pyenv init -)" && \
21
+ eval "$(pyenv virtualenv-init -)" && \
22
+ pyenv install 3.9.7 && \
23
+ pyenv global 3.9.7 && \
24
+ pyenv rehash && \
25
+ pip install --no-cache-dir --upgrade pip==22.3.1 setuptools wheel && \
26
+ pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1" && \
27
+ curl -sSL https://install.python-poetry.org | python -
28
+ ENV PATH /root/.local/bin:${PATH}
29
+ WORKDIR /home/user/app
30
+
31
+ RUN poetry install
32
+ RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
33
+ git config --global credential.helper store && \
34
+ huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) --add-to-git-credential
35
+ RUN poetry run python model_pull.py
36
+ CMD ["env", "poetry", "run", "python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
Makefile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ RUN := poetry run
3
+
4
+ .PHONY: check
5
+ check: lint mypy
6
+
7
+ .PHONY: lint
8
+ lint: lint-black lint-isort lint-flake8
9
+
10
+ .PHONY: lint-black
11
+ lint-black:
12
+ $(RUN) black --check --diff --quiet .
13
+
14
+ .PHONY: lint-isort
15
+ lint-isort:
16
+ $(RUN) isort --check --quiet .
17
+
18
+ .PHONY: lint-flake8
19
+ lint-flake8:
20
+ $(RUN) pflake8 .
21
+
22
+ .PHONY: mypy
23
+ mypy:
24
+ $(RUN) mypy .
25
+
26
+ .PHONY: format
27
+ format: format-black format-isort
28
+
29
+ .PHONY: format-black
30
+ format-black:
31
+ $(RUN) black --quiet .
32
+
33
+ .PHONY: format-isort
34
+ format-isort:
35
+ $(RUN) isort --quiet .
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Llama 13b Japanese Lora V0 1ep
3
  emoji: 🐨
4
  colorFrom: gray
5
  colorTo: gray
 
1
  ---
2
+ title: LLaMA 13B Japanese LoRA v0 1 epoch
3
  emoji: 🐨
4
  colorFrom: gray
5
  colorTo: gray
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ import gradio as gr
4
+ import torch
5
+ from peft import PeftModel
6
+ from transformers import AutoModelForCausalLM
7
+ from transformers import AutoTokenizer
8
+ from transformers import GenerationConfig
9
+
10
+ print("starting server ...")
11
+ BASE_MODEL = "decapoda-research/llama-13b-hf"
12
+ LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
+
16
+ if torch.cuda.is_available():
17
+ device = "cuda"
18
+ else:
19
+ device = "cpu"
20
+
21
+ try:
22
+ if torch.backends.mps.is_available():
23
+ device = "mps"
24
+ except Exception:
25
+ pass
26
+
27
+ if device == "cuda":
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ BASE_MODEL,
30
+ load_in_8bit=False,
31
+ torch_dtype=torch.float16,
32
+ device_map="auto",
33
+ )
34
+ model = PeftModel.from_pretrained(
35
+ model, LORA_WEIGHTS, torch_dtype=torch.float16
36
+ )
37
+ elif device == "mps":
38
+ model = AutoModelForCausalLM.from_pretrained(
39
+ BASE_MODEL,
40
+ device_map={"": device},
41
+ torch_dtype=torch.float16,
42
+ )
43
+ model = PeftModel.from_pretrained(
44
+ model,
45
+ LORA_WEIGHTS,
46
+ device_map={"": device},
47
+ torch_dtype=torch.float16,
48
+ )
49
+ else:
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
52
+ )
53
+ model = PeftModel.from_pretrained(
54
+ model,
55
+ LORA_WEIGHTS,
56
+ device_map={"": device},
57
+ )
58
+
59
+
60
+ def generate_prompt(instruction: str, input: Optional[str] = None):
61
+ if input:
62
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
63
+ ### Instruction:
64
+ {instruction}
65
+ ### Input:
66
+ {input}
67
+ ### Response:"""
68
+ else:
69
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
70
+ ### Instruction:
71
+ {instruction}
72
+ ### Response:"""
73
+
74
+
75
+ if device != "cpu":
76
+ model.half()
77
+ model.eval()
78
+ if torch.__version__ >= "2":
79
+ model = torch.compile(model)
80
+
81
+
82
+ def evaluate(
83
+ instruction: str,
84
+ input: Optional[str] = None,
85
+ temperature: float = 0.7,
86
+ top_p: float = 1.0,
87
+ top_k: int = 40,
88
+ num_beams: int = 4,
89
+ max_new_tokens: int = 256,
90
+ **kwargs,
91
+ ):
92
+ prompt = generate_prompt(instruction, input)
93
+ inputs = tokenizer(prompt, return_tensors="pt")
94
+ input_ids = inputs["input_ids"].to(device)
95
+ generation_config = GenerationConfig(
96
+ temperature=temperature,
97
+ top_p=top_p,
98
+ top_k=top_k,
99
+ num_beams=num_beams,
100
+ **kwargs,
101
+ )
102
+ with torch.no_grad():
103
+ generation_output = model.generate(
104
+ input_ids=input_ids,
105
+ generation_config=generation_config,
106
+ return_dict_in_generate=True,
107
+ output_scores=True,
108
+ max_new_tokens=max_new_tokens,
109
+ )
110
+ s = generation_output.sequences[0]
111
+ output = tokenizer.decode(s)
112
+ return output.split("### Response:")[1].strip()
113
+
114
+
115
+ g = gr.Interface(
116
+ fn=evaluate,
117
+ inputs=[
118
+ gr.components.Textbox(lines=2, label="Instruction", placeholder="東京から大阪に行くには?"),
119
+ gr.components.Textbox(lines=2, label="Input", placeholder="none"),
120
+ gr.components.Slider(minimum=0, maximum=1, value=0.7, label="Temperature"),
121
+ gr.components.Slider(minimum=0, maximum=1, value=1.0, label="Top p"),
122
+ gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
123
+ gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
124
+ gr.components.Slider(
125
+ minimum=1, maximum=512, step=1, value=128, label="Max tokens"
126
+ ),
127
+ ],
128
+ outputs=[
129
+ gr.inputs.Textbox(
130
+ lines=5,
131
+ label="Output",
132
+ )
133
+ ],
134
+ title="izumi-lab/calm-7b-lora-v0-1ep",
135
+ description="izumi-lab/calm-7b-lora-v0-1ep is a 7B-parameter Calm model finetuned to follow instructions. It is trained on the [izumi-lab/llm-japanese-dataset](https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset) dataset and makes use of the Huggingface Calm-7b implementation. For more information, please visit [the project's website](https://llm.msuzuki.me).",
136
+ )
137
+ g.queue(concurrency_count=1)
138
+ g.launch()
model_pull.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import PeftModel
3
+ from transformers import AutoModelForCausalLM
4
+ from transformers import AutoTokenizer
5
+
6
+ BASE_MODEL = "decapoda-research/llama-13b-hf"
7
+ LORA_WEIGHTS = "izumi-lab/llama-13b-japanese-lora-v0-1ep"
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ BASE_MODEL,
12
+ load_in_8bit=False,
13
+ torch_dtype=torch.float16,
14
+ device_map="auto",
15
+ )
16
+ model = PeftModel.from_pretrained(
17
+ model, LORA_WEIGHTS, torch_dtype=torch.float16, use_auth_token=True
18
+ )
pyproject.toml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "space-llama-13b-japanese-lora-v0-1ep"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Masanori HIRANO <masa.hirano.1996@gmail.com>"]
6
+ license = "other"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.9"
11
+ peft = "^0.3.0"
12
+ transformers = {git = "https://github.com/huggingface/transformers.git", branch = "main"}
13
+ gradio = "^3.32.0"
14
+ torch = "^2.0.1"
15
+ huggingface-hub = "^0.14.1"
16
+
17
+
18
+ [tool.poetry.group.dev.dependencies]
19
+ black = "^23.3.0"
20
+ isort = "^5.12.0"
21
+ mypy = "^1.3.0"
22
+ flake8 = "^6.0.0"
23
+ pyproject-flake8 = "^6.0.0.post1"
24
+
25
+ [build-system]
26
+ requires = ["poetry-core"]
27
+ build-backend = "poetry.core.masonry.api"
28
+
29
+ [tool.isort]
30
+ profile = 'black'
31
+ force_single_line = true
32
+ skip = [
33
+ ".git",
34
+ "__pycache__",
35
+ "docs",
36
+ "build",
37
+ "dist",
38
+ "examples",
39
+ ".venv",
40
+ "tests/examples"
41
+ ]
42
+
43
+ [tool.mypy]
44
+ disallow_untyped_defs = true
45
+ ignore_missing_imports = true
46
+
47
+ [tool.flake8]
48
+ ignore = "E203,E231,E501,W503"
49
+ max-line-length = 88
50
+ exclude = [
51
+ ".git",
52
+ "__pycache__",
53
+ "docs",
54
+ "build",
55
+ "dist",
56
+ "examples",
57
+ ".venv",
58
+ "__init__.py"
59
+ ]
60
+ select = "B,B950,C,E,F,W"