File size: 7,661 Bytes
a1d409e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import subprocess
import sys

from transformers import BertConfig, BertModel, BertTokenizer, pipeline
from transformers.testing_utils import TestCasePlus, require_torch


class OfflineTests(TestCasePlus):
    @require_torch
    def test_offline_mode(self):
        # this test is a bit tricky since TRANSFORMERS_OFFLINE can only be changed before
        # `transformers` is loaded, and it's too late for inside pytest - so we are changing it
        # while running an external program

        # python one-liner segments

        # this must be loaded before socket.socket is monkey-patched
        load = """
from transformers import BertConfig, BertModel, BertTokenizer, pipeline
        """

        run = """
mname = "hf-internal-testing/tiny-random-bert"
BertConfig.from_pretrained(mname)
BertModel.from_pretrained(mname)
BertTokenizer.from_pretrained(mname)
pipe = pipeline(task="fill-mask", model=mname)
print("success")
        """

        mock = """
import socket
def offline_socket(*args, **kwargs): raise RuntimeError("Offline mode is enabled, we shouldn't access internet")
socket.socket = offline_socket
        """

        # Force fetching the files so that we can use the cache
        mname = "hf-internal-testing/tiny-random-bert"
        BertConfig.from_pretrained(mname)
        BertModel.from_pretrained(mname)
        BertTokenizer.from_pretrained(mname)
        pipeline(task="fill-mask", model=mname)

        # baseline - just load from_pretrained with normal network
        cmd = [sys.executable, "-c", "\n".join([load, run, mock])]

        # should succeed
        env = self.get_env()
        # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files
        env["TRANSFORMERS_OFFLINE"] = "1"
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())

    @require_torch
    def test_offline_mode_no_internet(self):
        # python one-liner segments
        # this must be loaded before socket.socket is monkey-patched
        load = """
from transformers import BertConfig, BertModel, BertTokenizer, pipeline
        """

        run = """
mname = "hf-internal-testing/tiny-random-bert"
BertConfig.from_pretrained(mname)
BertModel.from_pretrained(mname)
BertTokenizer.from_pretrained(mname)
pipe = pipeline(task="fill-mask", model=mname)
print("success")
        """

        mock = """
import socket
def offline_socket(*args, **kwargs): raise socket.error("Faking flaky internet")
socket.socket = offline_socket
        """

        # Force fetching the files so that we can use the cache
        mname = "hf-internal-testing/tiny-random-bert"
        BertConfig.from_pretrained(mname)
        BertModel.from_pretrained(mname)
        BertTokenizer.from_pretrained(mname)
        pipeline(task="fill-mask", model=mname)

        # baseline - just load from_pretrained with normal network
        cmd = [sys.executable, "-c", "\n".join([load, run, mock])]

        # should succeed
        env = self.get_env()
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())

    @require_torch
    def test_offline_mode_sharded_checkpoint(self):
        # this test is a bit tricky since TRANSFORMERS_OFFLINE can only be changed before
        # `transformers` is loaded, and it's too late for inside pytest - so we are changing it
        # while running an external program

        # python one-liner segments

        # this must be loaded before socket.socket is monkey-patched
        load = """
from transformers import BertConfig, BertModel, BertTokenizer
        """

        run = """
mname = "hf-internal-testing/tiny-random-bert-sharded"
BertConfig.from_pretrained(mname)
BertModel.from_pretrained(mname)
print("success")
        """

        mock = """
import socket
def offline_socket(*args, **kwargs): raise ValueError("Offline mode is enabled")
socket.socket = offline_socket
        """

        # baseline - just load from_pretrained with normal network
        cmd = [sys.executable, "-c", "\n".join([load, run])]

        # should succeed
        env = self.get_env()
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())

        # next emulate no network
        cmd = [sys.executable, "-c", "\n".join([load, mock, run])]

        # Doesn't fail anymore since the model is in the cache due to other tests, so commenting this.
        # env["TRANSFORMERS_OFFLINE"] = "0"
        # result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        # self.assertEqual(result.returncode, 1, result.stderr)

        # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files
        env["TRANSFORMERS_OFFLINE"] = "1"
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())

    @require_torch
    def test_offline_mode_pipeline_exception(self):
        load = """
from transformers import pipeline
        """
        run = """
mname = "hf-internal-testing/tiny-random-bert"
pipe = pipeline(model=mname)
        """

        mock = """
import socket
def offline_socket(*args, **kwargs): raise socket.error("Offline mode is enabled")
socket.socket = offline_socket
        """
        env = self.get_env()
        env["TRANSFORMERS_OFFLINE"] = "1"
        cmd = [sys.executable, "-c", "\n".join([load, mock, run])]
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 1, result.stderr)
        self.assertIn(
            "You cannot infer task automatically within `pipeline` when using offline mode", result.stderr.decode()
        )

    @require_torch
    def test_offline_model_dynamic_model(self):
        load = """
from transformers import AutoModel
        """
        run = """
mname = "hf-internal-testing/test_dynamic_model"
AutoModel.from_pretrained(mname, trust_remote_code=True)
print("success")
        """

        # baseline - just load from_pretrained with normal network
        cmd = [sys.executable, "-c", "\n".join([load, run])]

        # should succeed
        env = self.get_env()
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())

        # should succeed as TRANSFORMERS_OFFLINE=1 tells it to use local files
        env["TRANSFORMERS_OFFLINE"] = "1"
        result = subprocess.run(cmd, env=env, check=False, capture_output=True)
        self.assertEqual(result.returncode, 0, result.stderr)
        self.assertIn("success", result.stdout.decode())