use-inference-endpoint

#2
by lysandre HF staff - opened
Files changed (2) hide show
  1. front/js-src/Api.ts +45 -39
  2. nginx.conf +1 -27
front/js-src/Api.ts CHANGED
@@ -15,39 +15,40 @@ export class Api {
15
  // `http://coconut-proxy.huggingface.test`
16
  // `http://coconuthf.eastus.cloudapp.azure.com:6006`
17
  // "http://localhost:6006"
18
- `https://transformer.huggingface.co`
 
19
  ;
20
  static shared = new Api();
21
 
22
- private path(p: string): string {
23
- return `${Api.ENDPOINT}/${p}`;
24
- }
25
-
26
  private async postAutocomplete(
27
  params: {
28
- context: string;
29
- model_size?: string; /// 'small' | 'medium',
30
- top_p?: number; /// float between 0 and 1
31
- temperature?: number; /// float between 0 and 100
32
- step_size?: number;
33
- kl_scale?: number;
34
- gm_scale?: number;
35
- num_iterations?: number;
36
- gen_length?: number;
37
- max_time?: number; /// <- if we want to limit the response time. (in sec)
38
- bow_or_discrim?: string;
39
- use_sampling?: boolean;
 
 
 
 
40
  }
41
- ): Promise<AutocompleteOutput> {
42
-
43
- const path = this.path(`autocomplete/${params.model_size || ""}`);
44
-
45
- const response = await fetch(path, {
46
  method: 'POST',
47
  headers: { 'Content-Type': 'application/json' },
48
  body: JSON.stringify(params),
49
- });
50
- return await response.json() as AutocompleteOutput;
 
 
 
51
  }
52
 
53
  /**
@@ -86,21 +87,26 @@ export class Api {
86
  const use_sampling = (
87
  document.querySelector<HTMLInputElement>('.decoder-settings input[name=use_sampling]') || {}
88
  ).checked;
89
-
90
- return this.postAutocomplete({
91
- ...params,
92
- model_size,
93
- top_p,
94
- temperature,
95
- step_size,
96
- kl_scale,
97
- gm_scale,
98
- num_iterations,
99
- gen_length,
100
- max_time,
101
- bow_or_discrim,
102
- use_sampling,
103
- });
 
 
 
 
 
104
  }
105
 
106
  /**
 
15
  // `http://coconut-proxy.huggingface.test`
16
  // `http://coconuthf.eastus.cloudapp.azure.com:6006`
17
  // "http://localhost:6006"
18
+ // `https://transformer.huggingface.co`
19
+ `https://wvnbdkpga3d4aev9.us-east-1.aws.endpoints.huggingface.cloud`
20
  ;
21
  static shared = new Api();
22
 
 
 
 
 
23
  private async postAutocomplete(
24
  params: {
25
+ inputs: {
26
+ context: string;
27
+ kwargs: {
28
+ model_size?: string; /// 'small' | 'medium',
29
+ top_p?: number; /// float between 0 and 1
30
+ temperature?: number; /// float between 0 and 100
31
+ step_size?: number;
32
+ kl_scale?: number;
33
+ gm_scale?: number;
34
+ num_iterations?: number;
35
+ gen_length?: number;
36
+ max_time?: number; /// <- if we want to limit the response time. (in sec)
37
+ bow_or_discrim?: string;
38
+ use_sampling?: boolean;
39
+ }
40
+ }
41
  }
42
+ ): Promise<any> {
43
+ const init = {
 
 
 
44
  method: 'POST',
45
  headers: { 'Content-Type': 'application/json' },
46
  body: JSON.stringify(params),
47
+ }
48
+
49
+ const response = await fetch(Api.ENDPOINT, init)
50
+ const result = JSON.parse(await response.json())
51
+ return result
52
  }
53
 
54
  /**
 
87
  const use_sampling = (
88
  document.querySelector<HTMLInputElement>('.decoder-settings input[name=use_sampling]') || {}
89
  ).checked;
90
+
91
+ const sent_params = {
92
+ inputs: {
93
+ ...params,
94
+ kwargs: {
95
+ model_size,
96
+ top_p,
97
+ temperature,
98
+ step_size,
99
+ kl_scale,
100
+ gm_scale,
101
+ num_iterations,
102
+ gen_length,
103
+ max_time,
104
+ bow_or_discrim,
105
+ use_sampling,
106
+ }
107
+ }
108
+ }
109
+ return this.postAutocomplete(sent_params);
110
  }
111
 
112
  /**
nginx.conf CHANGED
@@ -36,7 +36,7 @@ http {
36
  }
37
 
38
  location /autocomplete {
39
- proxy_pass $NGINX_NEURALGENV2_URL;
40
  proxy_set_header Host $host;
41
  proxy_set_header X-Real-IP $remote_addr;
42
  proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
@@ -47,31 +47,5 @@ http {
47
  add_header Access-Control-Allow-Headers Content-Type always; # <- circumvent cors for Firefox
48
  # mirror /mirror_autocomplete;
49
  }
50
-
51
- location ~ ^/autocomplete/(gpt2\/xl) {
52
- # was turned off.
53
- proxy_pass $NGINX_AUTOCOMPLETE_URL;
54
- proxy_set_header Host $host;
55
- proxy_set_header X-Real-IP $remote_addr;
56
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
57
- proxy_set_header X-Forwarded-Proto $scheme;
58
- proxy_buffering off;
59
- proxy_http_version 1.1;
60
- add_header X-Jacqueline True always;
61
- add_header Access-Control-Allow-Headers Content-Type always; # <- circumvent cors for Firefox
62
- }
63
-
64
- location ~ ^/autocomplete/(ctrl|pplm) {
65
- # was turned off.
66
- proxy_pass $NGINX_CTRL_PPLM_URL;
67
- proxy_set_header Host $host;
68
- proxy_set_header X-Real-IP $remote_addr;
69
- proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
70
- proxy_set_header X-Forwarded-Proto $scheme;
71
- proxy_buffering off;
72
- proxy_http_version 1.1;
73
- add_header X-Jacinthe True always;
74
- add_header Access-Control-Allow-Headers Content-Type always; # <- circumvent cors for Firefox
75
- }
76
  }
77
  }
 
36
  }
37
 
38
  location /autocomplete {
39
+ proxy_pass https://yvkpzfc4acovo7gs.us-east-1.aws.endpoints.huggingface.cloud;
40
  proxy_set_header Host $host;
41
  proxy_set_header X-Real-IP $remote_addr;
42
  proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
 
47
  add_header Access-Control-Allow-Headers Content-Type always; # <- circumvent cors for Firefox
48
  # mirror /mirror_autocomplete;
49
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  }