This repository has been archived by the owner on Aug 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtgi.py
122 lines (94 loc) · 4.08 KB
/
tgi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# generated by datamodel-codegen
from __future__ import annotations
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field, PositiveFloat, PositiveInt, confloat, conint, ConfigDict
class ErrorResponse(BaseModel):
error: str
error_type: str
class FinishReason(Enum):
length = 'length'
eos_token = 'eos_token'
stop_sequence = 'stop_sequence'
class GenerateParameters(BaseModel):
best_of: Optional[conint(ge=0, gt=0)]
decoder_input_details: Optional[bool]
details: Optional[bool]
do_sample: Optional[bool]
max_new_tokens: Optional[conint(ge=0, lt=512, gt=0)] = 20
repetition_penalty: Optional[PositiveFloat]
return_full_text: Optional[bool]
seed: Optional[conint(ge=0)]
stop: Optional[List[str]] = Field(None, max_items=4)
temperature: Optional[PositiveFloat] = Field(None, example=0.6)
top_k: Optional[PositiveInt]
top_p: Optional[confloat(le=1.0, gt=0.0)]
truncate: Optional[conint(ge=0)]
typical_p: Optional[confloat(le=1.0, gt=0.0)]
watermark: Optional[bool]
class GenerateRequest(BaseModel):
inputs: str = Field(..., example='My name is Olivier and I')
parameters: Optional[GenerateParameters] = None
class Info(BaseModel):
model_config = ConfigDict(protected_namespaces=())
docker_label: Optional[str] = Field(None, example='null')
max_batch_total_tokens: conint(ge=0) = Field(..., example='32000')
max_best_of: conint(ge=0) = Field(..., example='2')
max_concurrent_requests: conint(ge=0) = Field(
..., description='Router Parameters', example='128'
)
max_input_length: conint(ge=0) = Field(..., example='1024')
max_stop_sequences: conint(ge=0) = Field(..., example='4')
max_total_tokens: conint(ge=0) = Field(..., example='2048')
max_waiting_tokens: conint(ge=0) = Field(..., example='20')
model_device_type: str = Field(..., example='cuda')
model_dtype: str = Field(..., example='torch.float16')
model_id: str = Field(
..., description='Model info', example='bigscience/blomm-560m'
)
model_pipeline_tag: Optional[str] = Field(None, example='text-generation')
model_sha: Optional[str] = Field(
None, example='e985a63cdc139290c5f700ff1929f0b5942cced2'
)
sha: Optional[str] = Field(None, example='null')
validation_workers: conint(ge=0) = Field(..., example='2')
version: str = Field(..., description='Router Info', example='0.5.0')
waiting_served_ratio: float = Field(..., example='1.2')
class PrefillToken(BaseModel):
id: conint(ge=0) = Field(..., example=0)
logprob: float = Field(..., example=-0.34)
text: str = Field(..., example='test')
class StreamDetails(BaseModel):
finish_reason: FinishReason
generated_tokens: conint(ge=0) = Field(..., example=1)
seed: Optional[conint(ge=0)] = Field(None, example=42)
class Token(BaseModel):
id: conint(ge=0) = Field(..., example=0)
logprob: float = Field(..., example=-0.34)
special: bool = Field(..., example='false')
text: str = Field(..., example='test')
class BestOfSequence(BaseModel):
finish_reason: FinishReason
generated_text: str = Field(..., example='test')
generated_tokens: conint(ge=0) = Field(..., example=1)
prefill: List[PrefillToken]
seed: Optional[conint(ge=0)] = Field(None, example=42)
tokens: List[Token]
class CompatGenerateRequest(BaseModel):
inputs: str = Field(..., example='My name is Olivier and I')
parameters: Optional[GenerateParameters] = None
stream: Optional[bool] = 'false'
class Details(BaseModel):
best_of_sequences: Optional[List[BestOfSequence]] = None
finish_reason: FinishReason
generated_tokens: conint(ge=0) = Field(..., example=1)
prefill: List[PrefillToken]
seed: Optional[conint(ge=0)] = Field(None, example=42)
tokens: List[Token]
class GenerateResponse(BaseModel):
details: Optional[Details] = None
generated_text: str = Field(..., example='test')
class StreamResponse(BaseModel):
details: Optional[StreamDetails] = None
generated_text: Optional[str] = Field('null', example='test')
token: Token