Spaces:
Running
Running
from pydantic import BaseModel, Field | |
class GgufParser(BaseModel): | |
metadata: "Metadata" | |
architecture: "Architecture" | |
tokenizer: "Tokenizer" | |
estimate: "Estimate" | |
class Metadata(BaseModel): | |
type_: str = Field(alias="type") | |
architecture: str | |
quantization_version: int = Field(alias="quantizationVersion") | |
alignment: int | |
name: str | |
file_type: int = Field(alias="fileType") | |
little_endian: bool = Field(alias="littleEndian") | |
file_size: int = Field(alias="fileSize") | |
size: int | |
parameters: int | |
bits_per_weight: float = Field(alias="bitsPerWeight") | |
class Architecture(BaseModel): | |
type_: str = Field(alias="type") | |
architecture: str | |
maximum_context_length: int = Field(alias="maximumContextLength") | |
embedding_length: int = Field(alias="embeddingLength") | |
block_count: int = Field(alias="blockCount") | |
feed_forward_length: int = Field(alias="feedForwardLength") | |
attention_head_count: int = Field(alias="attentionHeadCount") | |
attention_head_count_kv: int = Field(alias="attentionHeadCountKV") | |
attention_layer_norm_rmse_epsilon: float = Field( | |
alias="attentionLayerNormRMSEpsilon" | |
) | |
attention_key_length: int = Field(alias="attentionKeyLength") | |
attention_value_length: int = Field(alias="attentionValueLength") | |
attention_causal: bool = Field(alias="attentionCausal") | |
rope_dimension_count: int = Field(alias="ropeDimensionCount") | |
rope_frequency_base: int = Field(alias="ropeFrequencyBase") | |
vocabulary_length: int = Field(alias="vocabularyLength") | |
embedding_gqa: int = Field(alias="embeddingGQA") | |
embedding_key_gqa: int = Field(alias="embeddingKeyGQA") | |
embedding_value_gqa: int = Field(alias="embeddingValueGQA") | |
class Tokenizer(BaseModel): | |
model: str | |
tokens_length: int = Field(alias="tokensLength") | |
merges_length: int = Field(alias="mergesLength") | |
added_token_length: int = Field(alias="addedTokenLength") | |
bos_token_id: int = Field(alias="bosTokenID") | |
eos_token_id: int = Field(alias="eosTokenID") | |
eot_token_id: int = Field(alias="eotTokenID") | |
eom_token_id: int = Field(alias="eomTokenID") | |
unknown_token_id: int = Field(alias="unknownTokenID") | |
separator_token_id: int = Field(alias="separatorTokenID") | |
padding_token_id: int = Field(alias="paddingTokenID") | |
tokens_size: int = Field(alias="tokensSize") | |
merges_size: int = Field(alias="mergesSize") | |
class Ram(BaseModel): | |
handle_layers: int = Field(alias="handleLayers") | |
handle_last_layer: int = Field(alias="handleLastLayer") | |
handle_output_layer: bool = Field(alias="handleOutputLayer") | |
remote: bool | |
position: int | |
uma: int | |
nonuma: int | |
class Item(BaseModel): | |
offload_layers: int = Field(alias="offloadLayers") | |
full_offloaded: bool = Field(alias="fullOffloaded") | |
maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond") | |
ram: "Ram" | |
vrams: list["Ram"] | |
class Estimate(BaseModel): | |
items: list["Item"] | |
type_: str = Field(alias="type") | |
architecture: str | |
context_size: int = Field(alias="contextSize") | |
flash_attention: bool = Field(alias="flashAttention") | |
no_mmap: bool = Field(alias="noMMap") | |
embedding_only: bool = Field(alias="embeddingOnly") | |
reranking: bool | |
distributable: bool | |
logical_batch_size: int = Field(alias="logicalBatchSize") | |
physical_batch_size: int = Field(alias="physicalBatchSize") | |
type_: str = Field(alias="type") | |
architecture: str | |
context_size: int = Field(alias="contextSize") | |
flash_attention: bool = Field(alias="flashAttention") | |
no_mmap: bool = Field(alias="noMMap") | |
embedding_only: bool = Field(alias="embeddingOnly") | |
reranking: bool | |
distributable: bool | |
logical_batch_size: int = Field(alias="logicalBatchSize") | |
physical_batch_size: int = Field(alias="physicalBatchSize") | |