Instructions to use saracandu/stldec_onlyhard with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use saracandu/stldec_onlyhard with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="saracandu/stldec_onlyhard", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("saracandu/stldec_onlyhard", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use saracandu/stldec_onlyhard with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "saracandu/stldec_onlyhard" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "saracandu/stldec_onlyhard", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/saracandu/stldec_onlyhard
- SGLang
How to use saracandu/stldec_onlyhard with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "saracandu/stldec_onlyhard" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "saracandu/stldec_onlyhard", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "saracandu/stldec_onlyhard" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "saracandu/stldec_onlyhard", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use saracandu/stldec_onlyhard with Docker Model Runner:
docker model run hf.co/saracandu/stldec_onlyhard
| import re | |
| import os | |
| from typing import Any, Dict, List, Optional, Tuple, Union | |
| from transformers import PreTrainedTokenizer | |
| from transformers.utils import logging | |
| from pathlib import Path | |
| import json | |
| logger = logging.get_logger(__name__) | |
| from huggingface_hub import hf_hub_download | |
| import json | |
| import os | |
| def load_json(path, repo_id=None): | |
| if repo_id: | |
| path = hf_hub_download(repo_id, path) | |
| with open(path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def load_json_old(path: str) -> Union[Dict, List]: | |
| """ | |
| Load a JSON file from the given path. | |
| Args: | |
| path (str): The path to the JSON file to be loaded. | |
| Returns: | |
| Union[Dict, List]: The parsed content of the JSON file, which could be a dictionary or a list. | |
| """ | |
| full_path = Path(__file__).parent / path | |
| with open(full_path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| class STLTokenizer(PreTrainedTokenizer): | |
| """ | |
| A custom tokenizer class that extends `PreTrainedTokenizer` to handle a specific vocabulary and tokenization process. | |
| This tokenizer can load a vocabulary from a JSON file, tokenize text, convert tokens to IDs, | |
| and handle padding and special tokens. | |
| """ | |
| def __init__(self, vocab_path: str = "vocab.json", unk_token: str = "unk", pad_token: str = "pad", | |
| bos_token: str = "/s", eos_token: str = "s", model_max_length = 512, **kwargs): | |
| """ | |
| Initializes the STLTokenizer with a given vocabulary and special tokens. | |
| Args: | |
| vocab_path (str): The path to the JSON file containing the vocabulary. | |
| unk_token (str, optional): The token used for unknown words. Defaults to "unk". | |
| pad_token (str, optional): The token used for padding. Defaults to "pad". | |
| bos_token (str, optional): The token used for the beginning of a sequence. Defaults to "/s". | |
| eos_token (str, optional): The token used for the end of a sequence. Defaults to "s". | |
| """ | |
| self.vocab = load_json("vocab.json", repo_id="saracandu/stldec_random_32") | |
| self.unk_token = unk_token | |
| self.pad_token = pad_token | |
| self.bos_token = bos_token | |
| self.eos_token = eos_token | |
| self.model_max_length = model_max_length | |
| self.id_to_token = {v: k for k, v in self.vocab.items()} # Reverse mapping | |
| super().__init__( | |
| unk_token=unk_token, | |
| pad_token=pad_token, | |
| bos_token=bos_token, | |
| eos_token=eos_token, | |
| model_max_length=model_max_length, | |
| **kwargs | |
| ) | |
| def vocab_size(self) -> int: | |
| """ | |
| Returns the size of the vocabulary. | |
| Returns: | |
| int: The number of tokens in the vocabulary. | |
| """ | |
| return len(self.vocab) | |
| def prepad_sequence(self, sequence, space_token = ' ', new_space_token = '@', undo = False): | |
| """ | |
| Replaces spaces in the input sequence with a specified token. | |
| Args: | |
| sequence (str): The input sequence. | |
| undo (bool): If True, replace the padding token with spaces. Defaults to False, which pads the spaces. | |
| Returns: | |
| str: The preprocessed sequence with spaces or padding tokens replaced. | |
| """ | |
| if undo: | |
| return sequence.replace(new_space_token, space_token) | |
| else: | |
| return sequence.replace(space_token, new_space_token) | |
| def add_bos_eos(self, sequence: str) -> str: | |
| """ | |
| Aggiunge i token BOS all'inizio e EOS alla fine della sequenza. | |
| Args: | |
| sequence (str): La sequenza di input. | |
| Returns: | |
| str: La sequenza con i token BOS ed EOS. | |
| """ | |
| return f'{self.bos_token} {sequence} {self.eos_token}' | |
| def tokenize(self, text: str) -> List[str]: | |
| """ | |
| Tokenizes the input text into a list of tokens. | |
| The method preprocesses the input text by replacing spaces with padding tokens and then tries to | |
| find the longest possible match for each substring in the vocabulary. | |
| Args: | |
| text (str): The input text to be tokenized. | |
| Returns: | |
| List[str]: A list of tokens representing the tokenized text. | |
| """ | |
| text = self.add_bos_eos(text) | |
| text = self.prepad_sequence(text) | |
| tokens = [] | |
| i = 0 | |
| while i < len(text): | |
| best_match = None | |
| for j in range(len(text), i, -1): # Try matching substrings of decreasing length | |
| subtoken = text[i:j] | |
| if subtoken in self.vocab: | |
| best_match = subtoken | |
| break | |
| if best_match: | |
| tokens.append(best_match) | |
| i += len(best_match) | |
| else: | |
| tokens.append(self.unk_token) | |
| i += 1 | |
| return tokens | |
| def convert_tokens_to_ids(self, tokens: List[str]) -> List[int]: | |
| """ | |
| Converts a list of tokens into a list of token IDs. | |
| Args: | |
| tokens (List[str]): A list of tokens to be converted into IDs. | |
| Returns: | |
| List[int]: A list of corresponding token IDs. | |
| """ | |
| unk_token_str = str(self.unk_token) | |
| unk_token_id = self.vocab.get(unk_token_str) | |
| return [self.vocab.get(token, unk_token_id) for token in tokens] | |
| def convert_ids_to_tokens(self, ids: List[int]) -> List[str]: | |
| """ | |
| Converts a list of token IDs into a list of tokens. | |
| Args: | |
| ids (List[int]): A list of token IDs to be converted into tokens. | |
| Returns: | |
| List[str]: A list of corresponding tokens. | |
| """ | |
| return [self.id_to_token.get(i, self.unk_token) for i in ids] | |
| def encode(self, sequence: str) -> List[int]: | |
| """ | |
| Encodes a string sequence into a list of token IDs. | |
| This method tokenizes the input sequence using the `tokenize` method, | |
| and then converts the resulting tokens into their corresponding token IDs | |
| using the `convert_tokens_to_ids` method. | |
| Args: | |
| sequence (str): The input sequence (text) to be encoded. | |
| Returns: | |
| List[int]: A list of token IDs corresponding to the input sequence. | |
| """ | |
| splitted_sequence = self.tokenize(sequence) | |
| return self.convert_tokens_to_ids(splitted_sequence) | |
| def postpad_sequence(self, sequence, pad_token_id): | |
| """ | |
| Fills the sequence up to max_length padding elements | |
| """ | |
| num_extra_elements = self.model_max_length - len(sequence) -1 | |
| if num_extra_elements > 0: | |
| sequence.extend([pad_token_id] * num_extra_elements) | |
| return sequence | |
| def decode(self, token_ids: List[int]) -> str: | |
| """ | |
| Decodes a list of token IDs into a string of text. | |
| The method converts the IDs to tokens and joins them to form a string. | |
| It also restores the original spaces or padding tokens if `undo` is True. | |
| Args: | |
| token_ids (List[int]): A list of token IDs to be decoded. | |
| skip_special_tokens (bool, optional): Whether to skip special tokens during decoding. Defaults to False. | |
| Returns: | |
| str: The decoded string. | |
| """ | |
| tokens = self.convert_ids_to_tokens(token_ids) | |
| decoded = "".join(tokens) | |
| return self.prepad_sequence(decoded, undo=True) | |
| def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]: | |
| """ | |
| Saves the tokenizer's vocabulary to a file. | |
| Useful only when the vocabulary has to be retrieved and is not given | |
| (thus this is not the case: here to further improvements with sentencepiece). | |
| This method saves the vocabulary to a JSON file in the specified directory. | |
| Args: | |
| save_directory (str): The directory where the vocabulary file will be saved. | |
| filename_prefix (Optional[str]): An optional prefix for the filename. | |
| Returns: | |
| Tuple[str]: A tuple containing the path to the saved vocabulary file. | |
| """ | |
| vocab_file = f"{save_directory}/{filename_prefix + '-' if filename_prefix else ''}vocab.json" | |
| with open(vocab_file, "w", encoding="utf-8") as f: | |
| json.dump(self.vocab, f, indent=2, ensure_ascii=False) | |
| return (vocab_file,) | |
| def get_vocab(self) -> dict: | |
| """ | |
| Retrieves the vocabulary used by the tokenizer. | |
| Returns: | |
| dict: The vocabulary as a dictionary. | |
| """ | |
| return self.vocab | |