data class ModelPullInput( val model_name: String, // "org/repo" or alias val precision: String? = null, // precision (quantization) e.g. "Q4_0", "Q4_K_M" val hub: HubSource = HubSource.AUTO, // AUTO routes by model_name val local_path: String? = null, // only when hub == LOCALFS val hf_token: String? = null, // falls back to GENIEX_HFTOKEN env val chipset: String? = null, // required for Qualcomm AI Hub on Android (e.g. "SM8750") val display_name: String? = null,)
data class ModelPaths( val model_path: String, val model_dir: String, val model_name: String, val runtime_id: String, // authoritative — prefer over UI selection val mmproj_path: String? = null, // VLM projection weights val tokenizer_path: String? = null, val compute_unit: String? = null,)
在 Android 上拉取 Qualcomm AI Hub 必须显式传 chipset。Rust 侧仅在骁龙 Windows 上自动识别。骁龙 8 至尊版用 "SM8750",骁龙 8 至尊版 Gen 5 用 "SM8850"。
data class LlmCreateInput( val model_name: String, val model_path: String, val tokenizer_path: String? = null, val config: ModelConfig, val runtime_id: String? = null, val compute_unit: String? = null,)
data class VlmCreateInput( val model_name: String, val model_path: String, val mmproj_path: String? = null, // vision projection weights (GGUF VLMs) val config: ModelConfig, val runtime_id: String? = null, val compute_unit: String? = null,)
data class ModelConfig( var nCtx: Int = 2048, // context size; 0 = model default var nThreads: Int = 8, var nThreadsBatch: Int = 8, var nBatch: Int = 2048, var nUBatch: Int = 512, var nSeqMax: Int = 1, var nGpuLayers: Int = 0, val chat_template_path: String = "", val chat_template_content: String = "", val max_tokens: Int = 2048, val enable_thinking: Boolean = false, val verbose: Boolean = false,)
data class VlmChatMessage( val role: String?, // "system" | "user" | "assistant" val contents: List<VlmContent>,)data class VlmContent( val type: String?, // "text" | "image" val text: String?, // text content, or absolute file path for image)
data class GenerationConfig( var maxTokens: Int = 32, var stopWords: Array<String>? = null, var stopCount: Int = 0, var nPast: Int = 0, var samplerConfig: SamplerConfig? = null, var imagePaths: Array<String>? = null, var imageCount: Int = 0, var audioPaths: Array<String>? = null, var audioCount: Int = 0,)
sealed class LlmStreamResult { data class Token(val text: String) : LlmStreamResult() data class Completed(val profile: ProfilingData) : LlmStreamResult() data class Error(val throwable: Throwable) : LlmStreamResult()}