107 lines
3.2 KiB
Java
107 lines
3.2 KiB
Java
package org.tzd.lm;
|
||
|
||
import com.axis.innovators.box.tools.FolderCreator;
|
||
import com.axis.innovators.box.tools.LibraryLoad;
|
||
|
||
/**
|
||
* LM推理类
|
||
* @author tzdwindows 7
|
||
*/
|
||
public class LM {
|
||
public static boolean CUDA = false;
|
||
public final static String DEEP_SEEK = FolderCreator.getModelFolder() + "//DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf";
|
||
|
||
static {
|
||
if (!CUDA) {
|
||
LibraryLoad.loadLibrary("cpu/ggml-base");
|
||
LibraryLoad.loadLibrary("cpu/ggml-cpu");
|
||
LibraryLoad.loadLibrary("cpu/ggml");
|
||
LibraryLoad.loadLibrary("cpu/llama");
|
||
} else {
|
||
LibraryLoad.loadLibrary("cuda/ggml-base");
|
||
LibraryLoad.loadLibrary("cuda/ggml-cpu");
|
||
LibraryLoad.loadLibrary("cuda/ggml-rpc");
|
||
// cuda版本 cuda-cu12.4-x64(确保你有)
|
||
LibraryLoad.loadLibrary("cuda/ggml-cuda");
|
||
LibraryLoad.loadLibrary("cuda/ggml");
|
||
LibraryLoad.loadLibrary("cuda/llama");
|
||
}
|
||
LibraryLoad.loadLibrary("LM");
|
||
}
|
||
/**
|
||
* 加载模型
|
||
* @param pathModel 模型路径
|
||
* @return 模型句柄
|
||
*/
|
||
public static native long llamaLoadModelFromFile(String pathModel);
|
||
|
||
/**
|
||
* 释放模型资源
|
||
* @param modelHandle 模型句柄
|
||
*/
|
||
public static native void llamaFreeModel(long modelHandle);
|
||
|
||
/**
|
||
* 上下文创建
|
||
* @param modelHandle 上下文句柄
|
||
* @return 上下文句柄
|
||
*/
|
||
public static native long createContext(long modelHandle);
|
||
|
||
/**
|
||
* 释放上下文资源
|
||
* @param ctxHandle 上下文句柄
|
||
*/
|
||
public static native void llamaFreeContext(long ctxHandle);
|
||
|
||
/**
|
||
* 推理模型
|
||
* @param modelHandle 模型句柄
|
||
* @param ctxHandle 模型上下文句柄
|
||
* @param temperature 温度
|
||
* @param prompt 问题
|
||
* @param messageCallback 回调接口
|
||
* @return 最终内容
|
||
*/
|
||
public static native String inference(long modelHandle ,
|
||
long ctxHandle,
|
||
float temperature,
|
||
String prompt,
|
||
MessageCallback messageCallback);
|
||
|
||
/**
|
||
* 回调接口
|
||
*/
|
||
public interface MessageCallback {
|
||
/**
|
||
* 接口回调
|
||
* @param message 消息
|
||
*/
|
||
void onMessage(String message);
|
||
}
|
||
|
||
public static void main(String[] args) {
|
||
// 加载模型
|
||
long modelHandle = llamaLoadModelFromFile(DEEP_SEEK);
|
||
// 创建新的上下文
|
||
long ctxHandle = createContext(modelHandle);
|
||
inference(modelHandle, ctxHandle, 0.2f, "写一个ai", new MessageCallback() {
|
||
@Override
|
||
public void onMessage(String message) {
|
||
// 回调输出
|
||
System.out.print(message);
|
||
}
|
||
});
|
||
// 推理模型
|
||
inference(modelHandle, ctxHandle, 0.2f, "谢谢你", new MessageCallback() {
|
||
@Override
|
||
public void onMessage(String message) {
|
||
// 回调输出
|
||
System.out.print(message);
|
||
}
|
||
});
|
||
// 清理上下文
|
||
llamaFreeContext(ctxHandle);
|
||
}
|
||
}
|