在Kubernetes(k8s)中使用NVIDIA GPU

1、编辑/etc/containerd/config.toml配置文件

version = 2
[plugins]
  [plugins."io.containerd.grpc.v1.cri"]
    [plugins."io.containerd.grpc.v1.cri".containerd]
      default_runtime_name = "nvidia"

      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
          privileged_without_host_devices = false
          runtime_engine = ""
          runtime_root = ""
          runtime_type = "io.containerd.runc.v2"
          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
            BinaryName = "/usr/bin/nvidia-container-runtime"

2、重启容器

$ sudo systemctl restart containerd

3、开启GPU

$ sudo kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/master/nvidia-device-plugin.yml

4、测试验证

$ cat <<EOF | sudo kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: gpu-pod
spec:
  restartPolicy: Never
  containers:
    - name: cuda-container
      image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2
      resources:
        limits:
          nvidia.com/gpu: 1 # requesting 1 GPU
  tolerations:
  - key: nvidia.com/gpu
    operator: Exists
    effect: NoSchedule
EOF
期待结果
$ sudo kubectl logs gpu-pod
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done

在容器中使用NVIDIA GPU

1、下载安装显卡驱动

  • 下载地址
NVIDIA Driver Downloads
  • 安装(例)
$ sudo ./NVIDIA-Linux-x86_64-515.57.run

2、安装前的准备工作

$ sudo modprobe overlay
$ sudo modprobe br_netfilter

$ cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF

3、安装containerd(例)

$ sudo apt-get update
$ sudo apt-get install ca-certificates curl gnupg lsb-release
$ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
$ echo \
  "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
  $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null

$ sudo apt-get update && sudo apt-get install -y containerd.io

4、安装NVIDIA Container Toolkit

$ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
    && curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | sudo apt-key add - \
    && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

$ sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit

5、测试

$ sudo ctr image pull docker.io/nvidia/cuda:11.0.3-base-ubuntu20.04

$ sudo ctr run --rm -t \
    --runc-binary=/usr/bin/nvidia-container-runtime \
    --env NVIDIA_VISIBLE_DEVICES=all \
    docker.io/nvidia/cuda:11.0.3-base-ubuntu20.04 \
    cuda-11.0.3-base-ubuntu20.04 nvidia-smi
期待结果
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.57       Driver Version: 515.57       CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  Off  | 00000000:65:00.0 Off |                  N/A |
| 30%   41C    P8    20W / 350W |      0MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

HAProxy配置SNI实现https多域名代理

配置参考:
frontend https_proxy
    mode tcp
    bind :443
    tcp-request inspect-delay 5s
    tcp-request content accept if { req_ssl_hello_type 1 }

    acl domain_app1 req_ssl_sni -i app1.domain
    acl domain_app2 req_ssl_sni -i app2.domain

    use_backend app1_service if domain_app1
    use_backend app2_service if domain_app2
    default_backend default_service

使用 CMake 设置 Windows 平台下 Qt 工程的链接子系统

参考如下例,可分别设置不同编译配置下的链接子系统为控制台或窗口。

IF(WIN32)
IF(MSVC)
SET_TARGET_PROPERTIES(MyApp PROPERTIES LINK_FLAGS_DEBUG "/SUBSYSTEM:CONSOLE")
SET_TARGET_PROPERTIES(MyApp PROPERTIES LINK_FLAGS_RELWITHDEBINFO "/SUBSYSTEM:CONSOLE")
SET_TARGET_PROPERTIES(MyApp PROPERTIES LINK_FLAGS_RELEASE "/SUBSYSTEM:WINDOWS")
SET_TARGET_PROPERTIES(MyApp PROPERTIES LINK_FLAGS_MINSIZEREL "/SUBSYSTEM:WINDOWS")
ENDIF()
ENDIF()

数据库迁移工具 Flyway

Flyway 是一款开源的数据库迁移工具。 支持命令行、程序API、与多种构建工具集成等运行模式,轻松实现多种数据库的版本管理。
■使用例(MySQL) ※参数【useUnicode=true&characterEncoding=utf-8】可避免乱码

$ ./flyway -url="jdbc:mysql://mysqlhost:3306/mysqldb?useUnicode=true&characterEncoding=utf-8" -user=mysqluser -password=mysqlpassword -locations=filesystem:sql/MySQLMigrations migrate
■使用例(SQLite)

$ ./flyway -url=jdbc:sqlite:/mypath/mysqlite.db -locations=filesystem:sql/SQLiteMigrations migrate

JNI 中 C++ 字符串到 jstring 类型的转换

jstring CharsToString(JNIEnv *pEnv, const char *pChars, const char *pEncoding = "GB2312")
{
jclass clazz = pEnv->FindClass("java/lang/String");
jmethodID methodID = pEnv->GetMethodID(clazz, "<init>", "([BLjava/lang/String;)V");
jbyteArray bytes = pEnv->NewByteArray(strlen(pChars));
pEnv->SetByteArrayRegion(bytes, 0, strlen(pChars), reinterpret_cast<const jbyte *>(pChars));
jstring strEncoding = pEnv->NewStringUTF(pEncoding);
jobject objString = pEnv->NewObject(clazz, methodID, bytes, strEncoding);
pEnv->DeleteLocalRef(strEncoding);
pEnv->DeleteLocalRef(bytes);

return reinterpret_cast<jstring>(objString);
}

RAID1 阵列扩容

◆磁盘分区
# fdisk /dev/sdc

◆查看RAID状态
# cat /proc/mdstat

◆查看指定磁盘的阵列信息
# mdadm -E /dev/sdc1

◆查看指定阵列的详细信息
# mdadm -D /dev/md0

◆添加新的阵列用磁盘
# mdadm /dev/md0 --add /dev/sdc1

◆将热备磁盘增加到阵列工作磁盘中
# mdadm -G /dev/md0 -n 3

◆模拟磁盘损坏
# mdadm /dev/md0 -f /dev/sda1

◆移除损坏的磁盘
# mdadm /dev/md0 -r /dev/sda1

◆更新阵列大小
# mdadm -G /dev/md0 -z max

◆更新阵列上文件系统大小
# resize2fs /dev/md0