我想要一天分享一點「LLM從底層堆疊的技術」,並且每篇文章長度控制在三分鐘以內,讓大家不會壓力太大,但是又能夠每天成長一點。
延續 AI說書 - 從0開始 - 79 ,ChatGPT 除了產生程式周邊的文字描述,事實上它還會回覆程式語法的指令,當中一樣經歷了 Transformer 模型從數學層面往 Token 發展,再往句子生成發展 :
!pip install scikit-learn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from matplotlib.colors import ListedColormap
# Generate 1000 random data points with 5 classes
X, y = make_blobs(n_samples = 1000, centers = 5, random_state = 42)
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_
state = 42)
# Create a k-NN classifier with k = 5
knn = KNeighborsClassifier(n_neighbors = 5)
# Train the classifier with the training data
knn.fit(X_train, y_train)
# Create a mesh to plot the decision boundaries
h = .02 # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predict the mesh points' class labels
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF', '#AFAFAF',
'#FFD700'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF', '#808080',
'#FFA500'])
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap = cmap_light)
plt.scatter(X_train[:, 0], X_train[:, 1], c = y_train, cmap = cmap_bold, edgecolor = '
k', s = 20)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("5-Class classification (k = 5, n_samples = 1000)")
plt.show()