您好,登錄后才能下訂單哦!
在C++中,聚類算法可以用于數據壓縮,因為它們可以將相似的數據點分組在一起,從而減少數據集中的冗余。以下是一個簡單的例子,展示了如何使用K-means聚類算法進行數據壓縮和解壓:
#include <iostream>
#include <vector>
#include <cmath>
#include <random>
#include <algorithm>
// K-means聚類算法
std::vector<std::vector<double>> kMeans(const std::vector<std::vector<double>>& data, int k, int maxIterations = 100) {
int n = data.size();
std::vector<int> labels(n, -1);
std::vector<std::vector<double>> centroids(k, std::vector<double>(data[0].size(), 0));
std::vector<std::vector<double>> clusterCentroids(k, std::vector<double>(data[0].size(), 0));
// 隨機初始化質心
std::random_device rd;
std::mt19937 gen(rd());
std::shuffle(data.begin(), data.end(), gen);
for (int i = 0; i < k; ++i) {
centroids[i] = data[i];
}
for (int i = 0; i < maxIterations; ++i) {
std::vector<std::vector<double>> clusters(k);
// 將數據點分配到最近的質心
for (int j = 0; j < n; ++j) {
double minDist = std::numeric_limits<double>::max();
int minIndex = -1;
for (int l = 0; l < k; ++l) {
double dist = 0;
for (int m = 0; m < data[j].size(); ++m) {
dist += pow(data[j][m] - centroids[l][m], 2);
}
if (dist < minDist) {
minDist = dist;
minIndex = l;
}
}
labels[j] = minIndex;
clusters[minIndex].push_back(data[j]);
}
// 更新質心
for (int j = 0; j < k; ++j) {
double sum = 0;
for (const auto& point : clusters[j]) {
for (int m = 0; m < point.size(); ++m) {
sum += point[m];
}
}
for (int m = 0; m < point.size(); ++m) {
centroids[j][m] = sum / clusters[j].size();
}
}
}
// 計算最終的質心
std::vector<std::vector<double>> finalCentroids;
for (int i = 0; i < k; ++i) {
if (!clusters[i].empty()) {
double sum = 0;
for (const auto& point : clusters[i]) {
for (int m = 0; m < point.size(); ++m) {
sum += point[m];
}
}
for (int m = 0; m < point.size(); ++m) {
finalCentroids[i][m] = sum / clusters[i].size();
}
}
}
return finalCentroids;
}
// 數據壓縮
std::vector<std::vector<double>> compressData(const std::vector<std::vector<double>>& data, int k) {
std::vector<std::vector<double>> centroids = kMeans(data, k);
std::vector<std::vector<double>> compressedData;
for (const auto& point : data) {
double minDist = std::numeric_limits<double>::max();
int minIndex = -1;
for (int i = 0; i < centroids.size(); ++i) {
double dist = 0;
for (int m = 0; m < point.size(); ++m) {
dist += pow(point[m] - centroids[i][m], 2);
}
if (dist < minDist) {
minDist = dist;
minIndex = i;
}
}
compressedData.push_back(centroids[minIndex]);
}
return compressedData;
}
// 數據解壓
std::vector<std::vector<double>> decompressData(const std::vector<std::vector<double>>& compressedData, const std::vector<std::vector<double>>& originalData, int k) {
std::vector<std::vector<double>> centroids = kMeans(originalData, k);
std::vector<std::vector<double>> decompressedData;
for (const auto& point : compressedData) {
double minDist = std::numeric_limits<double>::max();
int minIndex = -1;
for (int i = 0; i < centroids.size(); ++i) {
double dist = 0;
for (int m = 0; m < point.size(); ++m) {
dist += pow(point[m] - centroids[i][m], 2);
}
if (dist < minDist) {
minDist = dist;
minIndex = i;
}
}
decompressedData.push_back(originalData[minIndex]);
}
return decompressedData;
}
int main() {
std::vector<std::vector<double>> data = {{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}};
int k = 2;
// 數據壓縮
std::vector<std::vector<double>> compressedData = compressData(data, k);
std::cout << "Compressed data:" << std::endl;
for (const auto& point : compressedData) {
std::cout << "[" << point[0] << ", " << point[1] << "]" << std::endl;
}
// 數據解壓
std::vector<std::vector<double>> decompressedData = decompressData(compressedData, data, k);
std::cout << "Decompressed data:" << std::endl;
for (const auto& point : decompressedData) {
std::cout << "[" << point[0] << ", " << point[1] << "]" << std::endl;
}
return 0;
}
這個例子中,我們首先使用K-means聚類算法對數據進行壓縮,將相似的數據點分組在一起。然后,我們可以使用相同的算法對壓縮后的數據進行解壓,恢復原始數據。請注意,這個例子僅用于演示目的,實際應用中可能需要根據具體需求進行調整。
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。