|
|
|
@ -13,16 +13,26 @@
@@ -13,16 +13,26 @@
|
|
|
|
|
/* TODO: Use typedefs for solution and vector instead of using int (*s)[230]
|
|
|
|
|
* and double (*v)[42]. */ |
|
|
|
|
|
|
|
|
|
/* Types */ |
|
|
|
|
typedef int *solution; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Functions */ |
|
|
|
|
double distance(const double (*u)[42], const double (*v)[42]); |
|
|
|
|
double distance(const vector *u, const vector *v); |
|
|
|
|
bool trouver_solution_initiale(void); |
|
|
|
|
bool solution_valide(int (*s)[230]); |
|
|
|
|
void print_solution(int (*s)[230], const char *name); |
|
|
|
|
void centre_gravite(int cluster, int (*s)[230], double (*cg)[42]); |
|
|
|
|
int solution_voisine(int (*dest)[230], int (*src)[230]); |
|
|
|
|
bool solution_valide(const solution s); |
|
|
|
|
void print_solution(const solution s, const char *name); |
|
|
|
|
vector *centres_gravite(const solution s); |
|
|
|
|
int solution_voisine(solution *dest, const solution src); |
|
|
|
|
#if 0 |
|
|
|
|
double fonction_objective(const solution s); |
|
|
|
|
|
|
|
|
|
static double calculer_inter(const solution s, vector *cgs); |
|
|
|
|
static double calculer_intra(const solution s, vector *cgs); |
|
|
|
|
#endif /* 0 */ |
|
|
|
|
|
|
|
|
|
int solution[230] = {0}; |
|
|
|
|
int new_solution[230] = {0}; |
|
|
|
|
solution solution_opt; |
|
|
|
|
solution new_solution; |
|
|
|
|
|
|
|
|
|
int n_clusters = 5; |
|
|
|
|
int n_iterations = 1000; |
|
|
|
@ -37,7 +47,7 @@ int main(int argc, char *argv[])
@@ -37,7 +47,7 @@ int main(int argc, char *argv[])
|
|
|
|
|
switch (c) { |
|
|
|
|
case 'c': |
|
|
|
|
k = atoi(optarg); |
|
|
|
|
if (k > 0) |
|
|
|
|
if (k > 1) |
|
|
|
|
n_clusters = k; |
|
|
|
|
break; |
|
|
|
|
case 'i': |
|
|
|
@ -58,36 +68,37 @@ int main(int argc, char *argv[])
@@ -58,36 +68,37 @@ int main(int argc, char *argv[])
|
|
|
|
|
|
|
|
|
|
srand(time(NULL)); |
|
|
|
|
|
|
|
|
|
if (!(solution_opt = calloc(N_VECTORS, sizeof(solution_opt[0])))) |
|
|
|
|
abort(); |
|
|
|
|
|
|
|
|
|
/* Solution initiale */ |
|
|
|
|
while (!trouver_solution_initiale()) |
|
|
|
|
continue; /* do it again */ |
|
|
|
|
|
|
|
|
|
print_solution(&solution, "initiale"); |
|
|
|
|
print_solution(solution_opt, "initiale"); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < n_iterations; i++) { |
|
|
|
|
|
|
|
|
|
while (!solution_voisine(&new_solution, &solution)) |
|
|
|
|
while (!solution_voisine(&new_solution, solution_opt)) |
|
|
|
|
continue; /* do it again */ |
|
|
|
|
|
|
|
|
|
/* TODO: Calculer les centres de gravités, les distances moyennes inter-/intra-cluster,
|
|
|
|
|
* et faire le reste de l'algorithme */ |
|
|
|
|
print_solution(new_solution, "new solution"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
exit(EXIT_SUCCESS); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
double distance(const double (*u)[42], const double (*v)[42]) |
|
|
|
|
double distance(const vector *u, const vector *v) |
|
|
|
|
{ |
|
|
|
|
size_t n = sizeof(*v) / sizeof((*v)[0]); |
|
|
|
|
size_t i = 0; |
|
|
|
|
double sum = 0.0; |
|
|
|
|
|
|
|
|
|
printf("distance(): n = %zu\n", n); |
|
|
|
|
|
|
|
|
|
/* d(u, v) = sqrt( (u1 - v1)^2 + (u2 - v2)^2 + ... + (un - vn)^2 ) */ |
|
|
|
|
|
|
|
|
|
for (; i < n; i++) |
|
|
|
|
sum += pow((*u)[i] - (*v)[i], 2); |
|
|
|
|
for (; i < VECTOR_SIZE; i++) |
|
|
|
|
sum += pow(u[i] - v[i], 2); |
|
|
|
|
|
|
|
|
|
return sqrt(sum); |
|
|
|
|
} |
|
|
|
@ -96,13 +107,13 @@ bool trouver_solution_initiale(void)
@@ -96,13 +107,13 @@ bool trouver_solution_initiale(void)
|
|
|
|
|
{ |
|
|
|
|
size_t i; |
|
|
|
|
|
|
|
|
|
for (i = 0; i < sizeof(solution) / sizeof(solution[0]); i++) |
|
|
|
|
solution[i] = rand() % n_clusters; |
|
|
|
|
for (i = 0; i < N_VECTORS; i++) |
|
|
|
|
solution_opt[i] = rand() % n_clusters; |
|
|
|
|
|
|
|
|
|
return solution_valide(&solution); |
|
|
|
|
return solution_valide(solution_opt); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool solution_valide(int (*s)[230]) |
|
|
|
|
bool solution_valide(const solution s) |
|
|
|
|
{ |
|
|
|
|
/* This function returns true if the solution that it found is "valid",
|
|
|
|
|
* i.e., it has at least one element in each cluster. */ |
|
|
|
@ -114,8 +125,8 @@ bool solution_valide(int (*s)[230])
@@ -114,8 +125,8 @@ bool solution_valide(int (*s)[230])
|
|
|
|
|
for (i = 0; i < n_clusters; i++) |
|
|
|
|
cluster_empty[i] = true; |
|
|
|
|
|
|
|
|
|
for (i = 0; (size_t) i < sizeof(*s) / sizeof(*s[0]); i++) |
|
|
|
|
cluster_empty[(*s)[i]] = false; |
|
|
|
|
for (i = 0; (size_t) i < N_VECTORS; i++) |
|
|
|
|
cluster_empty[s[i]] = false; |
|
|
|
|
|
|
|
|
|
for (i = 0; valid && i < n_clusters; i++) |
|
|
|
|
valid = (valid && !cluster_empty[i]); |
|
|
|
@ -125,79 +136,161 @@ bool solution_valide(int (*s)[230])
@@ -125,79 +136,161 @@ bool solution_valide(int (*s)[230])
|
|
|
|
|
return valid; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void print_solution(int (*s)[230], const char *name) |
|
|
|
|
void print_solution(const solution s, const char *name) |
|
|
|
|
{ |
|
|
|
|
size_t i = 0, n = sizeof(*s) / sizeof((*s)[0]); |
|
|
|
|
size_t i = 0; |
|
|
|
|
|
|
|
|
|
printf("Solution%s%s:\n[", name ? " " : "", name ? name : ""); |
|
|
|
|
printf("Solution%s%s: [\n", name ? " " : "", name ? name : ""); |
|
|
|
|
|
|
|
|
|
while (i < n) { |
|
|
|
|
while (i < N_VECTORS) { |
|
|
|
|
const char *end; |
|
|
|
|
|
|
|
|
|
if (i + 1 == n) |
|
|
|
|
if (i + 1 == N_VECTORS) |
|
|
|
|
end = "]\n"; |
|
|
|
|
else if (i && i % 20 == 0) |
|
|
|
|
else if ((i + 1) % 20 == 0) |
|
|
|
|
end = ",\n"; |
|
|
|
|
else |
|
|
|
|
end = ", "; |
|
|
|
|
|
|
|
|
|
printf("%d%s", (*s)[i++], end); |
|
|
|
|
printf("%d%s", s[i++], end); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void centre_gravite(int cluster, int (*s)[230], double (*cg)[42]) |
|
|
|
|
vector *centres_gravite(const solution s) |
|
|
|
|
{ |
|
|
|
|
/* Calculer le centre de gravité pour les vecteurs du cluster cluster selon la solution s */ |
|
|
|
|
|
|
|
|
|
const size_t n = sizeof(*cg) / sizeof((*cg)[0]); /* nombre d'attributs dans chaque vecteur. n = 42 */ |
|
|
|
|
const size_t nv = sizeof(benchmark_data) / sizeof(benchmark_data[0]); /* nombre de vecteurs. nv = 230 */ |
|
|
|
|
int cluster; |
|
|
|
|
size_t i = 0; |
|
|
|
|
vector *cgs = calloc(n_clusters, sizeof(*cgs)); |
|
|
|
|
int *vectors_in_cluster = calloc(n_clusters, sizeof(*vectors_in_cluster)); |
|
|
|
|
|
|
|
|
|
assert(n == 42); |
|
|
|
|
assert(nv == 230); |
|
|
|
|
if (!cgs || !vectors_in_cluster) |
|
|
|
|
abort(); |
|
|
|
|
|
|
|
|
|
/* Initialiser à 0 */ |
|
|
|
|
while (i < n) |
|
|
|
|
(*cg)[i++] = 0; |
|
|
|
|
for (cluster = 0; cluster < n_clusters; cluster++) { |
|
|
|
|
if (!(cgs[cluster] = calloc(VECTOR_SIZE, sizeof(*cgs[cluster])))) |
|
|
|
|
abort(); |
|
|
|
|
|
|
|
|
|
/* Loop over all vectors */ |
|
|
|
|
for (i = 0; i < nv; i++) { |
|
|
|
|
size_t j = 0; |
|
|
|
|
/* Initialiser à 0 */ |
|
|
|
|
while (i < VECTOR_SIZE) |
|
|
|
|
cgs[cluster][i++] = 0; |
|
|
|
|
vectors_in_cluster[cluster] = 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if ((*s)[i] != cluster) |
|
|
|
|
/* Vector i is not part of the cluster, skip it */ |
|
|
|
|
continue; |
|
|
|
|
for (i = 0; i < N_VECTORS; i++) { |
|
|
|
|
size_t j; |
|
|
|
|
|
|
|
|
|
while (j < n) |
|
|
|
|
(*cg)[j] += benchmark_data[i][j]; |
|
|
|
|
} |
|
|
|
|
cluster = s[i]; |
|
|
|
|
++vectors_in_cluster[cluster]; |
|
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
|
|
|
(*cg)[i] /= n; |
|
|
|
|
for (j = 0; j < VECTOR_SIZE; j++) |
|
|
|
|
cgs[cluster][j] += benchmark_data[i][j]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (cluster = 0; cluster < n_clusters; cluster++) |
|
|
|
|
for (i = 0; i < VECTOR_SIZE; i++) |
|
|
|
|
cgs[cluster][i] /= (double) vectors_in_cluster[cluster]; |
|
|
|
|
|
|
|
|
|
free(vectors_in_cluster); |
|
|
|
|
|
|
|
|
|
return cgs; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int solution_voisine(int (*dest)[230], int (*src)[230]) |
|
|
|
|
int solution_voisine(solution *dest, const solution src) |
|
|
|
|
{ |
|
|
|
|
/* This function changes about 10% of the solution randomly */ |
|
|
|
|
|
|
|
|
|
memcpy(*dest, *src, sizeof(*src)); |
|
|
|
|
|
|
|
|
|
const size_t n = sizeof(*src) / sizeof((*src)[0]); |
|
|
|
|
size_t i; |
|
|
|
|
const int a = 10000; /* b = 10% of a */ |
|
|
|
|
const int b = 1000; |
|
|
|
|
bool valid; |
|
|
|
|
|
|
|
|
|
if (!(*dest = calloc(N_VECTORS, sizeof(**dest)))) |
|
|
|
|
abort(); |
|
|
|
|
|
|
|
|
|
assert(n == 230); |
|
|
|
|
memcpy(*dest, src, N_VECTORS * sizeof(*src)); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
|
|
|
assert(n_clusters > 1); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < N_VECTORS; i++) { |
|
|
|
|
if (rand() % a < b) { |
|
|
|
|
int new_cluster = rand() % (n_clusters - 1); |
|
|
|
|
/* To avoid the case where the cluster isn't actually changed. */ |
|
|
|
|
new_cluster += (new_cluster >= (*src)[i]); |
|
|
|
|
new_cluster += (new_cluster >= src[i]); |
|
|
|
|
(*dest)[i] = new_cluster; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return solution_valide(dest); |
|
|
|
|
if (!(valid = solution_valide(*dest))) { |
|
|
|
|
free(*dest); |
|
|
|
|
*dest = NULL; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return valid; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
double fonction_objective(const solution s) |
|
|
|
|
{ |
|
|
|
|
/* NOTE: this is very ineffecient on processor time, because
|
|
|
|
|
* each of these functions re-calculates the same centre_gravite |
|
|
|
|
* twice. */ |
|
|
|
|
|
|
|
|
|
double **cgs = calloc(n_clusters, sizeof *cgs); |
|
|
|
|
double inter; |
|
|
|
|
double intra; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!cgs) |
|
|
|
|
abort(); |
|
|
|
|
|
|
|
|
|
inter = calculer_inter(s, cgs); |
|
|
|
|
inter = calculer_inter(s, cgs); |
|
|
|
|
|
|
|
|
|
return inter - intra; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static double calculer_inter(const solution s, vector *cgs) |
|
|
|
|
{ |
|
|
|
|
const size_t n = sizeof(*s) / sizeof((*s)[0]); |
|
|
|
|
double inter = 0.0; |
|
|
|
|
int cluster; |
|
|
|
|
|
|
|
|
|
for (cluster = 0; cluster < n_clusters; cluster++) { |
|
|
|
|
double intra_for_this_cluster = 0.0; |
|
|
|
|
int vectors_in_this_cluster = 0; |
|
|
|
|
size_t i; |
|
|
|
|
|
|
|
|
|
centre_gravite(cluster, s, &cg); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static double calculer_intra(const solution s, vector *cgs) |
|
|
|
|
{ |
|
|
|
|
const size_t n = sizeof(*s) / sizeof((*s)[0]); |
|
|
|
|
double intra = 0.0; |
|
|
|
|
int cluster; |
|
|
|
|
|
|
|
|
|
for (cluster = 0; cluster < n_clusters; cluster++) { |
|
|
|
|
double intra_for_this_cluster = 0.0; |
|
|
|
|
int vectors_in_this_cluster = 0; |
|
|
|
|
size_t i; |
|
|
|
|
|
|
|
|
|
centre_gravite(cluster, s, cgs[cluster], 42); |
|
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
|
|
|
if ((*s)[i] != cluster) |
|
|
|
|
continue; |
|
|
|
|
|
|
|
|
|
++vectors_in_this_cluster; |
|
|
|
|
intra_for_this_cluster += distance(&cg, &benchmark_data[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
intra += intra_for_this_cluster / (double) vectors_in_this_cluster; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return intra / (double) n_clusters; |
|
|
|
|
} |
|
|
|
|
#endif /* 0 */ |
|
|
|
|