Browse Source

Minor rewrite using dynamic arrays.

Signed-off-by: wmb <wmb@teknik.io>
master
wmb 1 year ago
parent
commit
a46b8a2e67
  1. 9909
      benchmark-data.c
  2. 8
      benchmark-data.h
  3. 205
      main.c
  4. 27
      preprocess.py

9909
benchmark-data.c

File diff suppressed because it is too large Load Diff

8
benchmark-data.h

@ -1,6 +1,12 @@ @@ -1,6 +1,12 @@
#ifndef BENCHMARK_DATA_H
#define BENCHMARK_DATA_H
extern const double benchmark_data[230][42];
#include <stddef.h>
typedef double *vector;
extern vector *benchmark_data;
extern const size_t N_VECTORS;
extern const size_t VECTOR_SIZE;
#endif /* BENCHMARK_DATA_H */

205
main.c

@ -13,16 +13,26 @@ @@ -13,16 +13,26 @@
/* TODO: Use typedefs for solution and vector instead of using int (*s)[230]
* and double (*v)[42]. */
/* Types */
typedef int *solution;
/* Functions */
double distance(const double (*u)[42], const double (*v)[42]);
double distance(const vector *u, const vector *v);
bool trouver_solution_initiale(void);
bool solution_valide(int (*s)[230]);
void print_solution(int (*s)[230], const char *name);
void centre_gravite(int cluster, int (*s)[230], double (*cg)[42]);
int solution_voisine(int (*dest)[230], int (*src)[230]);
bool solution_valide(const solution s);
void print_solution(const solution s, const char *name);
vector *centres_gravite(const solution s);
int solution_voisine(solution *dest, const solution src);
#if 0
double fonction_objective(const solution s);
static double calculer_inter(const solution s, vector *cgs);
static double calculer_intra(const solution s, vector *cgs);
#endif /* 0 */
int solution[230] = {0};
int new_solution[230] = {0};
solution solution_opt;
solution new_solution;
int n_clusters = 5;
int n_iterations = 1000;
@ -37,7 +47,7 @@ int main(int argc, char *argv[]) @@ -37,7 +47,7 @@ int main(int argc, char *argv[])
switch (c) {
case 'c':
k = atoi(optarg);
if (k > 0)
if (k > 1)
n_clusters = k;
break;
case 'i':
@ -58,36 +68,37 @@ int main(int argc, char *argv[]) @@ -58,36 +68,37 @@ int main(int argc, char *argv[])
srand(time(NULL));
if (!(solution_opt = calloc(N_VECTORS, sizeof(solution_opt[0]))))
abort();
/* Solution initiale */
while (!trouver_solution_initiale())
continue; /* do it again */
print_solution(&solution, "initiale");
print_solution(solution_opt, "initiale");
for (i = 0; i < n_iterations; i++) {
while (!solution_voisine(&new_solution, &solution))
while (!solution_voisine(&new_solution, solution_opt))
continue; /* do it again */
/* TODO: Calculer les centres de gravités, les distances moyennes inter-/intra-cluster,
* et faire le reste de l'algorithme */
print_solution(new_solution, "new solution");
}
exit(EXIT_SUCCESS);
}
double distance(const double (*u)[42], const double (*v)[42])
double distance(const vector *u, const vector *v)
{
size_t n = sizeof(*v) / sizeof((*v)[0]);
size_t i = 0;
double sum = 0.0;
printf("distance(): n = %zu\n", n);
/* d(u, v) = sqrt( (u1 - v1)^2 + (u2 - v2)^2 + ... + (un - vn)^2 ) */
for (; i < n; i++)
sum += pow((*u)[i] - (*v)[i], 2);
for (; i < VECTOR_SIZE; i++)
sum += pow(u[i] - v[i], 2);
return sqrt(sum);
}
@ -96,13 +107,13 @@ bool trouver_solution_initiale(void) @@ -96,13 +107,13 @@ bool trouver_solution_initiale(void)
{
size_t i;
for (i = 0; i < sizeof(solution) / sizeof(solution[0]); i++)
solution[i] = rand() % n_clusters;
for (i = 0; i < N_VECTORS; i++)
solution_opt[i] = rand() % n_clusters;
return solution_valide(&solution);
return solution_valide(solution_opt);
}
bool solution_valide(int (*s)[230])
bool solution_valide(const solution s)
{
/* This function returns true if the solution that it found is "valid",
* i.e., it has at least one element in each cluster. */
@ -114,8 +125,8 @@ bool solution_valide(int (*s)[230]) @@ -114,8 +125,8 @@ bool solution_valide(int (*s)[230])
for (i = 0; i < n_clusters; i++)
cluster_empty[i] = true;
for (i = 0; (size_t) i < sizeof(*s) / sizeof(*s[0]); i++)
cluster_empty[(*s)[i]] = false;
for (i = 0; (size_t) i < N_VECTORS; i++)
cluster_empty[s[i]] = false;
for (i = 0; valid && i < n_clusters; i++)
valid = (valid && !cluster_empty[i]);
@ -125,79 +136,161 @@ bool solution_valide(int (*s)[230]) @@ -125,79 +136,161 @@ bool solution_valide(int (*s)[230])
return valid;
}
void print_solution(int (*s)[230], const char *name)
void print_solution(const solution s, const char *name)
{
size_t i = 0, n = sizeof(*s) / sizeof((*s)[0]);
size_t i = 0;
printf("Solution%s%s:\n[", name ? " " : "", name ? name : "");
printf("Solution%s%s: [\n", name ? " " : "", name ? name : "");
while (i < n) {
while (i < N_VECTORS) {
const char *end;
if (i + 1 == n)
if (i + 1 == N_VECTORS)
end = "]\n";
else if (i && i % 20 == 0)
else if ((i + 1) % 20 == 0)
end = ",\n";
else
end = ", ";
printf("%d%s", (*s)[i++], end);
printf("%d%s", s[i++], end);
}
}
void centre_gravite(int cluster, int (*s)[230], double (*cg)[42])
vector *centres_gravite(const solution s)
{
/* Calculer le centre de gravité pour les vecteurs du cluster cluster selon la solution s */
const size_t n = sizeof(*cg) / sizeof((*cg)[0]); /* nombre d'attributs dans chaque vecteur. n = 42 */
const size_t nv = sizeof(benchmark_data) / sizeof(benchmark_data[0]); /* nombre de vecteurs. nv = 230 */
int cluster;
size_t i = 0;
vector *cgs = calloc(n_clusters, sizeof(*cgs));
int *vectors_in_cluster = calloc(n_clusters, sizeof(*vectors_in_cluster));
assert(n == 42);
assert(nv == 230);
if (!cgs || !vectors_in_cluster)
abort();
/* Initialiser à 0 */
while (i < n)
(*cg)[i++] = 0;
for (cluster = 0; cluster < n_clusters; cluster++) {
if (!(cgs[cluster] = calloc(VECTOR_SIZE, sizeof(*cgs[cluster]))))
abort();
/* Loop over all vectors */
for (i = 0; i < nv; i++) {
size_t j = 0;
/* Initialiser à 0 */
while (i < VECTOR_SIZE)
cgs[cluster][i++] = 0;
vectors_in_cluster[cluster] = 0;
}
if ((*s)[i] != cluster)
/* Vector i is not part of the cluster, skip it */
continue;
for (i = 0; i < N_VECTORS; i++) {
size_t j;
while (j < n)
(*cg)[j] += benchmark_data[i][j];
}
cluster = s[i];
++vectors_in_cluster[cluster];
for (i = 0; i < n; i++) {
(*cg)[i] /= n;
for (j = 0; j < VECTOR_SIZE; j++)
cgs[cluster][j] += benchmark_data[i][j];
}
for (cluster = 0; cluster < n_clusters; cluster++)
for (i = 0; i < VECTOR_SIZE; i++)
cgs[cluster][i] /= (double) vectors_in_cluster[cluster];
free(vectors_in_cluster);
return cgs;
}
int solution_voisine(int (*dest)[230], int (*src)[230])
int solution_voisine(solution *dest, const solution src)
{
/* This function changes about 10% of the solution randomly */
memcpy(*dest, *src, sizeof(*src));
const size_t n = sizeof(*src) / sizeof((*src)[0]);
size_t i;
const int a = 10000; /* b = 10% of a */
const int b = 1000;
bool valid;
if (!(*dest = calloc(N_VECTORS, sizeof(**dest))))
abort();
assert(n == 230);
memcpy(*dest, src, N_VECTORS * sizeof(*src));
for (i = 0; i < n; i++) {
assert(n_clusters > 1);
for (i = 0; i < N_VECTORS; i++) {
if (rand() % a < b) {
int new_cluster = rand() % (n_clusters - 1);
/* To avoid the case where the cluster isn't actually changed. */
new_cluster += (new_cluster >= (*src)[i]);
new_cluster += (new_cluster >= src[i]);
(*dest)[i] = new_cluster;
}
}
return solution_valide(dest);
if (!(valid = solution_valide(*dest))) {
free(*dest);
*dest = NULL;
}
return valid;
}
#if 0
double fonction_objective(const solution s)
{
/* NOTE: this is very ineffecient on processor time, because
* each of these functions re-calculates the same centre_gravite
* twice. */
double **cgs = calloc(n_clusters, sizeof *cgs);
double inter;
double intra;
if (!cgs)
abort();
inter = calculer_inter(s, cgs);
inter = calculer_inter(s, cgs);
return inter - intra;
}
static double calculer_inter(const solution s, vector *cgs)
{
const size_t n = sizeof(*s) / sizeof((*s)[0]);
double inter = 0.0;
int cluster;
for (cluster = 0; cluster < n_clusters; cluster++) {
double intra_for_this_cluster = 0.0;
int vectors_in_this_cluster = 0;
size_t i;
centre_gravite(cluster, s, &cg);
}
}
static double calculer_intra(const solution s, vector *cgs)
{
const size_t n = sizeof(*s) / sizeof((*s)[0]);
double intra = 0.0;
int cluster;
for (cluster = 0; cluster < n_clusters; cluster++) {
double intra_for_this_cluster = 0.0;
int vectors_in_this_cluster = 0;
size_t i;
centre_gravite(cluster, s, cgs[cluster], 42);
for (i = 0; i < n; i++) {
if ((*s)[i] != cluster)
continue;
++vectors_in_this_cluster;
intra_for_this_cluster += distance(&cg, &benchmark_data[i]);
}
intra += intra_for_this_cluster / (double) vectors_in_this_cluster;
}
return intra / (double) n_clusters;
}
#endif /* 0 */

27
preprocess.py

@ -141,13 +141,26 @@ def main(): @@ -141,13 +141,26 @@ def main():
[attr["name"] for attr in attributes])), indent=4))
with open(OUTPUT_FILENAME, 'w', encoding="utf-8") as out:
print("const double benchmark_data[{}][{}] = {}"
.format(len(data), len(attributes), '{'),
file=out)
#print('\n'.join(map(lambda l: ','.join(map(str, l)), data)), file=out)
for line in data:
print("\t{" + ", ".join([str(field) for field in line]) + "},", file=out)
print("};", file=out)
print('#include "benchmark-data.h"\n#include <stdlib.h>\n', file=out)
print("vector *benchmark_data;", file=out)
print("const size_t N_VECTORS = {};".format(len(data)), file=out)
print("const size_t VECTOR_SIZE = {};\n".format(len(attributes)), file=out)
print("void init_benchmark_data(void)\n{", file=out)
print("\tsize_t i;\n", file=out)
print("\tbenchmark_data = calloc(N_VECTORS, sizeof(*benchmark_data));\n", file=out)
print("\tfor (i = 0; i < N_VECTORS; i++)", file=out)
print("\t\tbenchmark_data[i] = calloc(VECTOR_SIZE, sizeof(benchmark_data[i][0]));\n", file=out)
for nv, vector in enumerate(data):
for na, attr in enumerate(vector):
print("\tbenchmark_data[{}][{}] = {};".format(nv, na, vector[na]),
file=out)
print("}", file=out)
if __name__ == "__main__":
main()

Loading…
Cancel
Save