Browse Source

Minor rewrite using dynamic arrays.

Signed-off-by: wmb <wmb@teknik.io>
tags/v0.1.0
wmb 1 month ago
parent
commit
a46b8a2e67
4 changed files with 9853 additions and 296 deletions
  1. 9677
    232
      benchmark-data.c
  2. 7
    1
      benchmark-data.h
  3. 149
    56
      main.c
  4. 20
    7
      preprocess.py

+ 9677
- 232
benchmark-data.c
File diff suppressed because it is too large
View File


+ 7
- 1
benchmark-data.h View File

@@ -1,6 +1,12 @@
#ifndef BENCHMARK_DATA_H
#define BENCHMARK_DATA_H

extern const double benchmark_data[230][42];
#include <stddef.h>

typedef double *vector;

extern vector *benchmark_data;
extern const size_t N_VECTORS;
extern const size_t VECTOR_SIZE;

#endif /* BENCHMARK_DATA_H */

+ 149
- 56
main.c View File

@@ -13,16 +13,26 @@
/* TODO: Use typedefs for solution and vector instead of using int (*s)[230]
* and double (*v)[42]. */

/* Types */
typedef int *solution;


/* Functions */
double distance(const double (*u)[42], const double (*v)[42]);
double distance(const vector *u, const vector *v);
bool trouver_solution_initiale(void);
bool solution_valide(int (*s)[230]);
void print_solution(int (*s)[230], const char *name);
void centre_gravite(int cluster, int (*s)[230], double (*cg)[42]);
int solution_voisine(int (*dest)[230], int (*src)[230]);
bool solution_valide(const solution s);
void print_solution(const solution s, const char *name);
vector *centres_gravite(const solution s);
int solution_voisine(solution *dest, const solution src);
#if 0
double fonction_objective(const solution s);

static double calculer_inter(const solution s, vector *cgs);
static double calculer_intra(const solution s, vector *cgs);
#endif /* 0 */

int solution[230] = {0};
int new_solution[230] = {0};
solution solution_opt;
solution new_solution;

int n_clusters = 5;
int n_iterations = 1000;
@@ -37,7 +47,7 @@ int main(int argc, char *argv[])
switch (c) {
case 'c':
k = atoi(optarg);
if (k > 0)
if (k > 1)
n_clusters = k;
break;
case 'i':
@@ -58,36 +68,37 @@ int main(int argc, char *argv[])

srand(time(NULL));

if (!(solution_opt = calloc(N_VECTORS, sizeof(solution_opt[0]))))
abort();

/* Solution initiale */
while (!trouver_solution_initiale())
continue; /* do it again */

print_solution(&solution, "initiale");
print_solution(solution_opt, "initiale");

for (i = 0; i < n_iterations; i++) {

while (!solution_voisine(&new_solution, &solution))
while (!solution_voisine(&new_solution, solution_opt))
continue; /* do it again */

/* TODO: Calculer les centres de gravités, les distances moyennes inter-/intra-cluster,
* et faire le reste de l'algorithme */
print_solution(new_solution, "new solution");
}

exit(EXIT_SUCCESS);
}

double distance(const double (*u)[42], const double (*v)[42])
double distance(const vector *u, const vector *v)
{
size_t n = sizeof(*v) / sizeof((*v)[0]);
size_t i = 0;
double sum = 0.0;

printf("distance(): n = %zu\n", n);

/* d(u, v) = sqrt( (u1 - v1)^2 + (u2 - v2)^2 + ... + (un - vn)^2 ) */

for (; i < n; i++)
sum += pow((*u)[i] - (*v)[i], 2);
for (; i < VECTOR_SIZE; i++)
sum += pow(u[i] - v[i], 2);

return sqrt(sum);
}
@@ -96,13 +107,13 @@ bool trouver_solution_initiale(void)
{
size_t i;

for (i = 0; i < sizeof(solution) / sizeof(solution[0]); i++)
solution[i] = rand() % n_clusters;
for (i = 0; i < N_VECTORS; i++)
solution_opt[i] = rand() % n_clusters;

return solution_valide(&solution);
return solution_valide(solution_opt);
}

bool solution_valide(int (*s)[230])
bool solution_valide(const solution s)
{
/* This function returns true if the solution that it found is "valid",
* i.e., it has at least one element in each cluster. */
@@ -114,8 +125,8 @@ bool solution_valide(int (*s)[230])
for (i = 0; i < n_clusters; i++)
cluster_empty[i] = true;

for (i = 0; (size_t) i < sizeof(*s) / sizeof(*s[0]); i++)
cluster_empty[(*s)[i]] = false;
for (i = 0; (size_t) i < N_VECTORS; i++)
cluster_empty[s[i]] = false;

for (i = 0; valid && i < n_clusters; i++)
valid = (valid && !cluster_empty[i]);
@@ -125,79 +136,161 @@ bool solution_valide(int (*s)[230])
return valid;
}

void print_solution(int (*s)[230], const char *name)
void print_solution(const solution s, const char *name)
{
size_t i = 0, n = sizeof(*s) / sizeof((*s)[0]);
size_t i = 0;

printf("Solution%s%s:\n[", name ? " " : "", name ? name : "");
printf("Solution%s%s: [\n", name ? " " : "", name ? name : "");

while (i < n) {
while (i < N_VECTORS) {
const char *end;

if (i + 1 == n)
if (i + 1 == N_VECTORS)
end = "]\n";
else if (i && i % 20 == 0)
else if ((i + 1) % 20 == 0)
end = ",\n";
else
end = ", ";

printf("%d%s", (*s)[i++], end);
printf("%d%s", s[i++], end);
}
}

void centre_gravite(int cluster, int (*s)[230], double (*cg)[42])
vector *centres_gravite(const solution s)
{
/* Calculer le centre de gravité pour les vecteurs du cluster cluster selon la solution s */

const size_t n = sizeof(*cg) / sizeof((*cg)[0]); /* nombre d'attributs dans chaque vecteur. n = 42 */
const size_t nv = sizeof(benchmark_data) / sizeof(benchmark_data[0]); /* nombre de vecteurs. nv = 230 */
int cluster;
size_t i = 0;
vector *cgs = calloc(n_clusters, sizeof(*cgs));
int *vectors_in_cluster = calloc(n_clusters, sizeof(*vectors_in_cluster));

assert(n == 42);
assert(nv == 230);
if (!cgs || !vectors_in_cluster)
abort();

/* Initialiser à 0 */
while (i < n)
(*cg)[i++] = 0;
for (cluster = 0; cluster < n_clusters; cluster++) {
if (!(cgs[cluster] = calloc(VECTOR_SIZE, sizeof(*cgs[cluster]))))
abort();

/* Loop over all vectors */
for (i = 0; i < nv; i++) {
size_t j = 0;
/* Initialiser à 0 */
while (i < VECTOR_SIZE)
cgs[cluster][i++] = 0;
vectors_in_cluster[cluster] = 0;
}

if ((*s)[i] != cluster)
/* Vector i is not part of the cluster, skip it */
continue;
for (i = 0; i < N_VECTORS; i++) {
size_t j;

while (j < n)
(*cg)[j] += benchmark_data[i][j];
}
cluster = s[i];
++vectors_in_cluster[cluster];

for (i = 0; i < n; i++) {
(*cg)[i] /= n;
for (j = 0; j < VECTOR_SIZE; j++)
cgs[cluster][j] += benchmark_data[i][j];
}

for (cluster = 0; cluster < n_clusters; cluster++)
for (i = 0; i < VECTOR_SIZE; i++)
cgs[cluster][i] /= (double) vectors_in_cluster[cluster];

free(vectors_in_cluster);

return cgs;
}

int solution_voisine(int (*dest)[230], int (*src)[230])
int solution_voisine(solution *dest, const solution src)
{
/* This function changes about 10% of the solution randomly */

memcpy(*dest, *src, sizeof(*src));

const size_t n = sizeof(*src) / sizeof((*src)[0]);
size_t i;
const int a = 10000; /* b = 10% of a */
const int b = 1000;
bool valid;

if (!(*dest = calloc(N_VECTORS, sizeof(**dest))))
abort();

assert(n == 230);
memcpy(*dest, src, N_VECTORS * sizeof(*src));

for (i = 0; i < n; i++) {
assert(n_clusters > 1);

for (i = 0; i < N_VECTORS; i++) {
if (rand() % a < b) {
int new_cluster = rand() % (n_clusters - 1);
/* To avoid the case where the cluster isn't actually changed. */
new_cluster += (new_cluster >= (*src)[i]);
new_cluster += (new_cluster >= src[i]);
(*dest)[i] = new_cluster;
}
}

return solution_valide(dest);
if (!(valid = solution_valide(*dest))) {
free(*dest);
*dest = NULL;
}

return valid;
}

#if 0
double fonction_objective(const solution s)
{
/* NOTE: this is very ineffecient on processor time, because
* each of these functions re-calculates the same centre_gravite
* twice. */

double **cgs = calloc(n_clusters, sizeof *cgs);
double inter;
double intra;


if (!cgs)
abort();

inter = calculer_inter(s, cgs);
inter = calculer_inter(s, cgs);

return inter - intra;
}

static double calculer_inter(const solution s, vector *cgs)
{
const size_t n = sizeof(*s) / sizeof((*s)[0]);
double inter = 0.0;
int cluster;

for (cluster = 0; cluster < n_clusters; cluster++) {
double intra_for_this_cluster = 0.0;
int vectors_in_this_cluster = 0;
size_t i;

centre_gravite(cluster, s, &cg);
}

}

static double calculer_intra(const solution s, vector *cgs)
{
const size_t n = sizeof(*s) / sizeof((*s)[0]);
double intra = 0.0;
int cluster;

for (cluster = 0; cluster < n_clusters; cluster++) {
double intra_for_this_cluster = 0.0;
int vectors_in_this_cluster = 0;
size_t i;

centre_gravite(cluster, s, cgs[cluster], 42);

for (i = 0; i < n; i++) {
if ((*s)[i] != cluster)
continue;

++vectors_in_this_cluster;
intra_for_this_cluster += distance(&cg, &benchmark_data[i]);
}

intra += intra_for_this_cluster / (double) vectors_in_this_cluster;
}

return intra / (double) n_clusters;
}
#endif /* 0 */

+ 20
- 7
preprocess.py View File

@@ -141,13 +141,26 @@ def main():
[attr["name"] for attr in attributes])), indent=4))

with open(OUTPUT_FILENAME, 'w', encoding="utf-8") as out:
print("const double benchmark_data[{}][{}] = {}"
.format(len(data), len(attributes), '{'),
file=out)
#print('\n'.join(map(lambda l: ','.join(map(str, l)), data)), file=out)
for line in data:
print("\t{" + ", ".join([str(field) for field in line]) + "},", file=out)
print("};", file=out)
print('#include "benchmark-data.h"\n#include <stdlib.h>\n', file=out)

print("vector *benchmark_data;", file=out)
print("const size_t N_VECTORS = {};".format(len(data)), file=out)
print("const size_t VECTOR_SIZE = {};\n".format(len(attributes)), file=out)

print("void init_benchmark_data(void)\n{", file=out)
print("\tsize_t i;\n", file=out)

print("\tbenchmark_data = calloc(N_VECTORS, sizeof(*benchmark_data));\n", file=out)

print("\tfor (i = 0; i < N_VECTORS; i++)", file=out)
print("\t\tbenchmark_data[i] = calloc(VECTOR_SIZE, sizeof(benchmark_data[i][0]));\n", file=out)

for nv, vector in enumerate(data):
for na, attr in enumerate(vector):
print("\tbenchmark_data[{}][{}] = {};".format(nv, na, vector[na]),
file=out)

print("}", file=out)

if __name__ == "__main__":
main()

Loading…
Cancel
Save