00001 #include <stdio.h>
00002 #include <ctype.h>
00003 #include <stdlib.h>
00004 #include <assert.h>
00005
00006 #define A_SIZE 26 // taille de l'alphabet considéré
00007
00008 typedef struct {
00009 char c;
00010 double freq;
00011 } letter_freq_t ;
00012
00013 letter_freq_t TAB_REF_FR[A_SIZE] = {
00014 {'a', 8.11}, {'b', 0.81}, {'c', 3.38}, {'d', 4.28}, {'e', 17.69}, {'f', 1.13},
00015 {'g', 1.19}, {'h', 0.74}, {'i', 7.24}, {'j', 0.18}, {'k', 0.02}, {'l', 5.99},
00016 {'m', 2.29}, {'n', 7.68}, {'o', 5.20}, {'p', 2.92}, {'q', 0.83}, {'r', 6.43},
00017 {'s', 8.87}, {'t', 7.44}, {'u', 5.23}, {'v', 1.28}, {'w', 0.06}, {'x', 0.53},
00018 {'y', 0.26}, {'z', 0.12 }
00019 };
00020
00021 size_t count_letters_in(char * file, int tab[A_SIZE]) {
00022 assert(A_SIZE >= 26);
00023 FILE * fi = fopen(file, "r");
00024 if ( fi == NULL ) {
00025 perror("[Erreur] fopen");
00026 return 1;
00027 }
00028 int c;
00029 size_t res = 0;
00030 while (1) {
00031 c = getc(fi);
00032 if ( feof(fi) ) break;
00033 c = tolower(c);
00034 if ( (c >= 'a') && (c <= 'z') ) {
00035 tab[c - 'a']++;
00036 res++;
00037 }
00038 }
00039 fclose(fi);
00040 return res;
00041 }
00042
00043 void analyse_freq(char * file, letter_freq_t tab[A_SIZE]) {
00044 int * tmp_tab = (int *) calloc(A_SIZE, sizeof(int));
00045 size_t i, nb_char = count_letters_in(file, tmp_tab);
00046 for (i=0; i<A_SIZE; i++) {
00047 tab[i].c = i + 'a';
00048 tab[i].freq = ((double) tmp_tab[i]*100)/nb_char;
00049 }
00050 free(tmp_tab);
00051 }
00052
00053 int compare(const void * a, const void * b) {
00054 double freq_a = ((letter_freq_t *) a)->freq,
00055 freq_b = ((letter_freq_t *) b)->freq;
00056 return (freq_a < freq_b) ? 1 : (freq_a > freq_b) ? -1 : 0;
00057 }
00058
00059 void sort_by_letter_freq(letter_freq_t tab[A_SIZE]) {
00060 qsort(tab, A_SIZE, sizeof(letter_freq_t), compare);
00061 }
00062
00063
00064 void show_freq_comparison(letter_freq_t tab[A_SIZE], int nb_line_to_show) {
00065 letter_freq_t * tab_ref = TAB_REF_FR;
00066 int i, limit = (nb_line_to_show < A_SIZE)?nb_line_to_show:A_SIZE;
00067 sort_by_letter_freq(tab_ref);
00068 printf("---------------+----------------\n");
00069 printf(" Texte analysé | Référence (fr)\n");
00070 printf("---------------+----------------\n");
00071 for (i=0; i<limit; i++) {
00072 printf(" %c : %5.2f%% |", tab[i].c, tab[i].freq);
00073 printf(" %c : %2.2f%%\n", tab_ref[i].c, tab_ref[i].freq);
00074 }
00075 if (limit < A_SIZE) printf(" [...]\n");
00076 }
00077
00078 int main(int argc, char * argv[])
00079 {
00080 if (argc != 2) {
00081 printf("usage: %s filename\n",argv[0]);
00082 return EXIT_FAILURE;
00083 }
00084 letter_freq_t tab_freq[A_SIZE];
00085 analyse_freq(argv[1], tab_freq);
00086 sort_by_letter_freq(tab_freq);
00087 show_freq_comparison(tab_freq, 10);
00088 return 0;
00089 }