9p / who / tweedy / 9C / 1


Exercise 1.13: Histogram of Word Lengths

Code

#include <u.h>
#include <libc.h>
#include <bio.h>

#define STDIN   0
#define BINS    5   /* number of histogram bins */
#define MAXLEN  100 /* we need to assume a maximum word length */
#define IN  1   /* inside a word */
#define OUT 0   /* outside a word */

/* 
 * histogram of word lengths 
 *
 * This implementation computes bin sizes and sorts words into them, rather than
 * assuming a fixed set (e.g. 1-10) and sorting everything else (e.g. words of 
 * length > 10) into an overflow bin.
 */

void
main()
{
    Biobuf *bstdin;
    bstdin = Bfdopen(STDIN, OREAD);
    int i, j, state, len, hasblank;
    float interval, shortest, longest;
    int words[MAXLEN], hist[BINS];
    char c;

    longest = len = interval = hasblank = 0;
    shortest = MAXLEN;
    for(i = 0; i < MAXLEN; ++i)
        words[i] = 0;
    for(i = 0; i < BINS; ++i)
        hist[i] = 0;
    c = -1;

    /* count word lengths, keep track of shortest and longest */
    while((c = Bgetc(bstdin)) >= 0){
        if(c == ' '){
            if(len > longest)
                longest = len;
            if(len > 0 && len < shortest)
                shortest = len;
            if(len > 0)
                ++words[len];
            len = 0;
            state = OUT;
        }
        else{
            ++len;
            state = IN;
        }
    }
    if(state == IN){
        if(len-1 > longest)
            longest = len-1;
        if(len > 0 && len-1 < shortest)
            shortest = len-1;
        if(len > 0)
            ++words[len-1];
    }

    /* compute bin sizes */
    interval = (longest - shortest + 1)/BINS;
    if (interval < 1){
        print("Not enough variance to generate histogram.\n");
        exits(0);
    }

    /* fill histogram */
    for(i = 0; i < BINS; ++i){
        for(j = (shortest + (interval * (i+1)) - interval); j <= interval * (i+1); ++j)
            hist[i] += words[j];
    }

    /* display histogram */
    for(i = 0; i < BINS; ++i){
        print(" %d| ", i+1);
        for(j = 0; j < hist[i]; ++j)
            print("*");
        print("\n");
    }
    print("---\n");
    for(i = 0; i < BINS; ++i)
        print("[%d] %.1f-%.1f\t", i+1, (shortest + (interval * (i+1)) - interval), interval * (i+1));
    print("\n");

    exits(0);
}

Output

$ 9c lhist.c; 9l lhist.o -o lhist
$ ./lhist < TEXT
 1| ****************************
 2| *******************
 3| ****************
 4| ********
 5| * 
---
[1] 1.0-3.0 [2] 4.0-6.0 [3] 7.0-9.0 [4] 10.0-12.0   [5] 13.0-15.0   

TEXT

In C, a function is equivalent to a subroutine or function in Fortran, or a procedure or function in Pascal. A function provides a convenient way to encapsulate some computation, which can then be used without worrying about its implementation. With properly designed functions, it is possible to ignore how a job is done; knowing what is done is sufficient. C makes the use of functions easy, convenient and efficient; you will often see a short function defined and called only once, just because it clarifies some piece of code.




tweedy