/* tree.c
 *
 * This is the complete example program of K&R on self-referential data
 * structures as presented in 'The C programming Language'.  The only basic
 * change has been the calls to calloc and malloc instead of their handy-dandy
 * alloc function.
 * Also the function type() here, recognizes
 * apostrophied words.  For clarity, I have added the explicit returns
 * of called functions, even when they only return an integer.
 *
 * Modified for compilation as middle memory model which requires more
 * careful type bookkeeping. 12/11/87 WCH
 *
 * Compilation for SCO XENIX V 2.X with intel 80286 processor.
 *
 *		cc -o tree -Mm2e -DM_I286 -O -W 2 -s tree.c
 *
 * Compilation for SCO XENIX V 3.X with intel 80386 processor.
 *
 *		cc -o tree -O -W 2 -s tree.c
 *
 * Added: -c option which uses word recognition more like C language
 *	  word.
 *
 * Usage: tree [-c] < infile > outfile
 */
#include <stdio.h>
#include <string.h>

	/* A local include file used for M & L mem models */
#include  <stdfun.h> 


#define ALLOCSIZE 1
#define MAXWORD 50
#define BUFSIZE 512

	/* these are merely used as tokens */
#define LETTER -13
#define DIGIT -9 

		/* the basic node */
struct tnode {

	char *word;	        /* points to the word of the node */
	int count;	        /* number of occurrences */
	struct tnode *left;     /* pointer to left child node */
	struct tnode *right;    /* pointer to right child node */

	};

/* For use in getch and ungetch and must be available to both functions */
char buf[BUFSIZE];

int bufp = 0;	   /* next free position in buffer */
int wflag = '\0';


static char _sccsid[] = { " %M% %I% %H% " };

int main( argc, argv )
int argc;
char *argv[];
{
	void treeprint();
	struct tnode *root, *tree();
	char word[MAXWORD];
	int t, getword();

	if( argc > 1 && *argv[1] == '-' )
		switch( *(argv[1] + 1) )    {
			case 'c':
				wflag = 'c';
				break;
			default :
				fprintf( stderr, "Usage: %s -[c] < infile > outfile\n", argv[0] );
				exit( 1 );
		}

	root = NULL;
	while( ( t = getword( word, MAXWORD ) ) != EOF )
		if( t == LETTER )
			root = tree( root, word );
	treeprint( root );

	exit( 0 );

	/* NOTREACHED */
	return( 0 );
}

/* NB although a called function may not "directly" alter the value of a
 * variable local to the calling function, if as in this case, the called
 * function is passed the ADDRESS of the variable in question, the called
 * function can then write directly to the address and "indirectly" modify the
 * contents at that address.  This is exactly what getword() does.
 */

int getword( w, lim )
char *w;
int lim;
{
	void ungetch();
	int c, t, getch(), type();

	/* This handles EOF, non-word terminating white space or punctuation
	 * and any 'word' that begins with a digit.
	 */
	if( type( *w++ = (char)(c = getch()) ) != LETTER )    {
		*w = '\0';
		return( c );
	}
	/* We have the beginning of a word */
	while( --lim > 0 )    {
		t = type( *w++ = (char)(c = getch()) );
		if( t != LETTER && t != DIGIT )    {
			/* Put back the terminating punctuation or white space,
			 * or EOF
			 */
			ungetch( c );
			break;
		}
	}
	/* w has already been written and incremented. we overwrite */
	*(w - 1) = '\0';

	/* If EOF has been put back, the loop in main will get it and terminate
	 */
	return( LETTER );

}

/* Unlike K&R we account for apostophied and hyphenated
 * words with this modified type function.
 */
int type( c )
int c;
{

	if( c >= 'a' &&  c <= 'z' || c >= 'A' && c <= 'Z' )
		return( LETTER );
	else if( c >= '0' && c <= '9' )    {
		switch( wflag )    {
			case '\0':
				break;
			case 'c':
				return( LETTER );
		};
		return( DIGIT );
	}
	else    {
		switch( wflag )    {
			case '\0':
				if( c == '-' || c == '\'' )
					return( LETTER );
				break;
			case 'c':
				if( c == '_' )
					return( LETTER );
				break;
		}
		return( c );
	}
}


/* Watch this as a function name: it is the name of a function in the curses
 * library.
 */
int getch()
{
	return( ( bufp > 0 ) ? (int)buf[--bufp] : getchar() );
}

void ungetch( c )
int c;
{
	if( bufp > BUFSIZE )
		fprintf( stderr, "ungetch: toomany characters, buffer overflow.\n" );
	else
		buf[bufp++] = (char)c;
	return;
}

struct tnode *tree( p, w )
struct tnode *p;
char *w;
{
	/* NB in the recursive call to tree, its return value is not declared
	 * here.
	 */
	struct tnode *talloc();
	char *strsave();
	int cond;


	/* New word has arrived */

	/* p gets to be null from the init of the right and left pointers
	 * at the creation of a new node.  So if p is null we have
	 * arrived at the bottom of the tree and a new node must be
	 * installed.
	 */
	if( p == NULL )    {
						/* Make a new node */
		p = talloc( );
					/* The necessity for saving the string
					 * seems to arise from the reinitializa-
					 * tion of variables in the recursive
					 * calls of tree to itself.
					 */
		if( ( p->word = strsave( w ) ) == NULL )    {
			fprintf( stderr, "tree: out of memory\n" );
			exit( 1 );
		}
		p->count = 1;
		p->left = p->right = NULL;
	}
	else if( ( cond = strcmp( w, p->word ) ) == 0 )
		p->count++;	     /* Repeated word */
	else if( cond < 0 )
		p->left = tree( p->left, w );   /* Node goes into left subtree
						 */
	else

		p->right = tree( p->right, w );

	return( p );

} /* End tree() */

void treeprint( p )	  /* Print tree recursively to stdout */
struct tnode *p;
{
	if( p != NULL )    {
		treeprint( p->left );
		printf( "%4d %s\n", p->count, p->word );
		treeprint( p->right );
	}
}

/* talloc()
 * is a C-classic method for insuring proper alignment of returned pointer.
 * Calloc always returns a pointer to char, and the coercion by cast insures
 * that the storage is allocated consistantly with the type of stucture
 * being stored.
 */

struct tnode *talloc()
{
	char *tn, *calloc();

	if( ( tn = calloc( ALLOCSIZE, sizeof( struct tnode ) ) ) == NULL )    {
		fprintf( stderr, "tree: out of memory\n" );
		exit( 1 );
		
	}
	return( (struct tnode *) tn );

}

/* Returns null pointer if out of memory */
char *strsave( s )
char *s;
{
	char *p, *malloc();

	if( ( p = malloc( (unsigned)(strlen( s ) + 1) ) ) != NULL )
		strcpy( p, s );
	return( p );

}


Return to Home Page
Return to Metayoga Page
Return to C Language Page


The URL for this document is:
http://graham.main.nc.us/~bhammel/graham/CPROGS/tree.html
Created: 1997
Last Updated: May 28, 2000 Email me, Bill Hammel at
bhammel@graham.main.nc.us
READ WARNING BEFORE SENDING E-MAIL