Huffman Tree C/C++ Implementation

Falls es wen interessiert wie das so aussieht in C++

Disclaimer: Dieser Thread wurde aus dem alten Forum importiert. Daher werden eventuell nicht alle Formatierungen richtig angezeigt. Der ursprüngliche Thread beginnt im zweiten Post dieses Threads.

Huffman Tree C/C++ Implementation
Ist zwar noch nicht fertig weil die CHuffmanTree class den Input Buffer noch codieren soll. Geschrieben hab ichs das nur weil ich es in einem Netzwerk Programm einsetzen will um die Bandbreitennutzung effizienter zu gestalten.

Der code wie er unten steht funzt soweit, habs manuell nachgerechnet kommt für alle HEX Dezimalen die codiert werden dieselbe Bitzahl heraus

Evtl wäre es ja mal für Programmierneuling interessant sich dies anzuschauen, nächstes Semester werdet ihr nämlich mit C werkeln dürfen :slight_smile:freu

/**
* Created by:	Eduard Aumüller
* Date:			2004
*
* huffman_lib.h
* This library consist of a basic huffman tree implementation,
* plus some derivations for special purposes.
* CHuffmanTree:	-contains the basic routines (see underneath)
* CHexDecimalHuffmanTree:	- devides bytes into hex decimals and 
*							builds a huffman tree on the top of it
*/

#include <stdio.h>

typedef unsigned char BYTE;
typedef unsigned int  BOOL;

#define NULL 0
#define FALSE 0
#define TRUE 1

#define HEX_LEFT	0x000000F0
#define HEX_RIGHT	0x0000000F

class CCodeWord
{
public:
	//constructor
	CCodeWord()
	{
		m_iFrequency = 0;
		m_iCodeWord = 0;
		m_iSize = 0;
		m_pLeft = NULL;
		m_pRight = NULL;
		m_pParent = NULL;
	};

	//destructor
	~CCodeWord()
	{
		//set the parent pointers to NULL
		if(m_pLeft)
			m_pLeft->m_pParent = NULL;
		if(m_pRight)
			m_pRight->m_pParent = NULL;

		//delete knots only not the ends of the list
		if(m_pLeft && m_pLeft->m_pLeft)
			delete m_pLeft;

		if(m_pRight && m_pRight->m_pRight)
			delete m_pRight;
	};

	//operator overloading
	CCodeWord& operator=(CCodeWord& word)
	{
		m_iFrequency = word.m_iFrequency;
		m_iCodeWord = word.m_iCodeWord;

		return *this;
	};

	//print to console for debugging purposes only
	void Print()
	{
		printf("(%X,%d)\n",m_iCodeWord,m_iFrequency);
		if(m_pParent)
			m_pParent->Print();
		else
			printf("end\n");
	};

	//unsigned int variables
	unsigned int m_iFrequency;
	unsigned int m_iCodeWord;
	unsigned int m_iSize;

	//pointers
	CCodeWord *m_pLeft;
	CCodeWord *m_pRight;
	CCodeWord *m_pParent;
};

class CHuffmanTree
{
public:
	//constructor
	CHuffmanTree()
	{
		m_iCodeWords = 0;
		m_iCodeWordLength = 0;
		m_iCodeWordsInUse = 0;
		m_iCodeWordsLeft = 0;

		m_iInputSize;
		m_iOutputSize;

		m_pCodeWords = NULL;
		m_pHead = NULL;

		m_pInputBuffer = NULL;
		m_pOutputBuffer = NULL;
	};

	//destructor
	~CHuffmanTree()
	{
		//delete the left buffers
		if(m_pHead)
			delete  m_pHead;

		if(m_pCodeWords)
			delete[] m_pCodeWords;

		if(m_pOutputBuffer)
			delete[] m_pOutputBuffer;
	};

	//set the input buffer and its size
	void SetInput(int iSize, BYTE *pInput)
	{
		if(!iSize && pInput == NULL)
			return;

		m_iInputSize = iSize;
		m_pInputBuffer = pInput;
	};

	//returns a pointer to the output buffer
	BYTE* GetOutput()
	{
		return m_pOutputBuffer;
	};
	
	//returns the size of the output buffer
	int GetOutputSize()
	{
		return m_iOutputSize;
	};

	//get the frequencies (overwrite in subclasses)
	void GetFrequencies()	{};

	//print the codewords' frequencies to the console
	void OutFrequencies()
	{
		printf("HEX | Frequency\n");
		for(int i=0;i<m_iCodeWordsInUse;i++)
		{
			printf("%X | %d\n",m_pCodeWords[i].m_iCodeWord,m_pCodeWords[i].m_iFrequency);
		}
	};

	//sort the codewords by value with zeros at the back
	void SortCodeWords()
	{
		int i=0,a=0;

		CCodeWord swap;

		for(i=0;i<m_iCodeWords;i++)
		{
			for(a=i+1;a<m_iCodeWords;a++)
			{
				if(m_pCodeWords[a].m_iFrequency == 0)
					continue;
				if( ( m_pCodeWords[a].m_iFrequency < m_pCodeWords[i].m_iFrequency) || m_pCodeWords[i].m_iFrequency == 0 )
				{
					swap = m_pCodeWords[i];
					m_pCodeWords[i] = m_pCodeWords[a];
					m_pCodeWords[a] = swap;
				}
			}
			if( m_pCodeWords[i].m_iFrequency == 0 )
				--m_iCodeWordsInUse;
		}
	};

	//build a huffman tree out of the code words
	void BuildTree()
	{
		int i;

		if(m_iCodeWordsInUse < 3)
			return;

		//creates a dynamic pointer array of the size m_iCodeWordsInUse
		m_ppWords = new CCodeWord*[m_iCodeWordsInUse];

		//copy the code word addresses into the pointer array
		for(i=0;i<m_iCodeWordsInUse;i++)
			m_ppWords[i] = &m_pCodeWords[i];

		m_iCodeWordsLeft = m_iCodeWordsInUse;
		
		//proceed until there s only one code word left in m_ppWords, the head of the tree
		while(m_iCodeWordsLeft > 1)
		{
			LinkCodeWords();
		}

		m_pHead = m_ppWords[0];
		m_ppWords[0] = NULL;

		//delete the dynamic pointer array
		delete m_ppWords;

	};

	//links two codewords and rebuilds the m_ppWords list
	void LinkCodeWords()
	{	
		int i;
		CCodeWord *w = NULL;
		CCodeWord *p = new CCodeWord();

		//assign the pointers
		p->m_pLeft = m_ppWords[0];
		p->m_pRight = m_ppWords[1];

		m_ppWords[0]->m_pParent = p;
		m_ppWords[1]->m_pParent = p;

		//sum up the frequencies for the new link
		p->m_iFrequency = m_ppWords[0]->m_iFrequency + m_ppWords[1]->m_iFrequency;

		//replacing some pointers
		m_ppWords[0] = p;
		m_ppWords[1] = NULL;

		//move the elemts one slot to the left n->0
		for(i=1;i<(m_iCodeWordsLeft-1);i++)
		{
			m_ppWords[i] = m_ppWords[i+1];
		}

		m_ppWords[m_iCodeWordsLeft-1] = NULL;

		--m_iCodeWordsLeft;

		if(m_iCodeWordsLeft < 2)
			return;
		
		//sort the left code words by frequency
		for(i=0;i<(m_iCodeWordsLeft-1);i++)
		{
			if(m_ppWords[i]->m_iFrequency > m_ppWords[i+1]->m_iFrequency)
			{
				//swap m_ppWords[i] with m_ppWords[i+1]
				w = m_ppWords[i];
				m_ppWords[i] = m_ppWords[i+1];
				m_ppWords[i+1] = w;
			}
		};

		
	};
	//print the tree structure as far as possible with plain text
	void PrintTree()
	{
		for(int i=0;i<m_iCodeWordsInUse;i++)
			m_pCodeWords[i].Print();
	};


	//integer member variables
	int	m_iCodeWords;
	int m_iCodeWordsInUse;
	int m_iCodeWordsLeft;
	int	m_iCodeWordLength;

	int m_iInputSize;
	int m_iOutputSize;


	//pointers
	CCodeWord *m_pCodeWords;
	CCodeWord *m_pHead;
	CCodeWord **m_ppWords;

	BYTE *m_pInputBuffer;
	BYTE *m_pOutputBuffer;
};

class CHexDecimalHuffmanTree : public CHuffmanTree
{
public:
	//constructor for hex decimal huffman tree
	CHexDecimalHuffmanTree()
	{
		CHuffmanTree::CHuffmanTree();

		m_iCodeWordsInUse = m_iCodeWords = 16;

		m_pCodeWords = new CCodeWord[m_iCodeWords]();

		int i=0;

		for(i=0;i<m_iCodeWords;i++)
		{
			m_pCodeWords[i].m_iCodeWord = i;
		}
	};

	//get the frequencies of  hexa decimals in the input buffer
	void GetFrequencies()
	{
		if(!m_iInputSize && m_pInputBuffer == NULL)
			return;
		int i=0;

		//make the left 4 bits zero to get the right hex decimal 
		//and use the shift operator to get the left hex decimal
		for(i=0;i<m_iInputSize;i++)
		{
			++m_pCodeWords[(m_pInputBuffer[i] | HEX_LEFT ) & HEX_RIGHT].m_iFrequency;
			++m_pCodeWords[ m_pInputBuffer[i] >> 4].m_iFrequency;
		}
	};

};

Attachment:
huffman_lib.h: https://fsi.cs.fau.de/unb-attachments/post_39242/huffman_lib.h


und wer macht sowas freiwillig? :vogel:


leute wie der raptor :slight_smile:

wenn man spass dran hat…

gibt halt tage da steht man auf und hat lust sowas zu machen


hm ich? bin grad dabei das Teil zu schreiben dass den input buffer mit den huffman codes codiert und ausgibt trickreich sag ich da nur

aber zum glück gibts function inlining in c++ dann kann ich das problemlos in einzelne funktionen zerlegen ohne dass es sich performance mäßig auswirkt