C#实现的基于TCP传输的数据组包程序

目的：
很多时候我们会使用TCP协议来发送数据，发送数据时使用自拟的协议头来区分不同类型的数据。不同于UDP，TCP在传输过程中会自动将数据拆分为多个TCP包进行传输，这会使得我们自拟的数据包被拆成两个甚至更多个TCP包。在接收端如何将这些TCP包重新组合成完整的数据包呢？本文将解决这个问题。

关键词：
TCP，数据组包，自定义协议

一. C#代码

识别头可以是给定的任意长度的字节数组，也可以是给定的任意长度字符串，默认为五字节的 55 55 7e 7e 02。

//包结构： 识别头+内容长度+内容。其中, 长度的代表内容的长度，等于 整包长度-识别头长度-4
public class GetTcpPatchHandler
{
    private byte[] temp = null; 
    public byte[] header = new byte[5]{ 0x55, 0x55, 0x7e, 0x7e, 0x02 };
    public int lengthWithLenbit = 9;
    public int BufferSize = 16000;
    
    public GetTcpPatchHandler(byte[] Header)
    {//给定分段头(字节数组)
        header = (byte[])Header.Clone();
        lengthWithLenbit = header.Length + 4;
    }
    
    public GetTcpPatchHandler(string HeaderStr)
    {//给定分段头(字符串)
        byte[] HeaderBytes = (byte[])Encoding.ASCII.GetBytes(HeaderStr);
        header = (byte[])HeaderBytes.Clone();
        lengthWithLenbit = header.Length + 4;
    }
    public GetTcpPatchHandler() { }
    
    public List<byte[]> GetActualBuffer(byte[] input)
    {
        return GetActualBuffer(input, null);
    }
    
    private List<byte[]> GetActualBuffer(byte[] input, List<byte[]> outputList)
    {
        if (outputList == null)
    	outputList = new List<byte[]>();
    
        if (temp != null)
        {//把已存的临时量和输入拼在一起
		byte[] inputLongered = new byte[input.Length + temp.Length];
		Array.Copy(temp, inputLongered, temp.Length);
		Array.Copy(input, 0, inputLongered, temp.Length, input.Length);
		input = inputLongered;
		temp = null;
        }
        //input = temp + input;
        int MaxBufferSize = Math.Max(BufferSize, input.Length);
        byte[] output = new byte[MaxBufferSize];//开辟一个较大的空间来存储一个完整的包
        int lengthData;
        int lengthInput = input.Length;
    
        if (lengthInput < lengthWithLenbit)//头都不完整，存起来下次一起
        {
		temp = input;
		return outputList;
        }
    
        for (int i = lengthWithLenbit - 1; i < lengthInput; i++)
        {//逐字节查找识别头
		bool IsHeaderFound = false;
		for (int j = 0; j < header.Length; j++)
		{//有lengthWithLenbit个字节的头用于识别包的长度
		    if (input[i - lengthWithLenbit + j + 1] != header[j])
			break;
		    else if (j == header.Length - 1)//已比到最后一字节识别头，且匹配
			IsHeaderFound = true;
		}
		if(true==IsHeaderFound)
		{//找到了识别头
		    lengthData = BitConverter.ToInt32(input, i - 3);//第[i-3,i-2,i-1,i]四个字节为一个int整数，代表了去除识别头后的长度
		    // 获取需要进行截取的位置和长度(把用于识别的头截掉)
		    int startIndex = i + 1;
		    int lengthDataReceived = lengthInput - startIndex;//去掉头后剩下的数据长度
	    
		    // 获取从此位置开始后所有数据
		    Array.Copy(input, startIndex, output, 0, lengthDataReceived);
		    if (lengthDataReceived == lengthData)
		    {
			// 如果output的长度与数据的应有长度相等
			// 说明刚好是完整的包
			byte[] package = new byte[lengthData];
			Array.Copy(output, package, lengthData);
			outputList.Add(package);
			temp = null;
			break;
		    }
		    else if (lengthDataReceived < lengthData)
		    {
			// 如果之后的长度小于应有的长度，
			// 说明没有发完整，则应将整段数据，包括识别头，全部缓存
			// 与下一段数据合并起来再进行处理
			temp = input;
			break;
			// 此时程序应该退出，因为需要等待下一条数据到来才能继续处理
	    
		    }
		    else if (lengthDataReceived > lengthData)
		    {
			// 如果之后的长度大于应有的长度，
			// 说明数据完整了，但是有多余的数据
			// 多余的数据可能是截断消息，也可能是多条完整消息
			// 截取数据
			byte[] package = new byte[lengthData];
			Array.Copy(output, package, lengthData);//把完整的部分取下来
			outputList.Add(package);
			temp = null;
	    
			// 缩短input的长度                       
			int lengthRemainder = lengthInput - lengthData - startIndex;
			byte[] inputSub = new byte[lengthRemainder];
			Array.Copy(input, startIndex + lengthData, inputSub, 0, lengthRemainder);
	    
			// 递归调用
			GetActualBuffer(inputSub, outputList);
			break;
		    }
		}
		else
		{    // 说明用于识别的头并不完整
		    temp = input;
		}
        }
        //temp = input;
        return outputList;
    }
}

这个类所能处理的数据包结构为：
包结构：数据包的识别头+内容长度+内容
其中, 内容长度代表内容的长度，等于 整个数据包的长度-识别头长度-4，4字节是一个int型变量的存储空间大小。
稍作修改就能适用于其他的包结构。

程序采用递归的方式，对于输入的字节数组input，分割出所有满足协议的数据包，并将最后一个不完整的包（如果存在的话）保留，与下一次的输入拼接。