天天看點

Edit distance problem 動态規劃和遞歸解法之比較

問題:

給定字元串firstStr,secondStr,長度分别為m,n。通過delete, insert, replace 操作把firstStr轉化為secondStr,而這三種操作的每次執行成本分别為delCost, insCost, repCost。求完成這種轉換的最小成本。

解決方案:

1. 遞歸(recursive)

base condition:用于終止遞歸, 當m == 0 || n == 0;

Mincost( firstStr, secondStr, m, n, delCost, insCost, repCost ) = 

      min{ Mincost( firstStr, secondStr, m - 1, n, delCost, insCost, repCost ) + delCost,                                                                                                                                                          Mincost( firstStr, secondStr, m, n-1, delCost, insCost, repCost )+ insCost,

                 Mincost( firstStr, secondStr, m - 1, n-1, delCost, insCost, repCost)+ repCost };

2.動态規劃(dynamic programming)

引入成本數組cost[m][n]:代表把長度m的字元串轉成長度為n的字元串的成本;

建立遞推關系:cost[m][n] = min{ cost[m-1][n] + delCost, cost[m][n-1] + insCost, cost[m-1][n-1] + repCost };

兩種方法的比較的差別很明顯:

1.遞歸計算整個搜尋過程最終會形成一棵樹,這顆樹不同樹枝之間會有很多重複的計算,最終導緻了性能很差;

  2.動态規劃會把許多計算結果緩存到cost數組裡,雖然有個空間的開銷,但是算過的不用再計算,直接查表即可,

       是以動态規劃算法有很好的性能表現

代碼如下:

#ifndef _EDIT_DISTANCE_H_
#define _EDIT_DISTANCE_H_

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

/*
*helper function
*
*/
int minVal( int a, int b )
{
	return ( a < b ) ? a : b;
}

/*
*helper function
*
*/
inline int minnum( int a, int b, int c )
{
	return minVal( minVal(a, b), c );
}


/*
*implementation of recursive for edit distance problem
*
*/
int EditDistanceRecur( const char* firstStr, const char* secondStr, int firstPos, int secondPos, int delCost, int insCost, int rapCost )
{
	if( 0 == firstPos && 0 == secondPos )
		return 0;

	if( 0 == firstPos )
		return secondPos;

	if( 0 == secondPos )
		return firstPos;

	int del = EditDistanceRecur( firstStr, secondStr, firstPos - 1, secondPos, delCost, insCost, rapCost ) + delCost;
	int insert = EditDistanceRecur( firstStr, secondStr, firstPos, secondPos - 1, delCost, insCost, rapCost  ) + insCost;
	int replace = EditDistanceRecur( firstStr, secondStr, firstPos - 1, secondPos - 1, delCost, insCost, rapCost  );
	if( firstStr[firstPos - 1] != secondStr[secondPos - 1] )
		replace += rapCost;

	return minnum( del, insert, replace );
}


/*
* interface function 
*
*/
int FindEditDistance( const char* strfirst, const char* strsecond, int delCost, int insCost, int rapCost )
{
	return EditDistanceRecur( strfirst, strsecond, strlen( strfirst ), strlen( strsecond ), delCost, insCost, rapCost );
}


/*
* implementation of dynamic programming for edit distance problem
*
*/
int EditDistanceDP( const char* firstStr, const char* secondStr, int delCost, int insCost, int rapCost )
{
	size_t firstLen = strlen( firstStr );
	size_t secondLen = strlen( secondStr );

	int** table = new int*[ firstLen + 1];
	for( int i = 0; i <= firstLen; i++ )
	{
		table[i] = new int[ secondLen + 1];
		memset( table[i], 0x00, sizeof(int)*( secondLen + 1 ) );
	}

	//base condition
	for( int i = 0; i <= secondLen; i++ )
	{
		table[0][i] = i * insCost;
	}

	for( int i = 0; i <= firstLen; i++ )
	{
		table[i][0] = i * delCost;
	}

	for( int i = 1; i <= firstLen; i++ )
	{
		for( int j = 1; j <= secondLen; j++ )
		{
			int deleteCost = table[i][j-1];
			deleteCost += delCost;

			int insertCost = table[i-1][j];
			insertCost += insCost;

			int replaceCost = table[i-1][j-1];
			if( firstStr[i] != secondStr[j] )
				replaceCost += rapCost;


			table[i][j] = minnum( deleteCost, insertCost, replaceCost );
		}
	}

	int res = table[firstLen][secondLen];

	for( int i = 0; i <= firstLen; i++ )
	{
		delete [] table[i];
	}

	if( table )
	{
		delete [] table;
	}

	return res;
}

/*
* test method
*
*/
void TestEditDistance()
{
	const char* first = "adddfooiedfafa";
	const char* second = "adafsadoiddf";

	
	int delCost = 1;
	int insCost = 2;
	int rapCost = 3;

	printf("Minimum cost  %d by dynamic programming \n",
		    EditDistanceDP( first, second, delCost, insCost, rapCost ) );
	printf("Minimum cost  %d by recursive \n",
			first, second, FindEditDistance( first, second, delCost, insCost, rapCost ) );

	getchar();


}


#endif            

繼續閱讀