////////////////////////////////////////////////// Levenshtein Distance //////////////////////////////////////////////////
// Compile this cpp to dll in Matlab:
// Preprocess:// Type "mex -setup"(not include the quotes) 
// in the command of Matlab,// you will be asked a Question, pls input 'y',
// next, select a compiler in your computer.// last, verify the information, pls input 'y'.
//
// Compile:
// put this cpp in your work directory. Then
// type "mex LevenDistance.cpp"(not include the quotes)
// a LevenDistance.dll file will be created 
// in the same folder.
//
// Then this function can be used.//
///////// Algorithm designed by: Michael Gilleland//
// C++ code for Matlab coded by: Longfei Ma 
// malf1988@gmail.com
// 2009.10.31 
//
// Modified by Andreas Nienkötter
///////////////////////////////////////////////
#include <string.h>
#include <malloc.h>
#include <mex.h> 
#include <algorithm> // std::max
#include <limits.h> // INT_MAX

// the mex.h must be included
char* LD (char const *, char const *, const double);


// types of operation and cost function
enum type {ins, del, sub};
int cost(const type, const char, const char);

//Matlab main function
void mexFunction(int nlhs,mxArray *plhs[],int nrhs, const mxArray *prhs[]){ 
    
    mexPrintf("Warning: dpe_string_weighted_mean_func_mex might cause \"double free\" memory error\n");
    
  char *s0,*s1;
  char *weighted_mean;
  double percentage;
  
  if(nrhs!=3)
    {
      mexErrMsgTxt("Wrong number of Arguments"); 
      return; 
    }

 
  // get the lengths of the input strings
  int len0,len1; 
  len0 = (mxGetM(prhs[0]) * mxGetN(prhs[0])) + 1;
  len1 = (mxGetM(prhs[1]) * mxGetN(prhs[1])) + 1;

  // Allocate memory for the input strings
  s0=(char *)mxCalloc(len0,sizeof(char));
  s1=(char *)mxCalloc(len1,sizeof(char)); 
  
  // read input strings into s0 and s1
  mxGetString(prhs[0],s0,len0);
  mxGetString(prhs[1],s1,len1);


  // Get Percentage value
  percentage = mxGetScalar(prhs[2]);
  
  // Call Levensthein Distance
  weighted_mean = LD(s0,s1,percentage);

  // create output argument
  int lenOut = strlen(weighted_mean);
  plhs[0] = mxCreateString(weighted_mean);
  
 //  mxFree(weighted_mean);
}




// get the cost for the string operation //
int cost(const type op, const char c1, const char c2) {

  if (op == type::ins || op == type::del){
    return 1;
  }
  else {
    return c1 == c2 ? 0 : 1;
  }
}





//****************************// Get minimum of three values//****************************
int Minimum (int a, int b, int c)
{
  int mi; mi = a; if (b < mi) { mi = b; } if (c < mi) { mi = c; } return mi; 
}
//**************************************************
// Get a pointer to the specified cell of the matrix
//************************************************** 
int * GetCellPointer (int *pOrigin, int col, int row, int nCols)
{ return pOrigin + col + (row * (nCols + 1));}
//*****************************************************
// Get the contents of the specified cell in the matrix 
//*****************************************************
int GetAt (int *pOrigin, int col, int row, int nCols)
{ int *pCell; pCell = GetCellPointer (pOrigin, col, row, nCols); return *pCell; }
//*******************************************************
// Fill the specified cell in the matrix with the value x
//*******************************************************
void PutAt (int *pOrigin, int col, int row, int nCols, int x)
{ 
  int *pCell; 
  pCell = GetCellPointer (pOrigin, col, row, nCols); *pCell = x; 
}

//*****************************// Compute Levenshtein distance//*****************************
char* LD (char const *s, char const *t, const double percentage)
{
  int *d; // pointer to matrix 
  int n; // length of s 
  int m; // length of t 
  int i; // iterates through s 
  int j; // iterates through t 
  char s_i; // ith character of s 
  char t_j; // jth character of t 
  int result; // result 
  int cell; // contents of target cell 
  int above; // contents of cell immediately above 
  int left; // contents of cell immediately to left 
  int diag; // contents of cell immediately above and to left 
  int sz; // number of cells in matrix 

  
  // Step 1 
  n = strlen (s); 
  m = strlen (t); 

  if (n == 0) 
    {
      // TODO: Cost of insertion of every element
      result = m; 
    } 
  if (m == 0) 
    { 
      result = n; 
    }
  sz = (n+1) * (m+1) * sizeof (int); 
  d = (int *) malloc (sz); 
  // Step 2 
  for (i = 0; i <= n; i++) 
    {
      PutAt (d, i, 0, n, i); 
    } 
  for (j = 0; j <= m; j++) 
    { 
      PutAt (d, 0, j, n, j); 
    } 
  // Step 3 
  for (i = 1; i <= n; i++) 
    { 
      s_i = s[i-1]; 
      // Step 4 
      for (j = 1; j <= m; j++) 
        { 
	  t_j = t[j-1]; 

 	  // Step 5 
	  above = GetAt (d,i-1,j, n); 
	  left = GetAt (d,i, j-1, n); 
	  diag = GetAt (d, i-1,j-1, n);
	  
	  cell = Minimum (above + cost(type::del,s_i,t_j)
			  , left + cost(type::ins,s_i,t_j)
			  , diag + cost(type::sub,s_i,t_j));
	  PutAt (d, i, j, n, cell); 
        } 
    } 
  // Step 7 
  result = GetAt (d, n, m, n); //free (d); return result; 

  // Calculate weighted mean

  // Create output buffers and variables
  char* weighted_mean = (char*)mxCalloc(std::max(n,m)+1,sizeof(char));
  memset(weighted_mean,0,std::max(n,m)*sizeof(char)+1);
  strcpy(weighted_mean,t);
  double target_result = round(percentage * (double)result);
  int cur_edit_steps = result;
  int cur_n = n;
  int cur_m = m;

  //char buffer[1024];

  

  // go back from the second string to the beginning
  while (GetAt(d,cur_n,cur_m,n) > target_result) {

    
    // find direction, including special edge cases

    int top, left, top_left;
    
    if (cur_m > 0)
      top = GetAt(d,cur_n,cur_m-1,n);
    else
      top = INT_MAX;

    if (cur_n > 0)
      left = GetAt(d,cur_n-1,cur_m,n);
    else
      left = INT_MAX;

    if (cur_n > 0 && cur_m > 0)
      top_left = GetAt(d,cur_n-1,cur_m-1,n);
    else
      top_left = INT_MAX;
    
    int next = Minimum(top_left,top,left); // top left first, because substitution is easier

    //sprintf(buffer,"result=%i, top=%i, left=%i, top_left=%i, next=%i",
    //	    result,top,left,top_left,next);
    //mexWarnMsgTxt(buffer);

    if (next == top_left){
      // The letter was substituted -> substitute back
      cur_n -= 1;
      cur_m -= 1;


    
      //sprintf(buffer,"Substitution: n=%i, m=%i, s[n]=%c, w[m]=%c"
      //      ,cur_n, cur_m,s[cur_n],weighted_mean[cur_m]);
      //mexWarnMsgTxt(buffer);

      weighted_mean[cur_m] = s[cur_n];
    }
    else if (next == top){
      // The letter was inserted -> remove again
      cur_m -= 1;


      //sprintf(buffer,"Insertion: n=%i, m=%i, s[n]=%c, w[m]=%c"
      //      ,cur_n, cur_m,s[cur_n],weighted_mean[cur_m]);
      //mexWarnMsgTxt(buffer);

      memmove(&weighted_mean[cur_m], &weighted_mean[cur_m+1], strlen(weighted_mean) - cur_m+1);
    }
    else if(next == left) {
      // The letter was removed -> insert again
     
      cur_n -= 1;
      
      //sprintf(buffer,"Deletion: n=%i, m=%i, s[n]=%c, w[m]=%c"
      //	      ,cur_n, cur_m,s[cur_n],weighted_mean[cur_m]);
      //mexWarnMsgTxt(buffer);

      memmove(&weighted_mean[cur_m+1], &weighted_mean[cur_m], strlen(weighted_mean) - cur_m);
      weighted_mean[cur_m] = s[cur_n];
    }
    else {
      mexErrMsgTxt("dpe_string_weighted_mean_func_mex: Error in Path choosing");
    }

  }
  free(d);
  return weighted_mean;
}




