My version of this algorithm rearranges the words, but:
- does not include those words that cannot form the chain of words (the chain of words is started by the word that finds one coincidence at least)
- does not restart another chain with the rest of words that didn't coincide with the first chain of words. That is to say, there is only 1 chain of words
That is to say that:
"ysamz let mat xaml gallery tax"
would give:
let tax xaml
this happens because it does not find any coincidence for ysamz, gallery and mat.
http://sandbox.onlinephpfunctions.com/code/7f911e410510980331627832889816997abe24db
function right($string,$chars)
{
$vright = substr($string, strlen($string)-$chars,$chars);
return $vright;
}
function left($string,$chars){
$vleft=substr($string, 0,$chars);
return $vleft;
}
function findone($str,&$strArray,$currentword)
{
$chosen="";
foreach ($strArray as $i => $strinarray)
{
if(left($strinarray,1)==$str)
{
$chosen=$strinarray;
unset($strArray[$i]);
break;
}
}
return $chosen;
}
function findonepattern($str,&$strArray,$currentword)
{
$chosen="";
$coincidences=preg_grep('/^'.$str.'.*$/', $strArray);
if(sizeof($coincidences)>0)
{
$chosen=current($coincidences);
$delWord=array_keys($strArray, $chosen);
unset($strArray[$delWord[0]]);
$strArray=array_values($strArray);
}
return $chosen;
}
function rearrange($lastvalue,&$strArray)
{
global $finalArray,$globalcount;
$last=right($lastvalue, 1);
// $chosen=findonepattern($last,$strArray,$lastvalue);
$chosen=findone($last,$strArray,$lastvalue);
if(!empty($chosen))
{
//this line is commented because if there is more than one similar word
//it would not consider the second one
// if(!in_array($chosen, $finalArray)) $finalArray[]=$chosen;
$finalArray[]=$chosen;
rearrange($chosen, $strArray);
}
if(sizeof($finalArray)==1 && $globalcount<sizeof($strArray))
{
$globalcount++;
$finalArray[0]=$strArray[$globalcount];
rearrange($finalArray[0], $strArray);
}
return $finalArray;
}
$globalcount=0;
$thestring="sam let mat xaml tax";
$strArray=explode(" ",$thestring);
$strArray=array_filter($strArray);
$finalArray=array();
$finalArray[]=$strArray[0];
unset($strArray[0]);
$finalArray=rearrange($finalArray[0],$strArray);
foreach($finalArray as $finalstr)
{
echo $finalstr." ";
}
PREG PATTERN
Instead of using the findone function that makes use of "left", you could use a pattern but there is a worse performance:
function findonepattern($str,&$strArray,$currentword)
{
$chosen="";
$coincidences=preg_grep('/^'.$str.'.*$/', $strArray);
if(sizeof($coincidences)>0)
{
$chosen=current($coincidences);
$delWord=array_keys($strArray, $chosen);
unset($strArray[$delWord[0]]);
$strArray=array_values($strArray);
}
return $chosen;
}
You can see the performance with 3000 words here:
http://sandbox.onlinephpfunctions.com/code/5ae84a2dadc8140eeb1e776d74059a204877bf0a
First solution: aprox. 0.3 seconds
Second solution (preg): aprox 1 second
explode(' ', $string)"foo bar baz"for example?