<?php
	function word_count($text) 
	{
		return divisor(str_word_count($text));
	}
	function avg_sentence_length($t) # Average words per sentence (The ratio between words and number of sentences)
	{
		return rounding(word_count($t)/ count_sentences($t));
	}
	function count_colon_quotes($t) # Counts colons followed by blankspace or new row
	{
		return rounding(substr_count($t,":\r\n") + substr_count($t,":  "));
	}
	function count_commas($t) # Count ratio between number of comas and words
	{
		return rounding(substr_count($t, ",") /  word_count($t));
	}	
	function semicolon_frequency($t) # Count semicolon/ sentence
	{
		return rounding(substr_count($t, ";") /  count_sentences($t));
	}
	function word_per_line($t) #Number of words per line - linebreak ([ENTER] breaks it)
	{
		return rounding(word_count($t) / count(preg_split('/$\R?^/m', $t)));
	}
	function count_sentences($t) # Count number of sentences
	{
		return divisor(preg_match_all('/[^\s](\.|\!|\?)(?!\w)/',$t,$match));
	}
	function is_upper_first ($word) # Determine if the first letter is Capital or not
	{
		return($word === ucfirst($word) ? true : false);
	}
	function count_newlines($t) # Count newlines divided by number of sentences
	{
		return rounding((substr_count($t, "  ") + count(preg_split('/$\R?^/m', $t)))  / count_sentences($t));
	}
	function avg_word_length($t) # the average word length 
	{
		return rounding((strlen(str_replace(array(' ','&nbsp;'),'',preg_replace('/[^a-zA-Z0-9_ %\[\]\(\)%&-]/s', '', $t))) / word_count($t)));
	}
	function break_out_words($sentence) # Returns the individual words
	{
		return divisor(explode(' ', $sentence));
	}
	function word_freq($t,$sample) # Take the 20 most common words, compares them and returns that number
	{
		$frequency 			= array_count_values(str_word_count($t, 1));
		$frequency_sample 	= array_count_values(str_word_count($sample, 1));
		$frequency 			= array_diff_key($frequency,array_flip(array('the','of','to','a','in','and','with')));

		arsort($frequency_sample);
		arsort($frequency);
		return (compare_arrays(array_slice($frequency, 0, 20),$frequency_sample));
	}
	function compare_arrays($frequency, $frequency_sample, $nr_matches = 0)
	{
		foreach($frequency as $freq => $key)
			foreach($frequency_sample as $freq_s => $key_sample)
				if($freq_s == $freq)
					$nr_matches++;		
		return $nr_matches;
	}
	function count_names($t)
	{
		$names = 0;	
		foreach(break_out_words($t) as $word)
			if(is_upper_first($word))
				$names++;
				
		$nrNames = rounding(($names-count_sentences($t)) /count_sentences($t));
		return ($nrNames > 0) ? $nrNames : 0;
	}		
	function quote_lengths($t) # Return the average length of the quote 
	{
		preg_match_all('/"([^"]+)"/', $t, $text);

		$avg = 0;
		foreach($text[0] as $t)
			$avg += word_count(preg_replace("/[^a-zA-Z 0-9]+/", "", $t));

		return rounding($avg / divisor(count($text[0])));
	}
	function calculate_point($t1,$t2,$sample,&$probability) # Algorithm 1. The highest value gets a score
	{
		$diff1 = max(abs($t1),abs($sample)) - min(abs($t1),abs($sample));
		$diff2 = max(abs($t2),abs($sample)) - min(abs($t2),abs($sample));
		
		($diff1 < $diff2) ? $probability[0]++ : $probability[1]++;
	}	
	function calculate_point2($t1,$t2,$sample,&$probability) # Algorithm 2. The ratio of difference is added to the matching score now.
	{
		$diff1 = max(abs($t1),abs($sample)) - min(abs($t1),abs($sample));
		$diff2 = max(abs($t2),abs($sample)) - min(abs($t2),abs($sample));
		
		if(($diff1 > $diff2))
			$probability[1]+= ($diff1/ ($diff2 ? $diff2 : 1));
		elseif($diff1)
			$probability[0]+= ($diff2/$diff1);
	}
	function rounding($num) { return round($num,4); } # General rounding, four decimals
	function divisor($num)  { return ($num) ? $num : 1; } # Avoids division by 0; If 0 : 1.
?>