joomla_test/administrator/components/com_finder/helpers/indexer/query.php
2020-01-02 22:20:31 +07:00

1367 lines
35 KiB
PHP

<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerHelper', __DIR__ . '/helper.php');
JLoader::register('FinderIndexerTaxonomy', __DIR__ . '/taxonomy.php');
JLoader::register('FinderHelperRoute', JPATH_SITE . '/components/com_finder/helpers/route.php');
JLoader::register('FinderHelperLanguage', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/language.php');
/**
* Query class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerQuery
{
/**
* Flag to show whether the query can return results.
*
* @var boolean
* @since 2.5
*/
public $search;
/**
* The query input string.
*
* @var string
* @since 2.5
*/
public $input;
/**
* The language of the query.
*
* @var string
* @since 2.5
*/
public $language;
/**
* The query string matching mode.
*
* @var string
* @since 2.5
*/
public $mode;
/**
* The included tokens.
*
* @var array
* @since 2.5
*/
public $included = array();
/**
* The excluded tokens.
*
* @var array
* @since 2.5
*/
public $excluded = array();
/**
* The tokens to ignore because no matches exist.
*
* @var array
* @since 2.5
*/
public $ignored = array();
/**
* The operators used in the query input string.
*
* @var array
* @since 2.5
*/
public $operators = array();
/**
* The terms to highlight as matches.
*
* @var array
* @since 2.5
*/
public $highlight = array();
/**
* The number of matching terms for the query input.
*
* @var integer
* @since 2.5
*/
public $terms;
/**
* The static filter id.
*
* @var string
* @since 2.5
*/
public $filter;
/**
* The taxonomy filters. This is a multi-dimensional array of taxonomy
* branches as the first level and then the taxonomy nodes as the values.
*
* For example:
* $filters = array(
* 'Type' = array(10, 32, 29, 11, ...);
* 'Label' = array(20, 314, 349, 91, 82, ...);
* ...
* );
*
* @var array
* @since 2.5
*/
public $filters = array();
/**
* The start date filter.
*
* @var string
* @since 2.5
*/
public $date1;
/**
* The end date filter.
*
* @var string
* @since 2.5
*/
public $date2;
/**
* The start date filter modifier.
*
* @var string
* @since 2.5
*/
public $when1;
/**
* The end date filter modifier.
*
* @var string
* @since 2.5
*/
public $when2;
/**
* Method to instantiate the query object.
*
* @param array $options An array of query options.
*
* @since 2.5
* @throws Exception on database error.
*/
public function __construct($options)
{
// Get the input string.
$this->input = isset($options['input']) ? $options['input'] : null;
// Get the empty query setting.
$this->empty = isset($options['empty']) ? (bool) $options['empty'] : false;
// Get the input language.
$this->language = !empty($options['language']) ? $options['language'] : FinderIndexerHelper::getDefaultLanguage();
$this->language = FinderIndexerHelper::getPrimaryLanguage($this->language);
// Get the matching mode.
$this->mode = 'AND';
// Initialize the temporary date storage.
$this->dates = new JRegistry;
// Populate the temporary date storage.
if (isset($options['date1']) && !empty($options['date1']))
{
$this->dates->set('date1', $options['date1']);
}
if (isset($options['date2']) && !empty($options['date1']))
{
$this->dates->set('date2', $options['date2']);
}
if (isset($options['when1']) && !empty($options['date1']))
{
$this->dates->set('when1', $options['when1']);
}
if (isset($options['when2']) && !empty($options['date1']))
{
$this->dates->set('when2', $options['when2']);
}
// Process the static taxonomy filters.
if (isset($options['filter']) && !empty($options['filter']))
{
$this->processStaticTaxonomy($options['filter']);
}
// Process the dynamic taxonomy filters.
if (isset($options['filters']) && !empty($options['filters']))
{
$this->processDynamicTaxonomy($options['filters']);
}
// Get the date filters.
$d1 = $this->dates->get('date1');
$d2 = $this->dates->get('date2');
$w1 = $this->dates->get('when1');
$w2 = $this->dates->get('when2');
// Process the date filters.
if (!empty($d1) || !empty($d2))
{
$this->processDates($d1, $d2, $w1, $w2);
}
// Process the input string.
$this->processString($this->input, $this->language, $this->mode);
// Get the number of matching terms.
foreach ($this->included as $token)
{
$this->terms += count($token->matches);
}
// Remove the temporary date storage.
unset($this->dates);
/*
* Lastly, determine whether this query can return a result set.
*/
// Check if we have a query string.
if (!empty($this->input))
{
$this->search = true;
}
// Check if we can search without a query string.
elseif ($this->empty && (!empty($this->filter) || !empty($this->filters) || !empty($this->date1) || !empty($this->date2)))
{
$this->search = true;
}
// We do not have a valid search query.
else
{
$this->search = false;
}
}
/**
* Method to convert the query object into a URI string.
*
* @param string $base The base URI. [optional]
*
* @return string The complete query URI.
*
* @since 2.5
*/
public function toURI($base = null)
{
// Set the base if not specified.
if (empty($base))
{
$base = 'index.php?option=com_finder&view=search';
}
// Get the base URI.
$uri = JUri::getInstance($base);
// Add the static taxonomy filter if present.
if (!empty($this->filter))
{
$uri->setVar('f', $this->filter);
}
// Get the filters in the request.
$input = JFactory::getApplication()->input;
$t = $input->request->get('t', array(), 'array');
// Add the dynamic taxonomy filters if present.
if (!empty($this->filters))
{
foreach ($this->filters as $nodes)
{
foreach ($nodes as $node)
{
if (!in_array($node, $t))
{
continue;
}
$uri->setVar('t[]', $node);
}
}
}
// Add the input string if present.
if (!empty($this->input))
{
$uri->setVar('q', $this->input);
}
// Add the start date if present.
if (!empty($this->date1))
{
$uri->setVar('d1', $this->date1);
}
// Add the end date if present.
if (!empty($this->date2))
{
$uri->setVar('d2', $this->date2);
}
// Add the start date modifier if present.
if (!empty($this->when1))
{
$uri->setVar('w1', $this->when1);
}
// Add the end date modifier if present.
if (!empty($this->when2))
{
$uri->setVar('w2', $this->when2);
}
// Add a menu item id if one is not present.
if (!$uri->getVar('Itemid'))
{
// Get the menu item id.
$query = array(
'view' => $uri->getVar('view'),
'f' => $uri->getVar('f'),
'q' => $uri->getVar('q')
);
$item = FinderHelperRoute::getItemid($query);
// Add the menu item id if present.
if ($item !== null)
{
$uri->setVar('Itemid', $item);
}
}
return $uri->toString(array('path', 'query'));
}
/**
* Method to get a list of excluded search term ids.
*
* @return array An array of excluded term ids.
*
* @since 2.5
*/
public function getExcludedTermIds()
{
$results = array();
// Iterate through the excluded tokens and compile the matching terms.
for ($i = 0, $c = count($this->excluded); $i < $c; $i++)
{
$results = array_merge($results, $this->excluded[$i]->matches);
}
// Sanitize the terms.
$results = array_unique($results);
JArrayHelper::toInteger($results);
return $results;
}
/**
* Method to get a list of included search term ids.
*
* @return array An array of included term ids.
*
* @since 2.5
*/
public function getIncludedTermIds()
{
$results = array();
// Iterate through the included tokens and compile the matching terms.
for ($i = 0, $c = count($this->included); $i < $c; $i++)
{
// Check if we have any terms.
if (empty($this->included[$i]->matches))
{
continue;
}
// Get the term.
$term = $this->included[$i]->term;
// Prepare the container for the term if necessary.
if (!array_key_exists($term, $results))
{
$results[$term] = array();
}
// Add the matches to the stack.
$results[$term] = array_merge($results[$term], $this->included[$i]->matches);
}
// Sanitize the terms.
foreach ($results as $key => $value)
{
$results[$key] = array_unique($results[$key]);
JArrayHelper::toInteger($results[$key]);
}
return $results;
}
/**
* Method to get a list of required search term ids.
*
* @return array An array of required term ids.
*
* @since 2.5
*/
public function getRequiredTermIds()
{
$results = array();
// Iterate through the included tokens and compile the matching terms.
for ($i = 0, $c = count($this->included); $i < $c; $i++)
{
// Check if the token is required.
if ($this->included[$i]->required)
{
// Get the term.
$term = $this->included[$i]->term;
// Prepare the container for the term if necessary.
if (!array_key_exists($term, $results))
{
$results[$term] = array();
}
// Add the matches to the stack.
$results[$term] = array_merge($results[$term], $this->included[$i]->matches);
}
}
// Sanitize the terms.
foreach ($results as $key => $value)
{
$results[$key] = array_unique($results[$key]);
JArrayHelper::toInteger($results[$key]);
}
return $results;
}
/**
* Method to process the static taxonomy input. The static taxonomy input
* comes in the form of a pre-defined search filter that is assigned to the
* search form.
*
* @param integer $filterId The id of static filter.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function processStaticTaxonomy($filterId)
{
// Get the database object.
$db = JFactory::getDbo();
// Initialize user variables
$user = JFactory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Load the predefined filter.
$query = $db->getQuery(true)
->select('f.data, f.params')
->from($db->quoteName('#__finder_filters') . ' AS f')
->where('f.filter_id = ' . (int) $filterId);
$db->setQuery($query);
$return = $db->loadObject();
// Check the returned filter.
if (empty($return))
{
return false;
}
// Set the filter.
$this->filter = (int) $filterId;
// Get a parameter object for the filter date options.
$registry = new JRegistry;
$registry->loadString($return->params);
$params = $registry;
// Set the dates if not already set.
$this->dates->def('d1', $params->get('d1'));
$this->dates->def('d2', $params->get('d2'));
$this->dates->def('w1', $params->get('w1'));
$this->dates->def('w2', $params->get('w2'));
// Remove duplicates and sanitize.
$filters = explode(',', $return->data);
$filters = array_unique($filters);
JArrayHelper::toInteger($filters);
// Remove any values of zero.
if (array_search(0, $filters, true) !== false)
{
unset($filters[array_search(0, $filters, true)]);
}
// Check if we have any real input.
if (empty($filters))
{
return true;
}
/*
* Create the query to get filters from the database. We do this for
* two reasons: one, it allows us to ensure that the filters being used
* are real; two, we need to sort the filters by taxonomy branch.
*/
$query->clear()
->select('t1.id, t1.title, t2.title AS branch')
->from($db->quoteName('#__finder_taxonomy') . ' AS t1')
->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id')
->where('t1.state = 1')
->where('t1.access IN (' . $groups . ')')
->where('t1.id IN (' . implode(',', $filters) . ')')
->where('t2.state = 1')
->where('t2.access IN (' . $groups . ')');
// Load the filters.
$db->setQuery($query);
$results = $db->loadObjectList();
// Sort the filter ids by branch.
foreach ($results as $result)
{
$this->filters[$result->branch][$result->title] = (int) $result->id;
}
return true;
}
/**
* Method to process the dynamic taxonomy input. The dynamic taxonomy input
* comes in the form of select fields that the user chooses from. The
* dynamic taxonomy input is processed AFTER the static taxonomy input
* because the dynamic options can be used to further narrow a static
* taxonomy filter.
*
* @param array $filters An array of taxonomy node ids.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function processDynamicTaxonomy($filters)
{
// Initialize user variables
$user = JFactory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Remove duplicates and sanitize.
$filters = array_unique($filters);
JArrayHelper::toInteger($filters);
// Remove any values of zero.
if (array_search(0, $filters, true) !== false)
{
unset($filters[array_search(0, $filters, true)]);
}
// Check if we have any real input.
if (empty($filters))
{
return true;
}
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
/*
* Create the query to get filters from the database. We do this for
* two reasons: one, it allows us to ensure that the filters being used
* are real; two, we need to sort the filters by taxonomy branch.
*/
$query->select('t1.id, t1.title, t2.title AS branch')
->from($db->quoteName('#__finder_taxonomy') . ' AS t1')
->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id')
->where('t1.state = 1')
->where('t1.access IN (' . $groups . ')')
->where('t1.id IN (' . implode(',', $filters) . ')')
->where('t2.state = 1')
->where('t2.access IN (' . $groups . ')');
// Load the filters.
$db->setQuery($query);
$results = $db->loadObjectList();
// Cleared filter branches.
$cleared = array();
/*
* Sort the filter ids by branch. Because these filters are designed to
* override and further narrow the items selected in the static filter,
* we will clear the values from the static filter on a branch by
* branch basis before adding the dynamic filters. So, if the static
* filter defines a type filter of "articles" and three "category"
* filters but the user only limits the category further, the category
* filters will be flushed but the type filters will not.
*/
foreach ($results as $result)
{
// Check if the branch has been cleared.
if (!in_array($result->branch, $cleared))
{
// Clear the branch.
$this->filters[$result->branch] = array();
// Add the branch to the cleared list.
$cleared[] = $result->branch;
}
// Add the filter to the list.
$this->filters[$result->branch][$result->title] = (int) $result->id;
}
return true;
}
/**
* Method to process the query date filters to determine start and end
* date limitations.
*
* @param string $date1 The first date filter.
* @param string $date2 The second date filter.
* @param string $when1 The first date modifier.
* @param string $when2 The second date modifier.
*
* @return boolean True on success.
*
* @since 2.5
*/
protected function processDates($date1, $date2, $when1, $when2)
{
// Clean up the inputs.
$date1 = JString::trim(JString::strtolower($date1));
$date2 = JString::trim(JString::strtolower($date2));
$when1 = JString::trim(JString::strtolower($when1));
$when2 = JString::trim(JString::strtolower($when2));
// Get the time offset.
$offset = JFactory::getApplication()->getCfg('offset');
// Array of allowed when values.
$whens = array('before', 'after', 'exact');
// The value of 'today' is a special case that we need to handle.
if ($date1 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
{
$today = JFactory::getDate('now', $offset);
$date1 = $today->format('%Y-%m-%d');
}
// Try to parse the date string.
$date = JFactory::getDate($date1, $offset);
// Check if the date was parsed successfully.
if ($date->toUnix() !== null)
{
// Set the date filter.
$this->date1 = $date->toSQL();
$this->when1 = in_array($when1, $whens) ? $when1 : 'before';
}
// The value of 'today' is a special case that we need to handle.
if ($date2 === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
{
$today = JFactory::getDate('now', $offset);
$date2 = $today->format('%Y-%m-%d');
}
// Try to parse the date string.
$date = JFactory::getDate($date2, $offset);
// Check if the date was parsed successfully.
if ($date->toUnix() !== null)
{
// Set the date filter.
$this->date2 = $date->toSQL();
$this->when2 = in_array($when2, $whens) ? $when2 : 'before';
}
return true;
}
/**
* Method to process the query input string and extract required, optional,
* and excluded tokens; taxonomy filters; and date filters.
*
* @param string $input The query input string.
* @param string $lang The query input language.
* @param string $mode The query matching mode.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function processString($input, $lang, $mode)
{
// Clean up the input string.
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
$input = JString::strtolower($input);
$input = preg_replace('#\s+#mi', ' ', $input);
$input = JString::trim($input);
$debug = JFactory::getConfig()->get('debug_lang');
/*
* First, we need to handle string based modifiers. String based
* modifiers could potentially include things like "category:blah" or
* "before:2009-10-21" or "type:article", etc.
*/
$patterns = array(
'before' => JText::_('COM_FINDER_FILTER_WHEN_BEFORE'),
'after' => JText::_('COM_FINDER_FILTER_WHEN_AFTER')
);
// Add the taxonomy branch titles to the possible patterns.
foreach (FinderIndexerTaxonomy::getBranchTitles() as $branch)
{
// Add the pattern.
$patterns[$branch] = JString::strtolower(JText::_(FinderHelperLanguage::branchSingular($branch)));
}
// Container for search terms and phrases.
$terms = array();
$phrases = array();
// Cleared filter branches.
$cleared = array();
/*
* Compile the suffix pattern. This is used to match the values of the
* filter input string. Single words can be input directly, multi-word
* values have to be wrapped in double quotes.
*/
$quotes = html_entity_decode('&#8216;&#8217;&#39;', ENT_QUOTES, 'UTF-8');
$suffix = '(([\w\d' . $quotes . '-]+)|\"([\w\d\s' . $quotes . '-]+)\")';
/*
* Iterate through the possible filter patterns and search for matches.
* We need to match the key, colon, and a value pattern for the match
* to be valid.
*/
foreach ($patterns as $modifier => $pattern)
{
$matches = array();
if ($debug)
{
$pattern = substr($pattern, 2, -2);
}
// Check if the filter pattern is in the input string.
if (preg_match('#' . $pattern . '\s*:\s*' . $suffix . '#mi', $input, $matches))
{
// Get the value given to the modifier.
$value = isset($matches[3]) ? $matches[3] : $matches[1];
// Now we have to handle the filter string.
switch ($modifier)
{
// Handle a before and after date filters.
case 'before':
case 'after':
{
// Get the time offset.
$offset = JFactory::getApplication()->getCfg('offset');
// Array of allowed when values.
$whens = array('before', 'after', 'exact');
// The value of 'today' is a special case that we need to handle.
if ($value === JString::strtolower(JText::_('COM_FINDER_QUERY_FILTER_TODAY')))
{
$today = JFactory::getDate('now', $offset);
$value = $today->format('%Y-%m-%d');
}
// Try to parse the date string.
$date = JFactory::getDate($value, $offset);
// Check if the date was parsed successfully.
if ($date->toUnix() !== null)
{
// Set the date filter.
$this->date1 = $date->toSQL();
$this->when1 = in_array($modifier, $whens) ? $modifier : 'before';
}
break;
}
// Handle a taxonomy branch filter.
default:
{
// Try to find the node id.
$return = FinderIndexerTaxonomy::getNodeByTitle($modifier, $value);
// Check if the node id was found.
if ($return)
{
// Check if the branch has been cleared.
if (!in_array($modifier, $cleared))
{
// Clear the branch.
$this->filters[$modifier] = array();
// Add the branch to the cleared list.
$cleared[] = $modifier;
}
// Add the filter to the list.
$this->filters[$modifier][$return->title] = (int) $return->id;
}
break;
}
}
// Clean up the input string again.
$input = str_replace($matches[0], '', $input);
$input = preg_replace('#\s+#mi', ' ', $input);
$input = JString::trim($input);
}
}
/*
* Extract the tokens enclosed in double quotes so that we can handle
* them as phrases.
*/
if (JString::strpos($input, '"') !== false)
{
$matches = array();
// Extract the tokens enclosed in double quotes.
if (preg_match_all('#\"([^"]+)\"#mi', $input, $matches))
{
/*
* One or more phrases were found so we need to iterate through
* them, tokenize them as phrases, and remove them from the raw
* input string before we move on to the next processing step.
*/
foreach ($matches[1] as $key => $match)
{
// Find the complete phrase in the input string.
$pos = JString::strpos($input, $matches[0][$key]);
$len = JString::strlen($matches[0][$key]);
// Add any terms that are before this phrase to the stack.
if (JString::trim(JString::substr($input, 0, $pos)))
{
$terms = array_merge($terms, explode(' ', JString::trim(JString::substr($input, 0, $pos))));
}
// Strip out everything up to and including the phrase.
$input = JString::substr($input, $pos + $len);
// Clean up the input string again.
$input = preg_replace('#\s+#mi', ' ', $input);
$input = JString::trim($input);
// Get the number of words in the phrase.
$parts = explode(' ', $match);
// Check if the phrase is longer than three words.
if (count($parts) > 3)
{
/*
* If the phrase is longer than three words, we need to
* break it down into smaller chunks of phrases that
* are less than or equal to three words. We overlap
* the chunks so that we can ensure that a match is
* found for the complete phrase and not just portions
* of it.
*/
for ($i = 0, $c = count($parts); $i < $c; $i += 2)
{
// Set up the chunk.
$chunk = array();
// The chunk has to be assembled based on how many
// pieces are available to use.
switch ($c - $i)
{
/*
* If only one word is left, we can break from
* the switch and loop because the last word
* was already used at the end of the last
* chunk.
*/
case 1:
break 2;
// If there words are left, we use them both as
// the last chunk of the phrase and we're done.
case 2:
$chunk[] = $parts[$i];
$chunk[] = $parts[$i + 1];
break;
// If there are three or more words left, we
// build a three word chunk and continue on.
default:
$chunk[] = $parts[$i];
$chunk[] = $parts[$i + 1];
$chunk[] = $parts[$i + 2];
break;
}
// If the chunk is not empty, add it as a phrase.
if (count($chunk))
{
$phrases[] = implode(' ', $chunk);
$terms[] = implode(' ', $chunk);
}
}
}
else
{
// The phrase is <= 3 words so we can use it as is.
$phrases[] = $match;
$terms[] = $match;
}
}
}
}
// Add the remaining terms if present.
if (!empty($input))
{
$terms = array_merge($terms, explode(' ', $input));
}
// An array of our boolean operators. $operator => $translation
$operators = array(
'AND' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_AND')),
'OR' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_OR')),
'NOT' => JString::strtolower(JText::_('COM_FINDER_QUERY_OPERATOR_NOT'))
);
// If language debugging is enabled you need to ignore the debug strings in matching.
if (JDEBUG)
{
$debugStrings = array('**', '??');
$operators = str_replace($debugStrings, '', $operators);
}
/*
* Iterate through the terms and perform any sorting that needs to be
* done based on boolean search operators. Terms that are before an
* and/or/not modifier have to be handled in relation to their operator.
*/
for ($i = 0, $c = count($terms); $i < $c; $i++)
{
// Check if the term is followed by an operator that we understand.
if (isset($terms[$i + 1]) && in_array($terms[$i + 1], $operators))
{
// Get the operator mode.
$op = array_search($terms[$i + 1], $operators);
// Handle the AND operator.
if ($op === 'AND' && isset($terms[$i + 2]))
{
// Tokenize the current term.
$token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
$token = $this->getTokenData($token);
// Set the required flag.
$token->required = true;
// Add the current token to the stack.
$this->included[] = $token;
$this->highlight = array_merge($this->highlight, array_keys($token->matches));
// Skip the next token (the mode operator).
$this->operators[] = $terms[$i + 1];
// Tokenize the term after the next term (current plus two).
$other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
$other = $this->getTokenData($other);
// Set the required flag.
$other->required = true;
// Add the token after the next token to the stack.
$this->included[] = $other;
$this->highlight = array_merge($this->highlight, array_keys($other->matches));
// Remove the processed phrases if possible.
if (($pk = array_search($terms[$i], $phrases)) !== false)
{
unset($phrases[$pk]);
}
if (($pk = array_search($terms[$i + 2], $phrases)) !== false)
{
unset($phrases[$pk]);
}
// Remove the processed terms.
unset($terms[$i]);
unset($terms[$i + 1]);
unset($terms[$i + 2]);
// Adjust the loop.
$i += 2;
continue;
}
// Handle the OR operator.
elseif ($op === 'OR' && isset($terms[$i + 2]))
{
// Tokenize the current term.
$token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
$token = $this->getTokenData($token);
// Set the required flag.
$token->required = false;
// Add the current token to the stack.
if (count($token->matches))
{
$this->included[] = $token;
$this->highlight = array_merge($this->highlight, array_keys($token->matches));
}
else
{
$this->ignored[] = $token;
}
// Skip the next token (the mode operator).
$this->operators[] = $terms[$i + 1];
// Tokenize the term after the next term (current plus two).
$other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
$other = $this->getTokenData($other);
// Set the required flag.
$other->required = false;
// Add the token after the next token to the stack.
if (count($other->matches))
{
$this->included[] = $other;
$this->highlight = array_merge($this->highlight, array_keys($other->matches));
}
else
{
$this->ignored[] = $other;
}
// Remove the processed phrases if possible.
if (($pk = array_search($terms[$i], $phrases)) !== false)
{
unset($phrases[$pk]);
}
if (($pk = array_search($terms[$i + 2], $phrases)) !== false)
{
unset($phrases[$pk]);
}
// Remove the processed terms.
unset($terms[$i]);
unset($terms[$i + 1]);
unset($terms[$i + 2]);
// Adjust the loop.
$i += 2;
continue;
}
}
// Handle an orphaned OR operator.
elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'OR')
{
// Skip the next token (the mode operator).
$this->operators[] = $terms[$i];
// Tokenize the next term (current plus one).
$other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
$other = $this->getTokenData($other);
// Set the required flag.
$other->required = false;
// Add the token after the next token to the stack.
if (count($other->matches))
{
$this->included[] = $other;
$this->highlight = array_merge($this->highlight, array_keys($other->matches));
}
else
{
$this->ignored[] = $other;
}
// Remove the processed phrase if possible.
if (($pk = array_search($terms[$i + 1], $phrases)) !== false)
{
unset($phrases[$pk]);
}
// Remove the processed terms.
unset($terms[$i]);
unset($terms[$i + 1]);
// Adjust the loop.
$i += 1;
continue;
}
// Handle the NOT operator.
elseif (isset($terms[$i + 1]) && array_search($terms[$i], $operators) === 'NOT')
{
// Skip the next token (the mode operator).
$this->operators[] = $terms[$i];
// Tokenize the next term (current plus one).
$other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
$other = $this->getTokenData($other);
// Set the required flag.
$other->required = false;
// Add the next token to the stack.
if (count($other->matches))
{
$this->excluded[] = $other;
}
else
{
$this->ignored[] = $other;
}
// Remove the processed phrase if possible.
if (($pk = array_search($terms[$i + 1], $phrases)) !== false)
{
unset($phrases[$pk]);
}
// Remove the processed terms.
unset($terms[$i]);
unset($terms[$i + 1]);
// Adjust the loop.
$i += 1;
continue;
}
}
/*
* Iterate through any search phrases and tokenize them. We handle
* phrases as autonomous units and do not break them down into two and
* three word combinations.
*/
for ($i = 0, $c = count($phrases); $i < $c; $i++)
{
// Tokenize the phrase.
$token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true);
$token = $this->getTokenData($token);
// Set the required flag.
$token->required = true;
// Add the current token to the stack.
$this->included[] = $token;
$this->highlight = array_merge($this->highlight, array_keys($token->matches));
// Remove the processed term if possible.
if (($pk = array_search($phrases[$i], $terms)) !== false)
{
unset($terms[$pk]);
}
// Remove the processed phrase.
unset($phrases[$i]);
}
/*
* Handle any remaining tokens using the standard processing mechanism.
*/
if (!empty($terms))
{
// Tokenize the terms.
$terms = implode(' ', $terms);
$tokens = FinderIndexerHelper::tokenize($terms, $lang, false);
// Make sure we are working with an array.
$tokens = is_array($tokens) ? $tokens : array($tokens);
// Get the token data and required state for all the tokens.
foreach ($tokens as $token)
{
// Get the token data.
$token = $this->getTokenData($token);
// Set the required flag for the token.
$token->required = $mode === 'AND' ? ($token->phrase ? false : true) : false;
// Add the token to the appropriate stack.
if (count($token->matches) || $token->required)
{
$this->included[] = $token;
$this->highlight = array_merge($this->highlight, array_keys($token->matches));
}
else
{
$this->ignored[] = $token;
}
}
}
return true;
}
/**
* Method to get the base and similar term ids and, if necessary, suggested
* term data from the database. The terms ids are identified based on a
* 'like' match in MySQL and/or a common stem. If no term ids could be
* found, then we know that we will not be able to return any results for
* that term and we should try to find a similar term to use that we can
* match so that we can suggest the alternative search query to the user.
*
* @param FinderIndexerToken $token A FinderIndexerToken object.
*
* @return FinderIndexerToken A FinderIndexerToken object.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getTokenData($token)
{
// Get the database object.
$db = JFactory::getDbo();
// Create a database query to build match the token.
$query = $db->getQuery(true)
->select('t.term, t.term_id')
->from('#__finder_terms AS t');
/*
* If the token is a phrase, the lookup process is fairly simple. If
* the token is a word, it is a little more complicated. We have to
* create two queries to lookup the term and the stem respectively,
* then union the result sets together. This is MUCH faster than using
* an or condition in the database query.
*/
if ($token->phrase)
{
// Add the phrase to the query.
$query->where('t.term = ' . $db->quote($token->term))
->where('t.phrase = 1');
}
else
{
// Add the term to the query.
$query->where('t.term = ' . $db->quote($token->term))
->where('t.phrase = 0');
// Clone the query, replace the WHERE clause.
$sub = clone($query);
$sub->clear('where');
$sub->where('t.stem = ' . $db->quote($token->stem));
$sub->where('t.phrase = 0');
// Union the two queries.
$query->union($sub);
}
// Get the terms.
$db->setQuery($query);
$matches = $db->loadObjectList();
// Setup the container.
$token->matches = array();
// Check the matching terms.
if (!empty($matches))
{
// Add the matches to the token.
for ($i = 0, $c = count($matches); $i < $c; $i++)
{
$token->matches[$matches[$i]->term] = (int) $matches[$i]->term_id;
}
}
// If no matches were found, try to find a similar but better token.
if (empty($token->matches))
{
// Create a database query to get the similar terms.
// TODO: PostgreSQL doesn't support SOUNDEX out of the box
$query->clear()
->select('DISTINCT t.term_id AS id, t.term AS term')
->from('#__finder_terms AS t')
// ->where('t.soundex = ' . soundex($db->quote($token->term)))
->where('t.soundex = SOUNDEX(' . $db->quote($token->term) . ')')
->where('t.phrase = ' . (int) $token->phrase);
// Get the terms.
$db->setQuery($query);
$results = $db->loadObjectList();
// Check if any similar terms were found.
if (empty($results))
{
return $token;
}
// Stack for sorting the similar terms.
$suggestions = array();
// Get the levnshtein distance for all suggested terms.
foreach ($results as $sk => $st)
{
// Get the levenshtein distance between terms.
$distance = levenshtein($st->term, $token->term);
// Make sure the levenshtein distance isn't over 50.
if ($distance < 50)
{
$suggestions[$sk] = $distance;
}
}
// Sort the suggestions.
asort($suggestions, SORT_NUMERIC);
// Get the closest match.
$keys = array_keys($suggestions);
$key = $keys[0];
// Add the suggested term.
$token->suggestion = $results[$key]->term;
}
return $token;
}
}