first commit

This commit is contained in:
alazhar
2020-01-02 22:20:31 +07:00
commit 10eb3340ad
5753 changed files with 631345 additions and 0 deletions

View File

@ -0,0 +1,77 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Helper class for Finder.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderHelper
{
/**
* @var string The extension name.
* @since 2.5
*/
public static $extension = 'com_finder';
/**
* Configure the Linkbar.
*
* @param string $vName The name of the active view.
*
* @return void
*
* @since 2.5
*/
public static function addSubmenu($vName)
{
JHtmlSidebar::addEntry(
JText::_('COM_FINDER_SUBMENU_INDEX'),
'index.php?option=com_finder&view=index',
$vName == 'index'
);
JHtmlSidebar::addEntry(
JText::_('COM_FINDER_SUBMENU_MAPS'),
'index.php?option=com_finder&view=maps',
$vName == 'maps'
);
JHtmlSidebar::addEntry(
JText::_('COM_FINDER_SUBMENU_FILTERS'),
'index.php?option=com_finder&view=filters',
$vName == 'filters'
);
}
/**
* Gets a list of the actions that can be performed.
*
* @return JObject A JObject containing the allowed actions.
*
* @since 2.5
*/
public static function getActions()
{
$user = JFactory::getUser();
$result = new JObject;
$assetName = 'com_finder';
$actions = JAccess::getActions($assetName, 'component');
foreach ($actions as $action)
{
$result->set($action->name, $user->authorise($action->name, $assetName));
}
return $result;
}
}

View File

@ -0,0 +1,125 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderHelperLanguage', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/language.php');
/**
* HTML behavior class for Finder.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
abstract class JHtmlFinder
{
/**
* Creates a list of types to filter on.
*
* @return array An array containing the types that can be selected.
*
* @since 2.5
*/
public static function typeslist()
{
$lang = JFactory::getLanguage();
// Load the finder types.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select('DISTINCT t.title AS text, t.id AS value')
->from($db->quoteName('#__finder_types') . ' AS t')
->join('LEFT', $db->quoteName('#__finder_links') . ' AS l ON l.type_id = t.id')
->order('t.title ASC');
$db->setQuery($query);
try
{
$rows = $db->loadObjectList();
}
catch (RuntimeException $e)
{
return;
}
// Compile the options.
$options = array();
foreach ($rows as $row)
{
$key = $lang->hasKey(FinderHelperLanguage::branchPlural($row->text))
? FinderHelperLanguage::branchPlural($row->text) : $row->text;
$string = JText::sprintf('COM_FINDER_ITEM_X_ONLY', JText::_($key));
$options[] = JHtml::_('select.option', $row->value, $string);
}
return $options;
}
/**
* Creates a list of maps.
*
* @return array An array containing the maps that can be selected.
*
* @since 2.5
*/
public static function mapslist()
{
$lang = JFactory::getLanguage();
// Load the finder types.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select('title AS text, id AS value')
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = 1')
->order('ordering, title ASC');
$db->setQuery($query);
try
{
$rows = $db->loadObjectList();
}
catch (RuntimeException $e)
{
return;
}
// Compile the options.
$options = array();
$options[] = JHtml::_('select.option', '1', JText::_('COM_FINDER_MAPS_BRANCHES'));
foreach ($rows as $row)
{
$key = $lang->hasKey(FinderHelperLanguage::branchPlural($row->text))
? FinderHelperLanguage::branchPlural($row->text) : $row->text;
$string = JText::sprintf('COM_FINDER_ITEM_X_ONLY', JText::_($key));
$options[] = JHtml::_('select.option', $row->value, $string);
}
return $options;
}
/**
* Creates a list of published states.
*
* @return array An array containing the states that can be selected.
*
* @since 2.5
*/
public static function statelist()
{
$options = array();
$options[] = JHtml::_('select.option', '1', JText::sprintf('COM_FINDER_ITEM_X_ONLY', JText::_('JPUBLISHED')));
$options[] = JHtml::_('select.option', '0', JText::sprintf('COM_FINDER_ITEM_X_ONLY', JText::_('JUNPUBLISHED')));
return $options;
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,930 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexer', __DIR__ . '/indexer.php');
JLoader::register('FinderIndexerHelper', __DIR__ . '/helper.php');
JLoader::register('FinderIndexerResult', __DIR__ . '/result.php');
JLoader::register('FinderIndexerTaxonomy', __DIR__ . '/taxonomy.php');
/**
* Prototype adapter class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
abstract class FinderIndexerAdapter extends JPlugin
{
/**
* The context is somewhat arbitrary but it must be unique or there will be
* conflicts when managing plugin/indexer state. A good best practice is to
* use the plugin name suffix as the context. For example, if the plugin is
* named 'plgFinderContent', the context could be 'Content'.
*
* @var string
* @since 2.5
*/
protected $context;
/**
* The extension name.
*
* @var string
* @since 2.5
*/
protected $extension;
/**
* The sublayout to use when rendering the results.
*
* @var string
* @since 2.5
*/
protected $layout;
/**
* The mime type of the content the adapter indexes.
*
* @var string
* @since 2.5
*/
protected $mime;
/**
* The access level of an item before save.
*
* @var integer
* @since 2.5
*/
protected $old_access;
/**
* The access level of a category before save.
*
* @var integer
* @since 2.5
*/
protected $old_cataccess;
/**
* The type of content the adapter indexes.
*
* @var string
* @since 2.5
*/
protected $type_title;
/**
* The type id of the content.
*
* @var integer
* @since 2.5
*/
protected $type_id;
/**
* The database object.
*
* @var object
* @since 2.5
*/
protected $db;
/**
* The table name.
*
* @var string
* @since 2.5
*/
protected $table;
/**
* The indexer object.
*
* @var FinderIndexer
* @since 3.0
*/
protected $indexer;
/**
* The field the published state is stored in.
*
* @var string
* @since 2.5
*/
protected $state_field = 'state';
/**
* Method to instantiate the indexer adapter.
*
* @param object &$subject The object to observe.
* @param array $config An array that holds the plugin configuration.
*
* @since 2.5
*/
public function __construct(&$subject, $config)
{
// Get the database object.
$this->db = JFactory::getDbo();
// Call the parent constructor.
parent::__construct($subject, $config);
// Get the type id.
$this->type_id = $this->getTypeId();
// Add the content type if it doesn't exist and is set.
if (empty($this->type_id) && !empty($this->type_title))
{
$this->type_id = FinderIndexerHelper::addContentType($this->type_title, $this->mime);
}
// Check for a layout override.
if ($this->params->get('layout'))
{
$this->layout = $this->params->get('layout');
}
// Get the indexer object
$this->indexer = FinderIndexer::getInstance();
}
/**
* Method to get the adapter state and push it into the indexer.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on error.
*/
public function onStartIndex()
{
// Get the indexer state.
$iState = FinderIndexer::getState();
// Get the number of content items.
$total = (int) $this->getContentCount();
// Add the content count to the total number of items.
$iState->totalItems += $total;
// Populate the indexer state information for the adapter.
$iState->pluginState[$this->context]['total'] = $total;
$iState->pluginState[$this->context]['offset'] = 0;
// Set the indexer state.
FinderIndexer::setState($iState);
}
/**
* Method to prepare for the indexer to be run. This method will often
* be used to include dependencies and things of that nature.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on error.
*/
public function onBeforeIndex()
{
// Get the indexer and adapter state.
$iState = FinderIndexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total'])
{
return true;
}
// Run the setup method.
return $this->setup();
}
/**
* Method to index a batch of content items. This method can be called by
* the indexer many times throughout the indexing process depending on how
* much content is available for indexing. It is important to track the
* progress correctly so we can display it to the user.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on error.
*/
public function onBuildIndex()
{
// Get the indexer and adapter state.
$iState = FinderIndexer::getState();
$aState = $iState->pluginState[$this->context];
// Check the progress of the indexer and the adapter.
if ($iState->batchOffset == $iState->batchSize || $aState['offset'] == $aState['total'])
{
return true;
}
// Get the batch offset and size.
$offset = (int) $aState['offset'];
$limit = (int) ($iState->batchSize - $iState->batchOffset);
// Get the content items to index.
$items = $this->getItems($offset, $limit);
// Iterate through the items and index them.
for ($i = 0, $n = count($items); $i < $n; $i++)
{
// Index the item.
$this->index($items[$i]);
// Adjust the offsets.
$offset++;
$iState->batchOffset++;
$iState->totalItems--;
}
// Update the indexer state.
$aState['offset'] = $offset;
$iState->pluginState[$this->context] = $aState;
FinderIndexer::setState($iState);
return true;
}
/**
* Method to change the value of a content item's property in the links
* table. This is used to synchronize published and access states that
* are changed when not editing an item directly.
*
* @param string $id The ID of the item to change.
* @param string $property The property that is being changed.
* @param integer $value The new value of that property.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function change($id, $property, $value)
{
// Check for a property we know how to handle.
if ($property !== 'state' && $property !== 'access')
{
return true;
}
// Get the url for the content id.
$item = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Update the content items.
$query = $this->db->getQuery(true)
->update($this->db->quoteName('#__finder_links'))
->set($this->db->quoteName($property) . ' = ' . (int) $value)
->where($this->db->quoteName('url') . ' = ' . $item);
$this->db->setQuery($query);
$this->db->execute();
return true;
}
/**
* Method to index an item.
*
* @param FinderIndexerResult $item The item to index as a FinderIndexerResult object.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract protected function index(FinderIndexerResult $item);
/**
* Method to reindex an item.
*
* @param integer $id The ID of the item to reindex.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function reindex($id)
{
// Run the setup method.
$this->setup();
// Get the item.
$item = $this->getItem($id);
// Index the item.
$this->index($item);
}
/**
* Method to remove an item from the index.
*
* @param string $id The ID of the item to remove.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function remove($id)
{
// Get the item's URL
$url = $this->db->quote($this->getUrl($id, $this->extension, $this->layout));
// Get the link ids for the content items.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('link_id'))
->from($this->db->quoteName('#__finder_links'))
->where($this->db->quoteName('url') . ' = ' . $url);
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Check the items.
if (empty($items))
{
return true;
}
// Remove the items.
foreach ($items as $item)
{
$this->indexer->remove($item);
}
return true;
}
/**
* Method to setup the adapter before indexing.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract protected function setup();
/**
* Method to update index data on category access level changes
*
* @param JTable $row A JTable object
*
* @return void
*
* @since 2.5
*/
protected function categoryAccessChange($row)
{
$query = clone($this->getStateQuery());
$query->where('c.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the access level for each item within the category.
foreach ($items as $item)
{
// Set the access level.
$temp = max($item->access, $row->access);
// Update the item.
$this->change((int) $item->id, 'access', $temp);
// Reindex the item
$this->reindex($row->id);
}
}
/**
* Method to update index data on category access level changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 2.5
*/
protected function categoryStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk)
{
$query = clone($this->getStateQuery());
$query->where('c.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$items = $this->db->loadObjectList();
// Adjust the state for each item within the category.
foreach ($items as $item)
{
// Translate the state.
$temp = $this->translateState($item->state, $value);
// Update the item.
$this->change($item->id, 'state', $temp);
// Reindex the item
$this->reindex($item->id);
}
}
}
/**
* Method to check the existing access level for categories
*
* @param JTable $row A JTable object
*
* @return void
*
* @since 2.5
*/
protected function checkCategoryAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName('#__categories'))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_cataccess = $this->db->loadResult();
}
/**
* Method to check the existing access level for items
*
* @param JTable $row A JTable object
*
* @return void
*
* @since 2.5
*/
protected function checkItemAccess($row)
{
$query = $this->db->getQuery(true)
->select($this->db->quoteName('access'))
->from($this->db->quoteName($this->table))
->where($this->db->quoteName('id') . ' = ' . (int) $row->id);
$this->db->setQuery($query);
// Store the access level to determine if it changes
$this->old_access = $this->db->loadResult();
}
/**
* Method to get the number of content items available to index.
*
* @return integer The number of content items available to index.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getContentCount()
{
$return = 0;
// Get the list query.
$query = $this->getListQuery();
// Check if the query is valid.
if (empty($query))
{
return $return;
}
// Tweak the SQL query to make the total lookup faster.
if ($query instanceof JDatabaseQuery)
{
$query = clone($query);
$query->clear('select')
->select('COUNT(*)')
->clear('order');
}
// Get the total number of content items to index.
$this->db->setQuery($query);
$return = (int) $this->db->loadResult();
return $return;
}
/**
* Method to get a content item to index.
*
* @param integer $id The id of the content item.
*
* @return FinderIndexerResult A FinderIndexerResult object.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getItem($id)
{
// Get the list query and add the extra WHERE clause.
$query = $this->getListQuery();
$query->where('a.id = ' . (int) $id);
// Get the item to index.
$this->db->setQuery($query);
$row = $this->db->loadAssoc();
// Convert the item to a result object.
$item = JArrayHelper::toObject($row, 'FinderIndexerResult');
// Set the item type.
$item->type_id = $this->type_id;
// Set the item layout.
$item->layout = $this->layout;
return $item;
}
/**
* Method to get a list of content items to index.
*
* @param integer $offset The list offset.
* @param integer $limit The list limit.
* @param JDatabaseQuery $query A JDatabaseQuery object. [optional]
*
* @return array An array of FinderIndexerResult objects.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getItems($offset, $limit, $query = null)
{
$items = array();
// Get the content items to index.
$this->db->setQuery($this->getListQuery($query), $offset, $limit);
$rows = $this->db->loadAssocList();
// Convert the items to result objects.
foreach ($rows as $row)
{
// Convert the item to a result object.
$item = JArrayHelper::toObject($row, 'FinderIndexerResult');
// Set the item type.
$item->type_id = $this->type_id;
// Set the mime type.
$item->mime = $this->mime;
// Set the item layout.
$item->layout = $this->layout;
// Set the extension if present
if (isset($row->extension))
{
$item->extension = $row->extension;
}
// Add the item to the stack.
$items[] = $item;
}
return $items;
}
/**
* Method to get the SQL query used to retrieve the list of content items.
*
* @param mixed $query A JDatabaseQuery object. [optional]
*
* @return JDatabaseQuery A database object.
*
* @since 2.5
*/
protected function getListQuery($query = null)
{
// Check if we can use the supplied SQL query.
$query = $query instanceof JDatabaseQuery ? $query : $this->db->getQuery(true);
return $query;
}
/**
* Method to get the plugin type
*
* @param integer $id The plugin ID
*
* @return string The plugin type
*
* @since 2.5
*/
protected function getPluginType($id)
{
// Prepare the query
$query = $this->db->getQuery(true)
->select($this->db->quoteName('element'))
->from($this->db->quoteName('#__extensions'))
->where($this->db->quoteName('extension_id') . ' = ' . (int) $id);
$this->db->setQuery($query);
$type = $this->db->loadResult();
return $type;
}
/**
* Method to get a SQL query to load the published and access states for
* an article and category.
*
* @return JDatabaseQuery A database object.
*
* @since 2.5
*/
protected function getStateQuery()
{
$query = $this->db->getQuery(true);
// Item ID
$query->select('a.id');
// Item and category published state
$query->select('a.' . $this->state_field . ' AS state, c.published AS cat_state');
// Item and category access levels
$query->select('a.access, c.access AS cat_access')
->from($this->table . ' AS a')
->join('LEFT', '#__categories AS c ON c.id = a.catid');
return $query;
}
/**
* Method to get the query clause for getting items to update by time.
*
* @param string $time The modified timestamp.
*
* @return JDatabaseQuery A database object.
*
* @since 2.5
*/
protected function getUpdateQueryByTime($time)
{
// Build an SQL query based on the modified time.
$query = $this->db->getQuery(true)
->where('a.modified >= ' . $this->db->quote($time));
return $query;
}
/**
* Method to get the query clause for getting items to update by id.
*
* @param array $ids The ids to load.
*
* @return JDatabaseQuery A database object.
*
* @since 2.5
*/
protected function getUpdateQueryByIds($ids)
{
// Build an SQL query based on the item ids.
$query = $this->db->getQuery(true)
->where('a.id IN(' . implode(',', $ids) . ')');
return $query;
}
/**
* Method to get the type id for the adapter content.
*
* @return integer The numeric type id for the content.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getTypeId()
{
// Get the type id from the database.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('id'))
->from($this->db->quoteName('#__finder_types'))
->where($this->db->quoteName('title') . ' = ' . $this->db->quote($this->type_title));
$this->db->setQuery($query);
$result = (int) $this->db->loadResult();
return $result;
}
/**
* Method to get the URL for the item. The URL is how we look up the link
* in the Finder index.
*
* @param integer $id The id of the item.
* @param string $extension The extension the category is in.
* @param string $view The view for the URL.
*
* @return string The URL of the item.
*
* @since 2.5
*/
protected function getURL($id, $extension, $view)
{
return 'index.php?option=' . $extension . '&view=' . $view . '&id=' . $id;
}
/**
* Method to get the page title of any menu item that is linked to the
* content item, if it exists and is set.
*
* @param string $url The url of the item.
*
* @return mixed The title on success, null if not found.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function getItemMenuTitle($url)
{
$return = null;
// Set variables
$user = JFactory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Build a query to get the menu params.
$query = $this->db->getQuery(true)
->select($this->db->quoteName('params'))
->from($this->db->quoteName('#__menu'))
->where($this->db->quoteName('link') . ' = ' . $this->db->quote($url))
->where($this->db->quoteName('published') . ' = 1')
->where($this->db->quoteName('access') . ' IN (' . $groups . ')');
// Get the menu params from the database.
$this->db->setQuery($query);
$params = $this->db->loadResult();
// Check the results.
if (empty($params))
{
return $return;
}
// Instantiate the params.
$params = json_decode($params);
// Get the page title if it is set.
if ($params->page_title)
{
$return = $params->page_title;
}
return $return;
}
/**
* Method to update index data on access level changes
*
* @param JTable $row A JTable object
*
* @return void
*
* @since 2.5
*/
protected function itemAccessChange($row)
{
$query = clone($this->getStateQuery());
$query->where('a.id = ' . (int) $row->id);
// Get the access level.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Set the access level.
$temp = max($row->access, $item->cat_access);
// Update the item.
$this->change((int) $row->id, 'access', $temp);
}
/**
* Method to update index data on published state changes
*
* @param array $pks A list of primary key ids of the content that has changed state.
* @param integer $value The value of the state that the content has been changed to.
*
* @return void
*
* @since 2.5
*/
protected function itemStateChange($pks, $value)
{
/*
* The item's published state is tied to the category
* published state so we need to look up all published states
* before we change anything.
*/
foreach ($pks as $pk)
{
$query = clone($this->getStateQuery());
$query->where('a.id = ' . (int) $pk);
// Get the published states.
$this->db->setQuery($query);
$item = $this->db->loadObject();
// Translate the state.
$temp = $this->translateState($value, $item->cat_state);
// Update the item.
$this->change($pk, 'state', $temp);
// Reindex the item
$this->reindex($pk);
}
}
/**
* Method to update index data when a plugin is disabled
*
* @param array $pks A list of primary key ids of the content that has changed state.
*
* @return void
*
* @since 2.5
*/
protected function pluginDisable($pks)
{
// Since multiple plugins may be disabled at a time, we need to check first
// that we're handling the appropriate one for the context
foreach ($pks as $pk)
{
if ($this->getPluginType($pk) == strtolower($this->context))
{
// Get all of the items to unindex them
$query = clone($this->getStateQuery());
$this->db->setQuery($query);
$items = $this->db->loadColumn();
// Remove each item
foreach ($items as $item)
{
$this->remove($item);
}
}
}
}
/**
* Method to translate the native content states into states that the
* indexer can use.
*
* @param integer $item The item state.
* @param integer $category The category state. [optional]
*
* @return integer The translated indexer state.
*
* @since 2.5
*/
protected function translateState($item, $category = null)
{
// If category is present, factor in its states as well
if ($category !== null)
{
if ($category == 0)
{
$item = 0;
}
}
// Translate the state
switch ($item)
{
// Published and archived items only should return a published state
case 1;
case 2:
return 1;
// All other states should return a unpublished state
default:
case 0:
return 0;
}
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,663 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
jimport('joomla.filesystem.file');
/**
* Indexer class supporting MySQL(i) for the Finder indexer package.
*
* The indexer class provides the core functionality of the Finder
* search engine. It is responsible for adding and updating the
* content links table; extracting and scoring tokens; and maintaining
* all referential information for the content.
*
* Note: All exceptions thrown from within this class should be caught
* by the controller.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 3.0
*/
class FinderIndexerDriverMysql extends FinderIndexer
{
/**
* Method to index a content item.
*
* @param FinderIndexerResult $item The content item to index.
* @param string $format The format of the content. [optional]
*
* @return integer The ID of the record in the links table.
*
* @since 3.0
* @throws Exception on database error.
*/
public function index($item, $format = 'html')
{
// Mark beforeIndexing in the profiler.
static::$profiler ? static::$profiler->mark('beforeIndexing') : null;
$db = JFactory::getDbo();
$nd = $db->getNullDate();
// Check if the item is in the database.
$query = $db->getQuery(true)
->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum'))
->from($db->quoteName('#__finder_links'))
->where($db->quoteName('url') . ' = ' . $db->quote($item->url));
// Load the item from the database.
$db->setQuery($query);
$link = $db->loadObject();
// Get the indexer state.
$state = static::getState();
// Get the signatures of the item.
$curSig = static::getSignature($item);
$oldSig = isset($link->md5sum) ? $link->md5sum : null;
// Get the other item information.
$linkId = empty($link->link_id) ? null : $link->link_id;
$isNew = empty($link->link_id) ? true : false;
// Check the signatures. If they match, the item is up to date.
if (!$isNew && $curSig == $oldSig)
{
return $linkId;
}
/*
* If the link already exists, flush all the term maps for the item.
* Maps are stored in 16 tables so we need to iterate through and flush
* each table one at a time.
*/
if (!$isNew)
{
for ($i = 0; $i <= 15; $i++)
{
// Flush the maps for the link.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
}
// Mark afterUnmapping in the profiler.
static::$profiler ? static::$profiler->mark('afterUnmapping') : null;
// Perform cleanup on the item data.
$item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : $nd;
$item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : $nd;
$item->start_date = (int) $item->start_date != 0 ? $item->start_date : $nd;
$item->end_date = (int) $item->end_date != 0 ? $item->end_date : $nd;
// Prepare the item description.
$item->description = FinderIndexerHelper::parse($item->summary);
/*
* Now, we need to enter the item into the links table. If the item
* already exists in the database, we need to use an UPDATE query.
* Otherwise, we need to use an INSERT to get the link id back.
*/
if ($isNew)
{
$columnsArray = array(
$db->quoteName('url'), $db->quoteName('route'), $db->quoteName('title'), $db->quoteName('description'),
$db->quoteName('indexdate'), $db->quoteName('published'), $db->quoteName('state'), $db->quoteName('access'),
$db->quoteName('language'), $db->quoteName('type_id'), $db->quoteName('object'), $db->quoteName('publish_start_date'),
$db->quoteName('publish_end_date'), $db->quoteName('start_date'), $db->quoteName('end_date'), $db->quoteName('list_price'),
$db->quoteName('sale_price')
);
// Insert the link.
$query->clear()
->insert($db->quoteName('#__finder_links'))
->columns($columnsArray)
->values(
$db->quote($item->url) . ', '
. $db->quote($item->route) . ', '
. $db->quote($item->title) . ', '
. $db->quote($item->description) . ', '
. $query->currentTimestamp() . ', '
. '1, '
. (int) $item->state . ', '
. (int) $item->access . ', '
. $db->quote($item->language) . ', '
. (int) $item->type_id . ', '
. $db->quote(serialize($item)) . ', '
. $db->quote($item->publish_start_date) . ', '
. $db->quote($item->publish_end_date) . ', '
. $db->quote($item->start_date) . ', '
. $db->quote($item->end_date) . ', '
. (double) ($item->list_price ? $item->list_price : 0) . ', '
. (double) ($item->sale_price ? $item->sale_price : 0)
);
$db->setQuery($query);
$db->execute();
// Get the link id.
$linkId = (int) $db->insertid();
}
else
{
// Update the link.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('route') . ' = ' . $db->quote($item->route))
->set($db->quoteName('title') . ' = ' . $db->quote($item->title))
->set($db->quoteName('description') . ' = ' . $db->quote($item->description))
->set($db->quoteName('indexdate') . ' = ' . $query->currentTimestamp())
->set($db->quoteName('state') . ' = ' . (int) $item->state)
->set($db->quoteName('access') . ' = ' . (int) $item->access)
->set($db->quoteName('language') . ' = ' . $db->quote($item->language))
->set($db->quoteName('type_id') . ' = ' . (int) $item->type_id)
->set($db->quoteName('object') . ' = ' . $db->quote(serialize($item)))
->set($db->quoteName('publish_start_date') . ' = ' . $db->quote($item->publish_start_date))
->set($db->quoteName('publish_end_date') . ' = ' . $db->quote($item->publish_end_date))
->set($db->quoteName('start_date') . ' = ' . $db->quote($item->start_date))
->set($db->quoteName('end_date') . ' = ' . $db->quote($item->end_date))
->set($db->quoteName('list_price') . ' = ' . (double) ($item->list_price ? $item->list_price : 0))
->set($db->quoteName('sale_price') . ' = ' . (double) ($item->sale_price ? $item->sale_price : 0))
->where('link_id = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Set up the variables we will need during processing.
$count = 0;
// Mark afterLinking in the profiler.
static::$profiler ? static::$profiler->mark('afterLinking') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
/*
* Process the item's content. The items can customize their
* processing instructions to define extra properties to process
* or rearrange how properties are weighted.
*/
foreach ($item->getInstructions() as $group => $properties)
{
// Iterate through the properties of the group.
foreach ($properties as $property)
{
// Check if the property exists in the item.
if (empty($item->$property))
{
continue;
}
// Tokenize the property.
if (is_array($item->$property))
{
// Tokenize an array of content and add it to the database.
foreach ($item->$property as $ip)
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($ip, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
else
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$item->$property = JFile::stripExt($item->$property);
$item->$property = str_replace('/', ' ', $item->$property);
$item->$property = str_replace('-', ' ', $item->$property);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($item->$property, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
}
/*
* Process the item's taxonomy. The items can customize their
* taxonomy mappings to define extra properties to map.
*/
foreach ($item->getTaxonomy() as $branch => $nodes)
{
// Iterate through the nodes and map them to the branch.
foreach ($nodes as $node)
{
// Add the node to the tree.
$nodeId = FinderIndexerTaxonomy::addNode($branch, $node->title, $node->state, $node->access);
// Add the link => node map.
FinderIndexerTaxonomy::addMap($linkId, $nodeId);
// Tokenize the node title and add them to the database.
$count += $this->tokenizeToDB($node->title, static::META_CONTEXT, $item->language, $format);
}
}
// Mark afterProcessing in the profiler.
static::$profiler ? static::$profiler->mark('afterProcessing') : null;
/*
* At this point, all of the item's content has been parsed, tokenized
* and inserted into the #__finder_tokens table. Now, we need to
* aggregate all the data into that table into a more usable form. The
* aggregated data will be inserted into #__finder_tokens_aggregate
* table.
*/
$query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') .
' (' . $db->quoteName('term_id') .
', ' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('term_weight') .
', ' . $db->quoteName('context') .
', ' . $db->quoteName('context_weight') .
', ' . $db->quoteName('language') . ')' .
' SELECT' .
' t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, ' .
' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, t1.language' .
' FROM (' .
' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' .
' WHERE t1.context = %d' .
' ) AS t1' .
' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term' .
' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term' .
' WHERE t2.context = %d' .
' GROUP BY t1.term' .
' ORDER BY t1.term DESC';
// Iterate through the contexts and aggregate the tokens per context.
foreach ($state->weights as $context => $multiplier)
{
// Run the query to aggregate the tokens for this context..
$db->setQuery(sprintf($query, $multiplier, $context, $context));
$db->execute();
}
// Mark afterAggregating in the profiler.
static::$profiler ? static::$profiler->mark('afterAggregating') : null;
/*
* When we pulled down all of the aggregate data, we did a LEFT JOIN
* over the terms table to try to find all the term ids that
* already exist for our tokens. If any of the rows in the aggregate
* table have a term of 0, then no term record exists for that
* term so we need to add it to the terms table.
*/
$db->setQuery(
'INSERT IGNORE INTO ' . $db->quoteName('#__finder_terms') .
' (' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('weight') .
', ' . $db->quoteName('soundex') .
', ' . $db->quoteName('language') . ')' .
' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
' WHERE ta.term_id = 0' .
' GROUP BY ta.term'
);
$db->execute();
/*
* Now, we just inserted a bunch of new records into the terms table
* so we need to go back and update the aggregate table with all the
* new term ids.
*/
$query = $db->getQuery(true)
->update($db->quoteName('#__finder_tokens_aggregate') . ' AS ta')
->join('INNER', $db->quoteName('#__finder_terms') . ' AS t ON t.term = ta.term')
->set('ta.term_id = t.term_id')
->where('ta.term_id = 0');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* After we've made sure that all of the terms are in the terms table
* and the aggregate table has the correct term ids, we need to update
* the links counter for each term by one.
*/
$query->clear()
->update($db->quoteName('#__finder_terms') . ' AS t')
->join('INNER', $db->quoteName('#__finder_tokens_aggregate') . ' AS ta ON ta.term_id = t.term_id')
->set('t.' . $db->quoteName('links') . ' = t.links + 1');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* Before we can insert all of the mapping rows, we have to figure out
* which mapping table the rows need to be inserted into. The mapping
* table for each term is based on the first character of the md5 of
* the first character of the term. In php, it would be expressed as
* substr(md5(substr($token, 0, 1)), 0, 1)
*/
$query->clear()
->update($db->quoteName('#__finder_tokens_aggregate'))
->set($db->quoteName('map_suffix') . ' = SUBSTR(MD5(SUBSTR(' . $db->quoteName('term') . ', 1, 1)), 1, 1)');
$db->setQuery($query);
$db->execute();
/*
* At this point, the aggregate table contains a record for each
* term in each context. So, we're going to pull down all of that
* data while grouping the records by term and add all of the
* sub-totals together to arrive at the final total for each token for
* this link. Then, we insert all of that data into the appropriate
* mapping table.
*/
for ($i = 0; $i <= 15; $i++)
{
// Get the mapping table suffix.
$suffix = dechex($i);
/*
* We have to run this query 16 times, one for each link => term
* mapping table.
*/
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_links_terms' . $suffix) .
' (' . $db->quoteName('link_id') .
', ' . $db->quoteName('term_id') .
', ' . $db->quoteName('weight') . ')' .
' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' .
' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') .
' WHERE ' . $db->quoteName('map_suffix') . ' = ' . $db->quote($suffix) .
' GROUP BY ' . $db->quoteName('term') .
' ORDER BY ' . $db->quoteName('term') . ' DESC'
);
$db->execute();
}
// Mark afterMapping in the profiler.
static::$profiler ? static::$profiler->mark('afterMapping') : null;
// Update the signature.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('md5sum') . ' = ' . $db->quote($curSig))
->where($db->quoteName('link_id') . ' = ' . $db->quote($linkId));
$db->setQuery($query);
$db->execute();
// Mark afterSigning in the profiler.
static::$profiler ? static::$profiler->mark('afterSigning') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
// Toggle the token tables back to memory tables.
$this->toggleTables(true);
// Mark afterTruncating in the profiler.
static::$profiler ? static::$profiler->mark('afterTruncating') : null;
return $linkId;
}
/**
* Method to remove a link from the index.
*
* @param integer $linkId The id of the link.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
public function remove($linkId)
{
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Update the link counts and remove the mapping records.
for ($i = 0; $i <= 15; $i++)
{
// Update the link counts for the terms.
$query->update($db->quoteName('#__finder_terms') . ' AS t')
->join('INNER', $db->quoteName('#__finder_links_terms' . dechex($i)) . ' AS m ON m.term_id = t.term_id')
->set('t.links = t.links - 1')
->where('m.link_id = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove all records from the mapping tables.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Delete all orphaned terms.
$query->clear()
->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Delete the link from the index.
$query->clear()
->delete($db->quoteName('#__finder_links'))
->where($db->quoteName('link_id') . ' = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
return true;
}
/**
* Method to optimize the index. We use this method to remove unused terms
* and any other optimizations that might be necessary.
*
* @return boolean True on success.
*
* @since 3.0
* @throws Exception on database error.
*/
public function optimize()
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Delete all orphaned terms.
$query->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Optimize the links table.
$db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links'));
$db->execute();
for ($i = 0; $i <= 15; $i++)
{
// Optimize the terms mapping table.
$db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
$db->execute();
}
// Optimize the terms mapping table.
$db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_links_terms'));
$db->execute();
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
// Optimize the taxonomy mapping table.
$db->setQuery('OPTIMIZE TABLE ' . $db->quoteName('#__finder_taxonomy_map'));
$db->execute();
return true;
}
/**
* Method to add a set of tokens to the database.
*
* @param mixed $tokens An array or single FinderIndexerToken object.
* @param mixed $context The context of the tokens. See context constants. [optional]
*
* @return integer The number of tokens inserted into the database.
*
* @since 3.0
* @throws Exception on database error.
*/
protected function addTokensToDB($tokens, $context = '')
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Force tokens to an array.
$tokens = is_array($tokens) ? $tokens : array($tokens);
// Count the number of token values.
$values = 0;
// Insert the tokens into the database.
$query->insert($db->quoteName('#__finder_tokens'))
->columns(
array(
$db->quoteName('term'),
$db->quoteName('stem'),
$db->quoteName('common'),
$db->quoteName('phrase'),
$db->quoteName('weight'),
$db->quoteName('context'),
$db->quoteName('language')
)
);
// Iterate through the tokens to create SQL value sets.
foreach ($tokens as $token)
{
$query->values(
$db->quote($token->term) . ', '
. $db->quote($token->stem) . ', '
. (int) $token->common . ', '
. (int) $token->phrase . ', '
. (float) $token->weight . ', '
. (int) $context . ', '
. $db->quote($token->language)
);
$values++;
}
$db->setQuery($query);
$db->execute();
return $values;
}
/**
* Method to switch the token tables from Memory tables to MyISAM tables
* when they are close to running out of memory.
*
* @param boolean $memory Flag to control how they should be toggled.
*
* @return boolean True on success.
*
* @since 3.0
* @throws Exception on database error.
*/
protected function toggleTables($memory)
{
static $state;
// Get the database adapter.
$db = JFactory::getDbo();
// Check if we are setting the tables to the Memory engine.
if ($memory === true && $state !== true)
{
// Set the tokens table to Memory.
$db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MEMORY');
$db->execute();
// Set the tokens aggregate table to Memory.
$db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MEMORY');
$db->execute();
// Set the internal state.
$state = $memory;
}
// We must be setting the tables to the MyISAM engine.
elseif ($memory === false && $state !== false)
{
// Set the tokens table to MyISAM.
$db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MYISAM');
$db->execute();
// Set the tokens aggregate table to MyISAM.
$db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MYISAM');
$db->execute();
// Set the internal state.
$state = $memory;
}
return true;
}
}

View File

@ -0,0 +1,634 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
jimport('joomla.filesystem.file');
/**
* Indexer class supporting PostgreSQL for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 3.0
*/
class FinderIndexerDriverPostgresql extends FinderIndexer
{
/**
* Method to index a content item.
*
* @param FinderIndexerResult $item The content item to index.
* @param string $format The format of the content. [optional]
*
* @return integer The ID of the record in the links table.
*
* @since 3.0
* @throws Exception on database error.
*/
public function index($item, $format = 'html')
{
// Mark beforeIndexing in the profiler.
static::$profiler ? static::$profiler->mark('beforeIndexing') : null;
$db = JFactory::getDbo();
$nd = $db->getNullDate();
// Check if the item is in the database.
$query = $db->getQuery(true)
->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum'))
->from($db->quoteName('#__finder_links'))
->where($db->quoteName('url') . ' = ' . $db->quote($item->url));
// Load the item from the database.
$db->setQuery($query);
$link = $db->loadObject();
// Get the indexer state.
$state = static::getState();
// Get the signatures of the item.
$curSig = static::getSignature($item);
$oldSig = isset($link->md5sum) ? $link->md5sum : null;
// Get the other item information.
$linkId = empty($link->link_id) ? null : $link->link_id;
$isNew = empty($link->link_id) ? true : false;
// Check the signatures. If they match, the item is up to date.
if (!$isNew && $curSig == $oldSig)
{
return $linkId;
}
/*
* If the link already exists, flush all the term maps for the item.
* Maps are stored in 16 tables so we need to iterate through and flush
* each table one at a time.
*/
if (!$isNew)
{
for ($i = 0; $i <= 15; $i++)
{
// Flush the maps for the link.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
}
// Mark afterUnmapping in the profiler.
static::$profiler ? static::$profiler->mark('afterUnmapping') : null;
// Perform cleanup on the item data.
$item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : $nd;
$item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : $nd;
$item->start_date = (int) $item->start_date != 0 ? $item->start_date : $nd;
$item->end_date = (int) $item->end_date != 0 ? $item->end_date : $nd;
// Prepare the item description.
$item->description = FinderIndexerHelper::parse($item->summary);
/*
* Now, we need to enter the item into the links table. If the item
* already exists in the database, we need to use an UPDATE query.
* Otherwise, we need to use an INSERT to get the link id back.
*/
if ($isNew)
{
$columnsArray = array(
$db->quoteName('url'), $db->quoteName('route'), $db->quoteName('title'), $db->quoteName('description'),
$db->quoteName('indexdate'), $db->quoteName('published'), $db->quoteName('state'), $db->quoteName('access'),
$db->quoteName('language'), $db->quoteName('type_id'), $db->quoteName('object'), $db->quoteName('publish_start_date'),
$db->quoteName('publish_end_date'), $db->quoteName('start_date'), $db->quoteName('end_date'), $db->quoteName('list_price'),
$db->quoteName('sale_price')
);
// Insert the link.
$query->clear()
->insert($db->quoteName('#__finder_links'))
->columns($columnsArray)
->values(
$db->quote($item->url) . ', '
. $db->quote($item->route) . ', '
. $db->quote($item->title) . ', '
. $db->quote($item->description) . ', '
. $query->currentTimestamp() . ', '
. '1, '
. (int) $item->state . ', '
. (int) $item->access . ', '
. $db->quote($item->language) . ', '
. (int) $item->type_id . ', '
. $db->quote(serialize($item)) . ', '
. $db->quote($item->publish_start_date) . ', '
. $db->quote($item->publish_end_date) . ', '
. $db->quote($item->start_date) . ', '
. $db->quote($item->end_date) . ', '
. (double) ($item->list_price ? $item->list_price : 0) . ', '
. (double) ($item->sale_price ? $item->sale_price : 0)
);
$db->setQuery($query);
$db->execute();
// Get the link id.
$linkId = (int) $db->insertid();
}
else
{
// Update the link.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('route') . ' = ' . $db->quote($item->route))
->set($db->quoteName('title') . ' = ' . $db->quote($item->title))
->set($db->quoteName('description') . ' = ' . $db->quote($item->description))
->set($db->quoteName('indexdate') . ' = ' . $query->currentTimestamp())
->set($db->quoteName('state') . ' = ' . (int) $item->state)
->set($db->quoteName('access') . ' = ' . (int) $item->access)
->set($db->quoteName('language') . ' = ' . $db->quote($item->language))
->set($db->quoteName('type_id') . ' = ' . (int) $item->type_id)
->set($db->quoteName('object') . ' = ' . $db->quote(serialize($item)))
->set($db->quoteName('publish_start_date') . ' = ' . $db->quote($item->publish_start_date))
->set($db->quoteName('publish_end_date') . ' = ' . $db->quote($item->publish_end_date))
->set($db->quoteName('start_date') . ' = ' . $db->quote($item->start_date))
->set($db->quoteName('end_date') . ' = ' . $db->quote($item->end_date))
->set($db->quoteName('list_price') . ' = ' . (double) ($item->list_price ? $item->list_price : 0))
->set($db->quoteName('sale_price') . ' = ' . (double) ($item->sale_price ? $item->sale_price : 0))
->where('link_id = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Set up the variables we will need during processing.
$count = 0;
// Mark afterLinking in the profiler.
static::$profiler ? static::$profiler->mark('afterLinking') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
/*
* Process the item's content. The items can customize their
* processing instructions to define extra properties to process
* or rearrange how properties are weighted.
*/
foreach ($item->getInstructions() as $group => $properties)
{
// Iterate through the properties of the group.
foreach ($properties as $property)
{
// Check if the property exists in the item.
if (empty($item->$property))
{
continue;
}
// Tokenize the property.
if (is_array($item->$property))
{
// Tokenize an array of content and add it to the database.
foreach ($item->$property as $ip)
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($ip, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
else
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$item->$property = JFile::stripExt($item->$property);
$item->$property = str_replace('/', ' ', $item->$property);
$item->$property = str_replace('-', ' ', $item->$property);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($item->$property, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
}
/*
* Process the item's taxonomy. The items can customize their
* taxonomy mappings to define extra properties to map.
*/
foreach ($item->getTaxonomy() as $branch => $nodes)
{
// Iterate through the nodes and map them to the branch.
foreach ($nodes as $node)
{
// Add the node to the tree.
$nodeId = FinderIndexerTaxonomy::addNode($branch, $node->title, $node->state, $node->access);
// Add the link => node map.
FinderIndexerTaxonomy::addMap($linkId, $nodeId);
// Tokenize the node title and add them to the database.
$count += $this->tokenizeToDB($node->title, static::META_CONTEXT, $item->language, $format);
}
}
// Mark afterProcessing in the profiler.
static::$profiler ? static::$profiler->mark('afterProcessing') : null;
/*
* At this point, all of the item's content has been parsed, tokenized
* and inserted into the #__finder_tokens table. Now, we need to
* aggregate all the data into that table into a more usable form. The
* aggregated data will be inserted into #__finder_tokens_aggregate
* table.
*/
$query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') .
' (' . $db->quoteName('term_id') .
', ' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('term_weight') .
', ' . $db->quoteName('context') .
', ' . $db->quoteName('context_weight') .
', ' . $db->quoteName('language') . ')' .
' SELECT' .
' t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' .
' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, t1.language' .
' FROM (' .
' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' .
' WHERE t1.context = %d' .
' ) AS t1' .
' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term' .
' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term' .
' WHERE t2.context = %d' .
' GROUP BY t1.term, t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' ORDER BY t1.term DESC';
// Iterate through the contexts and aggregate the tokens per context.
foreach ($state->weights as $context => $multiplier)
{
// Run the query to aggregate the tokens for this context..
$db->setQuery(sprintf($query, $multiplier, $context, $context));
$db->execute();
}
// Mark afterAggregating in the profiler.
static::$profiler ? static::$profiler->mark('afterAggregating') : null;
/*
* When we pulled down all of the aggregate data, we did a LEFT JOIN
* over the terms table to try to find all the term ids that
* already exist for our tokens. If any of the rows in the aggregate
* table have a term of 0, then no term record exists for that
* term so we need to add it to the terms table.
*/
/* Emulation of IGNORE INTO behaviour */
$db->setQuery(
' SELECT ta.term' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
' WHERE ta.term_id = 0'
);
if ($db->loadRow() == null)
{
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_terms') .
' (' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('weight') .
', ' . $db->quoteName('soundex') .
', ' . $db->quoteName('language') . ')' .
' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
' WHERE ta.term_id = 0' .
' GROUP BY ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language'
);
$db->execute();
}
/*
* Now, we just inserted a bunch of new records into the terms table
* so we need to go back and update the aggregate table with all the
* new term ids.
*/
$query = $db->getQuery(true)
->update($db->quoteName('#__finder_tokens_aggregate') . ' AS ta')
->join('INNER', $db->quoteName('#__finder_terms') . ' AS t ON t.term = ta.term')
->set('ta.term_id = t.term_id')
->where('ta.term_id = 0');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* After we've made sure that all of the terms are in the terms table
* and the aggregate table has the correct term ids, we need to update
* the links counter for each term by one.
*/
$query->clear()
->update($db->quoteName('#__finder_terms') . ' AS t')
->join('INNER', $db->quoteName('#__finder_tokens_aggregate') . ' AS ta ON ta.term_id = t.term_id')
->set('t.' . $db->quoteName('links') . ' = t.links + 1');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* Before we can insert all of the mapping rows, we have to figure out
* which mapping table the rows need to be inserted into. The mapping
* table for each term is based on the first character of the md5 of
* the first character of the term. In php, it would be expressed as
* substr(md5(substr($token, 0, 1)), 0, 1)
*/
$query->clear()
->update($db->quoteName('#__finder_tokens_aggregate'))
->set($db->quoteName('map_suffix') . ' = SUBSTR(MD5(SUBSTR(' . $db->quoteName('term') . ', 1, 1)), 1, 1)');
$db->setQuery($query);
$db->execute();
/*
* At this point, the aggregate table contains a record for each
* term in each context. So, we're going to pull down all of that
* data while grouping the records by term and add all of the
* sub-totals together to arrive at the final total for each token for
* this link. Then, we insert all of that data into the appropriate
* mapping table.
*/
for ($i = 0; $i <= 15; $i++)
{
// Get the mapping table suffix.
$suffix = dechex($i);
/*
* We have to run this query 16 times, one for each link => term
* mapping table.
*/
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_links_terms' . $suffix) .
' (' . $db->quoteName('link_id') .
', ' . $db->quoteName('term_id') .
', ' . $db->quoteName('weight') . ')' .
' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' .
' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') .
' WHERE ' . $db->quoteName('map_suffix') . ' = ' . $db->quote($suffix) .
' GROUP BY ' . $db->quoteName('term') .
' ORDER BY ' . $db->quoteName('term') . ' DESC'
);
$db->execute();
}
// Mark afterMapping in the profiler.
static::$profiler ? static::$profiler->mark('afterMapping') : null;
// Update the signature.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('md5sum') . ' = ' . $db->quote($curSig))
->where($db->quoteName('link_id') . ' = ' . $db->quote($linkId));
$db->setQuery($query);
$db->execute();
// Mark afterSigning in the profiler.
static::$profiler ? static::$profiler->mark('afterSigning') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
// Toggle the token tables back to memory tables.
$this->toggleTables(true);
// Mark afterTruncating in the profiler.
static::$profiler ? static::$profiler->mark('afterTruncating') : null;
return $linkId;
}
/**
* Method to remove a link from the index.
*
* @param integer $linkId The id of the link.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
public function remove($linkId)
{
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Update the link counts and remove the mapping records.
for ($i = 0; $i <= 15; $i++)
{
// Update the link counts for the terms.
$query->update($db->quoteName('#__finder_terms') . ' AS t')
->join('INNER', $db->quoteName('#__finder_links_terms' . dechex($i)) . ' AS m ON m.term_id = t.term_id')
->set('t.links = t.links - 1')
->where('m.link_id = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove all records from the mapping tables.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Delete all orphaned terms.
$query->clear()
->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Delete the link from the index.
$query->clear()
->delete($db->quoteName('#__finder_links'))
->where($db->quoteName('link_id') . ' = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
return true;
}
/**
* Method to optimize the index. We use this method to remove unused terms
* and any other optimizations that might be necessary.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
public function optimize()
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Delete all orphaned terms.
$query->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Optimize the links table.
$db->setQuery('VACUUM ' . $db->quoteName('#__finder_links'));
$db->execute();
$db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links'));
$db->execute();
for ($i = 0; $i <= 15; $i++)
{
// Optimize the terms mapping table.
$db->setQuery('VACUUM ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
$db->execute();
$db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links_terms' . dechex($i)));
$db->execute();
}
// Optimize the terms mapping table.
$db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_links_terms'));
$db->execute();
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
// Optimize the taxonomy mapping table.
$db->setQuery('REINDEX TABLE ' . $db->quoteName('#__finder_taxonomy_map'));
$db->execute();
return true;
}
/**
* Method to add a set of tokens to the database.
*
* @param mixed $tokens An array or single FinderIndexerToken object.
* @param mixed $context The context of the tokens. See context constants. [optional]
*
* @return integer The number of tokens inserted into the database.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function addTokensToDB($tokens, $context = '')
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Force tokens to an array.
$tokens = is_array($tokens) ? $tokens : array($tokens);
// Count the number of token values.
$values = 0;
// Insert the tokens into the database.
$query->insert($db->quoteName('#__finder_tokens'))
->columns(
array(
$db->quoteName('term'),
$db->quoteName('stem'),
$db->quoteName('common'),
$db->quoteName('phrase'),
$db->quoteName('weight'),
$db->quoteName('context'),
$db->quoteName('language')
)
);
// Iterate through the tokens to create SQL value sets.
foreach ($tokens as $token)
{
$query->values(
$db->quote($token->term) . ', '
. $db->quote($token->stem) . ', '
. (int) $token->common . ', '
. (int) $token->phrase . ', '
. (float) $token->weight . ', '
. (int) $context . ', '
. $db->quote($token->language)
);
$values++;
}
$db->setQuery($query);
$db->execute();
return $values;
}
/**
* Method to switch the token tables from Memory tables to MyISAM tables
* when they are close to running out of memory.
*
* @param boolean $memory Flag to control how they should be toggled.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected function toggleTables($memory)
{
return true;
}
}

View File

@ -0,0 +1,630 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
jimport('joomla.filesystem.file');
/**
* Indexer class supporting SQL Server for the Finder indexer package.
*
* The indexer class provides the core functionality of the Finder
* search engine. It is responsible for adding and updating the
* content links table; extracting and scoring tokens; and maintaining
* all referential information for the content.
*
* Note: All exceptions thrown from within this class should be caught
* by the controller.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 3.1
*/
class FinderIndexerDriverSqlsrv extends FinderIndexer
{
/**
* Method to index a content item.
*
* @param FinderIndexerResult $item The content item to index.
* @param string $format The format of the content. [optional]
*
* @return integer The ID of the record in the links table.
*
* @since 3.1
* @throws Exception on database error.
*/
public function index($item, $format = 'html')
{
// Mark beforeIndexing in the profiler.
static::$profiler ? static::$profiler->mark('beforeIndexing') : null;
$db = JFactory::getDbo();
$nd = $db->getNullDate();
// Check if the item is in the database.
$query = $db->getQuery(true)
->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum'))
->from($db->quoteName('#__finder_links'))
->where($db->quoteName('url') . ' = ' . $db->quote($item->url));
// Load the item from the database.
$db->setQuery($query);
$link = $db->loadObject();
// Get the indexer state.
$state = static::getState();
// Get the signatures of the item.
$curSig = static::getSignature($item);
$oldSig = isset($link->md5sum) ? $link->md5sum : null;
// Get the other item information.
$linkId = empty($link->link_id) ? null : $link->link_id;
$isNew = empty($link->link_id) ? true : false;
// Check the signatures. If they match, the item is up to date.
if (!$isNew && $curSig == $oldSig)
{
return $linkId;
}
/*
* If the link already exists, flush all the term maps for the item.
* Maps are stored in 16 tables so we need to iterate through and flush
* each table one at a time.
*/
if (!$isNew)
{
for ($i = 0; $i <= 15; $i++)
{
// Flush the maps for the link.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
}
// Mark afterUnmapping in the profiler.
static::$profiler ? static::$profiler->mark('afterUnmapping') : null;
// Perform cleanup on the item data.
$item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : $nd;
$item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : $nd;
$item->start_date = (int) $item->start_date != 0 ? $item->start_date : $nd;
$item->end_date = (int) $item->end_date != 0 ? $item->end_date : $nd;
// Prepare the item description.
$item->description = FinderIndexerHelper::parse($item->summary);
/*
* Now, we need to enter the item into the links table. If the item
* already exists in the database, we need to use an UPDATE query.
* Otherwise, we need to use an INSERT to get the link id back.
*/
if ($isNew)
{
$columnsArray = array(
$db->quoteName('url'), $db->quoteName('route'), $db->quoteName('title'), $db->quoteName('description'),
$db->quoteName('indexdate'), $db->quoteName('published'), $db->quoteName('state'), $db->quoteName('access'),
$db->quoteName('language'), $db->quoteName('type_id'), $db->quoteName('object'), $db->quoteName('publish_start_date'),
$db->quoteName('publish_end_date'), $db->quoteName('start_date'), $db->quoteName('end_date'), $db->quoteName('list_price'),
$db->quoteName('sale_price')
);
// Insert the link.
$query->clear()
->insert($db->quoteName('#__finder_links'))
->columns($columnsArray)
->values(
$db->quote($item->url) . ', '
. $db->quote($item->route) . ', '
. $db->quote($item->title) . ', '
. $db->quote($item->description) . ', '
. $query->currentTimestamp() . ', '
. '1, '
. (int) $item->state . ', '
. (int) $item->access . ', '
. $db->quote($item->language) . ', '
. (int) $item->type_id . ', '
. $db->quote(serialize($item)) . ', '
. $db->quote($item->publish_start_date) . ', '
. $db->quote($item->publish_end_date) . ', '
. $db->quote($item->start_date) . ', '
. $db->quote($item->end_date) . ', '
. (double) ($item->list_price ? $item->list_price : 0) . ', '
. (double) ($item->sale_price ? $item->sale_price : 0)
);
$db->setQuery($query);
$db->execute();
// Get the link id.
$linkId = (int) $db->insertid();
}
else
{
// Update the link.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('route') . ' = ' . $db->quote($item->route))
->set($db->quoteName('title') . ' = ' . $db->quote($item->title))
->set($db->quoteName('description') . ' = ' . $db->quote($item->description))
->set($db->quoteName('indexdate') . ' = ' . $query->currentTimestamp())
->set($db->quoteName('state') . ' = ' . (int) $item->state)
->set($db->quoteName('access') . ' = ' . (int) $item->access)
->set($db->quoteName('language') . ' = ' . $db->quote($item->language))
->set($db->quoteName('type_id') . ' = ' . (int) $item->type_id)
->set($db->quoteName('object') . ' = ' . $db->quote(serialize($item)))
->set($db->quoteName('publish_start_date') . ' = ' . $db->quote($item->publish_start_date))
->set($db->quoteName('publish_end_date') . ' = ' . $db->quote($item->publish_end_date))
->set($db->quoteName('start_date') . ' = ' . $db->quote($item->start_date))
->set($db->quoteName('end_date') . ' = ' . $db->quote($item->end_date))
->set($db->quoteName('list_price') . ' = ' . (double) ($item->list_price ? $item->list_price : 0))
->set($db->quoteName('sale_price') . ' = ' . (double) ($item->sale_price ? $item->sale_price : 0))
->where('link_id = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Set up the variables we will need during processing.
$count = 0;
// Mark afterLinking in the profiler.
static::$profiler ? static::$profiler->mark('afterLinking') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
/*
* Process the item's content. The items can customize their
* processing instructions to define extra properties to process
* or rearrange how properties are weighted.
*/
foreach ($item->getInstructions() as $group => $properties)
{
// Iterate through the properties of the group.
foreach ($properties as $property)
{
// Check if the property exists in the item.
if (empty($item->$property))
{
continue;
}
// Tokenize the property.
if (is_array($item->$property))
{
// Tokenize an array of content and add it to the database.
foreach ($item->$property as $ip)
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$ip = JFile::stripExt($ip);
$ip = str_replace('/', ' ', $ip);
$ip = str_replace('-', ' ', $ip);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($ip, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
else
{
/*
* If the group is path, we need to a few extra processing
* steps to strip the extension and convert slashes and dashes
* to spaces.
*/
if ($group === static::PATH_CONTEXT)
{
$item->$property = JFile::stripExt($item->$property);
$item->$property = str_replace('/', ' ', $item->$property);
$item->$property = str_replace('-', ' ', $item->$property);
}
// Tokenize a string of content and add it to the database.
$count += $this->tokenizeToDB($item->$property, $group, $item->language, $format);
// Check if we're approaching the memory limit of the token table.
if ($count > static::$state->options->get('memory_table_limit', 30000))
{
$this->toggleTables(false);
}
}
}
}
/*
* Process the item's taxonomy. The items can customize their
* taxonomy mappings to define extra properties to map.
*/
foreach ($item->getTaxonomy() as $branch => $nodes)
{
// Iterate through the nodes and map them to the branch.
foreach ($nodes as $node)
{
// Add the node to the tree.
$nodeId = FinderIndexerTaxonomy::addNode($branch, $node->title, $node->state, $node->access);
// Add the link => node map.
FinderIndexerTaxonomy::addMap($linkId, $nodeId);
// Tokenize the node title and add them to the database.
$count += $this->tokenizeToDB($node->title, static::META_CONTEXT, $item->language, $format);
}
}
// Mark afterProcessing in the profiler.
static::$profiler ? static::$profiler->mark('afterProcessing') : null;
/*
* At this point, all of the item's content has been parsed, tokenized
* and inserted into the #__finder_tokens table. Now, we need to
* aggregate all the data into that table into a more usable form. The
* aggregated data will be inserted into #__finder_tokens_aggregate
* table.
*/
$query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') .
' (' . $db->quoteName('term_id') .
', ' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('term_weight') .
', ' . $db->quoteName('context') .
', ' . $db->quoteName('context_weight') .
', ' . $db->quoteName('language') . ')' .
' SELECT' .
' t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' .
' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, t1.language' .
' FROM (' .
' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' .
' WHERE t1.context = %d' .
' ) AS t1' .
' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term' .
' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term' .
' WHERE t2.context = %d' .
' GROUP BY t1.term, t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
' ORDER BY t1.term DESC';
// Iterate through the contexts and aggregate the tokens per context.
foreach ($state->weights as $context => $multiplier)
{
// Run the query to aggregate the tokens for this context..
$db->setQuery(sprintf($query, $multiplier, $context, $context));
$db->execute();
}
// Mark afterAggregating in the profiler.
static::$profiler ? static::$profiler->mark('afterAggregating') : null;
/*
* When we pulled down all of the aggregate data, we did a LEFT JOIN
* over the terms table to try to find all the term ids that
* already exist for our tokens. If any of the rows in the aggregate
* table have a term of 0, then no term record exists for that
* term so we need to add it to the terms table.
*/
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_terms') .
' (' . $db->quoteName('term') .
', ' . $db->quoteName('stem') .
', ' . $db->quoteName('common') .
', ' . $db->quoteName('phrase') .
', ' . $db->quoteName('weight') .
', ' . $db->quoteName('soundex') . ')' .
' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term)' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
' WHERE ta.term_id IS NULL' .
' GROUP BY ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight'
);
$db->execute();
/*
* Now, we just inserted a bunch of new records into the terms table
* so we need to go back and update the aggregate table with all the
* new term ids.
*/
$query = $db->getQuery(true)
->update('ta')
->set('ta.term_id = t.term_id from #__finder_tokens_aggregate AS ta INNER JOIN #__finder_terms AS t ON t.term = ta.term')
->where('ta.term_id IS NULL');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* After we've made sure that all of the terms are in the terms table
* and the aggregate table has the correct term ids, we need to update
* the links counter for each term by one.
*/
$query->clear()
->update('t')
->set('t.links = t.links + 1 FROM #__finder_terms AS t INNER JOIN #__finder_tokens_aggregate AS ta ON ta.term_id = t.term_id');
$db->setQuery($query);
$db->execute();
// Mark afterTerms in the profiler.
static::$profiler ? static::$profiler->mark('afterTerms') : null;
/*
* Before we can insert all of the mapping rows, we have to figure out
* which mapping table the rows need to be inserted into. The mapping
* table for each term is based on the first character of the md5 of
* the first character of the term. In php, it would be expressed as
* substr(md5(substr($token, 0, 1)), 0, 1)
*/
$query->clear()
->update($db->quoteName('#__finder_tokens_aggregate'))
->set($db->quoteName('map_suffix') . " = SUBSTRING(HASHBYTES('MD5', SUBSTRING(" . $db->quoteName('term') . ', 1, 1)), 1, 1)');
$db->setQuery($query);
$db->execute();
/*
* At this point, the aggregate table contains a record for each
* term in each context. So, we're going to pull down all of that
* data while grouping the records by term and add all of the
* sub-totals together to arrive at the final total for each token for
* this link. Then, we insert all of that data into the appropriate
* mapping table.
*/
for ($i = 0; $i <= 15; $i++)
{
// Get the mapping table suffix.
$suffix = dechex($i);
/*
* We have to run this query 16 times, one for each link => term
* mapping table.
*/
$db->setQuery(
'INSERT INTO ' . $db->quoteName('#__finder_links_terms' . $suffix) .
' (' . $db->quoteName('link_id') .
', ' . $db->quoteName('term_id') .
', ' . $db->quoteName('weight') . ')' .
' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' .
' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' .
' FROM ' . $db->quoteName('#__finder_tokens_aggregate') .
' WHERE ' . $db->quoteName('map_suffix') . ' = ' . $db->quote($suffix) .
' GROUP BY term, term_id' .
' ORDER BY ' . $db->quoteName('term') . ' DESC'
);
$db->execute();
}
// Mark afterMapping in the profiler.
static::$profiler ? static::$profiler->mark('afterMapping') : null;
// Update the signature.
$query->clear()
->update($db->quoteName('#__finder_links'))
->set($db->quoteName('md5sum') . ' = ' . $db->quote($curSig))
->where($db->quoteName('link_id') . ' = ' . $db->quote($linkId));
$db->setQuery($query);
$db->execute();
// Mark afterSigning in the profiler.
static::$profiler ? static::$profiler->mark('afterSigning') : null;
// Truncate the tokens tables.
$db->truncateTable('#__finder_tokens');
// Truncate the tokens aggregate table.
$db->truncateTable('#__finder_tokens_aggregate');
// Toggle the token tables back to memory tables.
$this->toggleTables(true);
// Mark afterTruncating in the profiler.
static::$profiler ? static::$profiler->mark('afterTruncating') : null;
return $linkId;
}
/**
* Method to remove a link from the index.
*
* @param integer $linkId The id of the link.
*
* @return boolean True on success.
*
* @since 3.1
* @throws Exception on database error.
*/
public function remove($linkId)
{
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Update the link counts and remove the mapping records.
for ($i = 0; $i <= 15; $i++)
{
// Update the link counts for the terms.
$query->update('t')
->set('t.links = t.links - 1 from #__finder_terms AS t INNER JOIN #__finder_links_terms' . dechex($i) . ' AS AS m ON m.term_id = t.term_id')
->where('m.link_id = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove all records from the mapping tables.
$query->clear()
->delete($db->quoteName('#__finder_links_terms' . dechex($i)))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
}
// Delete all orphaned terms.
$query->clear()
->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Delete the link from the index.
$query->clear()
->delete($db->quoteName('#__finder_links'))
->where($db->quoteName('link_id') . ' = ' . $db->quote((int) $linkId));
$db->setQuery($query);
$db->execute();
// Remove the taxonomy maps.
FinderIndexerTaxonomy::removeMaps($linkId);
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
return true;
}
/**
* Method to optimize the index. We use this method to remove unused terms
* and any other optimizations that might be necessary.
*
* @return boolean True on success.
*
* @since 3.1
* @throws Exception on database error.
*/
public function optimize()
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Delete all orphaned terms.
$query->delete($db->quoteName('#__finder_terms'))
->where($db->quoteName('links') . ' <= 0');
$db->setQuery($query);
$db->execute();
// Remove the orphaned taxonomy nodes.
FinderIndexerTaxonomy::removeOrphanNodes();
return true;
}
/**
* Method to add a set of tokens to the database.
*
* @param mixed $tokens An array or single FinderIndexerToken object.
* @param mixed $context The context of the tokens. See context constants. [optional]
*
* @return integer The number of tokens inserted into the database.
*
* @since 3.1
* @throws Exception on database error.
*/
protected function addTokensToDB($tokens, $context = '')
{
// Get the database object.
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Force tokens to an array.
$tokens = is_array($tokens) ? $tokens : array($tokens);
// Count the number of token values.
$values = 0;
// Set some variables to count the iterations
$totalTokens = count($tokens);
$remaining = $totalTokens;
$iterations = 0;
$loop = true;
do
{
// Shift the token off the array
$token = array_shift($tokens);
$query->values(
$db->quote($token->term) . ', '
. $db->quote($token->stem) . ', '
. (int) $token->common . ', '
. (int) $token->phrase . ', '
. (float) $token->weight . ', '
. (int) $context . ', '
. $db->quote($token->language)
);
$values++;
$iterations++;
$remaining--;
// Run the query if we've reached 1000 iterations or there are no tokens remaining
if ($iterations == 1000 || $remaining == 0)
{
// Insert the tokens into the database.
$query->insert($db->quoteName('#__finder_tokens'))
->columns(
array(
$db->quoteName('term'),
$db->quoteName('stem'),
$db->quoteName('common'),
$db->quoteName('phrase'),
$db->quoteName('weight'),
$db->quoteName('context'),
$db->quoteName('language')
)
);
$db->setQuery($query);
$db->execute();
// Reset the query
$query->clear();
}
// If there's nothing remaining, we're done looping
if ($remaining == 0)
{
$loop = false;
}
}
while ($loop == true);
return $values;
}
/**
* Method to switch the token tables from Memory tables to MyISAM tables
* when they are close to running out of memory.
*
* @param boolean $memory Flag to control how they should be toggled.
*
* @return boolean True on success.
*
* @since 3.1
* @throws Exception on database error.
*/
protected function toggleTables($memory)
{
return true;
}
}

View File

@ -0,0 +1,506 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', __DIR__ . '/parser.php');
JLoader::register('FinderIndexerStemmer', __DIR__ . '/stemmer.php');
JLoader::register('FinderIndexerToken', __DIR__ . '/token.php');
/**
* Helper class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerHelper
{
/**
* The token stemmer object. The stemmer is set by whatever class
* wishes to use it but it must be an instance of FinderIndexerStemmer.
*
* @var FinderIndexerStemmer
* @since 2.5
*/
public static $stemmer;
/**
* Method to parse input into plain text.
*
* @param string $input The raw input.
* @param string $format The format of the input. [optional]
*
* @return string The parsed input.
*
* @since 2.5
* @throws Exception on invalid parser.
*/
public static function parse($input, $format = 'html')
{
// Get a parser for the specified format and parse the input.
return FinderIndexerParser::getInstance($format)->parse($input);
}
/**
* Method to tokenize a text string.
*
* @param string $input The input to tokenize.
* @param string $lang The language of the input.
* @param boolean $phrase Flag to indicate whether input could be a phrase. [optional]
*
* @return array An array of FinderIndexerToken objects.
*
* @since 2.5
*/
public static function tokenize($input, $lang, $phrase = false)
{
static $cache;
$store = JString::strlen($input) < 128 ? md5($input . '::' . $lang . '::' . $phrase) : null;
// Check if the string has been tokenized already.
if ($store && isset($cache[$store]))
{
return $cache[$store];
}
$tokens = array();
$quotes = html_entity_decode('&#8216;&#8217;&#39;', ENT_QUOTES, 'UTF-8');
// Get the simple language key.
$lang = self::getPrimaryLanguage($lang);
/*
* Parsing the string input into terms is a multi-step process.
*
* Regexes:
* 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma.
* 2. Remove plus, dash, period, and comma characters located before letter characters.
* 3. Remove plus, dash, period, and comma characters located after other characters.
* 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy.
* 5. Remove orphaned apostrophe, plus, dash, period, and comma characters.
* 6. Remove orphaned quote characters.
* 7. Replace the assorted single quotation marks with the ASCII standard single quotation.
* 8. Remove multiple space characters and replaces with a single space.
*/
$input = JString::strtolower($input);
$input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input);
$input = preg_replace('#(^|\s)[+-.,]+([\pL\pM]+)#mui', ' $1', $input);
$input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input);
$input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input);
$input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input);
$input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input);
$input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input);
$input = preg_replace('#\s+#mui', ' ', $input);
$input = JString::trim($input);
// Explode the normalized string to get the terms.
$terms = explode(' ', $input);
/*
* If we have Unicode support and are dealing with Chinese text, Chinese
* has to be handled specially because there are not necessarily any spaces
* between the "words". So, we have to test if the words belong to the Chinese
* character set and if so, explode them into single glyphs or "words".
*/
if ($lang === 'zh')
{
// Iterate through the terms and test if they contain Chinese.
for ($i = 0, $n = count($terms); $i < $n; $i++)
{
$charMatches = array();
$charCount = preg_match_all('#[\p{Han}]#mui', $terms[$i], $charMatches);
// Split apart any groups of Chinese characters.
for ($j = 0; $j < $charCount; $j++)
{
$tSplit = JString::str_ireplace($charMatches[0][$j], '', $terms[$i], false);
if (!empty($tSplit))
{
$terms[$i] = $tSplit;
}
else
{
unset($terms[$i]);
}
$terms[] = $charMatches[0][$j];
}
}
// Reset array keys.
$terms = array_values($terms);
}
/*
* If we have to handle the input as a phrase, that means we don't
* tokenize the individual terms and we do not create the two and three
* term combinations. The phrase must contain more than one word!
*/
if ($phrase === true && count($terms) > 1)
{
// Create tokens from the phrase.
$tokens[] = new FinderIndexerToken($terms, $lang);
}
else
{
// Create tokens from the terms.
for ($i = 0, $n = count($terms); $i < $n; $i++)
{
$tokens[] = new FinderIndexerToken($terms[$i], $lang);
}
// Create two and three word phrase tokens from the individual words.
for ($i = 0, $n = count($tokens); $i < $n; $i++)
{
// Setup the phrase positions.
$i2 = $i + 1;
$i3 = $i + 2;
// Create the two word phrase.
if ($i2 < $n && isset($tokens[$i2]))
{
// Tokenize the two word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $lang, $lang === 'zh' ? '' : ' ');
$token->derived = true;
// Add the token to the stack.
$tokens[] = $token;
}
// Create the three word phrase.
if ($i3 < $n && isset($tokens[$i3]))
{
// Tokenize the three word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $lang, $lang === 'zh' ? '' : ' ');
$token->derived = true;
// Add the token to the stack.
$tokens[] = $token;
}
}
}
if ($store)
{
$cache[$store] = count($tokens) > 1 ? $tokens : array_shift($tokens);
return $cache[$store];
}
else
{
return count($tokens) > 1 ? $tokens : array_shift($tokens);
}
}
/**
* Method to get the base word of a token. This method uses the public
* {@link FinderIndexerHelper::$stemmer} object if it is set. If no stemmer is set,
* the original token is returned.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public static function stem($token, $lang)
{
// Trim apostrophes at either end of the token.
$token = JString::trim($token, '\'');
// Trim everything after any apostrophe in the token.
if (($pos = JString::strpos($token, '\'')) !== false)
{
$token = JString::substr($token, 0, $pos);
}
// Stem the token if we have a valid stemmer to use.
if (self::$stemmer instanceof FinderIndexerStemmer)
{
return self::$stemmer->stem($token, $lang);
}
else
{
return $token;
}
}
/**
* Method to add a content type to the database.
*
* @param string $title The type of content. For example: PDF
* @param string $mime The mime type of the content. For example: PDF [optional]
*
* @return integer The id of the content type.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function addContentType($title, $mime = null)
{
static $types;
$db = JFactory::getDbo();
$query = $db->getQuery(true);
// Check if the types are loaded.
if (empty($types))
{
// Build the query to get the types.
$query->select('*')
->from($db->quoteName('#__finder_types'));
// Get the types.
$db->setQuery($query);
$types = $db->loadObjectList('title');
}
// Check if the type already exists.
if (isset($types[$title]))
{
return (int) $types[$title]->id;
}
// Add the type.
$query->clear()
->insert($db->quoteName('#__finder_types'))
->columns(array($db->quoteName('title'), $db->quoteName('mime')))
->values($db->quote($title) . ', ' . $db->quote($mime));
$db->setQuery($query);
$db->execute();
// Return the new id.
return (int) $db->insertid();
}
/**
* Method to check if a token is common in a language.
*
* @param string $token The token to test.
* @param string $lang The language to reference.
*
* @return boolean True if common, false otherwise.
*
* @since 2.5
*/
public static function isCommon($token, $lang)
{
static $data;
// Load the common tokens for the language if necessary.
if (!isset($data[$lang]))
{
$data[$lang] = self::getCommonWords($lang);
}
// Check if the token is in the common array.
if (in_array($token, $data[$lang]))
{
return true;
}
else
{
return false;
}
}
/**
* Method to get an array of common terms for a language.
*
* @param string $lang The language to use.
*
* @return array Array of common terms.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function getCommonWords($lang)
{
$db = JFactory::getDbo();
// Create the query to load all the common terms for the language.
$query = $db->getQuery(true)
->select($db->quoteName('term'))
->from($db->quoteName('#__finder_terms_common'))
->where($db->quoteName('language') . ' = ' . $db->quote($lang));
// Load all of the common terms for the language.
$db->setQuery($query);
$results = $db->loadColumn();
return $results;
}
/**
* Method to get the default language for the site.
*
* @return string The default language string.
*
* @since 2.5
*/
public static function getDefaultLanguage()
{
static $lang;
// We need to go to com_languages to get the site default language, it's the best we can guess.
if (empty($lang))
{
$lang = JComponentHelper::getParams('com_languages')->get('site', 'en-GB');
}
return $lang;
}
/**
* Method to parse a language/locale key and return a simple language string.
*
* @param string $lang The language/locale key. For example: en-GB
*
* @return string The simple language string. For example: en
*
* @since 2.5
*/
public static function getPrimaryLanguage($lang)
{
static $data;
// Only parse the identifier if necessary.
if (!isset($data[$lang]))
{
if (is_callable(array('Locale', 'getPrimaryLanguage')))
{
// Get the language key using the Locale package.
$data[$lang] = Locale::getPrimaryLanguage($lang);
}
else
{
// Get the language key using string position.
$data[$lang] = JString::substr($lang, 0, JString::strpos($lang, '-'));
}
}
return $data[$lang];
}
/**
* Method to get the path (SEF route) for a content item.
*
* @param string $url The non-SEF route to the content item.
*
* @return string The path for the content item.
*
* @since 2.5
*/
public static function getContentPath($url)
{
static $router;
// Only get the router once.
if (!($router instanceof JRouter))
{
jimport('joomla.application.router');
include_once JPATH_SITE . '/includes/application.php';
// Get and configure the site router.
$config = JFactory::getConfig();
$router = JRouter::getInstance('site');
$router->setMode($config->get('sef', 1));
}
// Build the relative route.
$uri = $router->build($url);
$route = $uri->toString(array('path', 'query', 'fragment'));
$route = str_replace(JUri::base(true) . '/', '', $route);
return $route;
}
/**
* Method to get extra data for a content before being indexed. This is how
* we add Comments, Tags, Labels, etc. that should be available to Finder.
*
* @param FinderIndexerResult &$item The item to index as an FinderIndexerResult object.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function getContentExtras(FinderIndexerResult &$item)
{
// Get the event dispatcher.
$dispatcher = JEventDispatcher::getInstance();
// Load the finder plugin group.
JPluginHelper::importPlugin('finder');
try
{
// Trigger the event.
$results = $dispatcher->trigger('onPrepareFinderContent', array(&$item));
// Check the returned results. This is for plugins that don't throw
// exceptions when they encounter serious errors.
if (in_array(false, $results))
{
throw new Exception($dispatcher->getError(), 500);
}
}
catch (Exception $e)
{
// Handle a caught exception.
throw $e;
}
return true;
}
/**
* Method to process content text using the onContentPrepare event trigger.
*
* @param string $text The content to process.
* @param JRegistry $params The parameters object. [optional]
*
* @return string The processed content.
*
* @since 2.5
*/
public static function prepareContent($text, $params = null)
{
static $loaded;
// Get the dispatcher.
$dispatcher = JEventDispatcher::getInstance();
// Load the content plugins if necessary.
if (empty($loaded))
{
JPluginHelper::importPlugin('content');
$loaded = true;
}
// Instantiate the parameter object if necessary.
if (!($params instanceof JRegistry))
{
$registry = new JRegistry;
$registry->loadString($params);
$params = $registry;
}
// Create a mock content object.
$content = JTable::getInstance('Content');
$content->text = $text;
// Fire the onContentPrepare event.
$dispatcher->trigger('onContentPrepare', array('com_finder.indexer', &$content, &$params, 0));
return $content->text;
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,484 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerHelper', __DIR__ . '/helper.php');
JLoader::register('FinderIndexerParser', __DIR__ . '/parser.php');
JLoader::register('FinderIndexerStemmer', __DIR__ . '/stemmer.php');
JLoader::register('FinderIndexerTaxonomy', __DIR__ . '/taxonomy.php');
JLoader::register('FinderIndexerToken', __DIR__ . '/token.php');
jimport('joomla.filesystem.file');
/**
* Main indexer class for the Finder indexer package.
*
* The indexer class provides the core functionality of the Finder
* search engine. It is responsible for adding and updating the
* content links table; extracting and scoring tokens; and maintaining
* all referential information for the content.
*
* Note: All exceptions thrown from within this class should be caught
* by the controller.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
abstract class FinderIndexer
{
/**
* The title context identifier.
*
* @var integer
* @since 2.5
*/
const TITLE_CONTEXT = 1;
/**
* The text context identifier.
*
* @var integer
* @since 2.5
*/
const TEXT_CONTEXT = 2;
/**
* The meta context identifier.
*
* @var integer
* @since 2.5
*/
const META_CONTEXT = 3;
/**
* The path context identifier.
*
* @var integer
* @since 2.5
*/
const PATH_CONTEXT = 4;
/**
* The misc context identifier.
*
* @var integer
* @since 2.5
*/
const MISC_CONTEXT = 5;
/**
* The indexer state object.
*
* @var object
* @since 2.5
*/
public static $state;
/**
* The indexer profiler object.
*
* @var object
* @since 2.5
*/
public static $profiler;
/**
* Returns a reference to the FinderIndexer object.
*
* @return FinderIndexer instance based on the database driver
*
* @since 3.0
* @throws RuntimeException if driver class for indexer not present.
*/
public static function getInstance()
{
// Setup the adapter for the indexer.
$format = JFactory::getDbo()->name;
if ($format == 'mysqli')
{
$format = 'mysql';
}
elseif ($format == 'sqlazure')
{
$format = 'sqlsrv';
}
$path = __DIR__ . '/driver/' . $format . '.php';
$class = 'FinderIndexerDriver' . ucfirst($format);
// Check if a parser exists for the format.
if (file_exists($path))
{
// Instantiate the parser.
include_once $path;
return new $class;
}
else
{
// Throw invalid format exception.
throw new RuntimeException(JText::sprintf('COM_FINDER_INDEXER_INVALID_DRIVER', $format));
}
}
/**
* Method to get the indexer state.
*
* @return object The indexer state object.
*
* @since 2.5
*/
public static function getState()
{
// First, try to load from the internal state.
if (!empty(self::$state))
{
return self::$state;
}
// If we couldn't load from the internal state, try the session.
$session = JFactory::getSession();
$data = $session->get('_finder.state', null);
// If the state is empty, load the values for the first time.
if (empty($data))
{
$data = new JObject;
// Load the default configuration options.
$data->options = JComponentHelper::getParams('com_finder');
// Setup the weight lookup information.
$data->weights = array(
self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2),
self::TEXT_CONTEXT => round($data->options->get('text_multiplier', 0.7), 2),
self::META_CONTEXT => round($data->options->get('meta_multiplier', 1.2), 2),
self::PATH_CONTEXT => round($data->options->get('path_multiplier', 2.0), 2),
self::MISC_CONTEXT => round($data->options->get('misc_multiplier', 0.3), 2)
);
// Set the current time as the start time.
$data->startTime = JFactory::getDate()->toSQL();
// Set the remaining default values.
$data->batchSize = (int) $data->options->get('batch_size', 50);
$data->batchOffset = 0;
$data->totalItems = 0;
$data->pluginState = array();
}
// Setup the profiler if debugging is enabled.
if (JFactory::getApplication()->getCfg('debug'))
{
self::$profiler = JProfiler::getInstance('FinderIndexer');
}
// Setup the stemmer.
if ($data->options->get('stem', 1) && $data->options->get('stemmer', 'porter_en'))
{
FinderIndexerHelper::$stemmer = FinderIndexerStemmer::getInstance($data->options->get('stemmer', 'porter_en'));
}
// Set the state.
self::$state = $data;
return self::$state;
}
/**
* Method to set the indexer state.
*
* @param object $data A new indexer state object.
*
* @return boolean True on success, false on failure.
*
* @since 2.5
*/
public static function setState($data)
{
// Check the state object.
if (empty($data) || !$data instanceof JObject)
{
return false;
}
// Set the new internal state.
self::$state = $data;
// Set the new session state.
$session = JFactory::getSession();
$session->set('_finder.state', $data);
return true;
}
/**
* Method to reset the indexer state.
*
* @return void
*
* @since 2.5
*/
public static function resetState()
{
// Reset the internal state to null.
self::$state = null;
// Reset the session state to null.
$session = JFactory::getSession();
$session->set('_finder.state', null);
}
/**
* Method to index a content item.
*
* @param FinderIndexerResult $item The content item to index.
* @param string $format The format of the content. [optional]
*
* @return integer The ID of the record in the links table.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract public function index($item, $format = 'html');
/**
* Method to remove a link from the index.
*
* @param integer $linkId The id of the link.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract public function remove($linkId);
/**
* Method to optimize the index. We use this method to remove unused terms
* and any other optimizations that might be necessary.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract public function optimize();
/**
* Method to get a content item's signature.
*
* @param object $item The content item to index.
*
* @return string The content item's signature.
*
* @since 2.5
*/
protected static function getSignature($item)
{
// Get the indexer state.
$state = self::getState();
// Get the relevant configuration variables.
$config = array();
$config[] = $state->weights;
$config[] = $state->options->get('stem', 1);
$config[] = $state->options->get('stemmer', 'porter_en');
return md5(serialize(array($item, $config)));
}
/**
* Method to parse input, tokenize it, and then add it to the database.
*
* @param mixed $input String or resource to use as input. A resource
* input will automatically be chunked to conserve
* memory. Strings will be chunked if longer than
* 2K in size.
* @param integer $context The context of the input. See context constants.
* @param string $lang The language of the input.
* @param string $format The format of the input.
*
* @return integer The number of tokens extracted from the input.
*
* @since 2.5
*/
protected function tokenizeToDB($input, $context, $lang, $format)
{
$count = 0;
$buffer = null;
if (!empty($input))
{
// If the input is a resource, batch the process out.
if (is_resource($input))
{
// Batch the process out to avoid memory limits.
while (!feof($input))
{
// Read into the buffer.
$buffer .= fread($input, 2048);
/*
* If we haven't reached the end of the file, seek to the last
* space character and drop whatever is after that to make sure
* we didn't truncate a term while reading the input.
*/
if (!feof($input))
{
// Find the last space character.
$ls = strrpos($buffer, ' ');
// Adjust string based on the last space character.
if ($ls)
{
// Truncate the string to the last space character.
$string = substr($buffer, 0, $ls);
// Adjust the buffer based on the last space for the next iteration and trim.
$buffer = JString::trim(substr($buffer, $ls));
}
// No space character was found.
else
{
$string = $buffer;
}
}
// We've reached the end of the file, so parse whatever remains.
else
{
$string = $buffer;
}
// Parse the input.
$string = FinderIndexerHelper::parse($string, $format);
// Check the input.
if (empty($string))
{
continue;
}
// Tokenize the input.
$tokens = FinderIndexerHelper::tokenize($string, $lang);
// Add the tokens to the database.
$count += $this->addTokensToDB($tokens, $context);
// Check if we're approaching the memory limit of the token table.
if ($count > self::$state->options->get('memory_table_limit', 30000))
{
self::toggleTables(false);
}
unset($string);
unset($tokens);
}
}
// If the input is greater than 2K in size, it is more efficient to
// batch out the operation into smaller chunks of work.
elseif (strlen($input) > 2048)
{
$start = 0;
$end = strlen($input);
$chunk = 2048;
/*
* As it turns out, the complex regular expressions we use for
* sanitizing input are not very efficient when given large
* strings. It is much faster to process lots of short strings.
*/
while ($start < $end)
{
// Setup the string.
$string = substr($input, $start, $chunk);
// Find the last space character if we aren't at the end.
$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);
// Truncate to the last space character.
if ($ls !== false)
{
$string = substr($string, 0, $ls);
}
// Adjust the start position for the next iteration.
$start += ($ls !== false ? ($ls + 1 - $chunk) + $chunk : $chunk);
// Parse the input.
$string = FinderIndexerHelper::parse($string, $format);
// Check the input.
if (empty($string))
{
continue;
}
// Tokenize the input.
$tokens = FinderIndexerHelper::tokenize($string, $lang);
// Add the tokens to the database.
$count += $this->addTokensToDB($tokens, $context);
// Check if we're approaching the memory limit of the token table.
if ($count > self::$state->options->get('memory_table_limit', 30000))
{
self::toggleTables(false);
}
}
}
else
{
// Parse the input.
$input = FinderIndexerHelper::parse($input, $format);
// Check the input.
if (empty($input))
{
return $count;
}
// Tokenize the input.
$tokens = FinderIndexerHelper::tokenize($input, $lang);
// Add the tokens to the database.
$count = $this->addTokensToDB($tokens, $context);
}
}
return $count;
}
/**
* Method to add a set of tokens to the database.
*
* @param mixed $tokens An array or single FinderIndexerToken object.
* @param mixed $context The context of the tokens. See context constants. [optional]
*
* @return integer The number of tokens inserted into the database.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract protected function addTokensToDB($tokens, $context = '');
/**
* Method to switch the token tables from Memory tables to MyISAM tables
* when they are close to running out of memory.
*
* @param boolean $memory Flag to control how they should be toggled.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
abstract protected function toggleTables($memory);
}

View File

@ -0,0 +1,132 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Parser base class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
abstract class FinderIndexerParser
{
/**
* Method to get a parser, creating it if necessary.
*
* @param string $format The type of parser to load.
*
* @return FinderIndexerParser A FinderIndexerParser instance.
*
* @since 2.5
* @throws Exception on invalid parser.
*/
public static function getInstance($format)
{
static $instances;
// Only create one parser for each format.
if (isset($instances[$format]))
{
return $instances[$format];
}
// Create an array of instances if necessary.
if (!is_array($instances))
{
$instances = array();
}
// Setup the adapter for the parser.
$format = JFilterInput::getInstance()->clean($format, 'cmd');
$path = __DIR__ . '/parser/' . $format . '.php';
$class = 'FinderIndexerParser' . ucfirst($format);
// Check if a parser exists for the format.
if (file_exists($path))
{
// Instantiate the parser.
include_once $path;
$instances[$format] = new $class;
}
else
{
// Throw invalid format exception.
throw new Exception(JText::sprintf('COM_FINDER_INDEXER_INVALID_PARSER', $format));
}
return $instances[$format];
}
/**
* Method to parse input and extract the plain text. Because this method is
* called from both inside and outside the indexer, it needs to be able to
* batch out its parsing functionality to deal with the inefficiencies of
* regular expressions. We will parse recursively in 2KB chunks.
*
* @param string $input The input to parse.
*
* @return string The plain text input.
*
* @since 2.5
*/
public function parse($input)
{
$return = null;
// Parse the input in batches if bigger than 2KB.
if (strlen($input) > 2048)
{
$start = 0;
$end = strlen($input);
$chunk = 2048;
while ($start < $end)
{
// Setup the string.
$string = substr($input, $start, $chunk);
// Find the last space character if we aren't at the end.
$ls = (($start + $chunk) < $end ? strrpos($string, ' ') : false);
// Truncate to the last space character.
if ($ls !== false)
{
$string = substr($string, 0, $ls);
}
// Adjust the start position for the next iteration.
$start += ($ls !== false ? ($ls + 1 - $chunk) + $chunk : $chunk);
// Parse the chunk.
$return .= $this->process($string);
}
}
// The input is less than 2KB so we can parse it efficiently.
else
{
// Parse the chunk.
$return .= $this->process($input);
}
return $return;
}
/**
* Method to process input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
abstract protected function process($input);
}

View File

@ -0,0 +1,52 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* HTML Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserHtml extends FinderIndexerParser
{
/**
* Method to process HTML input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Strip invalid UTF-8 characters.
$input = iconv("utf-8", "utf-8//IGNORE", $input);
// Strip all script tags.
$input = preg_replace('#<script[^>]*>.*?</script>#si', ' ', $input);
// Deal with spacing issues in the input.
$input = str_replace('>', '> ', $input);
$input = str_replace(array('&nbsp;', '&#160;'), ' ', $input);
$input = trim(preg_replace('#\s+#u', ' ', $input));
// Strip the tags from the input and decode entities.
$input = strip_tags($input);
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
$input = trim(preg_replace('#\s+#u', ' ', $input));
return $input;
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,44 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* RTF Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserRtf extends FinderIndexerParser
{
/**
* Method to process RTF input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Remove embedded pictures.
$input = preg_replace('#{\\\pict[^}]*}#mis', '', $input);
// Remove control characters.
$input = str_replace(array('{', '}', "\\\n"), array(' ', ' ', "\n"), $input);
$input = preg_replace('#\\\([^;]+?);#mis', ' ', $input);
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mis', ' ', $input);
return $input;
}
}

View File

@ -0,0 +1,36 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* Text Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserTxt extends FinderIndexerParser
{
/**
* Method to process Text input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
return $input;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,431 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexer', __DIR__ . '/indexer.php');
/**
* Result class for the Finder indexer package.
*
* This class uses magic __get() and __set() methods to prevent properties
* being added that might confuse the system. All properties not explicitly
* declared will be pushed into the elements array and can be accessed
* explicitly using the getElement() method.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerResult
{
/**
* An array of extra result properties.
*
* @var array
* @since 2.5
*/
protected $elements = array();
/**
* This array tells the indexer which properties should be indexed and what
* weights to use for those properties.
*
* @var array
* @since 2.5
*/
protected $instructions = array(
FinderIndexer::TITLE_CONTEXT => array('title', 'subtitle', 'id'),
FinderIndexer::TEXT_CONTEXT => array('summary', 'body'),
FinderIndexer::META_CONTEXT => array('meta', 'list_price', 'sale_price'),
FinderIndexer::PATH_CONTEXT => array('path', 'alias'),
FinderIndexer::MISC_CONTEXT => array('comments')
);
/**
* The indexer will use this data to create taxonomy mapping entries for
* the item so that it can be filtered by type, label, category,
* or whatever.
*
* @var array
* @since 2.5
*/
protected $taxonomy = array();
/**
* The content URL.
*
* @var string
* @since 2.5
*/
public $url;
/**
* The content route.
*
* @var string
* @since 2.5
*/
public $route;
/**
* The content title.
*
* @var string
* @since 2.5
*/
public $title;
/**
* The content description.
*
* @var string
* @since 2.5
*/
public $description;
/**
* The published state of the result.
*
* @var integer
* @since 2.5
*/
public $published;
/**
* The content published state.
*
* @var integer
* @since 2.5
*/
public $state;
/**
* The content access level.
*
* @var integer
* @since 2.5
*/
public $access;
/**
* The content language.
*
* @var string
* @since 2.5
*/
public $language = '*';
/**
* The publishing start date.
*
* @var string
* @since 2.5
*/
public $publish_start_date;
/**
* The publishing end date.
*
* @var string
* @since 2.5
*/
public $publish_end_date;
/**
* The generic start date.
*
* @var string
* @since 2.5
*/
public $start_date;
/**
* The generic end date.
*
* @var string
* @since 2.5
*/
public $end_date;
/**
* The item list price.
*
* @var mixed
* @since 2.5
*/
public $list_price;
/**
* The item sale price.
*
* @var mixed
* @since 2.5
*/
public $sale_price;
/**
* The content type id. This is set by the adapter.
*
* @var integer
* @since 2.5
*/
public $type_id;
/**
* The default language for content.
*
* @var string
* @since 3.0.2
*/
public $defaultLanguage;
/**
* Constructor
*
* @since 3.0.3
*/
public function __construct()
{
$this->defaultLanguage = JComponentHelper::getParams('com_languages')->get('site', 'en-GB');
}
/**
* The magic set method is used to push additional values into the elements
* array in order to preserve the cleanliness of the object.
*
* @param string $name The name of the element.
* @param mixed $value The value of the element.
*
* @return void
*
* @since 2.5
*/
public function __set($name, $value)
{
$this->elements[$name] = $value;
}
/**
* The magic get method is used to retrieve additional element values
* from the elements array.
*
* @param string $name The name of the element.
*
* @return mixed The value of the element if set, null otherwise.
*
* @since 2.5
*/
public function __get($name)
{
// Get the element value if set.
if (array_key_exists($name, $this->elements))
{
return $this->elements[$name];
}
else
{
return null;
}
}
/**
* The magic isset method is used to check the state of additional element
* values in the elements array.
*
* @param string $name The name of the element.
*
* @return boolean True if set, false otherwise.
*
* @since 2.5
*/
public function __isset($name)
{
return isset($this->elements[$name]);
}
/**
* The magic unset method is used to unset additional element values in the
* elements array.
*
* @param string $name The name of the element.
*
* @return void
*
* @since 2.5
*/
public function __unset($name)
{
unset($this->elements[$name]);
}
/**
* Method to retrieve additional element values from the elements array.
*
* @param string $name The name of the element.
*
* @return mixed The value of the element if set, null otherwise.
*
* @since 2.5
*/
public function getElement($name)
{
// Get the element value if set.
if (array_key_exists($name, $this->elements))
{
return $this->elements[$name];
}
else
{
return null;
}
}
/**
* Method to set additional element values in the elements array.
*
* @param string $name The name of the element.
* @param mixed $value The value of the element.
*
* @return void
*
* @since 2.5
*/
public function setElement($name, $value)
{
$this->elements[$name] = $value;
}
/**
* Method to get all processing instructions.
*
* @return array An array of processing instructions.
*
* @since 2.5
*/
public function getInstructions()
{
return $this->instructions;
}
/**
* Method to add a processing instruction for an item property.
*
* @param string $group The group to associate the property with.
* @param string $property The property to process.
*
* @return void
*
* @since 2.5
*/
public function addInstruction($group, $property)
{
// Check if the group exists. We can't add instructions for unknown groups.
if (array_key_exists($group, $this->instructions))
{
// Check if the property exists in the group.
if (!in_array($property, $this->instructions[$group]))
{
// Add the property to the group.
$this->instructions[$group][] = $property;
}
}
}
/**
* Method to remove a processing instruction for an item property.
*
* @param string $group The group to associate the property with.
* @param string $property The property to process.
*
* @return void
*
* @since 2.5
*/
public function removeInstruction($group, $property)
{
// Check if the group exists. We can't remove instructions for unknown groups.
if (array_key_exists($group, $this->instructions))
{
// Search for the property in the group.
$key = array_search($property, $this->instructions[$group]);
// If the property was found, remove it.
if ($key !== false)
{
unset($this->instructions[$group][$key]);
}
}
}
/**
* Method to get the taxonomy maps for an item.
*
* @param string $branch The taxonomy branch to get. [optional]
*
* @return array An array of taxonomy maps.
*
* @since 2.5
*/
public function getTaxonomy($branch = null)
{
// Get the taxonomy branch if available.
if ($branch !== null && isset($this->taxonomy[$branch]))
{
// Filter the input.
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $branch);
return $this->taxonomy[$branch];
}
return $this->taxonomy;
}
/**
* Method to add a taxonomy map for an item.
*
* @param string $branch The title of the taxonomy branch to add the node to.
* @param string $title The title of the taxonomy node.
* @param integer $state The published state of the taxonomy node. [optional]
* @param integer $access The access level of the taxonomy node. [optional]
*
* @return void
*
* @since 2.5
*/
public function addTaxonomy($branch, $title, $state = 1, $access = 1)
{
// Filter the input.
$branch = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $branch);
// Create the taxonomy node.
$node = new JObject;
$node->title = $title;
$node->state = (int) $state;
$node->access = (int) $access;
// Add the node to the taxonomy branch.
$this->taxonomy[$branch][$node->title] = $node;
}
/**
* Method to set the item language
*
* @return void
*
* @since 3.0
*/
public function setLanguage()
{
if ($this->language == '*' || $this->language == '')
{
$this->language = $this->defaultLanguage;
}
}
}

View File

@ -0,0 +1,87 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Stemmer base class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
abstract class FinderIndexerStemmer
{
/**
* An internal cache of stemmed tokens.
*
* @var array
* @since 2.5
*/
public $cache = array();
/**
* Method to get a stemmer, creating it if necessary.
*
* @param string $adapter The type of stemmer to load.
*
* @return FinderIndexerStemmer A FinderIndexerStemmer instance.
*
* @since 2.5
* @throws Exception on invalid stemmer.
*/
public static function getInstance($adapter)
{
static $instances;
// Only create one stemmer for each adapter.
if (isset($instances[$adapter]))
{
return $instances[$adapter];
}
// Create an array of instances if necessary.
if (!is_array($instances))
{
$instances = array();
}
// Setup the adapter for the stemmer.
$adapter = JFilterInput::getInstance()->clean($adapter, 'cmd');
$path = __DIR__ . '/stemmer/' . $adapter . '.php';
$class = 'FinderIndexerStemmer' . ucfirst($adapter);
// Check if a stemmer exists for the adapter.
if (file_exists($path))
{
// Instantiate the stemmer.
include_once $path;
$instances[$adapter] = new $class;
}
else
{
// Throw invalid adapter exception.
throw new Exception(JText::sprintf('COM_FINDER_INDEXER_INVALID_STEMMER', $adapter));
}
return $instances[$adapter];
}
/**
* Method to stem a token and return the root.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
abstract public function stem($token, $lang);
}

View File

@ -0,0 +1,264 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerStemmer', dirname(__DIR__) . '/stemmer.php');
/**
* French stemmer class for Smart Search indexer.
*
* First contributed by Eric Sanou (bobotche@hotmail.fr)
* This class is inspired in Alexis Ulrich's French stemmer code (http://alx2002.free.fr)
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 3.0
*/
class FinderIndexerStemmerFr extends FinderIndexerStemmer
{
/**
* Stemming rules.
*
* @var array
* @since 3.0
*/
private static $_stemRules = null;
/**
* Method to stem a token and return the root.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 3.0
*/
public function stem($token, $lang)
{
// Check if the token is long enough to merit stemming.
if (strlen($token) <= 2)
{
return $token;
}
// Check if the language is French or All.
if ($lang !== 'fr' && $lang != '*')
{
return $token;
}
// Stem the token if it is not in the cache.
if (!isset($this->cache[$lang][$token]))
{
// Stem the token.
$result = static::_getStem($token);
// Add the token to the cache.
$this->cache[$lang][$token] = $result;
}
return $this->cache[$lang][$token];
}
/**
* French stemmer rules variables.
*
* @return array The rules
*
* @since 3.0
*/
protected static function getStemRules()
{
if (static::$_stemRules)
{
return static::$_stemRules;
}
$vars = array();
// French accented letters in ISO-8859-1 encoding
$vars['accents'] = chr(224) . chr(226) . chr(232) . chr(233) . chr(234) . chr(235) . chr(238) . chr(239) . chr(244) . chr(251) . chr(249) . chr(231);
// The rule patterns include all accented words for french language
$vars['rule_pattern'] = "/^([a-z" . $vars['accents'] . "]*)(\*){0,1}(\d)([a-z" . $vars['accents'] . "]*)([.|>])/";
// French vowels (including y) in ISO-8859-1 encoding
$vars['vowels'] = chr(97) . chr(224) . chr(226) . chr(101) . chr(232) . chr(233) . chr(234) . chr(235) . chr(105) . chr(238) . chr(239) . chr(111) . chr(244) . chr(117) . chr(251) . chr(249) . chr(121);
// The French rules in ISO-8859-1 encoding
$vars['rules'] = array(
'esre1>', 'esio1>', 'siol1.', 'siof0.', 'sioe0.', 'sio3>', 'st1>', 'sf1>', 'sle1>', 'slo1>', 's' . chr(233) . '1>', chr(233) . 'tuae5.',
chr(233) . 'tuae2.', 'tnia0.', 'tniv1.', 'tni3>', 'suor1.', 'suo0.', 'sdrail5.', 'sdrai4.', 'er' . chr(232) . 'i1>', 'sesue3x>',
'esuey5i.', 'esue2x>', 'se1>', 'er' . chr(232) . 'g3.', 'eca1>', 'esiah0.', 'esi1>', 'siss2.', 'sir2>', 'sit2>', 'egan' . chr(233) . '1.',
'egalli6>', 'egass1.', 'egas0.', 'egat3.', 'ega3>', 'ette4>', 'ett2>', 'etio1.', 'tio' . chr(231) . '4c.', 'tio0.', 'et1>', 'eb1>',
'snia1>', 'eniatnau8>', 'eniatn4.', 'enia1>', 'niatnio3.', 'niatg3.', 'e' . chr(233) . '1>', chr(233) . 'hcat1.', chr(233) . 'hca4.',
chr(233) . 'tila5>', chr(233) . 'tici5.', chr(233) . 'tir1.', chr(233) . 'ti3>', chr(233) . 'gan1.', chr(233) . 'ga3>',
chr(233) . 'tehc1.', chr(233) . 'te3>', chr(233) . 'it0.', chr(233) . '1>', 'eire4.', 'eirue5.', 'eio1.', 'eia1.', 'ei1>', 'eng1.',
'xuaessi7.', 'xuae1>', 'uaes0.', 'uae3.', 'xuave2l.', 'xuav2li>', 'xua3la>', 'ela1>', 'lart2.', 'lani2>', 'la' . chr(233) . '2>',
'siay4i.', 'siassia7.', 'siarv1*.', 'sia1>', 'tneiayo6i.', 'tneiay6i.', 'tneiassia9.', 'tneiareio7.', 'tneia5>', 'tneia4>', 'tiario4.',
'tiarim3.', 'tiaria3.', 'tiaris3.', 'tiari5.', 'tiarve6>', 'tiare5>', 'iare4>', 'are3>', 'tiay4i.', 'tia3>', 'tnay4i.',
'em' . chr(232) . 'iu5>', 'em' . chr(232) . 'i4>', 'tnaun3.', 'tnauqo3.', 'tnau4>', 'tnaf0.', 'tnat' . chr(233) . '2>', 'tna3>', 'tno3>',
'zeiy4i.', 'zey3i.', 'zeire5>', 'zeird4.', 'zeirio4.', 'ze2>', 'ssiab0.', 'ssia4.', 'ssi3.', 'tnemma6>', 'tnemesuey9i.', 'tnemesue8>',
'tnemevi7.', 'tnemessia5.', 'tnemessi8.', 'tneme5>', 'tnemia4.', 'tnem' . chr(233) . '5>', 'el2l>', 'lle3le>', 'let' . chr(244) . '0.',
'lepp0.', 'le2>', 'srei1>', 'reit3.', 'reila2.', 'rei3>', 'ert' . chr(226) . 'e5.', 'ert' . chr(226) . chr(233) . '1.',
'ert' . chr(226) . '4.', 'drai4.', 'erdro0.', 'erute5.', 'ruta0.', 'eruta1.', 'erutiov1.', 'erub3.', 'eruh3.', 'erul3.', 'er2r>', 'nn1>',
'r' . chr(232) . 'i3.', 'srev0.', 'sr1>', 'rid2>', 're2>', 'xuei4.', 'esuei5.', 'lbati3.', 'lba3>', 'rueis0.', 'ruehcn4.', 'ecirta6.',
'ruetai6.', 'rueta5.', 'rueir0.', 'rue3>', 'esseti6.', 'essere6>', 'esserd1.', 'esse4>', 'essiab1.', 'essia5.', 'essio1.', 'essi4.',
'essal4.', 'essa1>', 'ssab1.', 'essurp1.', 'essu4.', 'essi1.', 'ssor1.', 'essor2.', 'esso1>', 'ess2>', 'tio3.', 'r' . chr(232) . 's2re.',
'r' . chr(232) . '0e.', 'esn1.', 'eu1>', 'sua0.', 'su1>', 'utt1>', 'tu' . chr(231) . '3c.', 'u' . chr(231) . '2c.', 'ur1.', 'ehcn2>',
'ehcu1>', 'snorr3.', 'snoru3.', 'snorua3.', 'snorv3.', 'snorio4.', 'snori5.', 'snore5>', 'snortt4>', 'snort' . chr(238) . 'a7.', 'snort3.',
'snor4.', 'snossi6.', 'snoire6.', 'snoird5.', 'snoitai7.', 'snoita6.', 'snoits1>', 'noits0.', 'snoi4>', 'noitaci7>', 'noitai6.', 'noita5.',
'noitu4.', 'noi3>', 'snoya0.', 'snoy4i.', 'sno' . chr(231) . 'a1.', 'sno' . chr(231) . 'r1.', 'snoe4.', 'snosiar1>', 'snola1.', 'sno3>',
'sno1>', 'noll2.', 'tnennei4.', 'ennei2>', 'snei1>', 'sne' . chr(233) . '1>', 'enne' . chr(233) . '5e.', 'ne' . chr(233) . '3e.', 'neic0.',
'neiv0.', 'nei3.', 'sc1.', 'sd1.', 'sg1.', 'sni1.', 'tiu0.', 'ti2.', 'sp1>', 'sna1>', 'sue1.', 'enn2>', 'nong2.', 'noss2.', 'rioe4.',
'riot0.', 'riorc1.', 'riovec5.', 'rio3.', 'ric2.', 'ril2.', 'tnerim3.', 'tneris3>', 'tneri5.', 't' . chr(238) . 'a3.', 'riss2.',
't' . chr(238) . '2.', 't' . chr(226) . '2>', 'ario2.', 'arim1.', 'ara1.', 'aris1.', 'ari3.', 'art1>', 'ardn2.', 'arr1.', 'arua1.',
'aro1.', 'arv1.', 'aru1.', 'ar2.', 'rd1.', 'ud1.', 'ul1.', 'ini1.', 'rin2.', 'tnessiab3.', 'tnessia7.', 'tnessi6.', 'tnessni4.', 'sini2.',
'sl1.', 'iard3.', 'iario3.', 'ia2>', 'io0.', 'iule2.', 'i1>', 'sid2.', 'sic2.', 'esoi4.', 'ed1.', 'ai2>', 'a1>', 'adr1.',
'tner' . chr(232) . '5>', 'evir1.', 'evio4>', 'evi3.', 'fita4.', 'fi2>', 'enie1.', 'sare4>', 'sari4>', 'sard3.', 'sart2>', 'sa2.',
'tnessa6>', 'tnessu6>', 'tnegna3.', 'tnegi3.', 'tneg0.', 'tneru5>', 'tnemg0.', 'tnerni4.', 'tneiv1.', 'tne3>', 'une1.', 'en1>', 'nitn2.',
'ecnay5i.', 'ecnal1.', 'ecna4.', 'ec1>', 'nn1.', 'rit2>', 'rut2>', 'rud2.', 'ugn1>', 'eg1>', 'tuo0.', 'tul2>', 't' . chr(251) . '2>',
'ev1>', 'v' . chr(232) . '2ve>', 'rtt1>', 'emissi6.', 'em1.', 'ehc1.', 'c' . chr(233) . 'i2c' . chr(232) . '.', 'libi2l.', 'llie1.',
'liei4i.', 'xuev1.', 'xuey4i.', 'xueni5>', 'xuell4.', 'xuere5.', 'xue3>', 'rb' . chr(233) . '3rb' . chr(232) . '.', 'tur2.',
'rir' . chr(233) . '4re.', 'rir2.', 'c' . chr(226) . '2ca.', 'snu1.', 'rt' . chr(238) . 'a4.', 'long2.', 'vec2.', chr(231) . '1c>',
'ssilp3.', 'silp2.', 't' . chr(232) . 'hc2te.', 'n' . chr(232) . 'm2ne.', 'llepp1.', 'tan2.', 'rv' . chr(232) . '3rve.',
'rv' . chr(233) . '3rve.', 'r' . chr(232) . '2re.', 'r' . chr(233) . '2re.', 't' . chr(232) . '2te.', 't' . chr(233) . '2te.', 'epp1.',
'eya2i.', 'ya1i.', 'yo1i.', 'esu1.', 'ugi1.', 'tt1.', 'end0.'
);
static::$_stemRules = $vars;
return static::$_stemRules;
}
/**
* Returns the number of the first rule from the rule number
* that can be applied to the given reversed input.
* returns -1 if no rule can be applied, ie the stem has been found
*
* @param string $reversed_input The input to check in reversed order
* @param integer $rule_number The rule number to check
*
* @return integer Number of the first rule
*
* @since 3.0
*/
private static function _getFirstRule($reversed_input, $rule_number)
{
$vars = static::getStemRules();
$nb_rules = count($vars['rules']);
for ($i = $rule_number; $i < $nb_rules; $i++)
{
// Gets the letters from the current rule
$rule = $vars['rules'][$i];
$rule = preg_replace($vars['rule_pattern'], "\\1", $rule);
if (strncasecmp(utf8_decode($rule), $reversed_input, strlen(utf8_decode($rule))) == 0)
{
return $i;
}
}
return -1;
}
/**
* Check the acceptability of a stem for French language
*
* @param string $reversed_stem The stem to check in reverse form
*
* @return boolean True if stem is acceptable
*
* @since 3.0
*/
private static function _check($reversed_stem)
{
$vars = static::getStemRules();
if (preg_match('/[' . $vars['vowels'] . ']$/', utf8_encode($reversed_stem)))
{
// If the form starts with a vowel then at least two letters must remain after stemming (e.g.: "etaient" --> "et")
return (strlen($reversed_stem) > 2);
}
else
{
// If the reversed stem starts with a consonant then at least two letters must remain after stemming
if (strlen($reversed_stem) <= 2)
{
return false;
}
// And at least one of these must be a vowel or "y"
return (preg_match('/[' . $vars['vowels'] . ']/', utf8_encode($reversed_stem)));
}
}
/**
* Paice/Husk stemmer which returns a stem for the given $input
*
* @param string $input The word for which we want the stem in UTF-8
*
* @return string The stem
*
* @since 3.0
*/
private static function _getStem($input)
{
$vars = static::getStemRules();
$intact = true;
$reversed_input = strrev(utf8_decode($input));
$rule_number = 0;
// This loop goes through the rules' array until it finds an ending one (ending by '.') or the last one ('end0.')
while (true)
{
$rule_number = static::_getFirstRule($reversed_input, $rule_number);
if ($rule_number == -1)
{
// No other rule can be applied => the stem has been found
break;
}
$rule = $vars['rules'][$rule_number];
preg_match($vars['rule_pattern'], $rule, $matches);
if (($matches[2] != '*') || ($intact))
{
$reversed_stem = utf8_decode($matches[4]) . substr($reversed_input, $matches[3], strlen($reversed_input) - $matches[3]);
if (self::_check($reversed_stem))
{
$reversed_input = $reversed_stem;
if ($matches[5] == '.')
{
break;
}
}
else
{
// Go to another rule
$rule_number++;
}
}
else
{
// Go to another rule
$rule_number++;
}
}
return utf8_encode(strrev($reversed_input));
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,448 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerStemmer', dirname(__DIR__) . '/stemmer.php');
/**
* Porter English stemmer class for the Finder indexer package.
*
* This class was adapted from one written by Richard Heyes.
* See copyright and link information above.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerStemmerPorter_En extends FinderIndexerStemmer
{
/**
* Regex for matching a consonant.
*
* @var string
* @since 2.5
*/
private static $_regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
/**
* Regex for matching a vowel
*
* @var string
* @since 2.5
*/
private static $_regex_vowel = '(?:[aeiou]|(?<![aeiou])y)';
/**
* Method to stem a token and return the root.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public function stem($token, $lang)
{
// Check if the token is long enough to merit stemming.
if (strlen($token) <= 2)
{
return $token;
}
// Check if the language is English or All.
if ($lang !== 'en' && $lang != '*')
{
return $token;
}
// Stem the token if it is not in the cache.
if (!isset($this->cache[$lang][$token]))
{
// Stem the token.
$result = $token;
$result = self::_step1ab($result);
$result = self::_step1c($result);
$result = self::_step2($result);
$result = self::_step3($result);
$result = self::_step4($result);
$result = self::_step5($result);
// Add the token to the cache.
$this->cache[$lang][$token] = $result;
}
return $this->cache[$lang][$token];
}
/**
* Step 1
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step1ab($word)
{
// Part a
if (substr($word, -1) == 's')
{
self::_replace($word, 'sses', 'ss')
or self::_replace($word, 'ies', 'i')
or self::_replace($word, 'ss', 'ss')
or self::_replace($word, 's', '');
}
// Part b
if (substr($word, -2, 1) != 'e' or !self::_replace($word, 'eed', 'ee', 0))
{
// First rule
$v = self::$_regex_vowel;
// Words ending with ing and ed
// Note use of && and OR, for precedence reasons
if (preg_match("#$v+#", substr($word, 0, -3)) && self::_replace($word, 'ing', '')
or preg_match("#$v+#", substr($word, 0, -2)) && self::_replace($word, 'ed', ''))
{
// If one of above two test successful
if (!self::_replace($word, 'at', 'ate') and !self::_replace($word, 'bl', 'ble') and !self::_replace($word, 'iz', 'ize'))
{
// Double consonant ending
if (self::_doubleConsonant($word) and substr($word, -2) != 'll' and substr($word, -2) != 'ss' and substr($word, -2) != 'zz')
{
$word = substr($word, 0, -1);
}
elseif (self::_m($word) == 1 and self::_cvc($word))
{
$word .= 'e';
}
}
}
}
return $word;
}
/**
* Step 1c
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step1c($word)
{
$v = self::$_regex_vowel;
if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1)))
{
self::_replace($word, 'y', 'i');
}
return $word;
}
/**
* Step 2
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step2($word)
{
switch (substr($word, -2, 1))
{
case 'a':
self::_replace($word, 'ational', 'ate', 0)
or self::_replace($word, 'tional', 'tion', 0);
break;
case 'c':
self::_replace($word, 'enci', 'ence', 0)
or self::_replace($word, 'anci', 'ance', 0);
break;
case 'e':
self::_replace($word, 'izer', 'ize', 0);
break;
case 'g':
self::_replace($word, 'logi', 'log', 0);
break;
case 'l':
self::_replace($word, 'entli', 'ent', 0)
or self::_replace($word, 'ousli', 'ous', 0)
or self::_replace($word, 'alli', 'al', 0)
or self::_replace($word, 'bli', 'ble', 0)
or self::_replace($word, 'eli', 'e', 0);
break;
case 'o':
self::_replace($word, 'ization', 'ize', 0)
or self::_replace($word, 'ation', 'ate', 0)
or self::_replace($word, 'ator', 'ate', 0);
break;
case 's':
self::_replace($word, 'iveness', 'ive', 0)
or self::_replace($word, 'fulness', 'ful', 0)
or self::_replace($word, 'ousness', 'ous', 0)
or self::_replace($word, 'alism', 'al', 0);
break;
case 't':
self::_replace($word, 'biliti', 'ble', 0)
or self::_replace($word, 'aliti', 'al', 0)
or self::_replace($word, 'iviti', 'ive', 0);
break;
}
return $word;
}
/**
* Step 3
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step3($word)
{
switch (substr($word, -2, 1))
{
case 'a':
self::_replace($word, 'ical', 'ic', 0);
break;
case 's':
self::_replace($word, 'ness', '', 0);
break;
case 't':
self::_replace($word, 'icate', 'ic', 0)
or self::_replace($word, 'iciti', 'ic', 0);
break;
case 'u':
self::_replace($word, 'ful', '', 0);
break;
case 'v':
self::_replace($word, 'ative', '', 0);
break;
case 'z':
self::_replace($word, 'alize', 'al', 0);
break;
}
return $word;
}
/**
* Step 4
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step4($word)
{
switch (substr($word, -2, 1))
{
case 'a':
self::_replace($word, 'al', '', 1);
break;
case 'c':
self::_replace($word, 'ance', '', 1)
or self::_replace($word, 'ence', '', 1);
break;
case 'e':
self::_replace($word, 'er', '', 1);
break;
case 'i':
self::_replace($word, 'ic', '', 1);
break;
case 'l':
self::_replace($word, 'able', '', 1)
or self::_replace($word, 'ible', '', 1);
break;
case 'n':
self::_replace($word, 'ant', '', 1)
or self::_replace($word, 'ement', '', 1)
or self::_replace($word, 'ment', '', 1)
or self::_replace($word, 'ent', '', 1);
break;
case 'o':
if (substr($word, -4) == 'tion' or substr($word, -4) == 'sion')
{
self::_replace($word, 'ion', '', 1);
}
else
{
self::_replace($word, 'ou', '', 1);
}
break;
case 's':
self::_replace($word, 'ism', '', 1);
break;
case 't':
self::_replace($word, 'ate', '', 1)
or self::_replace($word, 'iti', '', 1);
break;
case 'u':
self::_replace($word, 'ous', '', 1);
break;
case 'v':
self::_replace($word, 'ive', '', 1);
break;
case 'z':
self::_replace($word, 'ize', '', 1);
break;
}
return $word;
}
/**
* Step 5
*
* @param string $word The token to stem.
*
* @return string
*
* @since 2.5
*/
private static function _step5($word)
{
// Part a
if (substr($word, -1) == 'e')
{
if (self::_m(substr($word, 0, -1)) > 1)
{
self::_replace($word, 'e', '');
}
elseif (self::_m(substr($word, 0, -1)) == 1)
{
if (!self::_cvc(substr($word, 0, -1)))
{
self::_replace($word, 'e', '');
}
}
}
// Part b
if (self::_m($word) > 1 and self::_doubleConsonant($word) and substr($word, -1) == 'l')
{
$word = substr($word, 0, -1);
}
return $word;
}
/**
* Replaces the first string with the second, at the end of the string. If third
* arg is given, then the preceding string must match that m count at least.
*
* @param string &$str String to check
* @param string $check Ending to check for
* @param string $repl Replacement string
* @param integer $m Optional minimum number of m() to meet
*
* @return boolean Whether the $check string was at the end
* of the $str string. True does not necessarily mean
* that it was replaced.
*
* @since 2.5
*/
private static function _replace(&$str, $check, $repl, $m = null)
{
$len = 0 - strlen($check);
if (substr($str, $len) == $check)
{
$substr = substr($str, 0, $len);
if (is_null($m) or self::_m($substr) > $m)
{
$str = $substr . $repl;
}
return true;
}
return false;
}
/**
* m() measures the number of consonant sequences in $str. if c is
* a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
* presence,
*
* <c><v> gives 0
* <c>vc<v> gives 1
* <c>vcvc<v> gives 2
* <c>vcvcvc<v> gives 3
*
* @param string $str The string to return the m count for
*
* @return integer The m count
*
* @since 2.5
*/
private static function _m($str)
{
$c = self::$_regex_consonant;
$v = self::$_regex_vowel;
$str = preg_replace("#^$c+#", '', $str);
$str = preg_replace("#$v+$#", '', $str);
preg_match_all("#($v+$c+)#", $str, $matches);
return count($matches[1]);
}
/**
* Returns true/false as to whether the given string contains two
* of the same consonant next to each other at the end of the string.
*
* @param string $str String to check
*
* @return boolean Result
*
* @since 2.5
*/
private static function _doubleConsonant($str)
{
$c = self::$_regex_consonant;
return preg_match("#$c{2}$#", $str, $matches) and $matches[0]{0} == $matches[0]{1};
}
/**
* Checks for ending CVC sequence where second C is not W, X or Y
*
* @param string $str String to check
*
* @return boolean Result
*
* @since 2.5
*/
private static function _cvc($str)
{
$c = self::$_regex_consonant;
$v = self::$_regex_vowel;
return preg_match("#($c$v$c)$#", $str, $matches) and strlen($matches[1]) == 3 and $matches[1]{2} != 'w' and $matches[1]{2} != 'x'
and $matches[1]{2} != 'y';
}
}

View File

@ -0,0 +1,135 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerStemmer', dirname(__DIR__) . '/stemmer.php');
/**
* Snowball stemmer class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerStemmerSnowball extends FinderIndexerStemmer
{
/**
* Method to stem a token and return the root.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public function stem($token, $lang)
{
// Language to use if All is specified.
static $defaultLang = '';
// If language is All then try to get site default language.
if ($lang == '*' && $defaultLang == '')
{
$languages = JLanguageHelper::getLanguages();
$defaultLang = isset($languages[0]->sef) ? $languages[0]->sef : '*';
$lang = $defaultLang;
}
// Stem the token if it is not in the cache.
if (!isset($this->cache[$lang][$token]))
{
// Get the stem function from the language string.
switch ($lang)
{
// Danish stemmer.
case 'da':
$function = 'stem_danish';
break;
// German stemmer.
case 'de':
$function = 'stem_german';
break;
// English stemmer.
default:
case 'en':
$function = 'stem_english';
break;
// Spanish stemmer.
case 'es':
$function = 'stem_spanish';
break;
// Finnish stemmer.
case 'fi':
$function = 'stem_finnish';
break;
// French stemmer.
case 'fr':
$function = 'stem_french';
break;
// Hungarian stemmer.
case 'hu':
$function = 'stem_hungarian';
break;
// Italian stemmer.
case 'it':
$function = 'stem_italian';
break;
// Norwegian stemmer.
case 'nb':
$function = 'stem_norwegian';
break;
// Dutch stemmer.
case 'nl':
$function = 'stem_dutch';
break;
// Portuguese stemmer.
case 'pt':
$function = 'stem_portuguese';
break;
// Romanian stemmer.
case 'ro':
$function = 'stem_romanian';
break;
// Russian stemmer.
case 'ru':
$function = 'stem_russian_unicode';
break;
// Swedish stemmer.
case 'sv':
$function = 'stem_swedish';
break;
// Turkish stemmer.
case 'tr':
$function = 'stem_turkish_unicode';
break;
}
// Stem the word if the stemmer method exists.
$this->cache[$lang][$token] = function_exists($function) ? $function($token) : $token;
}
return $this->cache[$lang][$token];
}
}

View File

@ -0,0 +1,375 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Stemmer base class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerTaxonomy
{
/**
* An internal cache of taxonomy branch data.
*
* @var array
* @since 2.5
*/
public static $branches = array();
/**
* An internal cache of taxonomy node data.
*
* @var array
* @since 2.5
*/
public static $nodes = array();
/**
* Method to add a branch to the taxonomy tree.
*
* @param string $title The title of the branch.
* @param integer $state The published state of the branch. [optional]
* @param integer $access The access state of the branch. [optional]
*
* @return integer The id of the branch.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function addBranch($title, $state = 1, $access = 1)
{
// Check to see if the branch is in the cache.
if (isset(self::$branches[$title]))
{
return self::$branches[$title]->id;
}
// Check to see if the branch is in the table.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = 1')
->where($db->quoteName('title') . ' = ' . $db->quote($title));
$db->setQuery($query);
// Get the result.
$result = $db->loadObject();
// Check if the database matches the input data.
if (!empty($result) && $result->state == $state && $result->access == $access)
{
// The data matches, add the item to the cache.
self::$branches[$title] = $result;
return self::$branches[$title]->id;
}
/*
* The database did not match the input. This could be because the
* state has changed or because the branch does not exist. Let's figure
* out which case is true and deal with it.
*/
$branch = new JObject;
if (empty($result))
{
// Prepare the branch object.
$branch->parent_id = 1;
$branch->title = $title;
$branch->state = (int) $state;
$branch->access = (int) $access;
}
else
{
// Prepare the branch object.
$branch->id = (int) $result->id;
$branch->parent_id = (int) $result->parent_id;
$branch->title = $result->title;
$branch->state = (int) $result->title;
$branch->access = (int) $result->access;
$branch->ordering = (int) $result->ordering;
}
// Store the branch.
self::storeNode($branch);
// Add the branch to the cache.
self::$branches[$title] = $branch;
return self::$branches[$title]->id;
}
/**
* Method to add a node to the taxonomy tree.
*
* @param string $branch The title of the branch to store the node in.
* @param string $title The title of the node.
* @param integer $state The published state of the node. [optional]
* @param integer $access The access state of the node. [optional]
*
* @return integer The id of the node.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function addNode($branch, $title, $state = 1, $access = 1)
{
// Check to see if the node is in the cache.
if (isset(self::$nodes[$branch][$title]))
{
return self::$nodes[$branch][$title]->id;
}
// Get the branch id, insert it if it does not exist.
$branchId = self::addBranch($branch);
// Check to see if the node is in the table.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select('*')
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = ' . $db->quote($branchId))
->where($db->quoteName('title') . ' = ' . $db->quote($title));
$db->setQuery($query);
// Get the result.
$result = $db->loadObject();
// Check if the database matches the input data.
if (!empty($result) && $result->state == $state && $result->access == $access)
{
// The data matches, add the item to the cache.
self::$nodes[$branch][$title] = $result;
return self::$nodes[$branch][$title]->id;
}
/*
* The database did not match the input. This could be because the
* state has changed or because the node does not exist. Let's figure
* out which case is true and deal with it.
*/
$node = new JObject;
if (empty($result))
{
// Prepare the node object.
$node->parent_id = (int) $branchId;
$node->title = $title;
$node->state = (int) $state;
$node->access = (int) $access;
}
else
{
// Prepare the node object.
$node->id = (int) $result->id;
$node->parent_id = (int) $result->parent_id;
$node->title = $result->title;
$node->state = (int) $result->title;
$node->access = (int) $result->access;
$node->ordering = (int) $result->ordering;
}
// Store the node.
self::storeNode($node);
// Add the node to the cache.
self::$nodes[$branch][$title] = $node;
return self::$nodes[$branch][$title]->id;
}
/**
* Method to add a map entry between a link and a taxonomy node.
*
* @param integer $linkId The link to map to.
* @param integer $nodeId The node to map to.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function addMap($linkId, $nodeId)
{
// Insert the map.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select($db->quoteName('link_id'))
->from($db->quoteName('#__finder_taxonomy_map'))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId)
->where($db->quoteName('node_id') . ' = ' . (int) $nodeId);
$db->setQuery($query);
$db->execute();
$id = (int) $db->loadResult();
$map = new JObject;
$map->link_id = (int) $linkId;
$map->node_id = (int) $nodeId;
if ($id)
{
$db->updateObject('#__finder_taxonomy_map', $map);
}
else
{
$db->insertObject('#__finder_taxonomy_map', $map);
}
return true;
}
/**
* Method to get the title of all taxonomy branches.
*
* @return array An array of branch titles.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function getBranchTitles()
{
$db = JFactory::getDbo();
// Set user variables
$user = JFactory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Create a query to get the taxonomy branch titles.
$query = $db->getQuery(true)
->select($db->quoteName('title'))
->from($db->quoteName('#__finder_taxonomy'))
->where($db->quoteName('parent_id') . ' = 1')
->where($db->quoteName('state') . ' = 1')
->where($db->quoteName('access') . ' IN (' . $groups . ')');
// Get the branch titles.
$db->setQuery($query);
$results = $db->loadColumn();
return $results;
}
/**
* Method to find a taxonomy node in a branch.
*
* @param string $branch The branch to search.
* @param string $title The title of the node.
*
* @return mixed Integer id on success, null on no match.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function getNodeByTitle($branch, $title)
{
$db = JFactory::getDbo();
// Set user variables
$user = JFactory::getUser();
$groups = implode(',', $user->getAuthorisedViewLevels());
// Create a query to get the node.
$query = $db->getQuery(true)
->select('t1.*')
->from($db->quoteName('#__finder_taxonomy') . ' AS t1')
->join('INNER', $db->quoteName('#__finder_taxonomy') . ' AS t2 ON t2.id = t1.parent_id')
->where('t1.access IN (' . $groups . ')')
->where('t1.state = 1')
->where('t1.title LIKE ' . $db->quote($db->escape($title) . '%'))
->where('t2.access IN (' . $groups . ')')
->where('t2.state = 1')
->where('t2.title = ' . $db->quote($branch));
// Get the node.
$db->setQuery($query, 0, 1);
$result = $db->loadObject();
return $result;
}
/**
* Method to remove map entries for a link.
*
* @param integer $linkId The link to remove.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function removeMaps($linkId)
{
// Delete the maps.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->delete($db->quoteName('#__finder_taxonomy_map'))
->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
$db->setQuery($query);
$db->execute();
return true;
}
/**
* Method to remove orphaned taxonomy nodes and branches.
*
* @return integer The number of deleted rows.
*
* @since 2.5
* @throws Exception on database error.
*/
public static function removeOrphanNodes()
{
// Delete all orphaned nodes.
$db = JFactory::getDbo();
$query = 'DELETE t' .
' FROM ' . $db->quoteName('#__finder_taxonomy') . ' AS t' .
' LEFT JOIN ' . $db->quoteName('#__finder_taxonomy_map') . ' AS m ON m.node_id = t.id' .
' WHERE t.parent_id > 1' .
' AND m.link_id IS NULL';
$db->setQuery($query);
$db->execute();
return $db->getAffectedRows();
}
/**
* Method to store a node to the database. This method will accept either a branch or a node.
*
* @param object $item The item to store.
*
* @return boolean True on success.
*
* @since 2.5
* @throws Exception on database error.
*/
protected static function storeNode($item)
{
$db = JFactory::getDbo();
// Check if we are updating or inserting the item.
if (empty($item->id))
{
// Insert the item.
$db->insertObject('#__finder_taxonomy', $item, 'id');
}
else
{
// Update the item.
$db->updateObject('#__finder_taxonomy', $item, 'id');
}
return true;
}
}

View File

@ -0,0 +1,147 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Token class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerToken
{
/**
* This is the term that will be referenced in the terms table and the
* mapping tables.
*
* @var string
* @since 2.5
*/
public $term;
/**
* The stem is used to match the root term and produce more potential
* matches when searching the index.
*
* @var string
* @since 2.5
*/
public $stem;
/**
* If the token is numeric, it is likely to be short and uncommon so the
* weight is adjusted to compensate for that situation.
*
* @var boolean
* @since 2.5
*/
public $numeric;
/**
* If the token is a common term, the weight is adjusted to compensate for
* the higher frequency of the term in relation to other terms.
*
* @var boolean
* @since 2.5
*/
public $common;
/**
* Flag for phrase tokens.
*
* @var boolean
* @since 2.5
*/
public $phrase;
/**
* The length is used to calculate the weight of the token.
*
* @var integer
* @since 2.5
*/
public $length;
/**
* The weight is calculated based on token size and whether the token is
* considered a common term.
*
* @var integer
* @since 2.5
*/
public $weight;
/**
* The simple language identifier for the token.
*
* @var string
* @since 2.5
*/
public $language;
/**
* Method to construct the token object.
*
* @param mixed $term The term as a string for words or an array for phrases.
* @param string $lang The simple language identifier.
* @param string $spacer The space separator for phrases. [optional]
*
* @since 2.5
*/
public function __construct($term, $lang, $spacer = ' ')
{
$this->language = $lang;
// Tokens can be a single word or an array of words representing a phrase.
if (is_array($term))
{
// Populate the token instance.
$this->term = implode($spacer, $term);
$this->stem = implode($spacer, array_map(array('FinderIndexerHelper', 'stem'), $term, array($lang)));
$this->numeric = false;
$this->common = false;
$this->phrase = true;
$this->length = JString::strlen($this->term);
/*
* Calculate the weight of the token.
*
* 1. Length of the token up to 30 and divide by 30, add 1.
* 2. Round weight to 4 decimal points.
*/
$this->weight = (($this->length >= 30 ? 30 : $this->length) / 30) + 1;
$this->weight = round($this->weight, 4);
}
else
{
// Populate the token instance.
$this->term = $term;
$this->stem = FinderIndexerHelper::stem($this->term, $lang);
$this->numeric = (is_numeric($this->term) || (bool) preg_match('#^[0-9,.\-\+]+$#', $this->term));
$this->common = $this->numeric ? false : FinderIndexerHelper::isCommon($this->term, $lang);
$this->phrase = false;
$this->length = JString::strlen($this->term);
/*
* Calculate the weight of the token.
*
* 1. Length of the token up to 15 and divide by 15.
* 2. If common term, divide weight by 8.
* 3. If numeric, multiply weight by 1.5.
* 4. Round weight to 4 decimal points.
*/
$this->weight = (($this->length >= 15 ? 15 : $this->length) / 15);
$this->weight = ($this->common == true ? $this->weight / 8 : $this->weight);
$this->weight = ($this->numeric == true ? $this->weight * 1.5 : $this->weight);
$this->weight = round($this->weight, 4);
}
}
}

View File

@ -0,0 +1,110 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
/**
* Finder language helper class.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderHelperLanguage
{
/**
* Method to return a plural language code for a taxonomy branch.
*
* @param string $branchName Branch title.
*
* @return string Language key code.
*
* @since 2.5
*/
public static function branchPlural($branchName)
{
$return = preg_replace('/[^a-zA-Z0-9]+/', '_', strtoupper($branchName));
return 'PLG_FINDER_QUERY_FILTER_BRANCH_P_' . $return;
}
/**
* Method to return a singular language code for a taxonomy branch.
*
* @param string $branchName Branch name.
*
* @return string Language key code.
*
* @since 2.5
*/
public static function branchSingular($branchName)
{
$return = preg_replace('/[^a-zA-Z0-9]+/', '_', strtoupper($branchName));
return 'PLG_FINDER_QUERY_FILTER_BRANCH_S_' . $return;
}
/**
* Method to load Smart Search component language file.
*
* @return void
*
* @since 2.5
*/
public static function loadComponentLanguage()
{
$lang = JFactory::getLanguage();
$lang->load('com_finder', JPATH_SITE);
}
/**
* Method to load Smart Search plug-in language files.
*
* @return void
*
* @since 2.5
*/
public static function loadPluginLanguage()
{
static $loaded = false;
// If already loaded, don't load again.
if ($loaded)
{
return;
}
$loaded = true;
// Get array of all the enabled Smart Search plug-in names.
$db = JFactory::getDbo();
$query = $db->getQuery(true)
->select('name')
->from($db->quoteName('#__extensions'))
->where($db->quoteName('type') . ' = ' . $db->quote('plugin'))
->where($db->quoteName('folder') . ' = ' . $db->quote('finder'))
->where($db->quoteName('enabled') . ' = 1');
$db->setQuery($query);
$plugins = $db->loadObjectList();
if (empty($plugins))
{
return;
}
// Load generic language strings.
$lang = JFactory::getLanguage();
$lang->load('plg_content_finder', JPATH_ADMINISTRATOR);
// Load language file for each plug-in.
foreach ($plugins as $plugin)
{
$lang->load($plugin->name, JPATH_ADMINISTRATOR);
}
}
}