first commit

This commit is contained in:
alazhar
2020-01-02 22:20:31 +07:00
commit 10eb3340ad
5753 changed files with 631345 additions and 0 deletions

View File

@ -0,0 +1,52 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* HTML Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserHtml extends FinderIndexerParser
{
/**
* Method to process HTML input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Strip invalid UTF-8 characters.
$input = iconv("utf-8", "utf-8//IGNORE", $input);
// Strip all script tags.
$input = preg_replace('#<script[^>]*>.*?</script>#si', ' ', $input);
// Deal with spacing issues in the input.
$input = str_replace('>', '> ', $input);
$input = str_replace(array('&nbsp;', '&#160;'), ' ', $input);
$input = trim(preg_replace('#\s+#u', ' ', $input));
// Strip the tags from the input and decode entities.
$input = strip_tags($input);
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
$input = trim(preg_replace('#\s+#u', ' ', $input));
return $input;
}
}

View File

@ -0,0 +1 @@
<!DOCTYPE html><title></title>

View File

@ -0,0 +1,44 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* RTF Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserRtf extends FinderIndexerParser
{
/**
* Method to process RTF input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
// Remove embedded pictures.
$input = preg_replace('#{\\\pict[^}]*}#mis', '', $input);
// Remove control characters.
$input = str_replace(array('{', '}', "\\\n"), array(' ', ' ', "\n"), $input);
$input = preg_replace('#\\\([^;]+?);#mis', ' ', $input);
$input = preg_replace('#\\\[\'a-zA-Z0-9]+#mis', ' ', $input);
return $input;
}
}

View File

@ -0,0 +1,36 @@
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2013 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerParser', dirname(__DIR__) . '/parser.php');
/**
* Text Parser class for the Finder indexer package.
*
* @package Joomla.Administrator
* @subpackage com_finder
* @since 2.5
*/
class FinderIndexerParserTxt extends FinderIndexerParser
{
/**
* Method to process Text input and extract the plain text.
*
* @param string $input The input to process.
*
* @return string The plain text input.
*
* @since 2.5
*/
protected function process($input)
{
return $input;
}
}