Revision as of 23:52, 9 December 2009 view sourceClueBot (talk | contribs)1,596,818 edits Automated source upload.← Previous edit | Latest revision as of 13:01, 27 November 2010 view source ClueBot (talk | contribs)1,596,818 edits Automated source upload. | ||
(42 intermediate revisions by 10 users not shown) | |||
Line 25: | Line 25: | ||
**/ | **/ | ||
function __construct () { | function __construct () { | ||
global $proxyhost, $proxyport; | global $proxyhost, $proxyport, $proxytype; | ||
$this->ch = curl_init(); | $this->ch = curl_init(); | ||
$this->uid = dechex(rand(0,99999999)); | $this->uid = dechex(rand(0,99999999)); | ||
Line 32: | Line 32: | ||
curl_setopt($this->ch,CURLOPT_MAXCONNECTS,100); | curl_setopt($this->ch,CURLOPT_MAXCONNECTS,100); | ||
curl_setopt($this->ch,CURLOPT_CLOSEPOLICY,CURLCLOSEPOLICY_LEAST_RECENTLY_USED); | curl_setopt($this->ch,CURLOPT_CLOSEPOLICY,CURLCLOSEPOLICY_LEAST_RECENTLY_USED); | ||
curl_setopt($this->ch,CURLOPT_USERAGENT,'ClueBot/1.1'); | |||
if (isset($proxyhost) and isset($proxyport) and ($proxyport != null) and ($proxyhost != null)) { | if (isset($proxyhost) and isset($proxyport) and ($proxyport != null) and ($proxyhost != null)) { | ||
curl_setopt($this->ch,CURLOPT_PROXYTYPE,CURLPROXY_HTTP); | curl_setopt($this->ch,CURLOPT_PROXYTYPE,isset( $proxytype ) ? $proxytype : CURLPROXY_HTTP); | ||
curl_setopt($this->ch,CURLOPT_PROXY,$proxyhost); | curl_setopt($this->ch,CURLOPT_PROXY,$proxyhost); | ||
curl_setopt($this->ch,CURLOPT_PROXYPORT,$proxyport); | curl_setopt($this->ch,CURLOPT_PROXYPORT,$proxyport); | ||
Line 165: | Line 166: | ||
private $edittoken; | private $edittoken; | ||
private $tokencache; | private $tokencache; | ||
private $user, $pass; | |||
public $apiurl = 'http://en.wikipedia.org/w/api.php'; | public $apiurl = 'http://en.wikipedia.org/w/api.php'; | ||
Line 186: | Line 188: | ||
**/ | **/ | ||
function login ($user,$pass) { | function login ($user,$pass) { | ||
$this->user = $user; | |||
⚫ | |
||
$this->pass = $pass; | |||
⚫ | $x = unserialize($this->http->post($this->apiurl.'?action=login&format=php',array('lgname' => $user, 'lgpassword' => $pass))); | ||
if($x == 'Success') | |||
return true; | |||
if($x == 'NeedToken') { | |||
$x = unserialize($this->http->post($this->apiurl.'?action=login&format=php',array('lgname' => $user, 'lgpassword' => $pass, 'lgtoken' => $x))); | |||
if($x == 'Success') | |||
return true; | |||
} | |||
return false; | |||
} | } | ||
Line 518: | Line 530: | ||
'action' => 'edit', | 'action' => 'edit', | ||
'format' => 'php', | 'format' => 'php', | ||
'assert' => 'bot', | |||
'title' => $page, | 'title' => $page, | ||
'text' => $data, | 'text' => $data, | ||
Line 533: | Line 546: | ||
var_export($x); | var_export($x); | ||
if ($x == 'Success') return true; | if ($x == 'Success') return true; | ||
if ($x == 'badtoken') { | |||
if($this->login($this->user,$this->pass)) { | |||
$this->gettokens('Main Page',true); | |||
return $this->edit($page,$data,$summary,$minor,$bot,$wpStarttime,$wpEdittime,$checkrun); | |||
} else | |||
return false; | |||
} | |||
else return false; | else return false; | ||
} | } | ||
Line 593: | Line 613: | ||
$x = unserialize($x); | $x = unserialize($x); | ||
var_export($x); | var_export($x); | ||
return (isset($x) |
return ( isset($x) and isset( $x ) and $x ) | ||
? true | |||
: false; | |||
} | } | ||
} | } | ||
Line 1,234: | Line 1,256: | ||
unset($heuristic); | unset($heuristic); | ||
$ |
$wpapi->edit('User:'.$user.'/Source', | ||
'The following is automatically generated by ].\n\n\n\n==Classes (wikibot.classes.php)==\n\n<pre>" . | 'The following is automatically generated by ].\n\n\n\n==Classes (wikibot.classes.php)==\n\n<pre>" . | ||
htmlentities(file_get_contents('../wikibot.classes.php'))."</pre>\n\n\n\n==Diff function (diff.function.php)==\n\n<pre>" . | htmlentities(file_get_contents('../wikibot.classes.php'))."</pre>\n\n\n\n==Diff function (diff.function.php)==\n\n<pre>" . | ||
Line 1,245: | Line 1,267: | ||
unset($heuristics); | unset($heuristics); | ||
$ |
$wpapi->edit('User:'.$user, | ||
"{{User:ClueBot Commons/Userpage}}\n", | "{{User:ClueBot Commons/Userpage}}\n", | ||
'Automated bot userpage set.'); /* Our page, we force post this because this is *our* page. */ | 'Automated bot userpage set.'); /* Our page, we force post this because this is *our* page. */ | ||
Line 1,679: | Line 1,701: | ||
} | } | ||
foreach (explode(',',$ircvandalismchannel) as $y) { | foreach (explode(',',$ircvandalismchannel) as $y) { | ||
fwrite($irc,'PRIVMSG '.$y.' : |
fwrite($irc,'PRIVMSG '.$y.' :rcbot bl add '.$change.' x='.(24*$warning).' r=Vandalism to .']] (#'.$warning.").\n"); | ||
} | } | ||
$wpapi->edit( | $wpapi->edit( | ||
Line 1,697: | Line 1,719: | ||
} elseif ($warning < 5) { /* Warn them if they haven't been warned 4 times. */ | } elseif ($warning < 5) { /* Warn them if they haven't been warned 4 times. */ | ||
foreach (explode(',',$ircvandalismchannel) as $y) { | foreach (explode(',',$ircvandalismchannel) as $y) { | ||
fwrite($irc,'PRIVMSG '.$y.' : |
fwrite($irc,'PRIVMSG '.$y.' :rcbot bl add '.$change.' x='.(24*$warning).' r=Vandalism to .']] (#'.$warning.').'."\n"); | ||
} | } | ||
$wpapi->edit( | $wpapi->edit( | ||
Line 1,787: | Line 1,809: | ||
$heuristics = 'smallchange'; | $heuristics = 'smallchange'; | ||
$heuristics = 'claimjumperpete'; | $heuristics = 'claimjumperpete'; | ||
$heuristics = 'sneaky'; | |||
$heuristics = 'redirect'; | $heuristics = 'redirect'; | ||
?> | ?> | ||
Line 1,970: | Line 1,993: | ||
===heuristics/cluebot.smallchange.heuristic.php=== | ===heuristics/cluebot.smallchange.heuristic.php=== | ||
<pre><?PHP | |||
unset($log,$log2); | |||
if ( /* Small changes with obscenities. */ | |||
(($change >= -200) and ($change <= 200)) | |||
and (($d = $wpi->diff($change,$change,$change)) or true) | |||
and ((($change == 'User:ClueBot/Sandbox') and print_r($rv)) or true) | |||
and (($s = score($obscenelist,$d,$log)) or true) | |||
and (($s -= score($obscenelist,$d,$log2)) or true) | |||
and ( | |||
( | |||
($s < -5) /* There are times when small changes are ok. */ | |||
and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) | |||
and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) | |||
and (!fnmatch('*SEX*',strtoupper($rv))) | |||
and (!fnmatch('*BDSM*',strtoupper($rv))) | |||
and (score($obscenelist,$change) >= 0) | |||
and (score($obscenelist,$rv) >= 0) | |||
and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) | |||
and ($heuristic .= '/obscenities') | |||
and ($reason = 'making a minor change with obscenities') | |||
) | |||
or ( | |||
($s > 5) | |||
and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) | |||
and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) | |||
and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) | |||
and (preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) | |||
and ($heuristic .= '/censor') | |||
and ($reason = 'making a minor change censoring content (])') | |||
) | |||
or ( | |||
(preg_match('/\!\!\!/S',$d)) | |||
and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) | |||
and (!preg_match('/\!\!\!/S',$rv)) | |||
and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) | |||
and ($heuristic .= '/exclamation') | |||
and ($reason = 'making a minor change adding "!!!"') | |||
) | |||
) | |||
) { $heuristicret = true; if (isset($log2) and is_array($log2)) foreach ($log2 as $k => $v) $log -= $v; if (isset($log) and is_array($log)) foreach ($log as $k => $v) if ($v == 0) unset($log); unset($log2); /* fwrite($irc,'PRIVMSG #wikipedia-BAG/ClueBot :Would revert http://en.wikipedia.org/search/?title='.urlencode($change.$change).'&diff=prev'.'&oldid='.urlencode($change)." .\n"); */ } | |||
?> | |||
</pre> | |||
===heuristics/cluebot.sneaky.heuristic.php=== | |||
<pre><?PHP | <pre><?PHP | ||
Line 2,155: | Line 2,223: | ||
] (]) |
] (]) 13:01, 27 November 2010 (UTC) |
Latest revision as of 13:01, 27 November 2010
The following is automatically generated by ClueBot.
Classes (wikibot.classes.php)
<?PHP /** * @author Cobi Carter **/ /** * This class is designed to provide a simplified interface to cURL which maintains cookies. * @author Cobi **/ class http { private $ch; private $uid; public $postfollowredirs; public $getfollowredirs; /** * Our constructor function. This just does basic cURL initialization. * @return void **/ function __construct () { global $proxyhost, $proxyport, $proxytype; $this->ch = curl_init(); $this->uid = dechex(rand(0,99999999)); curl_setopt($this->ch,CURLOPT_COOKIEJAR,'/tmp/cluewikibot.cookies.'.$this->uid.'.dat'); curl_setopt($this->ch,CURLOPT_COOKIEFILE,'/tmp/cluewikibot.cookies.'.$this->uid.'.dat'); curl_setopt($this->ch,CURLOPT_MAXCONNECTS,100); curl_setopt($this->ch,CURLOPT_CLOSEPOLICY,CURLCLOSEPOLICY_LEAST_RECENTLY_USED); curl_setopt($this->ch,CURLOPT_USERAGENT,'ClueBot/1.1'); if (isset($proxyhost) and isset($proxyport) and ($proxyport != null) and ($proxyhost != null)) { curl_setopt($this->ch,CURLOPT_PROXYTYPE,isset( $proxytype ) ? $proxytype : CURLPROXY_HTTP); curl_setopt($this->ch,CURLOPT_PROXY,$proxyhost); curl_setopt($this->ch,CURLOPT_PROXYPORT,$proxyport); } $this->postfollowredirs = 0; $this->getfollowredirs = 1; } /** * Post to a URL. * @param $url The URL to post to. * @param $data The post-data to post, should be an array of key => value pairs. * @return Data retrieved from the POST request. **/ function post ($url,$data) { $time = microtime(1); curl_setopt($this->ch,CURLOPT_URL,$url); curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,$this->postfollowredirs); curl_setopt($this->ch,CURLOPT_MAXREDIRS,10); curl_setopt($this->ch,CURLOPT_HEADER,0); curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($this->ch,CURLOPT_TIMEOUT,30); curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,10); curl_setopt($this->ch,CURLOPT_POST,1); curl_setopt($this->ch,CURLOPT_POSTFIELDS, $data); curl_setopt($this->ch,CURLOPT_HTTPHEADER, array('Expect:')); $data = curl_exec($this->ch); global $logfd; if (!is_resource($logfd)) $logfd = fopen('php://stderr','w'); fwrite($logfd,'POST: '.$url.' ('.(microtime(1) - $time).' s) ('.strlen($data)." b)\n"); return $data; } /** * Get a URL. * @param $url The URL to get. * @return Data retrieved from the GET request. **/ function get ($url) { $time = microtime(1); curl_setopt($this->ch,CURLOPT_URL,$url); curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,$this->getfollowredirs); curl_setopt($this->ch,CURLOPT_MAXREDIRS,10); curl_setopt($this->ch,CURLOPT_HEADER,0); curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($this->ch,CURLOPT_TIMEOUT,30); curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,10); curl_setopt($this->ch,CURLOPT_HTTPGET,1); $data = curl_exec($this->ch); global $logfd; if (!is_resource($logfd)) $logfd = fopen('php://stderr','w'); fwrite($logfd,'GET: '.$url.' ('.(microtime(1) - $time).' s) ('.strlen($data)." b)\n"); return $data; } /** * Our destructor. Cleans up cURL and unlinks temporary files. **/ function __destruct () { curl_close($this->ch); @unlink('/tmp/cluewikibot.cookies.'.$this->uid.'.dat'); } } /** * This class is a deprecated wrapper class which allows legacy code written for Misplaced Pages's query.php API to still work with wikipediaapi::. **/ class wikipediaquery { private $http; private $api; public $queryurl = 'http://en.wikipedia.org/w/query.php'; //Obsolete, but kept for compatibility purposes. /** * This is our constructor. * @return void **/ function __construct () { global $__wp__http; if (!isset($__wp__http)) { $__wp__http = new http; } $this->http = &$__wp__http; $this->api = new wikipediaapi; } /** * Reinitializes the queryurl. * @private * @return void **/ private function checkurl() { $this->api->apiurl = str_replace('query.php','api.php',$this->queryurl); } /** * Gets the content of a page. * @param $page The wikipedia page to fetch. * @return The wikitext for the page. **/ function getpage ($page) { $this->checkurl(); $ret = $this->api->revisions($page,1,'older',true,null,true,false,false,false); return $ret; } /** * Gets the page id for a page. * @param $page The wikipedia page to get the id for. * @return The page id of the page. **/ function getpageid ($page) { $this->checkurl(); $ret = $this->api->revisions($page,1,'older',false,null,true,false,false,false); return $ret; } /** * Gets the number of contributions a user has. * @param $user The username for which to get the edit count. * @return The number of contributions the user has. **/ function contribcount ($user) { $this->checkurl(); $ret = $this->api->users($user,1,null,true); if ($ret !== false) return $ret; return false; } } /** * This class is for interacting with Misplaced Pages's api.php API. **/ class wikipediaapi { private $http; private $edittoken; private $tokencache; private $user, $pass; public $apiurl = 'http://en.wikipedia.org/w/api.php'; /** * This is our constructor. * @return void **/ function __construct () { global $__wp__http; if (!isset($__wp__http)) { $__wp__http = new http; } $this->http = &$__wp__http; } /** * This function takes a username and password and logs you into wikipedia. * @param $user Username to login as. * @param $pass Password that corrisponds to the username. * @return void **/ function login ($user,$pass) { $this->user = $user; $this->pass = $pass; $x = unserialize($this->http->post($this->apiurl.'?action=login&format=php',array('lgname' => $user, 'lgpassword' => $pass))); if($x == 'Success') return true; if($x == 'NeedToken') { $x = unserialize($this->http->post($this->apiurl.'?action=login&format=php',array('lgname' => $user, 'lgpassword' => $pass, 'lgtoken' => $x))); if($x == 'Success') return true; } return false; } /** * This function returns the edit token. * @return Edit token. **/ function getedittoken () { $tokens = $this->gettokens('Main Page'); if ($tokens == '') $tokens = $this->gettokens('Main Page',true); $this->edittoken = $tokens; return $tokens; } /** * This function returns the various tokens for a certain page. * @param $title Page to get the tokens for. * @param $flush Optional - internal use only. Flushes the token cache. * @return An associative array of tokens for the page. **/ function gettokens ($title,$flush = false) { if (!is_array($this->tokencache)) $this->tokencache = array(); foreach ($this->tokencache as $t => $data) if (time() - $data > 6*60*60) unset($this->tokencache); if (isset($this->tokencache) && (!$flush)) { return $this->tokencache; } else { $tokens = array(); $x = $this->http->get($this->apiurl.'?action=query&format=php&prop=info&intoken=edit|delete|protect|move|block|unblock|email&titles='.urlencode($title)); $x = unserialize($x); foreach ($x as $y) { $tokens = $y; $tokens = $y; $tokens = $y; $tokens = $y; $tokens = $y; $tokens = $y; $tokens = $y; $this->tokencache = array( 'timestamp' => time(), 'tokens' => $tokens ); return $tokens; } } } /** * This function returns the recent changes for the wiki. * @param $count The number of items to return. (Default 10) * @param $namespace The namespace ID to filter items on. Null for no filtering. (Default null) * @param $dir The direction to pull items. "older" or "newer". (Default 'older') * @param $ts The timestamp to start at. Null for the beginning/end (depending on direction). (Default null) * @return Associative array of recent changes metadata. **/ function recentchanges ($count = 10,$namespace = null,$dir = 'older',$ts = null) { $append = ''; if ($ts !== null) { $append .= '&rcstart='.urlencode($ts); } $append .= '&rcdir='.urlencode($dir); if ($namespace !== null) { $append .= '&rcnamespace='.urlencode($namespace); } $x = $this->http->get($this->apiurl.'?action=query&list=recentchanges&rcprop=user|comment|flags|timestamp|title|ids|sizes&format=php&rclimit='.$count.$append); $x = unserialize($x); return $x; } /** * This function returns search results from Misplaced Pages's internal search engine. * @param $search The query string to search for. * @param $limit The number of results to return. (Default 10) * @param $offset The number to start at. (Default 0) * @param $namespace The namespace ID to filter by. Null means no filtering. (Default 0) * @param $what What to search, 'text' or 'title'. (Default 'text') * @param $redirs Whether or not to list redirects. (Default false) * @return Associative array of search result metadata. **/ function search ($search,$limit = 10,$offset = 0,$namespace = 0,$what = 'text',$redirs = false) { $append = ''; if ($limit != null) $append .= '&srlimit='.urlencode($limit); if ($offset != null) $append .= '&sroffset='.urlencode($offset); if ($namespace != null) $append .= '&srnamespace='.urlencode($namespace); if ($what != null) $append .= '&srwhat='.urlencode($what); if ($redirs == true) $append .= '&srredirects=1'; else $append .= '&srredirects=0'; $x = $this->http->get($this->apiurl.'?action=query&list=search&format=php&srsearch='.urlencode($search).$append); $x = unserialize($x); return $x; } /** * Retrieve entries from the WikiLog. * @param $user Username who caused the entry. Null means anyone. (Default null) * @param $title Object to which the entry refers. Null means anything. (Default null) * @param $limit Number of entries to return. (Default 50) * @param $type Type of logs. Null means any type. (Default null) * @param $start Date to start enumerating logs. Null means beginning/end depending on $dir. (Default null) * @param $end Where to stop enumerating logs. Null means whenever limit is satisfied or there are no more logs. (Default null) * @param $dir Direction to enumerate logs. "older" or "newer". (Default 'older') * @return Associative array of logs metadata. **/ function logs ($user = null,$title = null,$limit = 50,$type = null,$start = null,$end = null,$dir = 'older') { $append = ''; if ($user != null) $append.= '&leuser='.urlencode($user); if ($title != null) $append.= '&letitle='.urlencode($title); if ($limit != null) $append.= '&lelimit='.urlencode($limit); if ($type != null) $append.= '&letype='.urlencode($type); if ($start != null) $append.= '&lestart='.urlencode($start); if ($end != null) $append.= '&leend='.urlencode($end); if ($dir != null) $append.= '&ledir='.urlencode($dir); $x = $this->http->get($this->apiurl.'?action=query&format=php&list=logevents&leprop=ids|title|type|user|timestamp|comment|details'.$append); $x = unserialize($x); return $x; } /** * Retrieves metadata about a user's contributions. * @param $user Username whose contributions we want to retrieve. * @param $count Number of entries to return. (Default 50) * @param $continue Where to continue enumerating if part of a larger, split request. This is filled with the next logical continuation value. (Default null) * @param $dir Which direction to enumerate from, "older" or "newer". (Default 'older') * @return Associative array of contributions metadata. **/ function usercontribs ($user,$count = 50,&$continue = null,$dir = 'older') { if ($continue != null) { $append = '&ucstart='.urlencode($continue); } else { $append = ''; } $x = $this->http->get($this->apiurl.'?action=query&format=php&list=usercontribs&ucuser='.urlencode($user).'&uclimit='.urlencode($count).'&ucdir='.urlencode($dir).$append); $x = unserialize($x); $continue = $x; return $x; } /** * Returns revision data (meta and/or actual). * @param $page Page for which to return revision data for. * @param $count Number of revisions to return. (Default 1) * @param $dir Direction to start enumerating multiple revisions from, "older" or "newer". (Default 'older') * @param $content Whether to return actual revision content, true or false. (Default false) * @param $revid Revision ID to start at. (Default null) * @param $wait Whether or not to wait a few seconds for the specific revision to become available. (Default true) * @param $getrbtok Whether or not to retrieve a rollback token for the revision. (Default false) * @param $dieonerror Whether or not to kill the process with an error if an error occurs. (Default false) * @param $redirects Whether or not to follow redirects. (Default false) * @return Associative array of revision data. **/ function revisions ($page,$count = 1,$dir = 'older',$content = false,$revid = null,$wait = true,$getrbtok = false,$dieonerror = true,$redirects = false) { $x = $this->http->get($this->apiurl.'?action=query&prop=revisions&titles='.urlencode($page).'&rvlimit='.urlencode($count).'&rvprop=timestamp|ids|user|comment'.(($content)?'|content':'').'&format=php&meta=userinfo&rvdir='.urlencode($dir).(($revid !== null)?'&rvstartid='.urlencode($revid):'').(($getrbtok == true)?'&rvtoken=rollback':'').(($redirects == true)?'&redirects':'')); $x = unserialize($x); if ($revid !== null) { $found = false; if (!isset($x) or !is_array($x)) { if ($dieonerror == true) die('No such page.'."\n"); else return false; } foreach ($x as $data) { if (!isset($data) or !is_array($data)) { if ($dieonerror == true) die('No such page.'."\n"); else return false; } foreach ($data as $data2) if ($data2 == $revid) $found = true; unset($data,$data2); break; } if ($found == false) { if ($wait == true) { sleep(1); return $this->revisions($page,$count,$dir,$content,$revid,false,$getrbtok,$dieonerror); } else { if ($dieonerror == true) die('Revision error.'."\n"); } } } foreach ($x as $key => $data) { $data = $data; $data = $data; $data = $x; // $data = $x; $data = $x; $data = $key; return $data; } } /** * Enumerates user metadata. * @param $start The username to start enumerating from. Null means from the beginning. (Default null) * @param $limit The number of users to enumerate. (Default 1) * @param $group The usergroup to filter by. Null means no filtering. (Default null) * @param $requirestart Whether or not to require that $start be a valid username. (Default false) * @param $continue This is filled with the name to continue from next query. (Default null) * @return Associative array of user metadata. **/ function users ($start = null,$limit = 1,$group = null,$requirestart = false,&$continue = null) { $append = ''; if ($start != null) $append .= '&aufrom='.urlencode($start); if ($group != null) $append .= '&augroup='.urlencode($group); $x = $this->http->get($this->apiurl.'?action=query&list=allusers&format=php&auprop=blockinfo|editcount|registration|groups&aulimit='.urlencode($limit).$append); $x = unserialize($x); $continue = $x; if (($requirestart == true) and ($x != $start)) return false; return $x; } /** * Get members of a category. * @param $category Category to enumerate from. * @param $count Number of members to enumerate. (Default 500) * @param $continue Where to continue enumerating from. This is automatically filled in when run. (Default null) * @return Associative array of category member metadata. **/ function categorymembers ($category,$count = 500,&$continue = null) { if ($continue != null) { $append = '&cmcontinue='.urlencode($continue); } else { $append = ''; } $category = 'Category:'.str_ireplace('category:','',$category); $x = $this->http->get($this->apiurl.'?action=query&list=categorymembers&cmtitle='.urlencode($category).'&format=php&cmlimit='.$count.$append); $x = unserialize($x); $continue = $x; return $x; } /** * Enumerate all categories. * @param $start Where to start enumerating. This is updated automatically with the value to continue from. (Default null) * @param $limit Number of categories to enumerate. (Default 50) * @param $dir Direction to enumerate in. 'ascending' or 'descending'. (Default 'ascending') * @param $prefix Only enumerate categories with this prefix. (Default null) * @return Associative array of category list metadata. **/ function listcategories (&$start = null,$limit = 50,$dir = 'ascending',$prefix = null) { $append = ''; if ($start != null) $append .= '&acfrom='.urlencode($start); if ($limit != null) $append .= '&aclimit='.urlencode($limit); if ($dir != null) $append .= '&acdir='.urlencode($dir); if ($prefix != null) $append .= '&acprefix='.urlencode($prefix); $x = $this->http->get($this->apiurl.'?action=query&list=allcategories&acprop=size&format=php'.$append); $x = unserialize($x); $start = $x; return $x; } /** * Enumerate all backlinks to a page. * @param $page Page to search for backlinks to. * @param $count Number of backlinks to list. (Default 500) * @param $continue Where to start enumerating from. This is automatically filled in. (Default null) * @param $filter Whether or not to include redirects. Acceptible values are 'all', 'redirects', and 'nonredirects'. (Default null) * @return Associative array of backlink metadata. **/ function backlinks ($page,$count = 500,&$continue = null,$filter = null) { if ($continue != null) { $append = '&blcontinue='.urlencode($continue); } else { $append = ''; } if ($filter != null) { $append .= '&blfilterredir='.urlencode($filter); } $x = $this->http->get($this->apiurl.'?action=query&list=backlinks&bltitle='.urlencode($page).'&format=php&bllimit='.$count.$append); $x = unserialize($x); $continue = $x; return $x; } /** * Gets a list of transcludes embedded in a page. * @param $page Page to look for transcludes in. * @param $count Number of transcludes to list. (Default 500) * @param $continue Where to start enumerating from. This is automatically filled in. (Default null) * @return Associative array of transclude metadata. **/ function embeddedin ($page,$count = 500,&$continue = null) { if ($continue != null) { $append = '&eicontinue='.urlencode($continue); } else { $append = ''; } $x = $this->http->get($this->apiurl.'?action=query&list=embeddedin&eititle='.urlencode($page).'&format=php&eilimit='.$count.$append); $x = unserialize($x); $continue = $x; return $x; } /** * Gets a list of pages with a common prefix. * @param $prefix Common prefix to search for. * @param $namespace Numeric namespace to filter on. (Default 0) * @param $count Number of pages to list. (Default 500) * @param $continue Where to start enumerating from. This is automatically filled in. (Default null) * @return Associative array of page metadata. **/ function listprefix ($prefix,$namespace = 0,$count = 500,&$continue = null) { $append = '&apnamespace='.urlencode($namespace); if ($continue != null) { $append .= '&apfrom='.urlencode($continue); } $x = $this->http->get($this->apiurl.'?action=query&list=allpages&apprefix='.urlencode($prefix).'&format=php&aplimit='.$count.$append); $x = unserialize($x); $continue = $x; return $x; } /** * Edits a page. * @param $page Page name to edit. * @param $data Data to post to page. * @param $summary Edit summary to use. * @param $minor Whether or not to mark edit as minor. (Default false) * @param $bot Whether or not to mark edit as a bot edit. (Default true) * @param $wpStarttime Time in MW TS format of beginning of edit. (Default now) * @param $wpEdittime Time in MW TS format of last edit to that page. (Default correct) * @return boolean True on success, false on failure. **/ function edit ($page,$data,$summary = '',$minor = false,$bot = true,$wpStarttime = null,$wpEdittime = null,$checkrun = true) { global $run, $user; $wpq = new wikipediaquery; $wpq->queryurl = str_replace('api.php','query.php',$this->apiurl); if ($checkrun == true) if (!preg_match('/(yes|enable|true)/iS',((isset($run))?$run:$wpq->getpage('User:'.$user.'/Run')))) return false; /* Check /Run page */ $params = Array( 'action' => 'edit', 'format' => 'php', 'assert' => 'bot', 'title' => $page, 'text' => $data, 'token' => $this->getedittoken(), 'summary' => $summary, ($minor?'minor':'notminor') => '1', ($bot?'bot':'notbot') => '1' ); if ($wpStarttime !== null) $params = $wpStarttime; if ($wpEdittime !== null) $params = $wpEdittime; $x = $this->http->post($this->apiurl,$params); $x = unserialize($x); var_export($x); if ($x == 'Success') return true; if ($x == 'badtoken') { if($this->login($this->user,$this->pass)) { $this->gettokens('Main Page',true); return $this->edit($page,$data,$summary,$minor,$bot,$wpStarttime,$wpEdittime,$checkrun); } else return false; } else return false; } /** * Moves a page. * @param $old Name of page to move. * @param $new New page title. * @param $reason Move summary to use. * @return void **/ function move ($old,$new,$reason) { $tokens = $this->gettokens($old); $params = array( 'action' => 'move', 'format' => 'php', 'from' => $old, 'to' => $new, 'token' => $tokens, 'reason' => $reason ); $x = $this->http->post($this->apiurl,$params); $x = unserialize($x); var_export($x); } /** * Rollback an edit. * @param $title Title of page to rollback. * @param $user Username of last edit to the page to rollback. * @param $reason Edit summary to use for rollback. * @param $token Rollback token. If not given, it will be fetched. (Default null) * @return void **/ function rollback ($title,$user,$reason,$token = null) { if (($token == null) or ($token == '')) { $token = $this->revisions($title,1,'older',false,null,true,true); print_r($token); if ($token == $user) { $token = $token; } else { return false; } } $params = array( 'action' => 'rollback', 'format' => 'php', 'title' => $title, 'user' => $user, 'summary' => $reason, 'token' => $token, 'markbot' => 0 ); echo 'Posting to API: '; var_export($params); $x = $this->http->post($this->apiurl,$params); $x = unserialize($x); var_export($x); return ( isset($x) and isset( $x ) and $x ) ? true : false; } } /** * This class is for interacting with Misplaced Pages's browser interface, index.php. * Many of these functions are deprecated. **/ class wikipediaindex { private $http; public $indexurl = 'http://en.wikipedia.org/search/'; private $postinterval = 0; private $lastpost; private $edittoken; /** * This is our constructor. * @return void **/ function __construct () { global $__wp__http; if (!isset($__wp__http)) { $__wp__http = new http; } $this->http = &$__wp__http; } /** * Post data to a page, nicely. * @param $page Page title. * @param $data Data to post to page. * @param $summery Edit summary. (Default '') * @param $minor Whether to mark edit as minor. (Default false) * @param $rv Revision data. If not given, it will be fetched. (Default null) * @param $bot Whether to mark edit as bot. (Default true) * @return HTML data from the page. * @deprecated * @see wikipediaapi::edit **/ function post ($page,$data,$summery = '',$minor = false,$rv = null,$bot = true) { global $user; global $maxlag; global $irc; global $irctechchannel; global $run; global $maxlagkeepgoing; $wpq = new wikipediaquery; $wpq->queryurl = str_replace('index.php','query.php',$this->indexurl); $wpapi = new wikipediaapi; $wpapi->apiurl = str_replace('index.php','api.php',$this->indexurl); if ((!$this->edittoken) or ($this->edittoken == '')) $this->edittoken = $wpapi->getedittoken(); if ($rv == null) $rv = $wpapi->revisions($page,1,'older',true); if (!$rv) $rv = $wpq->getpage($page); //Fake the edit form. $now = gmdate('YmdHis', time()); $token = htmlspecialchars($this->edittoken); $tmp = date_parse($rv); $edittime = gmdate('YmdHis', gmmktime($tmp,$tmp,$tmp,$tmp,$tmp,$tmp)); $html = "<input type='hidden' value=\"{$now}\" name=\"wpStarttime\" />\n"; $html.= "<input type='hidden' value=\"{$edittime}\" name=\"wpEdittime\" />\n"; $html.= "<input type='hidden' value=\"{$token}\" name=\"wpEditToken\" />\n"; $html.= '<input name="wpAutoSummary" type="hidden" value="'.md5('').'" />'."\n"; if (preg_match('/'.preg_quote('{{nobots}}','/').'/iS',$rv)) { return false; } /* Honor the bots flags */ if (preg_match('/'.preg_quote('{{bots|allow=none}}','/').'/iS',$rv)) { return false; } if (preg_match('/'.preg_quote('{{bots|deny=all}}','/').'/iS',$rv)) { return false; } if (preg_match('/'.preg_quote('{{bots|deny=','/').'(.*)'.preg_quote('}}','/').'/iS',$rv,$m)) { if (in_array(explode(',',$m),$user)) { return false; } } /* /Honor the bots flags */ if (!preg_match('/'.preg_quote($user,'/').'/iS',$rv)) { return false; } /* We need to be logged in */ // if (preg_match('/'.preg_quote('You have new messages','/').'/iS',$rv)) { return false; } /* Check talk page */ if (!preg_match('/(yes|enable|true)/iS',((isset($run))?$run:$wpq->getpage('User:'.$user.'/Run')))) { return false; } /* Check /Run page */ $x = $this->forcepost($page,$data,$summery,$minor,$html,$maxlag,$maxlagkeepgoing,$bot); /* Go ahead and post. */ $this->lastpost = time(); return $x; } /** * Post data to a page. * @param $page Page title. * @param $data Data to post to page. * @param $summery Edit summary. (Default '') * @param $minor Whether to mark edit as minor. (Default false) * @param $edithtml HTML from the edit form. If not given, it will be fetched. (Default null) * @param $maxlag Maxlag for posting. (Default null) * @param $mlkg Whether to keep going after encountering a maxlag error and sleeping or not. (Default null) * @param $bot Whether to mark edit as bot. (Default true) * @return HTML data from the page. * @deprecated * @see wikipediaapi::edit **/ function forcepost ($page,$data,$summery = '',$minor = false,$edithtml = null,$maxlag = null,$mlkg = null,$bot = true) { $post = ''; $post = ''; if ($minor == true) { $post = 1; } $post = $data; $post = $summery; if ($edithtml == null) { $html = $this->http->get($this->indexurl.'?title='.urlencode($page).'&action=edit'); } else { $html = $edithtml; } preg_match('|\<input type\=\\\'hidden\\\' value\=\"(.*)\" name\=\"wpStarttime\" /\>|U',$html,$m); $post = $m; preg_match('|\<input type\=\\\'hidden\\\' value\=\"(.*)\" name\=\"wpEdittime\" /\>|U',$html,$m); $post = $m; preg_match('|\<input type\=\\\'hidden\\\' value\=\"(.*)\" name\=\"wpEditToken\" /\>|U',$html,$m); $post = $m; preg_match('|\<input name\=\"wpAutoSummary\" type\=\"hidden\" value\=\"(.*)\" /\>|U',$html,$m); $post = $m; if ($maxlag != null) { $x = $this->http->post($this->indexurl.'?title='.urlencode($page).'&action=submit&maxlag='.urlencode($maxlag).'&bot='.(($bot == true)?'1':'0'),$post); if (preg_match('/Waiting for (*): (+) seconds lagged/S',$x,$lagged)) { global $irc; if (is_resource($irc)) { global $irctechchannel; foreach(explode(',',$irctechchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :'.$lagged.' is lagged out by '.$lagged.' seconds. ('.$lagged.')'."\n"); } } sleep(10); if ($mlkg != true) { return false; } else { $x = $this->http->post($this->indexurl.'?title='.urlencode($page).'&action=submit&bot='.(($bot == true)?'1':'0'),$post); } } return $x; } else { return $this->http->post($this->indexurl.'?title='.urlencode($page).'&action=submit&bot='.(($bot == true)?'1':'0'),$post); } } /** * Get a diff. * @param $title Page title to get the diff of. * @param $oldid Old revision ID. * @param $id New revision ID. * @param $wait Whether or not to wait for the diff to become available. (Default true) * @return Array of added data, removed data, and a rollback token if one was fetchable. **/ function diff ($title,$oldid,$id,$wait = true) { $deleted = ''; $added = ''; $html = $this->http->get($this->indexurl.'?title='.urlencode($title).'&action=render&diff='.urlencode($id).'&oldid='.urlencode($oldid).'&diffonly=1'); if (preg_match_all('/\&\;(oldid\=)(\d*)\\\'\>(Revision as of|Current revision as of)/USs', $html, $m, PREG_SET_ORDER)) { //print_r($m); if ((($oldid != $m) and (is_numeric($oldid))) or (($id != $m) and (is_numeric($id)))) { if ($wait == true) { sleep(1); return $this->diff($title,$oldid,$id,false); } else { echo 'OLDID as detected: '.$m.' Wanted: '.$oldid."\n"; echo 'NEWID as detected: '.$m.' Wanted: '.$id."\n"; echo $html; die('Revision error.'."\n"); } } } if (preg_match_all('/\<td class\=(\"|\\\')diff-addedline\1\>\<div\>(.*)\<\/div\>\<\/td\>/USs', $html, $m, PREG_SET_ORDER)) { //print_r($m); foreach ($m as $x) { $added .= htmlspecialchars_decode(strip_tags($x))."\n"; } } if (preg_match_all('/\<td class\=(\"|\\\')diff-deletedline\1\>\<div\>(.*)\<\/div\>\<\/td\>/USs', $html, $m, PREG_SET_ORDER)) { //print_r($m); foreach ($m as $x) { $deleted .= htmlspecialchars_decode(strip_tags($x))."\n"; } } //echo $added."\n".$deleted."\n"; if (preg_match('/action\=rollback\&\;from\=.*\&\;token\=(.*)\"/US', $html, $m)) { $rbtoken = $m; $rbtoken = urldecode($rbtoken); // echo 'rbtoken: '.$rbtoken.' -- '; print_r($m); echo "\n\n"; return array($added,$deleted,$rbtoken); } return array($added,$deleted); } /** * Rollback an edit. * @param $title Page title to rollback. * @param $user Username of last edit to the page to rollback. * @param $reason Reason to rollback. If null, default is generated. (Default null) * @param $token Rollback token to use. If null, it is fetched. (Default null) * @param $bot Whether or not to mark as bot. (Default true) * @return HTML or false if failure. * @deprecated * @see wikipediaapi::rollback **/ function rollback ($title,$user,$reason = null,$token = null,$bot = true) { if (($token == null) or (!$token)) { $wpapi = new wikipediaapi; $wpapi->apiurl = str_replace('index.php','api.php',$this->indexurl); $token = $wpapi->revisions($title,1,'older',false,null,true,true); if ($token == $user) { // echo 'Token: '; print_r($token); echo "\n\n"; $token = $token; } else { return false; } } $x = $this->http->get($this->indexurl.'?title='.urlencode($title).'&action=rollback&from='.urlencode($user).'&token='.urlencode($token).(($reason != null)?'&summary='.urlencode($reason):'').'&bot='.(($bot == true)?'1':'0')); global $logfd; if (!is_resource($logfd)) $logfd = fopen('php://stderr','w'); fwrite($logfd,'Rollback return: '.$x."\n"); if (!preg_match('/action complete/iS',$x)) return false; return $x; } /** * Move a page. * @param $old Page title to move. * @param $new New title to move to. * @param $reason Move page summary. * @return HTML page. * @deprecated * @see wikipediaapi::move **/ function move ($old,$new,$reason) { $wpapi = new wikipediaapi; $wpapi->apiurl = str_replace('index.php','api.php',$this->indexurl); if ((!$this->edittoken) or ($this->edittoken == '')) $this->edittoken = $wpapi->getedittoken(); $token = htmlspecialchars($this->edittoken); $post = array ( 'wpOldTitle' => $old, 'wpNewTitle' => $new, 'wpReason' => $reason, 'wpWatch' => '0', 'wpEditToken' => $token, 'wpMove' => 'Move page' ); return $this->http->post($this->indexurl.'?title=Special:Movepage&action=submit',$post); } /** * Uploads a file. * @param $page Name of page on the wiki to upload as. * @param $file Name of local file to upload. * @param $desc Content of the file description page. * @return HTML content. **/ function upload ($page,$file,$desc) { $post = array ( 'wpUploadFile' => '@'.$file, 'wpSourceType' => 'file', 'wpDestFile' => $page, 'wpUploadDescription' => $desc, 'wpLicense' => '', 'wpWatchthis' => '0', 'wpIgnoreWarning' => '1', 'wpUpload' => 'Upload file' ); return $this->http->post($this->indexurl.'?title=Special:Upload&action=submit',$post); } /** * Check if a user has email enabled. * @param $user Username to check whether or not the user has email enabled. * @return True or false depending on whether or not the user has email enabled. **/ function hasemail ($user) { $tmp = $this->http->get($this->indexurl.'?title=Special:EmailUser&target='.urlencode($user)); if (stripos($tmp,"No e-mail address") !== false) return false; return true; } /** * Sends an email to a user. * @param $user Username to send email to. * @param $subject Subject of email to send. * @param $body Body of email to send. * @return HTML content. **/ function email ($user,$subject,$body) { $wpapi = new wikipediaapi; $wpapi->apiurl = str_replace('index.php','api.php',$this->indexurl); if ((!$this->edittoken) or ($this->edittoken == '')) $this->edittoken = $wpapi->getedittoken(); $post = array ( 'wpSubject' => $subject, 'wpText' => $body, 'wpCCMe' => 0, 'wpSend' => 'Send', 'wpEditToken' => $this->edittoken ); return $this->http->post($this->indexurl.'?title=Special:EmailUser&target='.urlencode($user).'&action=submit',$post); } } ?>
Diff function (diff.function.php)
<?PHP function diff ($old,$new,$nret = true,$inline = false) { // if ($inline) { // return str_replace(array("\n",chr(92).chr(92),'\n'),array(' ',chr(92),"\n"),diff(implode("\n",explode(' ',str_replace(array(chr(92),"\n"),array(chr(92).chr(92),'\n'),$old))),implode("\n",explode(' ',str_replace(array(chr(92),"\n"),array(chr(92).chr(92),'\n'),$new))),$nret,false)); // } $file1 = tempnam('/tmp','diff_'); $file2 = tempnam('/tmp','diff_'); file_put_contents($file1,$old); file_put_contents($file2,$new); $out = array(); if ($inline) { // echo 'EXEC: wdiff -3'.(($nret)?'1':'2').' '.escapeshellarg($file1).' '.escapeshellarg($file2)."\n"; @exec('wdiff -3'.(($nret)?'1':'2').' '.escapeshellarg($file1).' '.escapeshellarg($file2),$out); foreach ($out as $key => $line) { if ($line == '======================================================================') unset($out); elseif ($nret) $out = '> '.$line; else $out = '< '.$line; } } else { @exec('diff -d --suppress-common-lines '.escapeshellarg($file1).' '.escapeshellarg($file2),$out); } $out2 = array(); foreach ($out as $line) { if ( ( ($nret) and (preg_match('/^\> .*$/',$line)) ) or ( (!$nret) and (preg_match('/^\< .*$/',$line)) ) ) { $out2 = substr($line,2); } } $out = $out2; unset($out2); unlink($file1); unlink($file2); return implode("\n",$out); } ?>
Source to ClueBot
<?PHP declare(ticks = 1); function sig_handler($signo) { switch ($signo) { case SIGCHLD: while (($x = pcntl_waitpid(0, $status, WNOHANG)) != -1) { if ($x == 0) break; $status = pcntl_wexitstatus($status); } break; } } pcntl_signal(SIGCHLD, "sig_handler"); function score ($list,$data,&$matches = null) { $ret = 0; foreach ($list as $preg => $pts) { if ($x = preg_match_all($preg.'S',$data,$m)) { // echo $x.'*'.$pts.' ('.$preg.')'."\n"; $matches = $x; $ret += $pts * $x; } } // echo 'Score: '.$ret."\n"; return $ret; } function myfnmatch ($pattern,$string) { if (strlen($string) < 4000) { return fnmatch($pattern,$string); } else { $pattern = strtr(preg_quote($pattern, '#'), array('\*' => '.*', '\?' => '.', '\' => ']')); if (preg_match('#^'.$pattern.'$#',$string)) return true; return false; } } include '../diff.function.php'; /* The diff function. */ include '../wikibot.classes.php'; /* The wikipedia classes. */ include 'cluebot.config.php'; /* This file is very simple, but it contains sensitive information, we just define $user, $ircserver, $ircport, $ircchannel, $pass, $owner, and $status. */ include 'cluebot.scorelist.php'; /* This file is uploaded as well as the main file. */ $wpapi = new wikipediaapi; $wpq = new wikipediaquery; $wpi = new wikipediaindex; var_export($wpapi->login($user,$pass)); $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); if (!$mysql) { die('Could not connect: ' . mysql_error()); } if (!mysql_select_db($mysqldb, $mysql)) { die ('Can\'t use database : ' . mysql_error()); } $ircconfig = explode("\n",$wpq->getpage('User:'.$owner.'/CBChannels.js')); $tmp = array(); foreach($ircconfig as $tmpline) { if (substr($tmpline,0,1) != '#') { $tmpline = explode('=',$tmpline,2); $tmp)] = trim($tmpline); } } $ircchannel = $tmp; $ircdebugchannel = $tmp; $ircreportchannel = $tmp; $ircvandalismchannel = $tmp; $ircaivchannel = $tmp; $irctechchannel = $tmp; $ircproxychannels = $tmp; $ircunrevertedchannels = $tmp; $ircbagtrialchannels = $tmp; $ircotherchannels = $tmp; unset($tmp,$tmpline); $stalkbots = array(); $trialbots = explode("\n",$wpq->getpage('Misplaced Pages:Bots/Requests for approval')); foreach ($trialbots as $trialbot) if (preg_match('/\{\{BRFA\|(.*)\|.*\|Trial\}\}/',str_replace(array("\n","\r"),'',$trialbot),$m)) $stalkbots] = 1; $irc = fsockopen($ircserver,$ircport,$ircerrno,$ircerrstr,15); $ircpid = pcntl_fork(); if ($ircpid == 0) { fwrite($irc,'PASS '.$ircpass."\n"); fwrite($irc,'USER '.$user.' "1" "1" :ClueBot Misplaced Pages Bot.'."\n"); fwrite($irc,'NICK '.$user."\n"); while (!feof($irc)) { $data = str_replace(array("\n","\r"),'',fgets($irc,1024)); // echo 'IRC: '.$data."\n"; $d = explode(' ',$data); if (strtolower($d) == 'ping') { fwrite($irc,'PONG '.$d."\n"); } elseif (($d == '376') or ($d == '422')) { // fwrite($irc,'PRIVMSG NickServ :identify '.$pass."\n"); // sleep(2); fwrite($irc,'JOIN '.$ircchannel.','.$ircdebugchannel.','.$ircreportchannel.','.$ircvandalismchannel.','.$ircaivchannel.','.$irctechchannel.','.$ircproxychannels.','.$ircunrevertedchannels.','.$ircbagtrialchannels.','.$ircotherchannels."\n"); foreach (explode(',',$ircchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :IRC logging enabled.'."\n"); } } elseif (strtolower($d) == 'privmsg') { if (substr($d,0,2) == ':!') { if (strtolower($d) == '#wikipedia-en') { $tmp = explode('!',substr($d,1)); $cmd = 'NOTICE '.$tmp; } elseif (strtolower($d) == strtolower($user)) { $tmp = explode('!',substr($d,1)); $cmd = 'NOTICE '.$tmp; } else { $cmd = 'PRIVMSG '.$d; } switch (substr(strtolower($d),2)) { case 'edit': if (preg_match("/\\]/",$data,$m)) { $rv = $wpapi->revisions($m,1,'older'); fwrite($irc,$cmd.' :.']] http://en.wikipedia.org/search/?title='.urlencode($m).'&diff=prev' . '&oldid='.urlencode($rv).' * '.$rv.' * '.$rv."\n"); } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'stalk': if (preg_match("/\\]/",$data,$m)) { $uc = $wpapi->usercontribs($m,1); fwrite($irc,$cmd.' :.']] http://en.wikipedia.org/search/?title='.urlencode($uc).'&diff=prev' . '&oldid='.urlencode($uc).' * '.$m.' * '.$uc."\n"); } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'beaten': if (preg_match("/\\]/",$data,$m)) { if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); mysql_select_db($mysqldb, $mysql); } $x = mysql_fetch_assoc(mysql_query('SELECT COUNT(`id`) AS `count` FROM `beaten` WHERE `user` = \''.mysql_real_escape_string($m).'\' GROUP BY `user`')); $y = mysql_fetch_assoc(mysql_query('SELECT SQL_CALC_FOUND_ROWS COUNT(`id`) AS `count2` FROM `beaten` GROUP BY `user` HAVING `count2` > \''.mysql_real_escape_string($x).'\' LIMIT 1')); $z = mysql_fetch_assoc(mysql_query('SELECT FOUND_ROWS() as `ahead`')); fwrite($irc,$cmd.' :.']] has beaten me '.(($x != '')?$x:'0').' times. There are '.$z.' users who have beaten me more times.'."\n"); unset($x,$y); } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'vandalcount': if (preg_match("/\\]/",$data,$m)) { $tmp = unserialize(file_get_contents('oftenvandalized.txt')); if (isset($tmp])) { fwrite($irc,$cmd.' :.']] has been vandalized '.count($tmp]).' time(s) in the last 48 hours.'."\n"); } else { fwrite($irc,$cmd.' :.']] has not been vandalized in the last 48 hours.'."\n"); } } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'heuristics': include 'cluebot.heuristics.config.php'; $stats = unserialize(file_get_contents('cluebot.heuristics.stats.txt')); fwrite($irc,$cmd.' :I have the following heuristics enabled: '.implode(', ',$heuristics).".\n"); foreach ($stats as $heuristic => $count) { fwrite($irc,$cmd.' :The '.$heuristic.' heuristic has been matched '.$count.' times.'."\n"); } unset($count,$heuristic,$stats,$heuristics); break; case 'status': $ov = unserialize(file_get_contents('oftenvandalized.txt')); foreach ($ov as $title => $array) { if (count($array) == 0) unset($ov); } file_put_contents('oftenvandalized.txt',serialize($ov)); $count = count($ov); $titles = unserialize(file_get_contents('titles.txt')); foreach ($titles as $title => $time) { if ((time() - $time) > (24*60*60)) { unset($titles); } } file_put_contents('titles.txt',serialize($titles)); $tcount = count($titles); foreach ($ov as $x => $y) { $ocount = count($y); } arsort($ocount); foreach ($ocount as $x => $y) { $mova = $x; $movacount = $y; break; } preg_match('/\(\'\'\'\*)\|more...\]\]\'\'\'\)/iU',$wpq->getpage('Misplaced Pages:Today\'s featured article/'.date('F j, Y')),$tfa); $tfa = $tfa; if (!preg_match('/(yes|enable|true)/i',$wpq->getpage('User:'.$user.'/Run'))) { $run = false; } else { $run = true; } $top5beat = array(); if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); mysql_select_db($mysqldb, $mysql); } $q = mysql_query('SELECT `user`,COUNT(`id`) AS `count` FROM `cluebot_enwiki`.`beaten` WHERE `user` != \'\' GROUP BY `user` HAVING `count` > 1 ORDER BY `count` DESC LIMIT 5'); while ($x = mysql_fetch_assoc($q)) { $top5beat = $x.' ('.$x.')'; } unset($x,$q); $top5beat = implode(' - ',$top5beat); fwrite($irc,$cmd.' :I am '.$user.'. I am currently '.($run?'enabled':'disabled').'. I currently have '.$wpq->contribcount($user).' contributions.'."\n"); fwrite($irc,$cmd.' :I have attempted to revert '.$tcount.' unique article/user combinations in the last 24 hours. ' . 'I know of '.$count.' different articles that have been vandalized in the last 48 hours.'."\n" ); fwrite($irc,$cmd.' :] is the most vandalized page with a total of '.$movacount.' vandalisms in the last 48 hours. ' . 'Today\'s featured article is: ].'."\n" ); fwrite($irc,$cmd.' :The following users have beat me to the revert the most: '.$top5beat."\n"); fwrite($irc,$cmd.' :I log all information to '.$ircchannel.'. This channel is '.$d.'.'."\n"); unset($x,$y,$count,$ov,$tcount,$ocount,$mova,$movacount,$tfa,$run,$title,$titles,$time,$top5beat); break; case 'warninglevel': if (preg_match("/\\]/",$data,$n)) { $warning = 0; if (preg_match_all('/<!-- Template:(uw-*(\d)(im)?|Blatantvandal \(serious warning\)) -->.*(\d{2}):(\d{2}), (\d+) (+) (\d{4}) \(UTC\)/iU', $wpq->getpage('User talk:'.$n), $match,PREG_SET_ORDER) ) { foreach ($match as $m) { $month = array('January' => 1, 'February' => 2, 'March' => 3, 'April' => 4, 'May' => 5, 'June' => 6, 'July' => 7, 'August' => 8, 'September' => 9, 'October' => 10, 'November' => 11, 'December' => 12 ); if ($m == 'Blatantvandal (serious warning)') $m = 4; if ((time() - gmmktime($m,$m,0,$month],$m,$m)) <= (2*24*60*60)) { if ($m > $warning) { $warning = $m; } } } } fwrite($irc,$cmd.' :.']] is at warning level '.$warning.".\n"); } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'count': if (preg_match("/\\]/",$data,$n)) { fwrite($irc,$cmd.' :.']] has '.$wpq->contribcount($n)." contributions.\n"); } else { fwrite($irc,$cmd.' :Couldn\'t find link.'."\n"); } break; case 'help': fwrite($irc,$cmd.' :Please see ].'."\n"); break; case 'eval': $tmp = explode(' ',$data,6); $tmp1 = explode('!',substr($d,1)); if ($d == md5($thesecret.$tmp1.$tmp)) { eval($tmp); } else { fwrite($irc,$cmd.' :Code incorrect.'."\n"); } break; case 'cbproxy': $tmp = explode(' ',$data,6); $tmp1 = explode('!',substr($d,1)); if ($tmp1 == 'ClueBot-Bopm') { foreach (explode(',',$ircproxychannels) as $y) { fwrite($irc,'PRIVMSG '.$y.' :!admin '.$tmp."\n"); } $data = $wpq->getpage('Misplaced Pages:WikiProject on open proxies'); if (strpos($data,$tmp) === false) { $header = explode('{{proxyip2|127.0.0.1}} || Example',$data,2); $header .= '{{proxyip2|127.0.0.1}} || Example '; $footer = $header; $header = $header; $data = "\n".'{{proxyip2|'.$tmp.'}} || '.$tmp.' ~~~~'."\n"; $data = $header.$data.$footer; unset($header,$footer); $wpapi->edit('Misplaced Pages:WikiProject on open proxies',$data,'Adding '.$tmp.'.'); unset($data); } } break; } } } } die(); } $heuristics = "==Heuristics==\n\n===Config (cluebot.heuristics.config.php)===\n\n<pre>".htmlentities(file_get_contents('cluebot.heuristics.config.php'))."</pre>\n\n"; foreach (glob('heuristics/cluebot.*.heuristic.php') as $heuristic) $heuristics .= '==='.$heuristic."===\n\n<pre>".htmlentities(file_get_contents($heuristic))."</pre>\n\n"; unset($heuristic); $wpapi->edit('User:'.$user.'/Source', 'The following is automatically generated by ].\n\n\n\n==Classes (wikibot.classes.php)==\n\n<pre>" . htmlentities(file_get_contents('../wikibot.classes.php'))."</pre>\n\n\n\n==Diff function (diff.function.php)==\n\n<pre>" . htmlentities(file_get_contents('../diff.function.php'))."</pre>\n\n\n\n==Source to ".$user . "==\n\n".'<pre>'.htmlentities(file_get_contents(__FILE__))."</pre>\n\n\n\n" . $heuristics . "==Score list==\n\n".'<pre>'.htmlentities(file_get_contents('cluebot.scorelist.php'))."</pre>\n\n\n\n~~~~", 'Automated source upload.'); /* Our source code, we force post this because this is *our* page, and it triggers the nobots. */ unset($heuristics); $wpapi->edit('User:'.$user, "{{User:ClueBot Commons/Userpage}}\n", 'Automated bot userpage set.'); /* Our page, we force post this because this is *our* page. */ $tfas = 0; $pipe = fopen('thepipe','w'); $stdin = fopen('php://stdin','r'); $run = $wpq->getpage('User:'.$user.'/Run'); $wl = $wpq->getpage('User:'.$user.'/Whitelist'); $optin = $wpq->getpage('User:'.$user.'/Optin'); $aoptin = $wpq->getpage('User:'.$user.'/AngryOptin'); unset($tmp,$tmp2,$tmp3); $tmp = explode("\n",$wpq->getpage('User:'.$owner.'/CBAutostalk.js')); foreach ($tmp as $tmp2) { if (substr($tmp2,0,1) != '#') { $tmp3 = explode('|',$tmp2,2); $stalk] = trim($tmp3); } } $tmp = explode("\n",$wpq->getpage('User:'.$owner.'/CBAutoedit.js')); foreach ($tmp as $tmp2) { if (substr($tmp2,0,1) != '#') { $tmp3 = explode('|',$tmp2,2); $edit] = trim($tmp3); } } unset($tmp,$tmp2,$tmp3); print_r($stalk); print_r($edit); while (1) { $feed = fsockopen($feedhost,$feedport,$feederrno,$feederrstr,30); if (!$feed) { sleep(10); $feed = fsockopen($feedhost,$feedport,$feederrno,$feederrstr,30); if (!$feed) die($feederrstr.' ('.$feederrno.')'); } fwrite($feed,'USER '.$user.' "1" "1" :ClueBot Misplaced Pages Bot.'."\n"); fwrite($feed,'NICK '.$user."\n"); while (!feof($feed)) { $rawline = fgets($feed,1024); $line = str_replace(array("\n","\r","\002"),'',$rawline); $line = preg_replace('/\003(\d\d?(,\d\d?)?)?/','',$line); // echo 'FEED: '.$line."\n"; if (!$line) { fclose($feed); break; } $linea= explode(' ',$line,4); if (strtolower($linea) == 'ping') { fwrite($feed,'PONG '.$linea."\n"); } elseif (($linea == '376') or ($linea == '422')) { fwrite($feed,'JOIN '.$feedchannel."\n"); } elseif ((strtolower($linea) == 'privmsg') and (strtolower($linea) == strtolower($feedchannel))) { $message = substr($linea,1); if (preg_match('/^\*)\]\] (\S*) (http:\/\/en\.wikipedia\.org\/w\/index\.php\?diff=(\d*)&oldid=(\d*).*|http:\/\/en\.wikipedia\.org\/wiki\/\S+)? \* (*) \* (\((*)\))? (.*)$/S',$message,$m)) { $messagereceived = microtime(1); $change = $m; $change = $m; $change = $m; $change = $m; $change = $m; $change = $m; $change = $m; $change = $m; $change = $m; // include 'cluebot.stalk.config.php'; $stalkchannel = array(); foreach ($stalk as $key => $value) if (myfnmatch(str_replace('_',' ',$key),str_replace('_',' ',$change))) $stalkchannel = array_merge($stalkchannel,explode(',',$value)); foreach ($stalkbots as $key => $value) if (myfnmatch(str_replace('_',' ',$key),str_replace('_',' ',$change))) $stalkchannel = array_merge($stalkchannel,explode(',',$ircbagtrialchannels)); foreach ($edit as $key => $value) if (myfnmatch(str_replace('_',' ',$key),str_replace('_',' ',$change.$change))) $stalkchannel = array_merge($stalkchannel,explode(',',$value)); // if ($change == $owner) $stalkchannel = $ircchannel; $stalkchannel = array_unique($stalkchannel); foreach ($stalkchannel as $y) { fwrite($irc,'PRIVMSG '.$y.' :New edit: .$change.']] http://en.wikipedia.org/search/?title=' . urlencode($change.$change).'&diff=prev'.'&oldid='.urlencode($change).' * '.$change . ' * '.$change."\n"); } if (($change == 'User:') or ($change == 'User talk:')) { if (strtolower($change) == strtolower($user.'/Run')) { $run = $wpq->getpage('User:'.$user.'/Run'); } if (strtolower($change) == strtolower($user.'/Whitelist')) { $wl = $wpq->getpage('User:'.$user.'/Whitelist'); } if (strtolower($change) == strtolower($user.'/Optin')) { $optin = $wpq->getpage('User:'.$user.'/Optin'); } if (strtolower($change) == strtolower($user.'/AngryOptin')) { $aoptin = $wpq->getpage('User:'.$user.'/AngryOptin'); } if (strtolower($change) == strtolower($owner.'/CBAutostalk.js')) { unset($stalk); $tmp = explode("\n",$wpq->getpage('User:'.$owner.'/CBAutostalk.js')); foreach ($tmp as $tmp2) { if (substr($tmp2,0,1) != '#') { $tmp3 = explode('|',$tmp2,2); $stalk] = trim($tmp3); } } unset($tmp,$tmp2,$tmp3); print_r($stalk); } if (strtolower($change) == strtolower($owner.'/CBAutoedit.js')) { unset($edit); $tmp = explode("\n",$wpq->getpage('User:'.$owner.'/CBAutoedit.js')); foreach ($tmp as $tmp2) { if (substr($tmp2,0,1) != '#') { $tmp3 = explode('|',$tmp2,2); $edit] = trim($tmp3); } } unset($tmp,$tmp2,$tmp3); print_r($edit); } if (strtolower($change) == strtolower($owner.'/CBChannels.js')) { $ircconfig = explode("\n",$wpq->getpage('User:'.$owner.'/CBChannels.js')); $tmp = array(); foreach($ircconfig as $tmpline) { if (substr($tmpline,0,1) != '#') { $tmpline = explode('=',$tmpline,2); $tmp)] = trim($tmpline); } } print_r($tmp); $tmpold = array(); $tmpnew = array(); foreach ($tmp as $tmp2) foreach (explode(',',$tmp2) as $tmp3) $tmpnew = 1; foreach (explode(',',$ircchannel.','.$ircdebugchannel.','.$ircreportchannel.','.$ircvandalismchannel.','.$ircaivchannel.','.$irctechchannel.','.$ircproxychannels.','.$ircunrevertedchannels.','.$ircbagtrialchannels.','.$ircotherchannels) as $tmp3) $tmpold = 1; foreach ($tmpold as $tmp2 => $tmp3) if (isset($tmpnew)) unset($tmpold,$tmpnew); foreach ($tmpnew as $tmp2 => $tmp3) $tmpnew1 = $tmp2; foreach ($tmpold as $tmp2 => $tmp3) $tmpold1 = $tmp2; $tmpold = $tmpold1; $tmpnew = $tmpnew1; unset($tmpold1,$tmpnew1); fwrite($irc,'JOIN '.implode(',',$tmpnew)."\n"); fwrite($irc,'PART '.implode(',',$tmpold)."\n"); $ircchannel = $tmp; $ircdebugchannel = $tmp; $ircreportchannel = $tmp; $ircvandalismchannel = $tmp; $ircaivchannel = $tmp; $irctechchannel = $tmp; $ircproxychannels = $tmp; $ircunrevertedchannels = $tmp; $ircbagtrialchannels = $tmp; $ircotherchannels = $tmp; unset($tmp,$tmpline,$tmpold,$tmpnew,$tmp2,$tmp3); } } if ($change.$change == 'Misplaced Pages:Bots/Requests for approval') { $stalkbots = array(); $trialbots = explode("\n",$wpq->getpage('Misplaced Pages:Bots/Requests for approval')); foreach ($trialbots as $trialbot) if (preg_match('/\{\{BRFA\|(.*)\|.*\|Trial\}\}/',str_replace(array("\n","\r"),'',$trialbot),$m)) $stalkbots] = 1; } if (($change != '') and ((!preg_match('/\* \.$change,'/').')\]\] \- .*/i',$optin))) and ($change != 'move') and ($change != 'Template:')) continue; $change = $change; $change = $change.$change; if ($change == 'move') { if (preg_match('/moved \\] to \\]( over redirect)?: (.*)$/',$change,$m)) { $change = $m; $change = $m; $change = $change; $change = $m; echo "\n\n\n".'Move!'."\n\n\n"; print_r($change); echo "\n\n\n".'Move!'."\n\n\n"; } } if ( ((time() - $tfas) >= 1800) and (preg_match('/\(\'\'\'\*)\|more...\]\]\'\'\'\)/iU',$wpq->getpage('Misplaced Pages:Today\'s featured article/'.date('F j, Y')),$tfam)) ) { $tfas = time(); $tfa = $tfam; //echo "TFA: ".$tfa."\n"; } $s = null; $pid = @pcntl_fork(); if ($pid != 0) continue; $hutime = microtime(1); include 'cluebot.heuristics.config.php'; foreach ($heuristics as $heuristic) { $heuristicret = false; include 'heuristics/cluebot.'.$heuristic.'.heuristic.php'; if ($heuristicret == true) { $stats = unserialize(file_get_contents('cluebot.heuristics.stats.txt')); $stats++; print_r($log); file_put_contents('cluebot.heuristics.stats.txt',serialize($stats)); unset($stats); break; } } if ($heuristicret == true) { echo 'Heuristics time: '.(microtime(1) - $hutime)."\n"; /*file_put_contents('trainingdata.txt',$change."\0".$change."\0".'1'."\n",FILE_APPEND);*/ } else { $tmp = explode(' ',$rawline,4); $tmp = $tmp; $udp = fsockopen('udp://localhost',3333); fwrite($udp,substr(str_replace(array("\n","\r"),'',$tmp),1)."\n"); fclose($udp); unset($tmp,$udp); $d = $wpi->diff($change,$change,$change); $s = score($obscenelist,$d,$log); $s -= score($obscenelist,$d,$log); // if ($s > 15) file_put_contents('trainingdata.txt',$change."\0".$change."\0".'0'."\n",FILE_APPEND); } unset($hutime); if ( ($heuristicret == true) ) { if ( ( ( /* IP users with 250 contributions are fine .. */ (long2ip(ip2long($change)) == $change) /* and ($uc = $wpapi->usercontribs($change,250)) and (!isset($uc)) */ ) or ( /* Users with 50 contributions are fine .. */ (long2ip(ip2long($change)) != $change) and ($wpq->contribcount($change) < 50) ) ) and ( /* Whitelisted users are ok. */ /* ($wl = $wpq->getpage('User:'.$user.'/Whitelist')) and */ (!preg_match('/^\* \,'/').')|\1\]\] \- .*/',$wl)) ) ) { // $vandalpage = $wpq->getpage('User:'.$user.'/PossibleVandalism'); // $x = explode("\n\n",$vandalpage); // foreach ($x as $k => $y) { // if (preg_match('/(\d+)\-(\d+)\-(\d+)T(\d+):(\d+):(\d+)/',$y,$m)) { // if ((time() - gmmktime($m,$m,$m,$m,$m,$m)) > (5*60*60)) { // unset($x); // } // } // } // $vandalpage = implode("\n\n",$x); $diff = 'http://en.wikipedia.org/search/' . '?title='.urlencode($change) . '&diff='.urlencode($change) . '&oldid='.urlencode($change); $report = ').']] was ' . (($change != 'move')?' by ':'moved to ).']] by ') . '.'|'.$change.']] ' . '.'|(u)]] ' . '.'|(t)]] ' . $reason.' on '.gmdate('c'); // $datatopost = $vandalpage."\n\n".'Possible ]: '.$report." ~~~~\n"; if ($s == null) { // $rv = $wpapi->revisions($change,2,'older',true,$change); // $s = score($scorelist,diff($rv,$rv)); // $s += (score($scorelist,diff($rv,$rv,false))) * -1; $s = 'N/A'; } $tmp = unserialize(file_get_contents('oftenvandalized.txt')); if (rand(1,50) == 2) { foreach ($tmp as $key1 => $tmp2) { foreach ($tmp2 as $key2 => $time) { if ((time() - $time) > (2*24*60*60)) { unset($tmp); } } } } $tmp] = time(); if (count($tmp]) >= 30) { foreach (explode(',',$ircreportchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :!admin .']] has been vandalized '.(count($tmp])).' times in the last 2 days.'."\n"); } } file_put_contents('oftenvandalized.txt',serialize($tmp)); if ( ( ($rv1 = $wpapi->revisions($change,1,'older')) and ($rv1 == $change) ) or ($change == 'move') ) { /* No need to continue further if it has been reverted */ echo 'Possible vandalism: '.$change.' changed by '.$change.' '.$reason.' on '.$rv.'('.$s.").\n"; foreach (explode(',',$ircdebugchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :Possible vandalism: '.$change.' changed by '.$change.' '.$reason.' on '.$rv.'('.$s.").\n"); fwrite($irc,'PRIVMSG '.$y.' :( http://en.wikipedia.org/search/?title='.urlencode($change).'&action=history | '.$change.' )'."\n"); } fwrite($pipe,'http://en.wikipedia.org/search/?title='.urlencode($change).'&action=history'."\n"); /* Tell owner */ $mqtime = microtime(1); if (is_array($log)) { $logt = ''; foreach ($log as $k => $v) { $logt .= '* '.$v.' * "'.$k.'"'."\n"; } } $query = 'INSERT INTO `vandalism` ' . '(`id`,`user`,`article`,`heuristic`'.((is_array($log))?',`regex`':'').',`reason`,`diff`,`old_id`,`new_id`,`reverted`) ' . 'VALUES ' . '(NULL,\''.mysql_real_escape_string($change).'\',' . '\''.mysql_real_escape_string($change).'\',' . '\''.mysql_real_escape_string($heuristic).'\',' . ((is_array($log))?'\''.mysql_real_escape_string($logt).'\',':'') . '\''.mysql_real_escape_string($reason).'\',' . '\''.mysql_real_escape_string($change).'\',' . '\''.mysql_real_escape_string($change).'\',' . '\''.mysql_real_escape_string($change).'\',0)'; //echo 'Mysql query: '.$query."\n"; if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); if (!$mysql) { die('Could not connect: ' . mysql_error()); } if (!mysql_select_db($mysqldb, $mysql)) { die ('Can\'t use database : ' . mysql_error()); } } mysql_query($query); //echo 'Mysql error: '.mysql_error()."\n"; $mysqlid = mysql_insert_id(); echo 'MySQL time: '.(microtime(1) - $mqtime).' MySQL id: '.$mysqlid."\n"; unset($mqtime); if ( ( ( (preg_match('/(assisted|manual)/iS',$status)) and (print('Revert ? ')) and (strtolower(substr(fgets($stdin,3),0,1)) == 'y') ) or ( (preg_match('/(read-write|rw|go|approved|trial)/iS',$status)) ) ) and ( /*ANGRY MODE*/ false or ( ( ((time() - $tfas) < 1800) or ( (preg_match('/\(\'\'\'\*)\|more...\]\]\'\'\'\)/iU',$wpq->getpage('Misplaced Pages:Today\'s featured article/'.date('F j, Y')),$tfam)) and ($tfas = time()) and ($tfa = $tfam) and ((print("TFA: ".$tfa."\n")) or (true)) ) ) and ($tfa == $change) ) or ( (preg_match('/\* \,'/').')\]\] \- .*/i',$aoptin)) and ((fwrite($irc,'PRIVMSG '.$ircdebugchannel.' :Angry-reverting .']].'."\n")) or (true)) ) or ( (($tmp = unserialize(file_get_contents('titles.txt'))) or true) and ((!isset($tmp.$change])) or ((time() - $tmp.$change]) > (24*60*60))) and ($tmp.$change] = time()) and ((file_put_contents('titles.txt',serialize($tmp))) or true) ) ) ) { echo 'Reverting ...'."\n"; if ($change != 'move') { $rev = $wpapi->revisions($change,5,'older',false,null,true,true); $revid = 0; $rbtok = $rev; foreach ($rev as $revdata) { if ($revdata != $change) { $revid = $revdata; break; } } if (($revdata == $user) or (in_array($revdata,explode(',',$botfriends)))) { die(); /* Do not revert to us. */ } } // if ($revid == 0) { die(); } foreach (explode(',',$ircdebugchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :Reverting ...'."\n"); } // $revisiondata = $wpapi->revisions($change,1,'older',true,$revid); // if (!$revisiondata) die(); // if (!$rv1) $rv1 = $rv; // $wpi->post( // $change, // $revisiondata, // 'Reverting possible vandalism by .'|'.$change.']] ' . // 'to version by '.$revisiondata.'. ' . // 'False positive? ]. '. // 'Thanks, ]. ('.$mysqlid.') (Bot)', // false, // $rv1 // ); /* Revert the page */ if ($change != 'move') { if (!$rbtok) { $d = $wpi->diff($change,$change,$change); $rbtok = $d; } $rbret = $wpapi->rollback( $change, $change, 'Reverting possible vandalism by .'|'.$change.']] ' . 'to '.(($revid == 0)?'older version':'version by '.$revdata).'. ' . 'False positive? ]. '. 'Thanks, ]. ('.$mysqlid.') (Bot)', $rbtok ); } else { $rbret = $wpapi->move( $change, $change, 'Reverting possible vandalism by .'|'.$change.']] ' . 'to '.(($revid == 0)?'older version':'version by '.$revdata).'. ' . 'False positive? ]. '. 'Thanks, ]. ('.$mysqlid.') (Bot)' ); } // // $rv2 = $wpapi->revisions($change,1); // if ($rv2 == $user) { if ($rbret !== false) { foreach (explode(',',$ircdebugchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :Reverted. ('.(microtime(1) - $messagereceived).' s)'."\n"); } $warning = 0; $tpcontent = $wpq->getpage('User talk:'.$change); if (preg_match_all('/<!-- Template:(uw-*(\d)(im)?|Blatantvandal \(serious warning\)) -->.*(\d{2}):(\d{2}), (\d+) (+) (\d{4}) \(UTC\)/iU', $tpcontent, $match,PREG_SET_ORDER) ) { foreach ($match as $m) { $month = array('January' => 1, 'February' => 2, 'March' => 3, 'April' => 4, 'May' => 5, 'June' => 6, 'July' => 7, 'August' => 8, 'September' => 9, 'October' => 10, 'November' => 11, 'December' => 12 ); if ($m == 'Blatantvandal (serious warning)') $m = 4; if ((time() - gmmktime($m,$m,0,$month],$m,$m)) <= (2*24*60*60)) { if ($m > $warning) { $warning = $m; } } } } $warning++; if ($warning == 5) { /* Report them if they have been warned 4 times. */ $aivdata = $wpq->getpage('Misplaced Pages:Administrator_intervention_against_vandalism/TB2'); if (!preg_match('/'.preg_quote($change,'/').'/i',$aivdata)) { foreach(explode(',',$ircaivchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :!admin Reporting .']] to ]. Contributions: .']] Block: .']]'."\n"); } foreach (explode(',',$ircvandalismchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :rcbot bl add '.$change.' x='.(24*$warning).' r=Vandalism to .']] (#'.$warning.").\n"); } $wpapi->edit( 'Misplaced Pages:Administrator_intervention_against_vandalism/TB2', $aivdata . "\n\n* {{".((long2ip(ip2long($change)) == $change)?'IPvandal':'Vandal').'|'.$change.'}}' . ' - '.$report." (Automated) ~~~~\n", 'Automatically reporting .']]. (bot)', false, false ); } else { foreach (explode(',',$ircreportchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :!admin .']] has vandalized at least one time while being listed on ]. Contributions: .']] Block: .']]'."\n"); } } } elseif ($warning < 5) { /* Warn them if they haven't been warned 4 times. */ foreach (explode(',',$ircvandalismchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :rcbot bl add '.$change.' x='.(24*$warning).' r=Vandalism to .']] (#'.$warning.').'."\n"); } $wpapi->edit( 'User talk:'.$change, $tpcontent."\n\n" . '{{subst:User:'.$user.'/Warnings/Warning|1='.$warning.'|2='.str_replace('File:',':File:',$change).'|3='.$report.' <!{{subst:ns:0}}-- MySQL ID: '.$mysqlid.' --{{subst:ns:0}}>}} ~~~~'."\n", 'Warning .'|'.$change.']] - #'.$warning, false, false ); /* Warn the user */ } else { /* They have already been reported ... do nothing */ } if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); if (!$mysql) { die('Could not connect: ' . mysql_error()); } if (!mysql_select_db($mysqldb, $mysql)) { die ('Can\'t use database : ' . mysql_error()); } } mysql_query('UPDATE `vandalism` SET `reverted` = 1 WHERE `id` = \''.mysql_real_escape_string($mysqlid).'\''); } else { $rv2 = $wpapi->revisions($change,1); if ($change != $rv2) { echo 'Grr! Beaten by '.$rv2.".\n"; foreach(explode(',',$ircdebugchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :Grr! Beaten by '.$rv2.".\n"); } if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); mysql_select_db($mysqldb, $mysql); } mysql_query('INSERT INTO `beaten` (`id`,`article`,`diff`,`user`) VALUES (NULL,\''.mysql_real_escape_string($change).'\',\''.mysql_real_escape_string($change).'\',\''.mysql_real_escape_string($rv2).'\')'); } } } else { foreach (explode(',',$ircunrevertedchannels) as $y) { fwrite($irc,'PRIVMSG '.$y.' :'."\002\00304Possible ignored vandalism: \002\003."\003]]\00304 changed by \003."\003]] \00303".$reason."\00304 on \00307".$rv."\003(\002\00313".$s."\003).\n"); fwrite($irc,'PRIVMSG '.$y.' :'."\002(\002\00312 http://en.wikipedia.org/search/?title=".urlencode($change)."&action=history \003\002|\002\00312 ".$change." \003\002)\002"."\n"); } $vandalpage = $wpq->getpage('User:'.$user.'/PossibleVandalism'); $x = explode("\n\n",$vandalpage); foreach ($x as $k => $y) { if (preg_match('(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z',$y,$m)) { if ((gmtime() - gmmktime($m,$m,$m,$m,$m,$m)) > (5*60*60)) { unset($x); } } } $vandalpage = implode("\n\n",$vandalpage); $wpapi->edit('User:'.$user.'/PossibleVandalism',$vandalpage."\n\n".'Possible ignored vandalism: .']] changed by .']] '.$reason.' on '.$rv.' ('.$s.')'.' ).'&action=history hist] .' diff] ~~~~','Adding possible vandalism',true,true); } } else { $rev = $wpapi->revisions($change,1); $rev = $rev; echo 'Possible corrected vandalism: '.$change.' changed by '.$change.' '.$reason.'('.$s.")\n\tReverted by ".$rev." before I saw it.\n"; foreach (explode(',',$ircdebugchannel) as $y) { fwrite($irc,'PRIVMSG '.$y.' :Possible corrected vandalism: '.$change.' changed by '.$change.' '.$reason.'('.$s.")\n"); fwrite($irc,'PRIVMSG '.$y.' :Reverted by '.$rev." before I saw it.\n"); } if (($rev != $user) and ($rev != $change)) { if (!mysql_ping($mysql)) { $mysql = mysql_pconnect($mysqlhost.':'.$mysqlport,$mysqluser,$mysqlpass); mysql_select_db($mysqldb, $mysql); } mysql_query('INSERT INTO `beaten` (`id`,`article`,`diff`,`user`) VALUES (NULL,\''.mysql_real_escape_string($change).'\',\''.mysql_real_escape_string($change).'\',\''.mysql_real_escape_string($rev).'\')'); } } } } die(); } } } } ?>
Heuristics
Config (cluebot.heuristics.config.php)
<?PHP $heuristics = 'grawp'; $heuristics = 'evolution'; $heuristics = 'avrillavigne'; $heuristics = 'editsummary'; $heuristics = 'pagereplace'; $heuristics = 'pageblank'; $heuristics = 'massdelete'; $heuristics = 'massadd'; $heuristics = 'tables'; $heuristics = 'smallchange'; $heuristics = 'claimjumperpete'; $heuristics = 'sneaky'; $heuristics = 'redirect'; ?>
heuristics/cluebot.avrillavigne.heuristic.php
<?PHP if ( /* The Avril Lavigne vandal */ ( (preg_match('/avril\s*lavigne\s*roc*k**\s*my\s*soc*k**/i',$change)) or (preg_match('/she\s*(is\s*)*(so\s*)*.*hot\!.*using.*TW/i',$change)) // or ( // ($change == 'Template:') // and (preg_match('/\{\{user((\s|_)talk)?:/i',$wpq->getpage($change))) // ) ) and ($reason = 'Avril Lavigne vandal?') ) { $heuristicret = true; foreach (explode(',',$ircreportchannel) as $y) fwrite($irc,'PRIVMSG '.$y.' :!admin Avril Lavigne vandal? ."]] .\n"); } ?>
heuristics/cluebot.claimjumperpete.heuristic.php
<?PHP if ( /* The ClaimJumperPete vandal */ (($change >= 100) and ($change <= 400)) and ( ($change <= 200) or ($d = $wpi->diff($change,$change,$change)) ) and ( (fnmatch("<!--*howdy y'all*-->*",trim(strtolower($d)))) or (fnmatch("<!--*hello der*-->*",trim(strtolower($d)))) or (fnmatch("<!--*happy editin' y'all*-->*",trim(strtolower($d)))) ) and ($reason = 'ClaimJumperPete?') ) { $heuristicret = true; foreach (explode(',',$ircreportchannel) as $y) fwrite($irc,'PRIVMSG '.$y.' :!admin ClaimJumperPete vandal? http://en.wikipedia.org/search/?title='.urlencode($change).'&diff=prev'.'&oldid='.urlencode($change)." .\n"); } ?>
heuristics/cluebot.editsummary.heuristic.php
<?PHP if ( ( (fnmatch('*nimp*org*',strtolower($change))) ) and ($reason = 'obscenities in edit summary') ) { $heuristicret = true; } ?>
heuristics/cluebot.evolution.heuristic.php
<?PHP if ( /* The Evolution vandal */ ($change == 'Evolution') and (($pagedata = $wpq->getpage($change)) or true) and (fnmatch('*Genesis 1*The beginning*',$pagedata)) and ($reason = 'replacing article with the Bible') ) { $heuristicret = true; foreach (explode(',',$ircreportchannel) as $y) fwrite($irc,'PRIVMSG '.$y.' :!admin Evolution vandal? http://en.wikipedia.org/search/?title='.urlencode($change).'&diff=prev'.'&oldid='.urlencode($change)." .\n"); } ?>
heuristics/cluebot.grawp.heuristic.php
<?PHP if ( /* The Grawp vandal */ ( (fnmatch('*epic*lulz*on*nimp*org*',strtolower($change))) or (fnmatch('*on*nimp*org*epic*lulz*',strtolower($change))) or (fnmatch('*punishing*wikipedia*',strtolower($change))) or (fnmatch('*anti*avril*hate*campaign*',strtolower($change))) or (fnmatch('*HAGGER*',$change)) or (fnmatch('*H?A?G?G?E?R*',$change)) or (fnmatch('*h??a??g??g??e??r*',strtolower($change))) or (fnmatch('*grawp*cock*',strtolower($change))) or (fnmatch('*massive*cock*',strtolower($change))) or (fnmatch('*grawp*dick*',strtolower($change))) or (fnmatch('*massive*dick*',strtolower($change))) or (fnmatch('*H?A?G?E?R*',$change)) or (fnmatch('*hgger*',strtolower($change))) ) and ($reason = 'Grawp?') ) { $heuristicret = true; foreach (explode(',',$ircreportchannel) as $y) fwrite($irc,'PRIVMSG '.$y.' :!admin Grawp vandal? ."]] .\n"); } ?>
heuristics/cluebot.massadd.heuristic.php
<?PHP if ( /* Massive additions */ ($change >= 7500) and ($rv = $wpapi->revisions($change,2,'older',true,$change)) and ($pagedata = $wpq->getpage($change)) and ($s = score($scorelist,$rv)) and ($s += (score($scorelist,$rv)) * -1) and ($s < -1000) and ($reason = 'score equals '.$s) ) $heuristicret = true; ?>
heuristics/cluebot.massdelete.heuristic.php
<?PHP if ( /* Massive deletes */ ($change <= -7500) and ($pagedata = $wpq->getpage($change)) and (!fnmatch('*#REDIRECT*',strtoupper(substr($pagedata,0,9)))) and ($rv = $wpapi->revisions($change,2,'older',true,$change)) and ($s = score($scorelist,$rv)) and ($s += (score($scorelist,$rv)) * -1) and ($s < -50) /* There are times when massive deletes are ok. */ and ($reason = 'deleting '.($change * -1).' characters') ) $heuristicret = true; ?>
heuristics/cluebot.pageblank.heuristic.php
<?PHP if ( /* Page blanks */ (preg_match('/\,$m)) and (($pagedata = $wpq->getpage($change)) or true) and ($fc = $wpapi->revisions($change,1,'newer')) and ($fc != $change) /* The creator is allowed to blank the page. */ and ($reason = 'blanking the page') ) $heuristicret = true; ?>
heuristics/cluebot.pagereplace.heuristic.php
<?PHP if ( /* Page replaces */ (preg_match('/\\]Replaced page with (.*)$/',$change,$m)) and ($pagedata = $wpq->getpage($change)) and ($fc = $wpapi->revisions($change,1,'newer')) and ($fc != $change) /* The creator is allowed to replace the page. */ and ($reason = 'replacing entire content with something else') ) $heuristicret = true; ?>
heuristics/cluebot.redirect.heuristic.php
<?PHP if ( /* The Redirect vandals */ ( ($tfa == $change) and (fnmatch('*#redirect *',strtolower($wpq->getpage($change)))) and ($reason = 'redirecting featured article to new title') ) or ( ($pagedata = $wpq->getpage($change)) and (substr(trim(strtolower($pagedata)),0,10) == '#redirect ') and (preg_match('/\\]/',$pagedata,$m)) and (!$wpq->getpage($m)) and ($reason = 'redirecting article to non-existant page') ) ) { $heuristicret = true; // fwrite($irc,'PRIVMSG #cvn-wp-en :!admin Grawp vandal? http://en.wikipedia.org/Special:Contributions/'.$change." .\n"); } ?>
heuristics/cluebot.smallchange.heuristic.php
<?PHP unset($log,$log2); if ( /* Small changes with obscenities. */ (($change >= -200) and ($change <= 200)) and (($d = $wpi->diff($change,$change,$change)) or true) and ((($change == 'User:ClueBot/Sandbox') and print_r($rv)) or true) and (($s = score($obscenelist,$d,$log)) or true) and (($s -= score($obscenelist,$d,$log2)) or true) and ( ( ($s < -5) /* There are times when small changes are ok. */ and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and (!fnmatch('*SEX*',strtoupper($rv))) and (!fnmatch('*BDSM*',strtoupper($rv))) and (score($obscenelist,$change) >= 0) and (score($obscenelist,$rv) >= 0) and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and ($heuristic .= '/obscenities') and ($reason = 'making a minor change with obscenities') ) or ( ($s > 5) and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and (preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and ($heuristic .= '/censor') and ($reason = 'making a minor change censoring content (])') ) or ( (preg_match('/\!\!\!/S',$d)) and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!preg_match('/\!\!\!/S',$rv)) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and ($heuristic .= '/exclamation') and ($reason = 'making a minor change adding "!!!"') ) ) ) { $heuristicret = true; if (isset($log2) and is_array($log2)) foreach ($log2 as $k => $v) $log -= $v; if (isset($log) and is_array($log)) foreach ($log as $k => $v) if ($v == 0) unset($log); unset($log2); /* fwrite($irc,'PRIVMSG #wikipedia-BAG/ClueBot :Would revert http://en.wikipedia.org/search/?title='.urlencode($change.$change).'&diff=prev'.'&oldid='.urlencode($change)." .\n"); */ } ?>
heuristics/cluebot.sneaky.heuristic.php
<?PHP unset($log,$log2); if ( /* Small changes with obscenities. */ (($change >= -200) and ($change <= 200)) and (($d = $wpi->diff($change,$change,$change)) or true) and ((($change == 'User:ClueBot/Sandbox') and print_r($rv)) or true) and (($s = score($obscenelist,$d,$log)) or true) and (($s -= score($obscenelist,$d,$log2)) or true) and ( ( ($s < -5) /* There are times when small changes are ok. */ and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and (!fnmatch('*SEX*',strtoupper($rv))) and (!fnmatch('*BDSM*',strtoupper($rv))) and (score($obscenelist,$change) >= 0) and (score($obscenelist,$rv) >= 0) and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and ($heuristic .= '/obscenities') and ($reason = 'making a minor change with obscenities') ) or ( ($s > 5) and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and (!preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and (preg_match('/(^|\s)({1,2}(\*+|\-{3,}){0,2}|\*{4}|\-{4}|(\<|\()?censored(\>|\))?)(ing?|ed)?(\s|$)/iS',$rv)) and ($heuristic .= '/censor') and ($reason = 'making a minor change censoring content (])') ) or ( (preg_match('/\!\!\!/S',$d)) and (($rv = $wpapi->revisions($change,2,'older',true,$change)) or true) and (!preg_match('/\!\!\!/S',$rv)) and (!fnmatch('*#REDIRECT*',strtoupper(substr($rv,0,9)))) and ($heuristic .= '/exclamation') and ($reason = 'making a minor change adding "!!!"') ) ) ) { $heuristicret = true; if (isset($log2) and is_array($log2)) foreach ($log2 as $k => $v) $log -= $v; if (isset($log) and is_array($log)) foreach ($log as $k => $v) if ($v == 0) unset($log); unset($log2); /* fwrite($irc,'PRIVMSG #wikipedia-BAG/ClueBot :Would revert http://en.wikipedia.org/search/?title='.urlencode($change.$change).'&diff=prev'.'&oldid='.urlencode($change)." .\n"); */ } ?>
heuristics/cluebot.tables.heuristic.php
<?PHP if ( /* Massive tables */ ($change >= 7500) and ($rv = $wpapi->revisions($change,2,'older',true,$change)) and (substr_count(strtolower($rv),'<td') > 300) and ($reason = 'adding huge, browser-crashing tables') ) $heuristicret = true; ?>
Score list
<?PHP /* * This page contains bad words out of necessity. * Here is 50 lines of whitespace before the actual list: * (scroll down to see the list) * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Here is the list: */ $obscenelist = Array ( /* 'preg' => points, */ '/suck/i' => -5, /* Usually bad words */ '/stupid/i' => -3, '/haha/i' => -5, '/\bomg/i' => -3, '/\bpimp\b/i' => -7, '/1337/i' => -5, '/leet/i' => -5, '/dumb/i' => -5, '/\bputa\b/i' => -7, '/\bhomo\b/i' => -7, '/\bGAY\b/' => -10, '/\bslut/i' => -5, '/damn/i' => -5, '/\bass\b/i' => -10, '/\brape\b/i' => -7, '/\bpoop\b/i' => -10, '/\bcock\b/i' => -10, '/\blol\b/i' => -7, '/\bcrap\b/i' => -5, '/\bsex\b/i' => -5, '/noob/i' => -5, '/\bnazi\b/i' => -3, '/\bneo-nazi\b/i' => +3, /* False-positive */ '/fuck/i' => -20, /* Stronger bad words */ '/\\]/' => +20, /* This one is a false positive */ '/bitch/i' => -20, '/\bpussy\b/i' => -20, '/penis/i' => -20, '/Penisula/' => +20, /* False Positive */ '/vagina/i' => -20, '/whore/i' => -15, '/\bshit\b/i' => -20, '/nigger/i' => -20, '/\bnigga\b/i' => -20, '/cocksucker/i' => -20, '/assrape/i' => -15, '/motherfucker/i' => -20, '/wanker/i' => -20, '/\bcunt\b/i' => -20, '/faggot/i' => -20, '/fags/i' => -20, '/asshole/i' => -15, '/fuck ((yo)?u|h(er|im)|them|it)/i' => -100, /* This looks like a personal attack */ '/((yo)?u|s?he|we|they|it) sucks?/i' => -100, /* This looks like a personal attack */ '/666+\b/i' => -50 /* Though this has uses, it is commonly used by vandals */ ); $grammarlist = Array ( '/(.{1,4})\1{30}/' => -10, /* Ugg .. the same letter(s) several times in a row. */ '/\b.*\b/U' => +2, /* This looks to be a correct sentence */ '/\b{30,}\b/U' => -10, /* All capitals? Looks like vandal activity */ '/\b{1500,}\b/U' => -10, /* No capitals? Looks like vandal activity */ '/!{5,}/i' => -10, /* No wikipedia article needs '!!!!!' in it */ '/!!+1+(one)*/i' => -30, /* No wikipedia article needs '!!!11one' in it */ '/\\]/U' => +1, /* Wiki links are good. */ '/\{\{.*\}\}/U' => +5, /* Wiki transcludes are good. */ '/\{\{nfobox .*\}\}/U' => +20, /* Wiki infoboxes are good. */ '/\\]/iU' => +3 /* Wiki categories are good. */ ); $scorelist = array_merge($obscenelist,$grammarlist); ?>