<?php

    
require_once('curl.php');

    class 
wiki
    
{
        public 
$curl;
        public 
$username;
        private 
$password;
        public 
$lang;
        public 
$logged_in;
        public 
$last_edit;
        public 
$last_edit_pagename;
        public 
$last_diff;

        function 
__construct($username$password$lang)
        {
            
$this->curl = new curl();

            
$this->username $username;
            
$this->password $password;
            
$this->lang $lang;

            
$this->category_names = array();
            
$this->category_names['en'] = 'Category';
            
$this->category_names['sv'] = 'Kategori';
        }

        function 
login()
        {
            
$url_1 "http://{$this->lang}.wikipedia.org/w/index.php?title=Special:Inloggning";
            
$url_2 "http://{$this->lang}.wikipedia.org/w/index.php?title=Special:Inloggning&action=submitlogin&type=login";
            
$url_1 "http://{$this->lang}.wikipedia.org/w/index.php?title=Special:UserLogin";
            
$url_2 "http://{$this->lang}.wikipedia.org/w/index.php?title=Special:UserLogin&action=submitlogin&type=login";
            
$post_data = array();
            
$post_data['wpName'] = $this->username;
            
$post_data['wpPassword'] = $this->password;
            
$post_data['wpCentralLogin'] = 1;
            
$post_data['wpLoginAttempt'] = 'Logga in';

            
$login_page $this->curl->get($url_1);

            
/* check alredy logged in? */
            
$regexp "#<li  id=\"pt-userpage\"><a href=\"/wiki/[^:\"]+:([^\"]+)\"#";
            if(
preg_match($regexp$login_page$match))
            {

                if(
$this->username == $match[1])
                {
                    
$this->logged_in TRUE;
                    return 
TRUE;
                }
                else
                {
                    
trigger_error("Alredy logged in as {$match[1]}");
                    return 
FALSE;
                }
            }

            
/* check for wpLoginToken */
            
$regexp "#name=\"wpLoginToken\" value=\"([0-9a-fA-F]+)\"#";
            if(!
preg_match($regexp$login_page$match))
            {
                
trigger_error("Bad loginpage, no wpLoginToken");
                return 
FALSE;
            }

            
$post_data['wpLoginToken'] = $match[1];

            
$result $this->curl->post($url_2$post_data);

            if(
strpos('<div id="mw-loginsuccess">'$result))
            {
                
$this->logged_in TRUE;
                return 
TRUE;
            }
            else
            {
                
trigger_error("Login failed");
                return 
FALSE;
            }
        }

        function 
edit_get($page_name)
        {
            if(!
$this->logged_in)
            {
                
trigger_error("Login first");
                return 
FALSE;
            }
            if(!
$this->lang)
            {
                
trigger_error("No lang selected");
                return 
FALSE;
            }

            
$url "http://{$this->lang}.wikipedia.org/w/index.php?action=edit&title=";
            
$url .= urlencode($page_name);
            
$page $this->curl->get($url);

            
$start_pos strpos($page'name="wpTextbox1">');
            if(!
$start_pos)
            {
                
trigger_error("editbox (start) not found for {$page_name}");
                return 
FALSE;
            }
            
$start_pos += strlen('name="wpTextbox1">');
            
$end_pos strpos($page'</textarea>'$start_pos);
            if(!
$end_pos)
            {
                
trigger_error("editbox (start) not found for {$page_name}");
                return 
FALSE;
            }
            
$editbox substr($page$start_pos$end_pos-$start_pos);
            
$editbox str_replace(array('&lt;''&amp;'), array('<''&'), $editbox);
            
$this->last_edit_content $editbox;

            
$this->last_edit $this->curl->get_hidden_inputs();
            
$this->last_edit_pagename $page_name;

            
/* remove hiddens from the search-form */
            
unset($this->last_edit['title']);

            if(
substr_count($editbox"{{nobots"))
            {
                return 
FALSE;
            }
            if(
substr_count($editbox"{{inuse"))
            {
                return 
FALSE;
            }
            if(
substr_count($editbox"{{bots"))
            {
                
preg_match("#{{bots(\|(allow|deny)=([^}]+)}}#"$editbox$match);
                
trigger_error("page '{$page_name}' got bot restrictions: " print_r($matchTRUE));
                return 
FALSE;
            }

            return 
$editbox;
        }

        function 
edit_diff($new_contents)
        {
            
$url "http://{$this->lang}.wikipedia.org/w/index.php?action=submit&title=";
            
$url .= urlencode($this->last_edit_pagename);
            
$post_data $this->last_edit;
            
$post_data['wpTextbox1'] = $new_contents;
            
$post_data['wpSummary'] = "Testing Diff";
            
$post_data['wpMinoredit'] = 1;
            
$post_data['wpWatchthis'] = 0;
            
$post_data['wpDiff'] = "Visa Ändringar";

            
$page $this->curl->post_multipart($url$post_data);

            
$start_pos strpos($page"<table class='diff'>");
            if(!
$start_pos)
            {
                
trigger_error("diff-table (start) not found for {$this->last_edit_pagename}");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }
            
$start_pos += strlen('<table class="diff">');
            
$end_pos strpos($page'</table>'$start_pos);
            if(!
$end_pos)
            {
                
trigger_error("diff-table (stop) not found for {$this->last_edit_pagename}");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }

            
$diff_table substr($page$start_pos$end_pos-$start_pos);
            
$this->last_diff = array();
            
$this->last_diff['table'] = $diff_table;
            
$this->last_diff['removed lines'] = substr_count($diff_table'class="diff-deletedline"');
            
$this->last_diff['added lines'] = substr_count($diff_table'class="diff-addedline"');

            return 
$this->last_diff;
        }

        function 
edit_save($new_contents$summary)
        {
            
$url "http://{$this->lang}.wikipedia.org/w/index.php?action=submit&title=";
            
$url .= urlencode($this->last_edit_pagename);
            
$post_data $this->last_edit;
            
$post_data['wpTextbox1'] = $new_contents;
            
$post_data['wpSummary'] = $summary;
            
$post_data['wpMinoredit'] = 1;
            
$post_data['wpWatchthis'] = 0;
            
$post_data['wpSave'] = "Spara";

            
$page $this->curl->post_multipart($url$post_dataNULL, array(302));

            if(
$this->curl->last_info['http_code'] == 302)
            {
                return 
TRUE;
            }
            else
            {
                return 
FALSE;
            }
        }

        
/* fix 57: Rubrik avslutad med kolon */
        
function fix_57($page_name)
        {
            
$regexp "#(^|\n)(=+ *)([^:=]+):( *=+)(\n|$)#";

            
$page $this->edit_get($page_name);
            if(!
$page)
            {
                
trigger_error("Failed to get page '{$page_name}´");
                return 
FALSE;
            }

            if(!
preg_match($regexp$page$match))
            {
                
trigger_error("Fix 57 failed on page '{$page_name}´, no match");
                return 
FALSE;
            }

            
$page_parts explode($match[0], $page);
            
$pre_page strtolower($page_parts[0]);
            unset(
$page_parts);
            
$open_code substr_count($pre_page'<code') - substr_count($pre_page'</code');
            if(
$open_code 0)
            {
                
trigger_error("Fix 57 failed on page '{$page_name}´, inside a code-block");
                return 
FALSE;
            }
            
$open_nowiki substr_count($pre_page'<nowiki') - substr_count($pre_page'</nowiki');
            if(
$open_nowiki 0)
            {
                
trigger_error("Fix 57 failed on page '{$page_name}´, inside a nowiki-block");
                return 
FALSE;
            }

            
$summary "Replaceing '{$match[0]}' with '{$match[2]}{$match[3]}{$match[4]}'";
            
$page preg_replace($regexp"\\1\\2\\3\\4\\5"$page1);
            
$diff $this->edit_diff($page);

            if(
$diff['removed lines'] != OR $diff['added lines'] != 1)
            {
                
trigger_error("Fix 57 failed on page '{$page_name}´, more then one line affected");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                return 
FALSE;
            }
            return 
$this->edit_save($page$summary);
        }

        function 
run_57($time_pre 5$time_post 30)
        {
            
$list $this->list_problems(57);
            if(!
$list)
            {
                echo 
"Problem 57: Rubrik avslutad med kolon, Alla problem redan lösta\n";
                return 
TRUE;
            }
            echo 
"Problem 57: Rubrik avslutad med kolon, hittade " count($list) . " st\n";
            if(!
$this->logged_in)
            {
                
$this->login();
            }
            if(!
$this->logged_in)
            {
                return 
FALSE;
            }

            foreach(
$list as $page_id => $page_name)
            {
                echo 
"Fixing 57 on '{$page_name}' in {$time_pre} sec\t";
                
sleep($time_pre);
                if(
$this->fix_57($page_name))
                {
                    echo 
"OK\n";
                    
$this->mark_done($page_id57);
                }
                else
                {
                    echo 
"Fail\n";
                    
//return FALSE;
                
}
                
sleep($time_post);
            }
        }

        
/* Dubbel kategori */
        
function fix_17($page_name)
        {
            if(!
$this->category_names[$this->lang])
            {
                
trigger_error("Translation of kategory for {$this->lang} is missing in fix_17()");
                return 
FALSE;
            }

            
$category_name $this->category_names[$this->lang];
            
$regexp "#\[\[({$category_name}:)([^\]\|]+)(\|[^\]]+)?\]\]#";

            
$page $this->edit_get($page_name);
            if(!
$page)
            {
                
trigger_error("Failed to get page '{$page_name}´");
                return 
FALSE;
            }

            if(!
preg_match_all($regexp$page$matchPREG_SET_ORDER))
            {
                
trigger_error("Fix 17 failed on page '{$page_name}´, no match");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }

            
$categories = array();
            
$remove_category "";
            foreach(
$match as $current_match)
            {
                
$current_category $current_match[2];
                if(!
$current_category)
                {
                    continue;
                }
                if(!isset(
$categories[$current_category]))
                {
                    
$categories[$current_category] = $current_match;
                    continue;
                }
                else
                {
                    if(
strlen($current_match[0]) <= strlen($categories[$current_category][0]))
                    {
                        
$remove_category $current_match[0];
                    }
                    else
                    {
                        
$remove_category $categories[$current_category][0];
                    }
                }
            }

            if(!
$remove_category)
            {
                
trigger_error('all ' count($categories) . ' categories was unique');
                return 
FALSE;
            }
            
//die("tar bort '{$remove_category}' från '{$page_name}'\n");

             
$summary "Removing dublicate '" trim($remove_category"[]") . "'";
             
$new_page implode("\n"explode("\n{$remove_category}\n"$page2));
            if(
strlen($new_page) == strlen($page))
            {
                
$new_page implode(""explode("{$remove_category}"$page2));
            }

            if(
strlen($new_page) >= strlen($page))
            {
                
trigger_error("failed {$summary} on {$page_name}");
                return 
FALSE;
            }
             
$diff $this->edit_diff($new_page);

             if(
$diff['removed lines'] != OR $diff['added lines'] > 1)
             {
                 
trigger_error("Fix 17 failed on page '{$page_name}´, more then one line affected");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                 return 
FALSE;
             }

             return 
$this->edit_save($new_page$summary);
        }

        function 
run_17($time_pre 5$time_post 30)
        {
            
$list $this->list_problems(17);
            if(!
$list)
            {
                echo 
"Problem 17: Dubbel kategori, Alla problem redan lösta\n";
                return 
TRUE;
            }
            echo 
"Problem 17: Dubbel kategori, hittade " count($list) . " st\n";
            if(!
$this->logged_in)
            {
                
$this->login();
            }
            if(!
$this->logged_in)
            {
                return 
FALSE;
            }

            foreach(
$list as $page_id => $page_name)
            {
                echo 
"Fixing 17 on '{$page_name}' in {$time_pre} sec\t";
                
sleep($time_pre);
                if(
$this->fix_17($page_name))
                {
                    echo 
"OK\n";
                    
$this->mark_done($page_id17);
                }
                else
                {
                    echo 
"Fail\n";
                    
//return FALSE;
                
}
                
sleep($time_post);
            }
        }

        function 
fix_53($page_name)
        {
            
$regexp "#\n\[\[([a-z][a-z][a-z]?|simple):([^\]\|]+)\]\]#";

            
$page $this->edit_get($page_name);
            if(!
$page)
            {
                
trigger_error("Failed to get page '{$page_name}´");
                return 
FALSE;
            }

            if(!
preg_match_all($regexp$page$match))
            {
                
trigger_error("Fix 53 failed on page '{$page_name}´, no match");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }

            
$interwiki_links array_unique($match[0]);

            
$new_page str_replace($interwiki_links""$page);

            
$new_page trim($new_page) . implode(""$interwiki_links);

             
$diff $this->edit_diff($new_page);

//             $diff_size = levenshtein($page, $new_page);
//             if($diff_size < 10)
//              {
//                  trigger_error("Fix 53 failed on page '{$page_name}´, not enogh changed");
//                 file_put_contents('last_diff.txt', print_r($diff, TRUE));
//                  return FALSE;
//              }

            
if($diff['removed lines'] + $diff['added lines'] < 2)
             {
                 
trigger_error("Fix 53 failed on page '{$page_name}´, to few lines affected");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                 return 
FALSE;
             }

            if(
$diff['removed lines'] + $diff['added lines'] < 2)
             {
                 
trigger_error("Fix 53 failed on page '{$page_name}´, to few lines affected");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                 return 
FALSE;
             }

             if(
$diff['removed lines'] > count($interwiki_links) OR $diff['added lines'] > count($interwiki_links) + 1)
             {
                 
trigger_error("Fix 53 failed on page '{$page_name}´, to many lines affected");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                 return 
FALSE;
             }

            
$summary "Moving all interwiki links to the bottom of the page";

             return 
$this->edit_save($new_page$summary);
        }

        function 
run_53($time_pre 5$time_post 30)
        {
            
$list $this->list_problems(53);
            if(!
$list)
            {
                echo 
"Problem 53: Interwiki före sista kategori, Alla problem redan lösta\n";
                return 
TRUE;
            }
            echo 
"Problem 53: Interwiki före sista kategori, hittade " count($list) . " st\n";
            if(!
$this->logged_in)
            {
                
$this->login();
            }
            if(!
$this->logged_in)
            {
                return 
FALSE;
            }

            foreach(
$list as $page_id => $page_name)
            {
                echo 
"Fixing 53 on '{$page_name}' in {$time_pre} sec\t";
                
sleep($time_pre);
                if(
$this->fix_53($page_name))
                {
                    echo 
"OK\n";
                    
$this->mark_done($page_id53);
                }
                else
                {
                    echo 
"Fail\n";
                    
//return FALSE;
                
}
                
sleep($time_post);
            }
        }

        function 
fix_80($page_name)
        {
            
$regexp "#\[http(s?)://[^\]\[]+\n[^\]\[]*\]#";

            
$page $this->edit_get($page_name);
            if(!
$page)
            {
                
trigger_error("Failed to get page '{$page_name}´");
                return 
FALSE;
            }

            if(!
preg_match($regexp$page$match))
            {
                
trigger_error("Fix 53 failed on page '{$page_name}´, no match");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }

            
$org_link $match[0];
            if(
substr_count($org_link'</ref>'))
            {
                
$link_parts explode('</ref>'$org_link2);
                if(
substr_count(trim($link_parts[0]), ' '))
                {
                    
$new_link $link_parts[0] . "]</ref>" $link_parts[1];
                    
$summary "Added ] to link in reference {$link_parts[0]}";
                }
                else
                {
                    
$new_link substr($org_link1);
                    
$summary "Removed [ whitout end in reference {$link_parts[0]}";
                }
            }
            else if(
substr_count($org_link'}}'))
            {
                
$link_parts explode('}}'$org_link2);
                if(
substr_count(trim($link_parts[0]), ' '))
                {
                    
$after_trim substr($link_parts[0], strlen(trim($link_parts[0])));
                    
$new_link trim($link_parts[0]) . "]{$after_trim}}}" $link_parts[1];
                    
$summary "Added ] to link in template {$link_parts[0]}";
                }
                else
                {
                    
$new_link substr($org_link1);
                    
$summary "Removed [ whitout end in template {$link_parts[0]}";
                }
            }
            else
            {
                
$new_link str_replace("\n"" "$org_link);
                
$summary "Removed linebreak in {$new_link}";
            }
            
$new_page str_replace($org_link$new_link$page);

             
$diff $this->edit_diff($new_page);

             if(
$diff['removed lines'] < OR $diff['removed lines'] > OR $diff['added lines'] =! 1)
             {
                 
trigger_error("Fix 80 failed on page '{$page_name}´, more then one line affected (-{$diff['removed lines']}, +{$diff['added lines']})");
                
file_put_contents('last_diff.txt'print_r($diffTRUE));
                 return 
FALSE;
             }

             return 
$this->edit_save($new_page$summary);
        }

        function 
run_80($time_pre 5$time_post 30)
        {
            
$list $this->list_problems(80);
            if(!
$list)
            {
                echo 
"Problem 80: Extern länk med radbrytning, Alla problem redan lösta\n";
                return 
TRUE;
            }
            echo 
"Problem 80: Extern länk med radbrytning, hittade " count($list) . " st\n";
            if(!
$this->logged_in)
            {
                
$this->login();
            }
            if(!
$this->logged_in)
            {
                return 
FALSE;
            }

            foreach(
$list as $page_id => $page_name)
            {
                echo 
"Fixing 80 on '{$page_name}' in {$time_pre} sec\t";
                
sleep($time_pre);
                if(
$this->fix_80($page_name))
                {
                    echo 
"OK\n";
                    
$this->mark_done($page_id80);
                }
                else
                {
                    echo 
"Fail\n";
                    
//return FALSE;
                
}
                
sleep($time_post);
            }
        }

        
/* Samma referens flera gånger */
        
function analyse_81($page)
        {
            
$regexp "#<ref([^>]*)(>([^<]*)</ref|/)>#";
            if(!
preg_match_all($regexp$page$match))
            {
                return 
FALSE;
            }
            
print_r($match);
            die();
        }

        function 
list_problems($id$lang NULL)
        {
            if(!
$lang)
            {
                
$lang $this->lang;
            }

            
$list = array();
            
$url "http://toolserver.org/~sk/cgi-bin/checkwiki/checkwiki.cgi?project={$lang}wiki&view=only&id={$id}&offset=0&limit=500";
            
$page $this->curl->get($url);
            
$table_rows explode("<tr>"$page);
            unset(
$table_rows[0]);
            
shuffle($table_rows);

            
$translate = array();
            
$translate['&#039;'] = "'";
            
$translate['&amp;'] = "&";

            foreach(
$table_rows as $current_table_row)
            {
                if(!
preg_match("#pageid=([0-9]+)&#"$current_table_row$match))
                {
                    continue;
                }
                
$page_id $match[1];
                if(!
preg_match("#<a href=\"http://{$lang}.wikipedia.org/wiki/([^\"]+)\">([^<]+)</a>#"$current_table_row$match))
                {
                    continue;
                }
                
$title $match[2];
                
$title str_replace(array_keys($translate), $translate$title);
                
$list[$page_id] = trim($title);
            }
            return 
$list;
        }

        function 
list_problems_1($id$lang 'sv')
        {
            
$url "http://toolserver.org/~sk/cgi-bin/checkwiki/checkwiki.cgi?project={$lang}wiki&view=bots&id={$id}&offset=0&limit=500";
            
$page $this->curl->get($url);

            
$start_pos strpos($page"<pre>");
            if(!
$start_pos)
            {
                
trigger_error("problem-list (start) not found for {$id}");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }
            
$start_pos += strlen('<pre>');
            
$end_pos strpos($page'</pre>'$start_pos);
            if(!
$end_pos)
            {
                
trigger_error("problem-list (stop) not found for {$id}");
                
file_put_contents('last_page.html'$page);
                return 
FALSE;
            }
            
$pre substr($page$start_pos$end_pos $start_pos);
            
$translate = array();
            
$translate['&#039;'] = "'";
            
$translate['&amp;'] = "&";
            
$pre str_replace(array_keys($translate), $translate$pre);
            
$pre trim($pre);
            return 
explode("\n"$pre);
        }

        function 
mark_done($page_id$id$lang NULL)
        {
            if(!
$lang)
            {
                
$lang $this->lang;
            }
            
$url "http://toolserver.org/~sk/cgi-bin/checkwiki/checkwiki.cgi?project={$lang}wiki&view=only&id={$id}&pageid={$page_id}&offset=0&limit=0&orderby=found&sort=desc";
            
$referer $this->curl->referer;
            
$result $this->curl->get($url);
            
$this->curl->referer $referer;
            return (
$result TRUE FALSE);
        }
    }

    
$wiki = new wiki("Puggansbot"PASSWORD"sv");

?>