User:ValterVBot/Source/Wikipedia.cs

From Wikidata
Jump to navigation Jump to search
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;


namespace VBot
{
    class Wikipedia
    {
        public static void CheckForF_Dump(TextBox Mess, string DumpFile, string OutFile)
        {
            string line;
            string title = "";
            string ns = "";
            string text = "";
            string redirect_to = "";

            int conta = 0;
            long tot = 0;
            System.IO.StreamWriter log = new System.IO.StreamWriter(OutFile + "Candidate to F"+ ".txt", false, Encoding.UTF8);
            System.IO.StreamReader file = new System.IO.StreamReader(DumpFile, Encoding.UTF8);
            while ((line = file.ReadLine()) != null)
            {
                if (line.IndexOf("<title>") != -1) //title of the page
                {
                    title = line.Replace("<title>", "").Replace("</title>", "");
                    title = title.Substring(4);
                    redirect_to = "";
                    text = "";
                    tot += 1;
                }
                else if (line.IndexOf("<ns>") != -1 && line.IndexOf("</ns>") != -1) //ns of the page
                {
                    ns = line.Replace("<ns>", "").Replace("</ns>", "").Trim();
                }
                else if (line.IndexOf("<redirect title=") != -1) //is a redirect
                {
                    redirect_to = line.Replace("<redirect title=\"", "").Replace("\" />", "").Trim();
                }
                else if (line.IndexOf("<text xml:space=\"preserve\">") != -1 && ns == "0" && redirect_to == "") //Text of the page
                {
                    line = line.Replace("<text xml:space=\"preserve\">", "");
                    line = line.Substring(6);
                    text = line;

                    while ((line = file.ReadLine()).IndexOf("</text>") == -1)
                    {
                        text += Environment.NewLine + line;
                    }
                    if (line.Replace("</text>", "").Trim() != "")
                    {
                        text += Environment.NewLine + line.Replace("</text>", "").Trim();
                    }
                    text = System.Net.WebUtility.HtmlDecode(text); //wiki text
                    //if (title== "Anni 1950")
                    //{
                    //    Console.WriteLine("");
                    //}
                    if (Regex.Match(text, @"{{\s*F\s*}}", RegexOptions.IgnoreCase).Success) { } //{{F}}
                    else if (Regex.Match(text, @"{{\s*F[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{F
                    else if (Regex.Match(text, @"{{\s*S\s*}}", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"{{\s*S[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{S
                    else if (Regex.Match(text, @"{{\s*A\s*}}", RegexOptions.IgnoreCase).Success) { } //{{A}}
                    else if (Regex.Match(text, @"{{\s*A[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{A
                    else if (Regex.Match(text, @"{{\s*NN\s*}}", RegexOptions.IgnoreCase).Success) { } //{{NN}}
                    else if (Regex.Match(text, @"{{\s*NN[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{NN
                    else if (Regex.Match(text, @"{{\s*Disambigua\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Disambigua}}
                    else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Disambigua
                    else if (Regex.Match(text, @"{{\s*Controllo di autorità\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Controllo di autorità}}
                    else if (Regex.Match(text, @"{{\s*Torna a[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Torna a
                    else if (Regex.Match(text, @"{{\s*Cita\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Cita}}
                    else if (Regex.Match(text, @"{{\s*Cita[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
                    else if (Regex.Match(text, @"{{\s*Cita.+[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita

                    /// Esclusione pagine sulle date
                    else if (Regex.Match(text, @"{{\s*Decennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Decennio
                    else if (Regex.Match(text, @"{{\s*Anno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Anno
                    else if (Regex.Match(text, @"{{\s*Secolo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Secolo
                    else if (Regex.Match(text, @"{{\s*Gennaio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Febbraio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Marzo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Aprile\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Maggio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Giugno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Luglio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Agosto\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Settembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Ottobre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Novembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Dicembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese

                    else if (Regex.Match(text, @"{{\s*Numero intero\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{numeri



                    else if (Regex.Match(text, @"===?=?\s*Note\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"===?=?\s*Bibliografia\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"===?=?\s*Collegamenti esterni\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}

                    //else if (Utility.SectionStart(text, "Note") > 0) { }
                    //else if (Utility.SectionStart(text, "Bibliografia") > 0) { }
                    //else if (Utility.SectionStart(text, "Collegamenti esterni") > 0) { }
                    else if (text.IndexOf("<ref", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else if (text.IndexOf("http://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else if (text.IndexOf("https://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else
                    {
                        log.WriteLine(title);
                        //Mess.AppendText("* [[" + title + "]]" + Environment.NewLine);
                        conta+=1;
                    }

                }
            }
            log.Close();
            Mess.AppendText("risultato: " + conta.ToString());
        }

        public static void WriteF(TextBox Mess, string strList, string DumpFile, string OutFile, string user, string password)
        {
            MessageBox.Show("Aggiungere la modifica per le Note Disambigue");
            #region Tabella template agomento
            Dictionary<string, string> TemArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
            TemArg.Add("[[Template:Album]]", "album discografici");
            TemArg.Add("[[Template:Azienda]]", "aziende");
            TemArg.Add("[[Template:Bio]]", "biografie");
            TemArg.Add("[[Template:Brano musicale]]", "brani musicali");
            TemArg.Add("[[Template:College]]", "università");
            TemArg.Add("[[Template:Università]]", "università");
            TemArg.Add("[[Template:Composto chimico]]", "sostanze chimiche");
            TemArg.Add("[[Template:Corpo celeste]]", "astronomia");
            TemArg.Add("[[Template:Asteroide]]", "astronomia");
            TemArg.Add("[[Template:Discografia]]", "discografie");
            TemArg.Add("[[Template:Divisione amministrativa]]", "geografia");
            TemArg.Add("[[Template:Dramma]]", "teatro");
            TemArg.Add("[[Template:Opera]]", "teatro");
            TemArg.Add("[[Template:Spettacolo teatrale]]", "teatro");
            TemArg.Add("[[Template:Teatro]]", "teatro");
            TemArg.Add("[[Template:Edificio civile]]", "architettura");
            TemArg.Add("[[Template:Edificio religioso]]", "architettura");
            TemArg.Add("[[Template:Festival musicale]]", "festival musicali");
            TemArg.Add("[[Template:Fiction TV]]", "fiction televisive");
            TemArg.Add("[[Template:Film]]", "film");
            TemArg.Add("[[Template:Formazione geologica]]", "geologia");
            TemArg.Add("[[Template:Roccia]]", "geologia");
            TemArg.Add("[[Template:Terremoto]]", "geologia");
            TemArg.Add("[[Template:Fumetto e animazione]]", "anime e manga");
            TemArg.Add("[[Template:Episodio Anime]]", "anime e manga");
            TemArg.Add("[[Template:Stagione anime]]", "anime e manga");
            TemArg.Add("[[Template:Videogioco]]", "videogiochi");
            TemArg.Add("[[Template:Infobox aeromobile]]", "aviazione");
            TemArg.Add("[[Template:Infobox aeroporto]]", "aviazione");
            TemArg.Add("[[Template:Auto]]", "automobili");
            TemArg.Add("[[Template:Auto1]]", "automobili");
            TemArg.Add("[[Template:Infobox linea ferroviaria]]", "ferrovie");
            TemArg.Add("[[Template:Infobox stazione ferroviaria]]", "ferrovie");
            TemArg.Add("[[Template:Infobox linea metropolitana]]", "metropolitane");
            TemArg.Add("[[Template:Infobox stazione della metropolitana]]", "metropolitane");
            TemArg.Add("[[Template:Infobox metropolitana]]", "metropolitane");
            TemArg.Add("[[Template:Partito politico]]", "partiti politici");
            TemArg.Add("[[Template:Infobox ponte]]", "ponti");
            TemArg.Add("[[Template:Libro]]", "opere letterarie");
            TemArg.Add("[[Template:Minerale]]", "mineralogia");
            TemArg.Add("[[Template:Montagna]]", "montagna");
            TemArg.Add("[[Template:Catena montuosa]]", "montagna");
            TemArg.Add("[[Template:Valico]]", "montagna");
            TemArg.Add("[[Template:Rifugio]]", "montagna");
            TemArg.Add("[[Template:Museo]]", "musei");
            TemArg.Add("[[Template:Opera d'arte]]", "arte");
            TemArg.Add("[[Template:Prenome]]", "antroponimi");
            TemArg.Add("[[Template:Sito archeologico]]", "siti archeologici");
            TemArg.Add("[[Template:Software]]", "software");
            TemArg.Add("[[Template:Tassobox]]", "biologia");
            TemArg.Add("[[Template:Isola]]", "geografia");
            TemArg.Add("[[Template:Infobox isola]]", "geografia");
            #endregion

            #region Tabella categorie agomento
            Dictionary<string, string> CatArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
            CatArg.Add("[[Categoria:Araldica]]", "araldica");
            CatArg.Add("[[Categoria:Cucina]]", "cucina");
            CatArg.Add("[[Categoria:Giappone]]", "Giappone");
            CatArg.Add("[[Categoria:Mitologia]]", "mitologia");
            CatArg.Add("[[Categoria:Scacchi]]", "scacchi");
            CatArg.Add("[[Categoria:Vessillologia]]", "vessillologia");
            CatArg.Add("[[Categoria:Prenome]]", "antroponimi");
            CatArg.Add("[[Categoria:Personaggi cinematografici]]", "personaggi cinematografici");
            #endregion

            #region Tabella portale agomento
            Dictionary<string, string> PorArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
            PorArg.Add("Astronomia", "astronomia");
            PorArg.Add("Vessillologia", "vessillologia");
            PorArg.Add("Religioni", "religione");
            PorArg.Add("Aviazione", "aviazione");
            PorArg.Add("Chimica", "chimica");
            PorArg.Add("Geografia", "geografia");
            PorArg.Add("Tennis", "tennis");
            PorArg.Add("Matematica", "matematica");
            PorArg.Add("Astronautica", "astronautica");
            PorArg.Add("Mitologia", "mitologia");
            PorArg.Add("Letteratura", "letteratura");
            PorArg.Add("Oggetti del profondo cielo", "astronomia");
            PorArg.Add("Sistema solare", "astronomia");
            PorArg.Add("Psicologia", "psicologia");
            #endregion

            Mess.AppendText("Iniziato alle " + DateTime.Now.ToString() + Environment.NewLine);
            string[] lines = strList.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
            string tmpList = "";
            int cont = 0;
            for (int idx = 0; idx < lines.Count(); idx++)
            {
                tmpList += lines[idx] + "|";
            }
            tmpList = tmpList.Remove(tmpList.LastIndexOf("|"));
            List<string> list = Utility.SplitInChunk(tmpList, 500);
            string strJson = "";
            WikimediaAPI WP = new WikimediaAPI("https://it.wikipedia.org", user, password);

            string res = "";
            foreach (string s in list)
            {
                Pages pages = new Pages();
                strJson = WP.LoadWP(s);
                pages = JsonConvert.DeserializeObject<Pages>(strJson);
                foreach (Page p in pages.query.pages.Values)
                {
                    string text = p.revisions[0].text;
                    text = System.Net.WebUtility.HtmlDecode(text); //wiki text
                    if (Regex.Match(text, @"{{\s*F\s*}}", RegexOptions.IgnoreCase).Success) { } //{{F}}
                    else if (Regex.Match(text, @"{{\s*F[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{F
                    else if (Regex.Match(text, @"{{\s*S\s*}}", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"{{\s*S[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{S
                    else if (Regex.Match(text, @"{{\s*A\s*}}", RegexOptions.IgnoreCase).Success) { } //{{A}}
                    else if (Regex.Match(text, @"{{\s*A[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{A
                    else if (Regex.Match(text, @"{{\s*NN\s*}}", RegexOptions.IgnoreCase).Success) { } //{{NN}}
                    else if (Regex.Match(text, @"{{\s*NN[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{NN
                    else if (Regex.Match(text, @"{{\s*Disambigua\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Disambigua}}
                    else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Disambigua
                    else if (Regex.Match(text, @"{{\s*Controllo di autorità\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Controllo di autorità}}
                    else if (Regex.Match(text, @"{{\s*Torna a[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Torna a
                    else if (Regex.Match(text, @"{{\s*Cita\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Cita}}
                    else if (Regex.Match(text, @"{{\s*Cita[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
                    else if (Regex.Match(text, @"{{\s*Cita.+[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita

                    /// Esclusione pagine sulle date
                    else if (Regex.Match(text, @"{{\s*Decennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Decennio
                    else if (Regex.Match(text, @"{{\s*Anno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Anno
                    else if (Regex.Match(text, @"{{\s*Secolo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Secolo
                    else if (Regex.Match(text, @"{{\s*Millennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Millennio
                    else if (Regex.Match(text, @"{{\s*Gennaio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Febbraio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Marzo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Aprile\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Maggio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Giugno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Luglio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Agosto\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Settembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Ottobre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Novembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
                    else if (Regex.Match(text, @"{{\s*Dicembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese

                    else if (Regex.Match(text, @"{{\s*Numero intero\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{numeri

                    else if (Regex.Match(text, @"===?=?\s*Note\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"===?=?\s*Bibliografia\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
                    else if (Regex.Match(text, @"===?=?\s*Collegamenti esterni\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}

                    else if (text.IndexOf("<ref", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else if (text.IndexOf("http://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else if (text.IndexOf("https://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
                    else
                    {
                        // Controllo i template
                        string F = "{{F|";
                        foreach (KeyValuePair<string, string> templ in TemArg)
                        {
                            string t = templ.Key.Replace("[[Template:", "").Replace("]]","") ;
                            if (Regex.Match(text, @"{{\s*" + t + @"[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success)
                            {
                                F += templ.Value +"|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
                                break;
                            }
                        }
                        //Controllo le categorie
                        if (F.IndexOf("}")==-1)
                        {
                            foreach (KeyValuePair<string, string> cat in CatArg)
                            {
                                string c = cat.Key.Replace("[","").Replace("]","");
                                if (Regex.Match(text, @"\[\[" + c + @"\]\]", RegexOptions.IgnoreCase).Success)
                                {
                                    F += cat.Value + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
                                    break;
                                }
                            }
                        }
                        //Controllo i portali
                        if (F.IndexOf("}") == -1)
                        {
                            Regex regex = new Regex("({{portale)(\\|.*)+(}})", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled );
                            Match ms = regex.Match(text);
                            string tempValues = ms.Groups[2].Value;
                            foreach (KeyValuePair<string, string> por in PorArg)
                            {
                                if (tempValues.IndexOf(por.Key, StringComparison.CurrentCultureIgnoreCase) !=-1)
                                {
                                    F += por.Value + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
                                    break;
                                }
                            }
                        }
                        if (F.IndexOf("}") == -1) // no arg
                        {
                            F += "" + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
                        }
                        else
                        {
                            // {{Nota disambigua}} 
                            if (Regex.Match(text, @"{{\s*Nota disambigua\s*}}", RegexOptions.IgnoreCase).Success) //{{Nota disambigua}}
                            {
                                Console.WriteLine("");
                            }
                            else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success)  //{{Nota disambigua
                            {
                                Console.WriteLine("");
                            }
                            else
                            {
                                text = F + Environment.NewLine + p.revisions[0].text;
                                WP.SavePage(p.title, text, "BOT: Add template F");
                            }
                            res += "*[[" + p.title + "]] \t <nowiki>" + F + "</nowiki>" + Environment.NewLine;
                            cont ++;
                            if (cont==200)
                            {
                                break;
                            }

                        }
                    }
                }
            }
            Mess.Text = res;
        }

        public static void NullEdit(string strList, string user, string password)
        {
            WikimediaAPI WP = new WikimediaAPI("https://it.wikipedia.org", user, password);
            string[] lines = strList.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
            string tmpList = "";
            for (int idx = 0; idx < lines.Count(); idx++)
            {
                tmpList += lines[idx].Trim() + "|";
            }
            tmpList = tmpList.Remove(tmpList.LastIndexOf("|"));
            List<string> list = Utility.SplitInChunk(tmpList, 500);
            string strJson = "";
            Dictionary<string, string> res = new Dictionary<string, string>();
            foreach (string s in list)
            {
                Pages pages = new Pages();
                strJson = WP.LoadWP(s);
                pages = JsonConvert.DeserializeObject<Pages>(strJson);
                foreach (Page p in pages.query.pages.Values)
                {
                    string testo = p.revisions[0].text;
                    WP.SavePage(p.title, testo, "");
                }
            }
        }
    }
}