User:ValterVBot/Source/Wikipedia.cs
< User:ValterVBot | Source
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Newtonsoft.Json;
namespace VBot
{
class Wikipedia
{
public static void CheckForF_Dump(TextBox Mess, string DumpFile, string OutFile)
{
string line;
string title = "";
string ns = "";
string text = "";
string redirect_to = "";
int conta = 0;
long tot = 0;
System.IO.StreamWriter log = new System.IO.StreamWriter(OutFile + "Candidate to F"+ ".txt", false, Encoding.UTF8);
System.IO.StreamReader file = new System.IO.StreamReader(DumpFile, Encoding.UTF8);
while ((line = file.ReadLine()) != null)
{
if (line.IndexOf("<title>") != -1) //title of the page
{
title = line.Replace("<title>", "").Replace("</title>", "");
title = title.Substring(4);
redirect_to = "";
text = "";
tot += 1;
}
else if (line.IndexOf("<ns>") != -1 && line.IndexOf("</ns>") != -1) //ns of the page
{
ns = line.Replace("<ns>", "").Replace("</ns>", "").Trim();
}
else if (line.IndexOf("<redirect title=") != -1) //is a redirect
{
redirect_to = line.Replace("<redirect title=\"", "").Replace("\" />", "").Trim();
}
else if (line.IndexOf("<text xml:space=\"preserve\">") != -1 && ns == "0" && redirect_to == "") //Text of the page
{
line = line.Replace("<text xml:space=\"preserve\">", "");
line = line.Substring(6);
text = line;
while ((line = file.ReadLine()).IndexOf("</text>") == -1)
{
text += Environment.NewLine + line;
}
if (line.Replace("</text>", "").Trim() != "")
{
text += Environment.NewLine + line.Replace("</text>", "").Trim();
}
text = System.Net.WebUtility.HtmlDecode(text); //wiki text
//if (title== "Anni 1950")
//{
// Console.WriteLine("");
//}
if (Regex.Match(text, @"{{\s*F\s*}}", RegexOptions.IgnoreCase).Success) { } //{{F}}
else if (Regex.Match(text, @"{{\s*F[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{F
else if (Regex.Match(text, @"{{\s*S\s*}}", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"{{\s*S[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{S
else if (Regex.Match(text, @"{{\s*A\s*}}", RegexOptions.IgnoreCase).Success) { } //{{A}}
else if (Regex.Match(text, @"{{\s*A[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{A
else if (Regex.Match(text, @"{{\s*NN\s*}}", RegexOptions.IgnoreCase).Success) { } //{{NN}}
else if (Regex.Match(text, @"{{\s*NN[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{NN
else if (Regex.Match(text, @"{{\s*Disambigua\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Disambigua}}
else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Disambigua
else if (Regex.Match(text, @"{{\s*Controllo di autorità\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Controllo di autorità}}
else if (Regex.Match(text, @"{{\s*Torna a[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Torna a
else if (Regex.Match(text, @"{{\s*Cita\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Cita}}
else if (Regex.Match(text, @"{{\s*Cita[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
else if (Regex.Match(text, @"{{\s*Cita.+[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
/// Esclusione pagine sulle date
else if (Regex.Match(text, @"{{\s*Decennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Decennio
else if (Regex.Match(text, @"{{\s*Anno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Anno
else if (Regex.Match(text, @"{{\s*Secolo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Secolo
else if (Regex.Match(text, @"{{\s*Gennaio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Febbraio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Marzo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Aprile\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Maggio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Giugno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Luglio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Agosto\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Settembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Ottobre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Novembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Dicembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Numero intero\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{numeri
else if (Regex.Match(text, @"===?=?\s*Note\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"===?=?\s*Bibliografia\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"===?=?\s*Collegamenti esterni\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
//else if (Utility.SectionStart(text, "Note") > 0) { }
//else if (Utility.SectionStart(text, "Bibliografia") > 0) { }
//else if (Utility.SectionStart(text, "Collegamenti esterni") > 0) { }
else if (text.IndexOf("<ref", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else if (text.IndexOf("http://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else if (text.IndexOf("https://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else
{
log.WriteLine(title);
//Mess.AppendText("* [[" + title + "]]" + Environment.NewLine);
conta+=1;
}
}
}
log.Close();
Mess.AppendText("risultato: " + conta.ToString());
}
public static void WriteF(TextBox Mess, string strList, string DumpFile, string OutFile, string user, string password)
{
MessageBox.Show("Aggiungere la modifica per le Note Disambigue");
#region Tabella template agomento
Dictionary<string, string> TemArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
TemArg.Add("[[Template:Album]]", "album discografici");
TemArg.Add("[[Template:Azienda]]", "aziende");
TemArg.Add("[[Template:Bio]]", "biografie");
TemArg.Add("[[Template:Brano musicale]]", "brani musicali");
TemArg.Add("[[Template:College]]", "università");
TemArg.Add("[[Template:Università]]", "università");
TemArg.Add("[[Template:Composto chimico]]", "sostanze chimiche");
TemArg.Add("[[Template:Corpo celeste]]", "astronomia");
TemArg.Add("[[Template:Asteroide]]", "astronomia");
TemArg.Add("[[Template:Discografia]]", "discografie");
TemArg.Add("[[Template:Divisione amministrativa]]", "geografia");
TemArg.Add("[[Template:Dramma]]", "teatro");
TemArg.Add("[[Template:Opera]]", "teatro");
TemArg.Add("[[Template:Spettacolo teatrale]]", "teatro");
TemArg.Add("[[Template:Teatro]]", "teatro");
TemArg.Add("[[Template:Edificio civile]]", "architettura");
TemArg.Add("[[Template:Edificio religioso]]", "architettura");
TemArg.Add("[[Template:Festival musicale]]", "festival musicali");
TemArg.Add("[[Template:Fiction TV]]", "fiction televisive");
TemArg.Add("[[Template:Film]]", "film");
TemArg.Add("[[Template:Formazione geologica]]", "geologia");
TemArg.Add("[[Template:Roccia]]", "geologia");
TemArg.Add("[[Template:Terremoto]]", "geologia");
TemArg.Add("[[Template:Fumetto e animazione]]", "anime e manga");
TemArg.Add("[[Template:Episodio Anime]]", "anime e manga");
TemArg.Add("[[Template:Stagione anime]]", "anime e manga");
TemArg.Add("[[Template:Videogioco]]", "videogiochi");
TemArg.Add("[[Template:Infobox aeromobile]]", "aviazione");
TemArg.Add("[[Template:Infobox aeroporto]]", "aviazione");
TemArg.Add("[[Template:Auto]]", "automobili");
TemArg.Add("[[Template:Auto1]]", "automobili");
TemArg.Add("[[Template:Infobox linea ferroviaria]]", "ferrovie");
TemArg.Add("[[Template:Infobox stazione ferroviaria]]", "ferrovie");
TemArg.Add("[[Template:Infobox linea metropolitana]]", "metropolitane");
TemArg.Add("[[Template:Infobox stazione della metropolitana]]", "metropolitane");
TemArg.Add("[[Template:Infobox metropolitana]]", "metropolitane");
TemArg.Add("[[Template:Partito politico]]", "partiti politici");
TemArg.Add("[[Template:Infobox ponte]]", "ponti");
TemArg.Add("[[Template:Libro]]", "opere letterarie");
TemArg.Add("[[Template:Minerale]]", "mineralogia");
TemArg.Add("[[Template:Montagna]]", "montagna");
TemArg.Add("[[Template:Catena montuosa]]", "montagna");
TemArg.Add("[[Template:Valico]]", "montagna");
TemArg.Add("[[Template:Rifugio]]", "montagna");
TemArg.Add("[[Template:Museo]]", "musei");
TemArg.Add("[[Template:Opera d'arte]]", "arte");
TemArg.Add("[[Template:Prenome]]", "antroponimi");
TemArg.Add("[[Template:Sito archeologico]]", "siti archeologici");
TemArg.Add("[[Template:Software]]", "software");
TemArg.Add("[[Template:Tassobox]]", "biologia");
TemArg.Add("[[Template:Isola]]", "geografia");
TemArg.Add("[[Template:Infobox isola]]", "geografia");
#endregion
#region Tabella categorie agomento
Dictionary<string, string> CatArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
CatArg.Add("[[Categoria:Araldica]]", "araldica");
CatArg.Add("[[Categoria:Cucina]]", "cucina");
CatArg.Add("[[Categoria:Giappone]]", "Giappone");
CatArg.Add("[[Categoria:Mitologia]]", "mitologia");
CatArg.Add("[[Categoria:Scacchi]]", "scacchi");
CatArg.Add("[[Categoria:Vessillologia]]", "vessillologia");
CatArg.Add("[[Categoria:Prenome]]", "antroponimi");
CatArg.Add("[[Categoria:Personaggi cinematografici]]", "personaggi cinematografici");
#endregion
#region Tabella portale agomento
Dictionary<string, string> PorArg = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
PorArg.Add("Astronomia", "astronomia");
PorArg.Add("Vessillologia", "vessillologia");
PorArg.Add("Religioni", "religione");
PorArg.Add("Aviazione", "aviazione");
PorArg.Add("Chimica", "chimica");
PorArg.Add("Geografia", "geografia");
PorArg.Add("Tennis", "tennis");
PorArg.Add("Matematica", "matematica");
PorArg.Add("Astronautica", "astronautica");
PorArg.Add("Mitologia", "mitologia");
PorArg.Add("Letteratura", "letteratura");
PorArg.Add("Oggetti del profondo cielo", "astronomia");
PorArg.Add("Sistema solare", "astronomia");
PorArg.Add("Psicologia", "psicologia");
#endregion
Mess.AppendText("Iniziato alle " + DateTime.Now.ToString() + Environment.NewLine);
string[] lines = strList.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
string tmpList = "";
int cont = 0;
for (int idx = 0; idx < lines.Count(); idx++)
{
tmpList += lines[idx] + "|";
}
tmpList = tmpList.Remove(tmpList.LastIndexOf("|"));
List<string> list = Utility.SplitInChunk(tmpList, 500);
string strJson = "";
WikimediaAPI WP = new WikimediaAPI("https://it.wikipedia.org", user, password);
string res = "";
foreach (string s in list)
{
Pages pages = new Pages();
strJson = WP.LoadWP(s);
pages = JsonConvert.DeserializeObject<Pages>(strJson);
foreach (Page p in pages.query.pages.Values)
{
string text = p.revisions[0].text;
text = System.Net.WebUtility.HtmlDecode(text); //wiki text
if (Regex.Match(text, @"{{\s*F\s*}}", RegexOptions.IgnoreCase).Success) { } //{{F}}
else if (Regex.Match(text, @"{{\s*F[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{F
else if (Regex.Match(text, @"{{\s*S\s*}}", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"{{\s*S[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{S
else if (Regex.Match(text, @"{{\s*A\s*}}", RegexOptions.IgnoreCase).Success) { } //{{A}}
else if (Regex.Match(text, @"{{\s*A[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{A
else if (Regex.Match(text, @"{{\s*NN\s*}}", RegexOptions.IgnoreCase).Success) { } //{{NN}}
else if (Regex.Match(text, @"{{\s*NN[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{NN
else if (Regex.Match(text, @"{{\s*Disambigua\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Disambigua}}
else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Disambigua
else if (Regex.Match(text, @"{{\s*Controllo di autorità\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Controllo di autorità}}
else if (Regex.Match(text, @"{{\s*Torna a[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Torna a
else if (Regex.Match(text, @"{{\s*Cita\s*}}", RegexOptions.IgnoreCase).Success) { } //{{Cita}}
else if (Regex.Match(text, @"{{\s*Cita[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
else if (Regex.Match(text, @"{{\s*Cita.+[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) { } //{{Cita
/// Esclusione pagine sulle date
else if (Regex.Match(text, @"{{\s*Decennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Decennio
else if (Regex.Match(text, @"{{\s*Anno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Anno
else if (Regex.Match(text, @"{{\s*Secolo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Secolo
else if (Regex.Match(text, @"{{\s*Millennio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{Millennio
else if (Regex.Match(text, @"{{\s*Gennaio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Febbraio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Marzo\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Aprile\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Maggio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Giugno\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Luglio\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Agosto\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Settembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Ottobre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Novembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Dicembre\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{mese
else if (Regex.Match(text, @"{{\s*Numero intero\s*[|\r\n]*", RegexOptions.IgnoreCase).Success) { } //{{numeri
else if (Regex.Match(text, @"===?=?\s*Note\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"===?=?\s*Bibliografia\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (Regex.Match(text, @"===?=?\s*Collegamenti esterni\s*===?=?", RegexOptions.IgnoreCase).Success) { } //{{S}}
else if (text.IndexOf("<ref", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else if (text.IndexOf("http://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else if (text.IndexOf("https://", StringComparison.CurrentCultureIgnoreCase) != -1) { }
else
{
// Controllo i template
string F = "{{F|";
foreach (KeyValuePair<string, string> templ in TemArg)
{
string t = templ.Key.Replace("[[Template:", "").Replace("]]","") ;
if (Regex.Match(text, @"{{\s*" + t + @"[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success)
{
F += templ.Value +"|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
break;
}
}
//Controllo le categorie
if (F.IndexOf("}")==-1)
{
foreach (KeyValuePair<string, string> cat in CatArg)
{
string c = cat.Key.Replace("[","").Replace("]","");
if (Regex.Match(text, @"\[\[" + c + @"\]\]", RegexOptions.IgnoreCase).Success)
{
F += cat.Value + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
break;
}
}
}
//Controllo i portali
if (F.IndexOf("}") == -1)
{
Regex regex = new Regex("({{portale)(\\|.*)+(}})", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled );
Match ms = regex.Match(text);
string tempValues = ms.Groups[2].Value;
foreach (KeyValuePair<string, string> por in PorArg)
{
if (tempValues.IndexOf(por.Key, StringComparison.CurrentCultureIgnoreCase) !=-1)
{
F += por.Value + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
break;
}
}
}
if (F.IndexOf("}") == -1) // no arg
{
F += "" + "|" + DateTime.Now.ToString("MMMM") + " " + DateTime.Now.Year + "}}";
}
else
{
// {{Nota disambigua}}
if (Regex.Match(text, @"{{\s*Nota disambigua\s*}}", RegexOptions.IgnoreCase).Success) //{{Nota disambigua}}
{
Console.WriteLine("");
}
else if (Regex.Match(text, @"{{\s*Disambigua[\r\n]?[\r\n]?\|", RegexOptions.IgnoreCase).Success) //{{Nota disambigua
{
Console.WriteLine("");
}
else
{
text = F + Environment.NewLine + p.revisions[0].text;
WP.SavePage(p.title, text, "BOT: Add template F");
}
res += "*[[" + p.title + "]] \t <nowiki>" + F + "</nowiki>" + Environment.NewLine;
cont ++;
if (cont==200)
{
break;
}
}
}
}
}
Mess.Text = res;
}
public static void NullEdit(string strList, string user, string password)
{
WikimediaAPI WP = new WikimediaAPI("https://it.wikipedia.org", user, password);
string[] lines = strList.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
string tmpList = "";
for (int idx = 0; idx < lines.Count(); idx++)
{
tmpList += lines[idx].Trim() + "|";
}
tmpList = tmpList.Remove(tmpList.LastIndexOf("|"));
List<string> list = Utility.SplitInChunk(tmpList, 500);
string strJson = "";
Dictionary<string, string> res = new Dictionary<string, string>();
foreach (string s in list)
{
Pages pages = new Pages();
strJson = WP.LoadWP(s);
pages = JsonConvert.DeserializeObject<Pages>(strJson);
foreach (Page p in pages.query.pages.Values)
{
string testo = p.revisions[0].text;
WP.SavePage(p.title, testo, "");
}
}
}
}
}