User:Magnus Manske/authority control.js

From Wikidata
Jump to navigation Jump to search

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Internet Explorer / Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Press Ctrl-F5.
// <nowiki>
/*
To add this gadget, put the following line on your common.js User subpage:

 importScript( 'User:Magnus_Manske/authority_control.js' );

*/

let authority_control = function () {

mw.loader.using(['vue', '@wikimedia/codex', 'jquery.ui', 'mediawiki.api'], function ( require ) {
	
	
	const CdxButton = require( '@wikimedia/codex' ).CdxButton;
	const CdxRadio = require( '@wikimedia/codex' ).CdxRadio;
	const CdxTextInput = require( '@wikimedia/codex' ).CdxTextInput;
	const CdxToggleSwitch = require( '@wikimedia/codex' ).CdxToggleSwitch;

	const AuthorityControlApp = {
		template: `
<div id='authority_control_container' style='padding: 0.2rem;'>


<div v-if='hasNoResults()'>
	<i>no results</i>
</div>

<div v-else-if='running>0'>
	<i>{{running}} running</i>
</div>

<div v-else>

<table class="table">
<tbody>
<tr v-for='result in results' :style='(result.status=="NO DATA" || result.status=="NO PAGE" || result.data.length==0?"display: none":"")'>
<th nowrap style='vertical-align: top;'>
	<span v-if='typeof result.page_url=="undefined"'>{{result.site}}</span>
	<a v-else target='_blank' class='external' :href='result.page_url'>{{result.site}}</a>
</th>
<td v-if='result.status=="OK"' style='width: 100%;'>
	<div v-for='data in result.data' style='display: flex; border-top: 1px solid #DDD;'>
		<div style='min-width: 2rem; text-align: center; border-right: 1px solid #DDD'>
			<div v-if='canAddAccData(data)'>
				<a href='#' @click.prevent='addAllIDs(data)' title='Add these AC data to the item'>⊕</a>
			</div>
			<div v-if='typeof data.url!="undefined"'>
				<a :href='data.url' class='external' target='_blank'>⇒</a>
			</div>
		</div>
		<div style='min-width: 15rem;'>
			<table style='border-spacing: 0px;'>
				<tr v-for='id in data.ids'>
					<td style='text-align: center;'>
						<span v-if='idExists(id)'>✓</span>
						<span v-else-if='id.code=="WKP"'></span>
						<a v-else href='#' @click.prevent='addID(id)' title='Add this AC value to the item'>⊕</a>
					</td>
					<td :style='getIdColor(id)+";width: 4rem;"'>{{id.code}}</td>
					<td :style='getIdColor(id)'>
						<span v-if='id.code=="WKP" && id.value!=q'>
						<a :href='"/wiki/"+id.value'>{{id.value}}</a>
						</span>
						<span v-else>
							{{id.value}}
						</span>
					</td>
				</tr>
			</table>
		</div>
		<div style='font-size: 8pt;'>
			<div v-for='comment in data.comments' v-html='renderComment(comment)'></div>
		</div>
	</div>
</td>
<td v-else style='color:#DDD'>{{result.status}}</td>
</tr>
</tbody>
</table>
</div>

</div>`,
		data() { return { 
			api: mw.config.get('wgServer')+'/w/api.php' ,
			languages_to_search: ['en','de','fr','ja','es','commons'],
			results: [],
			running: 0,
			q: '',
			birth_year: '', death_year:'',
			existing_ids : {} ,
			allowed_codes : {
				WKP:{key:'WKP',p:0} ,
				NSZL:{key:'NSZL',p:951} ,
				BNE:{key:'BNE',p:950} ,
				BNC:{key:'BNC',p:9984} , /* BNC = CANTIC */
				BIBSYS:{key:'BIBSYS',p:1015} , /* "Norway (BIBSYS)" ; [[viaf:106122570]] */
				BAV:{key:'BAV',p:8034} , /* "Vatican Library" ; WMF: 'la' */
				EGAXA:{key:'EGAXA',p:1309} , /* "Bibliotheca Alexandrina (Egypt)" ; [[viaf:102319859]] */
				xA:{key:'xA',p:0} ,
				VIAF:{key:'VIAF',p:214} ,
				DNB:{key:'GND',p:227} ,
				GND:{key:'GND',p:227} ,
				PND:{key:'GND',p:227} ,
				LC:{key:'LCCN',p:244} ,
				LCCN:{key:'LCCN',p:244} ,
				SUDOC:{key:'SUDOC',p:269} ,
				NDL:{key:'NDL',p:349} ,
				NUKAT:{key:'NUKAT',p:1207} , /* "NUKAT Center (Poland)" ; Ludwik Lejzer Zamenhof ([[Q11758]]) [[viaf:73885295]] */
				CINII:{key:'CINII',p:271} ,
				ISNI:{key:'ISNI',p:213} ,
				JPG:{key:'ULAN',p:245} ,
				ULAN:{key:'ULAN',p:245} ,
				SELIBR:{key:'LIBRIS',p:906} ,
				LIBRIS:{key:'LIBRIS',p:906} ,
				NLA:{key:'NLA',p:409} ,
				NKC:{key:'NKC',p:691} ,
				ICCU:{key:'ICCU',p:396} ,
				BNF:{key:'BNF',p:268} ,
				BPN:{key:'BPN',p:651} ,
				NLI:{key:'NLI',p:949} , /* National Library of Israel */
				NLR:{key:'NLR',p:7029} , /* "National Library of Russia */
				PTBNP:{key:'PTBNP',p:1005} , /* "Biblioteca Nacional de Portugal" */
				NTA:{key:'NTA',p:1006} , /* "National Library of the Netherlands" */
				N6I:{key:'N6I',p:10227} , /* National Library of Ireland */
				LAC:{key:'LAC',p:1670} , /* Library of Canada */
				ORCID:{key:'ORCID',p:496}
			} ,
		} },
		mounted : function () {
			this.q = mw.config.get("wgTitle") ;
			this.runQueries();
		} ,
		methods: {
			runQueries : function () {
				let self = this ;
				self.results = [] ;
				self.detectDatesFromItem();
				self.loadExistingIDs();
				$.each ( self.languages_to_search , function ( dummy , language ) { self.searchOnWP ( language ) } ) ;
				self.searchVIAF() ;
			} ,
			loadExistingIDs : function () {
				let self = this ;
				self.existing_ids = {};
				let q = self.q;
				(new mw.Api())
				.get( {
					action: 'wbgetentities',
					ids: q
				} )
				.then( function ( data ) {
					let item = data.entities[q] ;
					$.each ( (item.claims||{}) , function ( property , claims ) {
						let internal_prop_name = self.convertProperty ( property ) ;
						if ( internal_prop_name == '' ) return ;
						$.each ( claims , function ( dummy , claim ) {
							if ( !claim.type=='statement' ) return ;
							if ( typeof claim.mainsnak=='undefined' ) return ;
							if ( claim.mainsnak.datatype!='external-id' ) return ;
							if ( typeof claim.mainsnak.datavalue=='undefined' ) return ;
							if ( typeof claim.mainsnak.datavalue.value=='undefined' ) return ;
							let key = internal_prop_name+":"+claim.mainsnak.datavalue.value ;
							self.existing_ids[key] = 1 ;
						} ) ;
					} ) ;
					//console.log(JSON.parse(JSON.stringify(self.existing_ids)));
				} )
				.catch( function(e){self.onError(e)} );

			} ,
			convertProperty : function ( property ) {
				let self = this ;
				let ret = '' ;
				let prop_num = property.replace(/\D/g,'')*1 ;
				$.each ( self.allowed_codes , function ( ac ) {
					if ( self.allowed_codes[ac].p == prop_num ) ret = self.allowed_codes[ac].key ;
				} ) ;
				return ret ;
			} ,
			getArticleTitle : function ( language , project ) {
				let article = '' ;
				if ( language == 'commons' ) {
					article = $($('#P1472 .wikibase-snakview-variation-valuesnak').get(0)).text();
				} else {
					let lookfor = 'https://' + language + '.'+project+'.org/wiki/' ;
					$('div.wikibase-sitelinkgrouplistview a').each ( function () {
						let href = $(this).attr('href') ;
						if ( undefined === href || href.substr(0,lookfor.length) != lookfor ) return ;
						article = $(this).text() ;
						return false ;
					} ) ;
				}
				return article ;
			} ,
			searchOnWP : function ( language ) {
				let self = this ;
				self.running++ ;
				let project = language=='commons'?'wikimedia':'wikipedia' ;
				let result = {
					id: language,
					site: (language=='commons'?language:language+'.'+project),
					status: 'RUNNING',
					data: []
				} ;
				let result_id = self.results.length ;
				self.results.push ( result ) ;

				let article = self.getArticleTitle ( language , project ) ;
				if ( article == '' ) {
					result.status = 'NO PAGE' ;
					self.running-- ;
					return ;
				}
				let server = "https://"+language+"."+project+".org" ;
				//result.page_name = article ;
				result.page_url = server+'/wiki/'+encodeURIComponent(article);
				$.get(
					self.api,
					{
						format: 'json',
						action: 'query',
						prop: 'revisions',
						rvprop: 'content',
						rvlimit: 1,
						titles: article
					},
					'json'
				)
				.then(function ( data ) {
					let page, wikitext;
					if ( typeof data['-1'] !== 'undefined' ) {
						try {
							console.log(data.query.pages);
							for ( page in data.query.pages ) {
								console.log(page);
								let wikitext = data.query.pages[page].revisions[0]['*'];
								self.processWikitext ( language , wikitext , self.results[result_id] ) ;
								if ( self.results[result_id].data.length == 0 ) self.results[result_id].status = 'NO DATA' ;
								else self.results[result_id].status = 'OK' ;
							}
						} catch ( e ) {
							console.log(e);
							//self.results[result_id].status = 'PAGE PARSING FAILED' ;
						}
					}
					self.running-- ;
				})
				.catch( function(){
					self.running-- ;
					//self.results[result_id].status = 'API REQUEST FAILED' ;
					self.onError('Error for getting '+article+' on '+self.api)
				});
			} ,
			detectDatesFromItem : function () {
				let self = this ;
				self.birth_year = $($('#P569 .wikibase-snakview-variation-valuesnak').get(0)).text().replace(/^.*?(\d{3,4}).*$/,'$1') ;
				self.death_year = $($('#P570 .wikibase-snakview-variation-valuesnak').get(0)).text().replace(/^.*?(\d{3,4}).*$/,'$1') ;
			} ,
			setDatesFromPatterns : function ( birth_pattern , death_pattern ) {
				let m ;
				m=wikitext.match(birth_pattern); if(m!=null)self.birth_year=m[1] ;
				m=wikitext.match(death_pattern); if(m!=null)self.death_year=m[1] ;
			} ,
			detectDatesFromWikitext : function ( wikitext ) {
				let self = this ;
				if ( self.birth_year+self.death_year != '' ) return ;
				self.setDatesFromPatterns(/:(\d+)[_ ]births\s*\]\]/,/:(\d+)[_ ]deaths\s*\]\]/) ;
				self.setDatesFromPatterns(/:[Gg]eboren[_ ](\d+)\s*\]\]/,/:[Gg]estorben[_ ](\d+)\s*\]\]/);
				self.setDatesFromPatterns(/:[Nn]aissance[_ ]en[_ ](\d+)\s*\]\]/,/:[Dd]écès[_ ]en[_ ](\d+)\s*\]\]/);
				self.setDatesFromPatterns(/:[Nn]acidos[_ ]en[_ ](\d+)\s*\]\]/,/:[Ff]allecidos[_ ]en[_ ](\d+)\s*\]\]/);
				self.setDatesFromPatterns(/\{\{NF\|(\d+)\|\d+/,/\{\{NF\|\d+\|(\d+)/);
			} ,
			processWikitext : function ( language , wikitext , result ) {
				let self = this ;
				self.detectDatesFromWikitext ( wikitext ) ;
				self.getAuthorityControlDataFromWikitext ( language , wikitext , result ) ;
			} ,
			getAuthorityControlDataFromWikitext : function ( language , wikitext , result ) {
				let self = this ;
				let st = null ;
				if ( language == 'en' || language == 'commons' ) st = wikitext.match ( /{{Authority[ _]control(.+?)}}/i ) ;
				else if ( language == 'de' ) st = wikitext.match ( /{{Normdaten(.+?)}}/i ) ;
				else if ( language == 'ja' ) st = wikitext.match ( /{{Normdaten(.+?)}}/i ) ;
				else if ( language == 'fr' ) st = wikitext.match ( /{{Autorité\s*\|(.+?)}}/i ) ;
				if ( st == null ) return ;
				let kv = {} ;
				let data = [] ;
				$.each ( st[1].split ( '|' ) , function ( dummy , kvp ) {
					let m = kvp.match ( /^\s*([a-z]+)\s*=\s*(.+?)\s*$/i ) ;
					if ( m == null ) return ;
					let code = $.trim(m[1]).toUpperCase() ;
					if ( undefined === self.allowed_codes[code] ) return ;
					let value = self.normaliseCodeValue ( code , m[2] ) ;
					data.push ( { code:code , value:value } ) ;
				} ) ;
				if ( data.length>0 ) {
					result.data.push ( { ids:data , comments:["From authority control template"] } ) ;
				}
			} ,
			normaliseCodeValue : function ( code , value ) {
				value = $.trim ( value ) ;
				value = value.replace(/\s/g,'');
				
				if ( code == 'LCCN' ) {
					let m = value.match ( /^(.+)\/(\d+)\/(\d+)/ ) ;
					if ( m != null ) {
						while ( m[3].length < 6 ) m[3] = '0' + m[3] ;
						value = m[1] + '/' + m[2] + '/' + m[3] ;
					}
					value = value.replace(/\//g,'') ;
				}
				if ( code == 'NLR' ) {
					value = value.replace ( /^RUNLRAUTH/i , '' ) ;
				}
				if ( code == 'BAV' ) {
					value = value.replace ( /_/ , '/' ) ;
				}
				if ( code == 'BNF' ) {
					value = 'cb' + value.replace ( /\D/g , '' ) ; // Digits only
					let bnf_xdigits = '0123456789bcdfghjkmnpqrstvwxz'; // A few lines from https://en.wikisource.org/wiki/User:Inductiveload/BnF_ARK_format
					let bnf_check_digit = 0;
					
					for (let i=0; i < value.length; i++){
						bnf_check_digit += bnf_xdigits.indexOf(value[i]) * (i+1);
					}
					value = value.substr(2) + bnf_xdigits[bnf_check_digit % bnf_xdigits.length]; //29 is the radix
				}
				if ( code == 'EGAXA' ) {
					value = value.replace ( /^vtls/i , '' ) ;
				}
				if ( code == 'NLA' ) {
					value = value.replace ( /^0+/i , '' ) ;
				}
				return value ;
			} ,
			getYearsString : function () {
				let self = this ;
				if ( self.birth_year+self.death_year=='' ) return '' ;
				if ( self.birth_year=='' ) return self.death_year ;
				if ( self.death_year=='' ) return self.birth_year ;
				return self.birth_year+" "+self.death_year ;
			} ,

			searchVIAF : function ( use_dates = true , result_id = null ) {
				let self = this ;
				if ( result_id===null) self.running++ ;
				let name = $('h1 span.wikibase-title-label').text().replace(/\([PQ]\d+\)$/,'') ;

				if ( result_id == null ) {
					let result = {
						id: 'viaf',
						site: 'VIAF',
						status: 'RUNNING',
						data: []
					} ;
					result_id = self.results.length ;
					self.results.push ( result ) ;
				}


				let search_key = name ;
				let dates = self.getYearsString() ;
				if ( use_dates && dates!='' ) search_key += ' ' + dates ;
				self.results[result_id].page_url = 'https://viaf.org/viaf/search?query=local.names%20all%20%22'+encodeURIComponent(search_key)+'%22&sortKeys=holdingscount&recordSchema=BriefVIAF' ;

				$.ajax ( {
					dataType: "json",
					url : '//magnustools.toolforge.org/authority_control.php?callback=?' ,
					data : { query : 'viaf' , key : search_key , r:Math.random() } ,
					timeout : 15000 ,
					error : function () {
						self.results[result_id] = "VIAF/TOOLFORGE ERROR" ;
						self.running-- ;
					} ,
					success : function ( d ) {
						let xml = $.parseXML ( d.result ) ;
						let vd = $(xml) ;
						let result = self.results[result_id] ;
						let viaf_record_id = 1 ;
						vd.find('record').each ( function ( dummy_id , record ) {
							viaf_record_id++ ;
							let record_result = self.parseVIAFrecord ( viaf_record_id , record , result_id ) ;
							if ( record_result.ids.length == 0 ) return ;
							self.results[result_id].data.push(record_result);
						} ) ;
						if ( self.results[result_id].data.length == 0 && use_dates && dates!='' ) {
							return self.searchVIAF ( false , result_id ) ; // Try again, without dates
						}
						self.results[result_id].status = 'OK' ;
						self.running-- ;

					}
				} ) ;
			} ,
			parseVIAFrecord : function ( viaf_record_id , record , result_id ) {
				let self = this ;
				self.current_viaf_record_id = viaf_record_id ;
				let r = $(record) ;
				let data = { ids : [] , comments : [] } ;
				
				data.birth_date = $(r.find(self.getNodeName('birthDate'))).text() ;
				data.death_date = $(r.find(self.getNodeName('deathDate'))).text() ;

				let viaf = r.find(self.getNodeName('viafID')).text() ;
				data.ids.push ( { code:'VIAF' , value:viaf } ) ;
				data.url = "http://viaf.org/viaf/" + viaf + "/" ;

				r.find(self.getNodeName('source')).each ( function ( k , v ) {
					if ( ($(v).attr('differentiated')||'').toLowerCase() == 'false' ) return ;
					let n = $(v).text().split('|') ;
					if ( n.length != 2 ) return ;
					if ( undefined === self.allowed_codes[n[0].toUpperCase()] ) return ;
					let thecode = self.allowed_codes[n[0].toUpperCase()].key ;
					data.ids.push ( { code:thecode , value:self.normaliseCodeValue(thecode,n[1]) } ) ;
				} ) ;

				r.find(self.getNodeName('mainHeadings')).each ( function ( k , v ) {
					$($(v).find(self.getNodeName('text'))).each ( function ( k2 , v2 ) {
						data.comments.unshift ( $(v2).text() ) ;
					} ) ;
				} ) ;
				r.find(self.getNodeName('titles')).each ( function ( k , v ) {
					$($(v).find(self.getNodeName('text'))).each ( function ( k2 , v2 ) {
						data.comments.push ( $(v2).text() ) ;
					} ) ;
				} ) ;
				
				return data ;
			} ,
			getNodeName : function ( name ) { // Node name; there is a bug with Chrome, but recent jQuery update removed $.browser
				return "ns" + this.current_viaf_record_id + "\\:"+name ;
			} ,
			addAllIDs : function ( data , id_num = 0 ) {
				let self = this ;
				//console.log(JSON.parse(JSON.stringify(data)));
				if ( data.ids.length == 0 ) return ;
				if ( data.ids.length < id_num ) return ;
				self.addID ( data.ids[id_num] , function(){self.addAllIDs(data,id_num+1)} );
				//$.each ( data.ids , function ( dummy , id ) { self.addID ( id ) } ) ;
			} ,
			addID : function ( id , callback = function(){} ) {
				let self = this ;
				if ( typeof id == 'undefined' ) return callback();
				let property = 'P'+(self.allowed_codes[id.code].p||0) ;
				if ( property == 'P0' ) return callback();

				let value = ''+id.value ;
				let property_name = self.convertProperty(property) ;
				let key = property_name+':'+value ;
				if ( typeof self.existing_ids[key] != 'undefined' ) return callback();
				
				let summary = 'using [[User:Magnus Manske/authority control.js|authority control.js]]' ;
				wdutil.addExternalIdStatement(property,value,summary,function(success){
					if ( !success ) return self.onError('Error during statement creation') ;
					self.existing_ids[key] = 1 ;
					callback()
				});
			} ,

			getIdColor : function ( id ) {
				if ( id.code=="WKP" ) {
					let ret = 'font-weight: bold; color: ' ;
					if ( this.q == id.value ) return ret+'green;' ;
					else return ret+'red;' ;
				} else return '' ;
			} ,
			idExists : function ( id ) {
				return typeof this.existing_ids[id.code+":"+id.value]!="undefined" ;
			} ,
			hasNoResults : function () {
				let self = this ;
				if ( self.running>0 ) return false ;
				let result_count = 0 ;
				$.each ( self.results , function ( dummy , result ) {
					result_count += (result.data||[]).length ;
				} ) ;
				return result_count==0 ;
			} ,
			canAddAccData : function ( data ) {
				let self = this ;
				return data.ids.filter(function(id){
					return !self.idExists(id) && id.code!='WKP'
				}).length;
			} ,
			renderComment : function (comment) {
				let self = this ;
				comment = $("<div>").text(comment).html() ;
				let re_born = new RegExp('\\b('+self.birth_year+')\\b');
				let re_died = new RegExp('\\b('+self.death_year+')\\b');
				if ( self.birth_year!='') comment = comment.replace(re_born,'<b>$1</b>');
				if ( self.death_year!='') comment = comment.replace(re_died,'<b>$1</b>');
				return comment ;
			} ,
			onError: function ( message ) {
				console.log(message);
				alert(message);
			} ,
			onClose: function () {
				//this.hideContainer();
			}
		},
		components: {
			CdxButton,
			CdxRadio,
			CdxTextInput,
			CdxToggleSwitch,
		},
	};

	// Code to run on page load
	if ( mw.config.get('wgNamespaceNumber') != 0 ) return ;
	if ( mw.config.get('wgAction') != 'view' ) return ;

	let portletLink = mw.util.addPortletLink( 'p-tb', '#', 'Authority Control','wikitext-wd_ac');
		$(portletLink).click ( function () {
			wdutil_app.addTab({
				name: 'authority_control',
				label: 'Authority control',
			},function(id){
				const authority_control_app = Vue.createMwApp(AuthorityControlApp);
				authority_control_app.mount(id);
			});
			return false ;
		} ) ;

})
};

var wdutil_app ;
//mw.loader.load('https://wikidata-todo.toolforge.org/wdutils.js');
mw.loader.load('https://www.wikidata.org/w/index.php?title=User:Magnus_Manske/wdutil.js&action=raw&ctype=text/javascript');
if ( typeof wdutil!='undefined' ) wdutil.loadCallback(authority_control);
else {
	var wdutil_loaded_callbacks ;
	if ( typeof wdutil_loaded_callbacks=='undefined' ) wdutil_loaded_callbacks = [] ;
	wdutil_loaded_callbacks.push(authority_control);
}
// </nowiki>