User:1Veertje/identifierInput.js

From Wikidata
Jump to navigation Jump to search

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Internet Explorer / Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5
  • Opera: Press Ctrl-F5.
importScript('User:1Veertje/xregexp.js');
var ignorelist = ['P4174' , 'P6305']; //#FIXME these make the browser crash. Big problem 2021-02-11
ignorelist = ignorelist.concat( ['P212' ,  'P957'] ); 	// #FIXME trips out on ISBN-numbers
ignorelist = ignorelist.concat( ['P356' ] ); // DOI: "invalid group error"
var BreakException = {};
$( document ).ready(function() {
	if ($('.wikibase-entityview-main').length && typeof MutationObserver !== 'undefined') {
		const observer = new MutationObserver(function(mutationList, observer) {
			// We're not really interested in each change. We just wanted to know that at least one change has occured. Therefore ignore mutationList
			$('.valueview-input').each(function() {
				$(this).off( "input", inputHandler).on( "input", inputHandler);
			});
		});
		var observeNode = $('.wikibase-entityview-main')[0];
		if(!observeNode) return;
		observer.observe(observeNode, {
			childList: true,
			attributes: false, // We don't care about attributes - we're just interested in new child nodes
			subtree: true
		});
	}
});
function inputHandler(event) {
	//code soup to get to the pValue
	//found in User:TMg/currentDate.js
	const $valueview = $( event.target ).closest('.valueview'),
	valueview = $valueview.data( 'valueview' ),
	expert = valueview.expert();
	const $snakview = $valueview.closest( '.wikibase-snakview' ),
	snakview = $snakview.data( 'snakview' );
	const pValue = snakview.value().property;
	//end code soup
	if(ignorelist.includes(pValue)){
		return;
	}
	if(this.value.match(/.+\.(djvu|pdf|png|jpg|jpeg|tif|tiff|gif|webm)$/gi) ){
		changeInput(event,this, /^File:(.+)/gi, '$1');
	}
	if (pValue == 'P2002'){ // Twitter username (P2002) plaintext input 
		changeInput(event,this,/.+(@([0-9A-Za-z_]{1,15})).+/gi, '$2');
	}
	if (pValue == 'P2333'){
		//Norwegian organisation number
		changeInput(event,this, /(?:.*?)(\d{3})\s?(\d{3})\s?(\d{3})(?:.*)/gi, '$1$2$3');
	}
	if (pValue == 'P968'){
		changeInput(event,this, /^(mailto:)?\s*(.+)/gi, 'mailto:$2');
	}
	else if (pValue == 'P214'){
		//VIAF ID quick fix
		changeInput(event,this,/.*?(\d+).*/gi, '$1');
	}
	else if (pValue == 'P213'){ //ISNI custom fix
		changeInput(event, this, /.+?0000\s*000([0-4])\s*([0-9]{4})\s*([0-9]{3}[0-9X]).*/, '0000000$1$2$3');
	}
	else if (pValue == 'P373'){ // Commons category
		changeInput(event,this, /^Category:(.+)/gi, '$1');
	}
	
	else if (pValue == 'P2013' && (this.value.indexOf('facebook.com/pages/') != '-1' || this.value.indexOf('profile.php?id=') != '-1' ) ){
		//get the FB account number
		changeInput(event,this, /^.+?(\d+)$/gi, '$1');
	}
	//Mastodon urls need to be flipped
	else if(pValue == 'P4033'){
		if (this.value.match(/^[0-9A-Za-z_]+@[0-9a-z\.\-]+[0-9a-z]+$/)  ){
			//already correct format
			return
		}
		else if(this.value.match(/^https?:\/\/[0-9a-z\.\-]+[0-9a-z]+\/@[0-9A-Za-z_]+(?:\/.*)?/)){
			//URL needs to be flipped
			changeInput(event, this, /(?:https?:\/\/)([0-9a-z\.\-]+[0-9a-z]+)\/@([0-9A-Za-z_]+)(?:\/.*)?/, '$2@$1');
		}
		else{
			// probably removing the @ at the start.
			changeInput(event, this, /(?:.*?)([0-9A-Za-z_]+@[0-9a-z\.\-]+[0-9a-z]+)(?:.*?)/, '$1');	
		}
	}
	else {
		//select identifier based on format constraints
		const element = this;
		const query = `SELECT ?formatterUrl ?formatConstraints WHERE {  { BIND(wd:${pValue} AS ?property) } {?property wdt:P1630 ?formatterUrl.} UNION  { ?property wdt:P7250 ?formatterUrl.  }  OPTIONAL { ?property p:P2302 ?constraints . ?constraints ps:P2302 wd:Q21502404. ?constraints pq:P1793 ?formatConstraints . ?constraints a wikibase:BestRank.} }`;
		// console.log(query);
		$.ajax({
            url: 'https://query.wikidata.org/sparql',
            data: { query:query },
            headers: { Accept: 'application/sparql-results+json' },
            success: function (results){
            	if(typeof results.results.bindings !== 'undefined' && results.results.bindings.length > 0
                && typeof results.results.bindings[0].formatterUrl.value !== 'undefined'){
            		var output_set = false;
            		var regexes = [];
            		var regex = '';
            		var format_urls = [];
            		var input = element.value.trim();
            		//remove "https?" and "www.":
            		input = input.replace(/https?:\/\/(www\.)?/, '');
            		input = input.replace(/\/$/, '');
            		//LinkedIn urls decode things like  Ø and å
        			input = decodeURIComponent(input);
            		for (var x in results.results.bindings){
						result = results.results.bindings[x];
            			format_urls.push(result.formatterUrl.value.replace(/https?:\/\/(www\.)?/, ''));
            		}
            		format_urls = format_urls.sort(function(a, b){return b.length - a.length});
        			
            		if(typeof  results.results.bindings[0].formatConstraints.value !== 'undefined'){
            			// console.log('format constraints were found');
            			var id_regex = results.results.bindings[0].formatConstraints.value;
            			//remove open option like (|something) (why are these in format constraints?)
            			id_regex = id_regex.replace('(|', '(').replace('|)', ')');
            			//similarly, remove opening or closing pipes at the beginning or end of format constraints, to remove creation of (|something)
            			id_regex = id_regex.replace(/(^\||\|$)/, '');
            			//remove ^ from beginnig and $ from end
            			id_regex = id_regex.replace(/^\^/,'');
            			id_regex = id_regex.replace(/\$$/,'');
            			//escape dots and slashes in formatter URL
            			format_urls.forEach(function(format_url){
            				regex = '';
            				regex = format_url.replace(/[.*+?^${}()|[\]\\/]/g, '\\$&');
            				//if there is content after the parameter
            				regex = regex.replace(/(\\\$1)(.+)$/, '$1($2)?');
            				//replace $1 with format constraints
            				regex = regex.replace('\\$1', '(' + id_regex + ')' );
            				regexes.push({'regex': regex, 'format_url' : format_url});
            			});
            		}
            		else{
            			format_urls.forEach(function(format_url){
            				regex = format_url.replace(/[.*+?^${}()|[\]\\/]/g, '\\$&').replace(/(\\\$1)(.+)$/, '$1($2)?').replace('\\$1', '(.{2,})' );
            				regexes.push({'regex': regex, 'format_url': format_url});
            			});
            		}
            		// console.log(regexes);
        			
        			if(typeof XRegExp === 'undefined') return;
        		try {	
        			//JavaScript library for newer RegEx features
        			regexes.forEach(function(regex){
        				//if the parameter is leading
        				// https://example.tumblr.com
	        			if (regex['format_url'].match(/^\$1/)){
	        				regex['regex'] +='(?:.*)';
	        			}
	        			else{
	        				regex['regex'] = '(?:.*)' + regex['regex'] + '(?:.*)';
	        			}

						// console.log(regex['regex']);
        				regex['regex'] = XRegExp(regex['regex']);
        				if (regex['regex'].test(input) == true ){
        					// console.log("doing regex 1");
        					// console.log(input, id_regex);
        					//return match based both on the format url and format constraints
        					var out = XRegExp.replace(input, regex['regex'], "$1");
        					out = out.trim();
        					if (typeof id_regex == 'undefined'){
        						out = out.replace(/\/$/, '');
        						out = out.replace(/^\//, '');
        					}
        					if (out.length > 0){
        						element.value = out;
        					    output_set = true;
        					    throw BreakException;
        						
        					}
        				}
        			});
                } catch (e) {
				  if (e !== BreakException) throw e;
				}
        			if (!output_set && typeof id_regex !== 'undefined'){
        			//if the regex fails, try finding a match with only the formatConstraints

	        			format_urls.forEach(function(format_url){
	    					//remove format_url from input
	    					format_url = format_url.replace('$1', '');
	    					format_url = format_url.replace(/\/$/, '');
	    					input = input.replace(format_url , ' ');
	    					//return input without format URL
	    					
	        			});
						//find matches solely based on the format regex
    					// console.log('regex1 failed, trying:\n', id_regex);
    					regex = XRegExp('('+id_regex+')', 'g');
						var matches = input.match(regex);
						if(matches && matches.length > 0){
							//sort by length DESC
							matches = matches.sort(function(a, b){return b.length - a.length});
							if (matches[0].trim().length > 0){
								//return longest match to format constraints 
    							element.value = matches[0];
    						}
						}
        			}
					// FIXME: This accesses a protected property.
					expert._viewNotifier.notify( 'change' );
					element.focus();
            	}
            }
        });
	} //end else
}

function changeInput(event, element, regex, group){
			element.value = element.value.replace(regex, group);
			
			var $valueview = $( event.target ).closest('.valueview'),
				valueview = $valueview.data( 'valueview' ),
				expert = valueview.expert();
			// FIXME: This accesses a protected property.
		
			expert._viewNotifier.notify( 'change' );
			element.focus();
}