human gene (E37)

From Wikidata
Jump to navigation Jump to search
language codelabeldescriptionaliasesedit
enhuman geneschema of a human gene according to Gene Wikiedit
damenneske-genedit
deMenschliches Genedit
eohoma genoedit
itgene umanoschema per descrivere un gene umanoedit
nlmenselijk genbasis schema voor een menselijk gen in Wikidata volgens gene wikiedit
ptgene humanoedit
# Shape Expression for Human genes in Wikidata
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX prv: <http://www.wikidata.org/prop/reference/value/>
PREFIX pr:  <http://www.wikidata.org/prop/reference/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>

BASE <http://www.wikidata.org/entity/>
IMPORT <https://www.wikidata.org/wiki/Special:EntitySchemaText/E75> # gene

start = @<#wikidata-human-gene>

# Query with results
# SELECT * WHERE {?item wdt:P31 wd:Q7187 ; wdt:P703 wd:Q15978631 .} LIMIT 10

# Indicates which shape to use to start iterating over the graph if none is provided.

# wikidata-human gene is the main shape for a human gene data model in Wikidata. Each line between the brackets
# represents the structure than can be enforced to validate human gene annotations in Wikidata
#
# We distinguish between value statements, identifier statements, and erronuous statements.
# Value statements contain either actual values, or pointers to other Wikidata items. Identifier statements capture
# external identifiers, erronuous statements are those that are errors.
#

<#wikidata-human-gene> EXTRA p:P31  {
   
    #&<E75#wikidata-gene> ;
    
    
    # below is a special shape used to express that both the genomic start and genomic end properties are not mandatory
    # however if one does exist, the other one is requires. The * behind the brackets indicates the 0 or more carinality
    # that expresses the requirement that both properties are not mandatory.
	(
		p:P644 @<#P644_genomic_start> ; # Its genomic start location
		p:P645 @<#P645_genomic_end> ; # Its genomic end location
	)* ; # Zero or more start and end locations.

	p:P684 @<#P684_ortholog>* ; # Zero or more known orthologs.
	p:P688 @<#P688_encodes>* ; # Zero or more known geneproducts.
	p:P703 @<#P703_found_in_taxon_human> ; # In which taxonomy and where in that taxonomy this gene is found
	p:P1057 @<#P1057_chromosome>* ;  # Zero or more known chromosomes the gene is located on.
	p:P2888 @<#P2888_exact_match>+ ; # One or more external Internationalized Resource Identifiers of a node on the
	                                # semantic web, describing the same concept as linked data.
	p:P2548 @<#P2548_strand_orientation> ; # Its strand orrientation
	
	# IDENTIFIER STATEMENTS
	p:P351 @<#P351_ncbi_gene_id> ; # Exactly one ncbi gene identifier
	p:P353 @<#P353_hgnc_gene_symbol> ; # Exactly one hgnc gene symbol
	p:P354 @<#P354_hgnc_gene_id> ; # Exactly one hgnc gene identifier
	p:P594 @<#P594_ensembl_gene_id>* ; # Zero or more Ensembl gene identifier
	p:P639 @<#P639_refseq_rna_id>* ; # Zero or more RefSeq RNA identifiers
	p:P704 @<#P704_ensembl_transcript_id>* ; # Zero or more Ensembl Transcript identifiers.
	p:P593 @<#P593_homologene_id> ; # Exactly one homologene identifier

	# Negative shapes
	p:P352 @<#P352_uniprot_id_wor>{0} ; # A gene can't have a uniprot identifier.
}

# Detailed ShExs for Wikidata statements
# Wikidata captures a statement in the following graph pattern:
####################
# wd:Pxx wikibase:directClaim wdt:Pxx ;
#        wikibase:claim p:Pxx .
# <item> wdt:Pxx "value" or <other_item> .
# <item> p:Pxx <node> .
# <node> ps:Pxx "value" or <other_item> ;
#        pq:Pxx "qualifier value" or <qualifier_item> ;
#        pr:Pxx "reference value" or <reference_item> .
####################

<#P31_instance_of_gene> { 
   ps:P31 @<#gene_types> ;     # Instance of [P31] gene types
   prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;
} 

<#P279_subclass_of_gene> { 
    ps:P279 @<#gene_types> ; # Subclass of [P279] gene types <gene_types>
	prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;
} 

<#P644_genomic_start> {
   ps:P644 LITERAL ;  # genomic start [P644] value
   pq:P1057	@<#human-chromosomes>+ ;	 # Qualifier indicating the applicable chromosome [P1057] from a set of
                                     #  list Wikidata on chromosomes described below.
   pq:P659	@<#genomic-assembly>+ ;  # Qualifier indicating the applicable genomic assembly versions.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
}

<#P645_genomic_end> {
   ps:P645 LITERAL ; # genomic start [P645] value
   pq:P1057	@<#human-chromosomes>+ ;	 # Qualifier indicating the applicable chromosome [P1057] from a set of
                                     #  list Wikidata on chromosomes described below.
   pq:P659	@<#genomic-assembly>+ ; # Qualifier indicating the applicable genomic assembly versions.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
}
	
<#P684_ortholog> {
   ps:P684 IRI ; # Known orthologs [P684].
   pq:P703	IRI ; # Qualifier indicating in which taxon the ortholog is found [P703].
   prov:wasDerivedFrom	@<#homologene-reference> ;
}

<#P688_encodes> {
   ps:P688 IRI ; # gene encodes [688] for a gene product.
   prov:wasDerivedFrom @<#uniprot-reference>;
}

<#P703_found_in_taxon_human> {	
   ps:P703 [wd:Q15978631] ; # the gene is found in taxon [P703] human [Q15978631]
   prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;
} 

<#P1057_chromosome> {	
   ps:P1057 @<#human-chromosomes> ; # gene is found on chromosome [P1057]
   pq:P659	@<#genomic-assembly>+ ; # Qualifier indicating the one or more applicable genomic assembly
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
}
	
<#P2888_exact_match> {
   ps:P2888 IRI ; # External IRI which describe the exact same concept [P2888]
   prov:wasDerivedFrom @<#miriam_reference> OR @<#ncbi-gene-reference> ;
}

<#P2548_strand_orientation> {	
   ps:P2548	@<#strand-orientation> ; # Strand orientation [P2548] of the gene
   pq:P659	@<#genomic-assembly>+ ; # Qualifier indicating the one or more applicable genomic assembly
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
}

<#strand-orientation> [ # list of wikidata items for both the reverse and forward strand
    wd:Q22809711 # reverse strand
    wd:Q22809680 # forward strand
]
	
## IDENTIFIERS
<#P351_ncbi_gene_id> {
	ps:P351 LITERAL ; # The NCBI gene identifier [P351] for the applicable gene item.
	prov:wasDerivedFrom @<#ncbi-gene-reference> ;
}

<#P352_uniprot_id_wor> { # The (non-existent) uniprot identifier [P352] for the applicable gene item.
    ps:P352 LITERAL ;
}
	
<#P353_hgnc_gene_symbol> {
   ps:P353 LITERAL ; # The gene symbol [P353] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;
}

<#P354_hgnc_gene_id> {
   ps:P354 LITERAL ; # The HGNC gene identifier  [P354] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;
}

<#P593_homologene_id> {
   ps:P593 LITERAL ; # The homologene identifier [P593] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;
}
	
<#P594_ensembl_gene_id> {
   ps:P594 LITERAL ; # The Ensembl gene identifier [P594] for the applicable gene item.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
}

<#P639_refseq_rna_id> {
   ps:P639 LITERAL ; # The RefSeq RNA identifier [P351] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;
}

<#P704_ensembl_transcript_id> {
   ps:P704 LITERAL ; # The Ensembl transcript identifier [P704] for the applicable gene item
   prov:wasDerivedFrom @<#ensembl-gene-reference> OR <#ncbi-gene-reference> ;
}

## REFERENCES
<#ncbi-gene-reference> { # reference to a NCBI gene record
  pr:P248   [ wd:Q20641742 ] ; # stated in [P248] NCBI gene [Q20641742]
  pr:P351	LITERAL ; # NCBI gene identifier
  pr:P813	xsd:dateTime ; # Date of retrieval
}

<#ensembl-gene-reference> {
  pr:P248	[
         wd:Q30227110 # stated in [P248] Ensembl Release 89 [Q30227110]
         wd:Q46401024 # stated in [P248] Ensembl Release 91 [Q30227110]
         wd:Q57339524 # stated in [P248] Ensembl Release 94 [Q57339524]
         wd:Q63170780 # stated in [P248] Ensembl Release 96 [Q63170780]
         ] ; 
  pr:P594	LITERAL ; # Ensembl Gene ID [P594]
}

<#homologene-reference> {
  pr:P248 [wd:Q20976936]; # Stated in [P248]  homologene build68 [Q20976936]
  pr:P593 LITERAL; # Homologene identifier
}

<#miriam_reference> {
  pr:P248		[wd:Q16335166] ; # Stated in [P248] MIRIAM registry
  pr:P854		IRI ; # Reference URL [P854] to a IRI
}

<#uniprot-reference> {
  pr:P248	[wd:Q905695] ;
  pr:P352	LITERAL ;
  pr:P813	xsd:dateTime ;
}

## Lists with Wikidata items
<#gene_types> [
  wd:Q7187  # gene
  wd:Q20747295 # protein-coding gene
  wd:Q427087 # ncRNA
  wd:Q284578 # snRNA
  wd:Q284416 # snoRNA
  wd:Q215980 # rRNA
  wd:Q201448 # tRNA
  wd:Q277338 # pseudo
  wd:Q11053 # miscRNA
  wd:Q25323710 # scRNA
			 ]

<#human-chromosomes> [
  wd:Q430258  # human chromosome 1
  wd:Q638893  # human chromosome 2
  wd:Q668633  # human chromosome 3
  wd:Q836605  # human chromosome 4
  wd:Q840741  # human chromosome 5
  wd:Q540857  # human chromosome 6
  wd:Q657319  # human chromosome 7
  wd:Q572848  # human chromosome 8
  wd:Q840604  # human chromosome 9
  wd:Q840737  # human chromosome 10
  wd:Q847096  # human chromosome 11
  wd:Q847102  # human chromosome 12
  wd:Q840734  # human chromosome 13
  wd:Q138955  # human chromosome 14
  wd:Q765245  # human chromosome 15
  wd:Q742870  # human chromosome 16
  wd:Q220677  # human chromosome 17
  wd:Q780468  # human chromosome 18
  wd:Q510786  # human chromosome 19
  wd:Q666752  # human chromosome 20
  wd:Q753218  # human chromosome 21
  wd:Q753805  # human chromosome 22
  wd:Q61333  # human chromosome X
  wd:Q202771  # human chromosome Y
  wd:Q27075  # human chromosome MT
			]
						
<#genomic-assembly> [
  wd:Q20966585 # Genome assembly GRCh38
  wd:Q21067546 # Genome assembly GRCh37
			]