language codelabeldescriptionaliasesedit
enhuman geneschema of a human gene according to Gene Wikiedit
demenschliches GenSchema für ein menschliches Genedit
eohoma genoedit
esgen humanoesquema de gen humano de acuerdo al proyecto Gene Wikiedit
frgène humainschema d'un gène humain selon Gene Wikiedit
itgene umanoschema per descrivere un gene umanoedit
jaヒトの遺伝子Gene Wikiにおいてヒトの遺伝子を記述するためのスキーマedit
nlmenselijk genbasis schema voor een menselijk gen in Wikidata volgens gene wikiedit
plludzki genedit
ptgene humanoedit
# E108: genome_assembly
PREFIX E108: <>

# E109: human chromosome
PREFIX E109: <>

# Shape Expression for Human genes in Wikidata
PREFIX wd: <>
PREFIX wdt: <>
PREFIX p: <>
PREFIX prov: <>
PREFIX pq: <>
PREFIX xsd: <>
PREFIX prv: <>
PREFIX pr:  <>
PREFIX ps: <>


start = @<#wikidata-human-gene>

# Query with results
# SELECT * WHERE {?item wdt:P31 wd:Q7187 ; wdt:P703 wd:Q15978631 .} LIMIT 10

# Indicates which shape to use to start iterating over the graph if none is provided.

# wikidata-human gene is the main shape for a human gene data model in Wikidata. Each line between the brackets
# represents the structure than can be enforced to validate human gene annotations in Wikidata
# We distinguish between value statements, identifier statements, and erroneous statements.
# Value statements contain either actual values, or pointers to other Wikidata items. Identifier statements capture
# external identifiers, erroneous statements are those that are errors.

<#wikidata-human-gene> EXTRA p:P31  {
    # below is a special shape used to express that both the genomic start and genomic end properties are not mandatory
    # however if one does exist, the other one is required. The * behind the brackets indicates the 0 or more cardinality
    # that expresses the requirement that both properties are not mandatory.
	p:P31 @<#P31_instance_of_gene> ;
		p:P644 @<#P644_genomic_start> ; # Its genomic start location
		p:P645 @<#P645_genomic_end> ; # Its genomic end location
	)* ; # Zero or more start and end locations.

	p:P684 @<#P684_ortholog>* ; # Zero or more known orthologs.
	p:P688 @<#P688_encodes>* ; # Zero or more known geneproducts.
	p:P703 @<#P703_found_in_taxon_human> ; # In which taxonomy and where in that taxonomy this gene is found
	p:P1057 @<#P1057_chromosome>* ;  # Zero or more known chromosomes the gene is located on.
	p:P2888 . ; # @<#P2888_exact_match>+ ; # One or more external Internationalized Resource Identifiers of a node on the
	                                # semantic web, describing the same concept as linked data.
	p:P2548 @<#P2548_strand_orientation> ; # Its strand orrientation
	p:P351 @<#P351_ncbi_gene_id> ; # Exactly one ncbi gene identifier
	p:P353 @<#P353_hgnc_gene_symbol> ; # Exactly one hgnc gene symbol
	p:P354 @<#P354_hgnc_gene_id> ; # Exactly one hgnc gene identifier
	p:P594 @<#P594_ensembl_gene_id>* ; # Zero or more Ensembl gene identifier
	p:P639 @<#P639_refseq_rna_id>* ; # Zero or more RefSeq RNA identifiers
	p:P704 @<#P704_ensembl_transcript_id>* ; # Zero or more Ensembl Transcript identifiers.
	p:P593 @<#P593_homologene_id> ; # Exactly one homologene identifier

	# Negative shapes
	p:P352 @<#P352_uniprot_id_wor>{0} ; # A gene can't have a uniprot identifier.

# Detailed ShExs for Wikidata statements
# Wikidata captures a statement in the following graph pattern:
# wd:Pxx wikibase:directClaim wdt:Pxx ;
#        wikibase:claim p:Pxx .
# <item> wdt:Pxx "value" or <other_item> .
# <item> p:Pxx <node> .
# <node> ps:Pxx "value" or <other_item> ;
#        pq:Pxx "qualifier value" or <qualifier_item> ;
#        pr:Pxx "reference value" or <reference_item> .

<#P31_instance_of_gene> { 
   ps:P31 @<#gene_types> ;     # Instance of [P31] gene types
   prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;

<#P279_subclass_of_gene> { 
    ps:P279 @<#gene_types> ; # Subclass of [P279] gene types <gene_types>
	prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;

<#P644_genomic_start> {
   ps:P644 LITERAL ;  # genomic start [P644] value
   pq:P1057	@E109:humanChromosome+ ;	 # Qualifier indicating the applicable chromosome [P1057] from a set of
                                     #  list Wikidata on chromosomes described below.
   pq:P659	@E108:sequence_assembly+ ;  # Qualifier indicating the applicable genomic assembly versions.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;

<#P645_genomic_end> {
   ps:P645 LITERAL ; # genomic start [P645] value
   pq:P1057	@E109:humanChromosome+ ;	 # Qualifier indicating the applicable chromosome [P1057] from a set of
                                     #  list Wikidata on chromosomes described below.
   pq:P659	@E108:sequence_assembly+ ; # Qualifier indicating the applicable genomic assembly versions.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
<#P684_ortholog> {
   ps:P684 IRI ; # Known orthologs [P684].
   pq:P703	IRI ; # Qualifier indicating in which taxon the ortholog is found [P703].
   prov:wasDerivedFrom	@<#homologene-reference> ;

<#P688_encodes> {
   ps:P688 IRI ; # gene encodes [688] for a gene product.
   prov:wasDerivedFrom @<#uniprot-reference>;

<#P703_found_in_taxon_human> {	
   ps:P703 [wd:Q15978631] ; # the gene is found in taxon [P703] human [Q15978631]
   prov:wasDerivedFrom @<#ncbi-gene-reference> OR @<#ensembl-gene-reference> ;

<#P1057_chromosome> {	
   ps:P1057 @E109:humanChromosome ; # gene is found on chromosome [P1057]
   pq:P659	@E108:sequence_assembly+ ; # Qualifier indicating the one or more applicable genomic assembly
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;
<#P2888_exact_match> {
   ps:P2888 IRI ; # External IRI which describe the exact same concept [P2888]
   prov:wasDerivedFrom @<#miriam_reference> OR @<#ncbi-gene-reference> ;

<#P2548_strand_orientation> {	
   ps:P2548	@<#strand-orientation> ; # Strand orientation [P2548] of the gene
   pq:P659	@E108:sequence_assembly+ ; # Qualifier indicating the one or more applicable genomic assembly
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;

<#strand-orientation> [ # list of wikidata items for both the reverse and forward strand
    wd:Q22809711 # reverse strand
    wd:Q22809680 # forward strand
<#P351_ncbi_gene_id> {
	ps:P351 LITERAL ; # The NCBI gene identifier [P351] for the applicable gene item.
	prov:wasDerivedFrom @<#ncbi-gene-reference> ;

<#P352_uniprot_id_wor> { # The (non-existent) uniprot identifier [P352] for the applicable gene item.
    ps:P352 LITERAL ;
<#P353_hgnc_gene_symbol> {
   ps:P353 LITERAL ; # The gene symbol [P353] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;

<#P354_hgnc_gene_id> {
   ps:P354 LITERAL ; # The HGNC gene identifier  [P354] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;

<#P593_homologene_id> {
   ps:P593 LITERAL ; # The homologene identifier [P593] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;
<#P594_ensembl_gene_id> {
   ps:P594 LITERAL ; # The Ensembl gene identifier [P594] for the applicable gene item.
   prov:wasDerivedFrom @<#ensembl-gene-reference> ;

<#P639_refseq_rna_id> {
   ps:P639 LITERAL ; # The RefSeq RNA identifier [P351] for the applicable gene item.
   prov:wasDerivedFrom @<#ncbi-gene-reference> ;

<#P704_ensembl_transcript_id> {
   ps:P704 LITERAL ; # The Ensembl transcript identifier [P704] for the applicable gene item
   prov:wasDerivedFrom @<#ensembl-gene-reference> OR <#ncbi-gene-reference> ;

<#ncbi-gene-reference> { # reference to a NCBI gene record
  pr:P248   [ wd:Q20641742 ] ; # stated in [P248] NCBI gene [Q20641742]
  pr:P351	LITERAL ; # NCBI gene identifier
  pr:P813	xsd:dateTime ; # Date of retrieval

<#ensembl-gene-reference> {
  pr:P248	[
         wd:Q30227110 # stated in [P248] Ensembl Release 89 [Q30227110]
         wd:Q46401024 # stated in [P248] Ensembl Release 91 [Q30227110]
         wd:Q57339524 # stated in [P248] Ensembl Release 94 [Q57339524]
         wd:Q63170780 # stated in [P248] Ensembl Release 96 [Q63170780]
         wd:Q67600000 # stated in [P248] Ensembl Release 97 [Q67600000]
         wd:Q71033229 # stated in [P248] Ensembl Release 98 [Q71033229]
         wd:Q83867711 # stated in [P248] Ensembl Release 99 [Q83867711]
         wd:Q110249889 # stated in [P248] Ensembl Release 105 [Q83867711]
         ] ; 
  pr:P594	LITERAL ; # Ensembl Gene ID [P594]

<#homologene-reference> {
  pr:P248 [wd:Q20976936]; # Stated in [P248]  homologene build68 [Q20976936]
  pr:P593 LITERAL; # Homologene identifier

<#miriam_reference> {
  pr:P248		[wd:Q16335166] ; # Stated in [P248] MIRIAM registry
  pr:P854		IRI ; # Reference URL [P854] to a IRI

<#uniprot-reference> {
  pr:P248	[wd:Q905695] ;
  pr:P352	LITERAL ;
  pr:P813	xsd:dateTime ;

## Lists with Wikidata items
<#gene_types> [
  wd:Q7187  # gene
  wd:Q20747295 # protein-coding gene
  wd:Q284578 # snRNA
  wd:Q284416 # snoRNA
  wd:Q215980 # rRNA
  wd:Q201448 # tRNA
  wd:Q277338 # pseudo
  wd:Q11053 # miscRNA
  wd:Q25323710 # scRNA