From 4d85ad13e4c4ce26cb6eb7b4c5ff5a218162ca08 Mon Sep 17 00:00:00 2001 From: Laura Luebbert <56094636+lauraluebbert@users.noreply.github.com> Date: Wed, 12 Jul 2023 13:55:39 -0700 Subject: [PATCH] Delete test_elm.json --- tests/fixtures/test_elm.json | 1887 ---------------------------------- 1 file changed, 1887 deletions(-) delete mode 100644 tests/fixtures/test_elm.json diff --git a/tests/fixtures/test_elm.json b/tests/fixtures/test_elm.json deleted file mode 100644 index 3b5b190c..00000000 --- a/tests/fixtures/test_elm.json +++ /dev/null @@ -1,1887 +0,0 @@ -{ - "test1": { - "type": "assert_equal", - "args": { - "sequence": "KVFGRCELAA" - }, - "expected_result": [ - [ - "DEG_Nend_UBRbox_1", - "ELME000351", - "N-degron", - "The N-end rule pathway regulates protein stability by targeting proteins for ubiquitin-dependent proteasomal degradation. Polyubiquitylation of N-end rule substrates depends on their recognition by N-recognins, specific E3 ubiquitin ligases that use their conserved UBR-box and N-box domains to bind specific N-terminal protein motifs, called N-degrons, in their target proteins. N-degrons are defined by a destabilizing N-terminal residue. Type I destabilizing residues can either occur as primary destabilizing residues, which are positively charged amino acids directly recognized by N-recognins, or as secondary and tertiary destabilizing amino acids, which can be conjugated to a primary destabilizing residue. N-degrons containing type I destabilizing residues are specifically bound by the UBR-box of N-recognins. In contrast, type II destabilizing residues, which comprise bulky hydrophobic amino acids, initiate protein degradation by binding to the N-box of N-recognins.", - "This class of N-degrons is defined by a positively charged type I primary destabilizing Arg or Lys residue in the N-terminal position, which is recognized by the UBR-box of N-recognins. Functional N-degrons of this class are generated from pre-N-degrons either by excision of the N-terminal Met on nascent proteins or by internal cleavage of a protein (Varshavsky,2011; Tasaki,2007). However, known Met-aminopeptidases catalyzing the N-terminal Met excision were so far found to cleave N-terminal of small amino acids (Varshavsky,2011), and the Arg- or Lys-containing N-degrons identified to date are generated by internal cleavage. It is important to note that the ELM prediction tool will only return internal N-degrons if the sequence of the cleavage product is entered for analysis.Once the active N-degron is generated, the N-terminal Arg or Lys residue binds to the UBR-box of N-recognins. The UBR-box is a highly conserved region whose tertiary structure is stabilized by two zinc fingers, which form a negatively charged binding pocket that rigidly binds the positively charged N-terminal amino acid. It was shown that Arg is favored over Lys as N-terminal residue. Binding also involves a shallow hydrophobic binding pocket that interacts with the side chain of the second residue of the motif. Despite a preference for hydrophobic residues in this second position, any amino acid except Pro is allowed. In addition, the UBR-box forms hydrogen bonds with the free alpha amino group and the backbone of the first three residues (Choi,2010; Tasaki,2012). Hence, the first two residues are the main determinants for specific binding to the UBR-box (3NIH; 3NII; 3NIJ; 3NIM). Recent studies showed binding of N-terminal His-containing N-degrons to the UBR-box, indicating a possible role for His as a primary type I destabilizing residue. However, compared to other primary type I interactions, this binding is very weak and naturally occurring N-degrons containing His have not yet been identified (Choi,2010).", - "DEG_Nend_Nbox_1 DEG_Nend_UBRbox_1 DEG_Nend_UBRbox_2 DEG_Nend_UBRbox_3 DEG_Nend_UBRbox_4", - "^M{0,1}[RK][^P].", - "0.0002064", - "Eukaryota", - "zf-UBR (PF02207) Putative zinc finger in N-recognin (UBR box) (Stochiometry: 1 : 1)", - "KVF", - "1", - "3", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_PDZ_Class_2", - "ELME000091", - "PDZ domain ligands", - "The best characterised PDZ ligands (PBMs, PDZ-Binding Motifs) are short C-terminal peptides that bind in a surface groove of PDZ domains of proteins as a part of a variety of biological processes including cell signalling and synapse. Although there is a considerable literature on internal sequence peptide interactions, we are not currently representing internal PDZ-binding peptides in ELM.", - "PDZ domains recognize short sequences at the carboxy terminus of target proteins. The terminal residue is apparently always hydrophobic with the -2 position being a strong determinant of specificity. The class 2 motif has a pattern such as (VYF)X(VIL)*. We have made the conserved positions more relaxed based on experimental binding data. However, probably not all PDZ domain instances can accept either A or F at the terminal position. Several less conserved positions in the motif may modulate affinity and specificity of the ligand domain interaction.", - "LIG_PDZ_Class_1 LIG_PDZ_Class_2 LIG_PDZ_Class_3 LIG_PDZ_Wminus1_1", - "...[VLIFY].[ACVILF]$", - "0.0000789", - "Eukaryota Homo sapiens Metazoa Mus musculus", - "PDZ (PF00595) PDZ domain (Also known as DHR or GLGF) (Stochiometry: 1 : 1) PDB Structure: 1N7F", - "RCELAA", - "5", - "10", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ] - ] - }, - "test2": { - "type": "assert_equal", - "args": { - "sequence": "DIEFRVLH", - "json": true - }, - "expected_result": [ - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "DIEFRVL", - "1", - "7", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "DIEFRVL", - "3", - "7", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "DEG_Nend_UBRbox_2", - "ELME000352", - "N-degron", - "The N-end rule pathway regulates protein stability by targeting proteins for ubiquitin-dependent proteasomal degradation. Polyubiquitylation of N-end rule substrates depends on their recognition by N-recognins, specific E3 ubiquitin ligases that use their conserved UBR-box and N-box domains to bind specific N-terminal protein motifs, called N-degrons, in their target proteins. N-degrons are defined by a destabilizing N-terminal residue. Type I destabilizing residues can either occur as primary destabilizing residues, which are positively charged amino acids directly recognized by N-recognins, or as secondary and tertiary destabilizing amino acids, which can be conjugated to a primary destabilizing residue. N-degrons containing type I destabilizing residues are specifically bound by the UBR-box of N-recognins. In contrast, type II destabilizing residues, which comprise bulky hydrophobic amino acids, initiate protein degradation by binding to the N-box of N-recognins.", - "This class of N-degrons is defined by a negatively charged type I secondary destabilizing Asp or Glu residue in the N-terminal position that is required to be arginylated for recognition by the UBR-box of N-recognins (Tasaki,2012). Asp- or Glu-containing pre-N-degrons can be generated by internal cleavage of a protein. Generation by Met excision has not been investigated yet as the known N-terminal Met-aminopeptidases that catalyze Met excision show no activity towards larger amino acids like Glu or Asp (Varshavsky,2011). It is important to note that the ELM prediction tool will only return internal N-degrons if the sequence of the cleavage product is entered for analysis.Once the secondary destabilizing Asp or Glu residue is exposed at the N-terminus of the protein, it is targeted by ATE1-encoded arginyl transferases (R-transferases) that transfer Arg from an Arg-t-RNA to the N-terminal amino group of the pre-N-degron. This N-terminal arginylation of the protein results in generation of a functional N-degron (Balzi,1990; Sriram,2011). In Mammals, six isoforms of R-transferase have been detected, differing in cellular location, tissue distribution and activity (Tasaki,2007). Their specificity depends on the N-terminal acidic residue in the substrate and is not affected by adjacent amino acids (Rai,2005; Hu,2005). The active N-degron generated after arginylation specifically binds to the UBR-box of N-recognins, initiating the degradation of the protein. The UBR-box is a highly conserved region whose tertiary structure is stabilized by two zinc fingers, which form a negatively charged binding pocket that rigidly binds the positively charged N-terminal amino acid. In addition, the UBR-box forms electrostatic interactions and hydrogen bonds with the free alpha amino group, the side chain of the acidic residue in the second position and the backbone of the first three residues (Choi,2010; Tasaki,2012) (3NIK; 3NIL).", - "DEG_Nend_Nbox_1 DEG_Nend_UBRbox_1 DEG_Nend_UBRbox_2 DEG_Nend_UBRbox_3 DEG_Nend_UBRbox_4", - "^M{0,1}([ED]).", - "0.0002537", - "Eukaryota", - "zf-UBR (PF02207) Putative zinc finger in N-recognin (UBR box) (Stochiometry: 1 : 1)", - "DI", - "1", - "2", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ] - ] - }, - "test3": { - "type": "assert_equal", - "args": { - "sequence": "A0PK11", - "uniprot": "True" - }, - "expected_result": [ - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "42", - "46", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "10", - "15", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "195", - "198", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "209", - "216", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "218", - "225", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "221", - "227", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "222", - "227", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_WD40_WDR5_VDV_2", - "ELME000365", - "WDR5 WD40 repeat (blade 5,6)-binding ligand", - "In the nuclei of eukaryotic cells, DNA is complexed with histones into nucleosomes. Post-translational modification of histones regulates their interactions with DNA and other nuclear proteins, and is important for the control of cellular processes such as gene transcription, cell cycle progression and DNA repair. Important modifications include methylation of H3 histones at lysine 4 by Set1/MLL protein family members and acetylation of H4 histones at lysine 16 by MYST protein family members. Activity of these enzymes depends on their assembly in multi-protein histone modification complexes. The WD40 repeat domain protein WDR5 plays a key role in H3K4 methylation and H4K16 acetylation by acting as a scaffold protein for the assembly of the respective core histone methylation and acetylation complex, which are conserved through evolution. The recruitment of different complex subunits by WDR5 depends on distinct motifs in WDR5-binding partners, including the catalytic subunits and the accessory proteins.", - "Homologues of RbBP5 from fungal organisms contain a motif that is similar to the WDR5-binding motif found in Vertebrata and Viridiplantae species. However, among Fungi some variances can be observed, which are not captured in the canonical motif definition (LIG_WD40_WDR5_VDV_1). Hence, a separate motif specific for fungal species has been defined. While Saccharomyces cerevisiae show some sequence similarities with an Ile-Asp-Leu core and multiple preceding acidic residues, Schizosaccharomyces species display less sequence conservation and have a lysine residue between the two conserved hydrophobic positions instead of an aspartate. Similarly, several Ustilaginomycetes species contain an arginine in this position. Candida species have an alanine and Penicillium species have a proline instead of the first valine, deeming this may be a suitable substitution. In addition, several Fungi, including Trichosporon and Ustilago species, contain a serine, threonine or tyrosine residue instead of an acidic residue in the N-terminal flanking region, suggesting possible regulation by phosphorylation. However, no experimental data was available that validate these motifs and the definition is only based on multiple sequence alignments.", - "LIG_WD40_WDR5_VDV_1 LIG_WD40_WDR5_VDV_2", - "[EDSTY].{0,4}[VIPLA][TSDEKR][ILVA]", - "0.0467786", - "Candida Fungi Penicillium Schizosaccharomyces Trichosporon Ustilaginomycetes Ustilago", - "IPR017986 (IPR017986) WD40-repeat-containing domain (Stochiometry: 1 : 1)", - "224", - "227", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "MOD_GlcNHglycan", - "ELME000085", - "Glycosaminoglycan attachment site", - "Proteoglycans are extracellular proteins with glycosaminoglycan chains attached at a serine residue.", - "The glycosaminoglycan attachment site is an exposed serine which accepts transfer of xylose from UDP-xylose to the hydroxyl group by protein xylosyl transferase (EC 2.4.2.26).", - "nan", - "[ED]{0,3}.(S)[GA].", - "0.0179191", - "Metazoa", - "Branch (PF02485) Core-2/I-Branching enzyme (Stochiometry: 1 : 1) PNP_UDP_1 (PF01048) Phosphorylase superfamily (Stochiometry: 1 : 1)", - "194", - "197", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_GlcNHglycan", - "ELME000085", - "Glycosaminoglycan attachment site", - "Proteoglycans are extracellular proteins with glycosaminoglycan chains attached at a serine residue.", - "The glycosaminoglycan attachment site is an exposed serine which accepts transfer of xylose from UDP-xylose to the hydroxyl group by protein xylosyl transferase (EC 2.4.2.26).", - "nan", - "[ED]{0,3}.(S)[GA].", - "0.0179191", - "Metazoa", - "Branch (PF02485) Core-2/I-Branching enzyme (Stochiometry: 1 : 1) PNP_UDP_1 (PF01048) Phosphorylase superfamily (Stochiometry: 1 : 1)", - "34", - "37", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_GlcNHglycan", - "ELME000085", - "Glycosaminoglycan attachment site", - "Proteoglycans are extracellular proteins with glycosaminoglycan chains attached at a serine residue.", - "The glycosaminoglycan attachment site is an exposed serine which accepts transfer of xylose from UDP-xylose to the hydroxyl group by protein xylosyl transferase (EC 2.4.2.26).", - "nan", - "[ED]{0,3}.(S)[GA].", - "0.0179191", - "Metazoa", - "Branch (PF02485) Core-2/I-Branching enzyme (Stochiometry: 1 : 1) PNP_UDP_1 (PF01048) Phosphorylase superfamily (Stochiometry: 1 : 1)", - "196", - "199", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_GlcNHglycan", - "ELME000085", - "Glycosaminoglycan attachment site", - "Proteoglycans are extracellular proteins with glycosaminoglycan chains attached at a serine residue.", - "The glycosaminoglycan attachment site is an exposed serine which accepts transfer of xylose from UDP-xylose to the hydroxyl group by protein xylosyl transferase (EC 2.4.2.26).", - "nan", - "[ED]{0,3}.(S)[GA].", - "0.0179191", - "Metazoa", - "Branch (PF02485) Core-2/I-Branching enzyme (Stochiometry: 1 : 1) PNP_UDP_1 (PF01048) Phosphorylase superfamily (Stochiometry: 1 : 1)", - "131", - "134", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_CK1_1", - "ELME000063", - "Casein kinase 1 (CK1) Phosphorylation site", - "Casein kinase 1 (CK1) comprises a family of serine/threonine kinases that are ubiquitous in eukaryotic system. It regulates diverse cellular processes including circadian rhythms (Shanware,2011), Wnt signaling pathway (Davidson,2005), hedgehog signaling and cell cycle (Price,2006, Knippschild,2005), membrane trafficking, cytoskeleton maintenance (Schittek,2014), DNA replication, DNA damage response, RNA metabolism, and parasitic infections (Rachidi,2014).", - "CK1 phosphorylation site", - "nan", - "S..([ST])...", - "0.0170407", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "81", - "87", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_CK1_1", - "ELME000063", - "Casein kinase 1 (CK1) Phosphorylation site", - "Casein kinase 1 (CK1) comprises a family of serine/threonine kinases that are ubiquitous in eukaryotic system. It regulates diverse cellular processes including circadian rhythms (Shanware,2011), Wnt signaling pathway (Davidson,2005), hedgehog signaling and cell cycle (Price,2006, Knippschild,2005), membrane trafficking, cytoskeleton maintenance (Schittek,2014), DNA replication, DNA damage response, RNA metabolism, and parasitic infections (Rachidi,2014).", - "CK1 phosphorylation site", - "nan", - "S..([ST])...", - "0.0170407", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "17", - "23", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_CK1_1", - "ELME000063", - "Casein kinase 1 (CK1) Phosphorylation site", - "Casein kinase 1 (CK1) comprises a family of serine/threonine kinases that are ubiquitous in eukaryotic system. It regulates diverse cellular processes including circadian rhythms (Shanware,2011), Wnt signaling pathway (Davidson,2005), hedgehog signaling and cell cycle (Price,2006, Knippschild,2005), membrane trafficking, cytoskeleton maintenance (Schittek,2014), DNA replication, DNA damage response, RNA metabolism, and parasitic infections (Rachidi,2014).", - "CK1 phosphorylation site", - "nan", - "S..([ST])...", - "0.0170407", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "14", - "20", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_CK2_1", - "ELME000064", - "Casein kinase 2 (CK2) Phosphorylation site", - "Casein kinase 2 (CK2) is a highly conserved ubiquitously expressed serine and threonine kinase present in all eukaryotes. In most of the organisms, it exists as a heterotetramer composed of two catalytic α- or α′-subunits and two non-catalytic β-subunits. Substrates can be phosphorylated by the catalytic subunits alone or by the holoenzyme. CK2 is known to phosphorylate a plethora of substrates found in various cellular compartments that shows its importance in many cellular processes including cell cycle progression, apoptosis, transcriptional regulation, protein transport, metabolism, virus infection, cell morphology, and signal transduction. Deregulation of CK2 function is associated with human pathologies such as various types of cancer, viral infection and neurodegenerative diseases. CK2 prefers acidic amino acids surrounding the phosphoacceptor residue, especially at the +3 position.", - "The main determinant of Casein kinase 2 (CK2) phosphorylation specificity is a negative charge 3 positions after the modification residue.", - "nan", - "...([ST])..E", - "0.0145681", - "Drosophila melanogaster Eukaryota Saccharomyces cerevisiae Vertebrata Zea mays", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "47", - "53", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_CK2_1", - "ELME000064", - "Casein kinase 2 (CK2) Phosphorylation site", - "Casein kinase 2 (CK2) is a highly conserved ubiquitously expressed serine and threonine kinase present in all eukaryotes. In most of the organisms, it exists as a heterotetramer composed of two catalytic α- or α′-subunits and two non-catalytic β-subunits. Substrates can be phosphorylated by the catalytic subunits alone or by the holoenzyme. CK2 is known to phosphorylate a plethora of substrates found in various cellular compartments that shows its importance in many cellular processes including cell cycle progression, apoptosis, transcriptional regulation, protein transport, metabolism, virus infection, cell morphology, and signal transduction. Deregulation of CK2 function is associated with human pathologies such as various types of cancer, viral infection and neurodegenerative diseases. CK2 prefers acidic amino acids surrounding the phosphoacceptor residue, especially at the +3 position.", - "The main determinant of Casein kinase 2 (CK2) phosphorylation specificity is a negative charge 3 positions after the modification residue.", - "nan", - "...([ST])..E", - "0.0145681", - "Drosophila melanogaster Eukaryota Saccharomyces cerevisiae Vertebrata Zea mays", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "215", - "221", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SH3_3", - "ELME000155", - "SH3 ligand", - "This motif is involved in protein-protein interaction mediated by SH3 domains.", - "This is the motif recognized by those SH3 domains with a non-canonical class I recognition specificity", - "LIG_SH3_1 LIG_SH3_2 LIG_SH3_3 LIG_SH3_4 LIG_SH3_5", - "...[PV]..P", - "0.0131729", - "Bos taurus Metazoa", - "SH3_1 (PF00018) SH3 domain (Stochiometry: 1 : 1) PDB Structure: 2GBQ", - "128", - "134", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_SUMO_rev_2", - "ELME000393", - "Sumoylation site", - "Sumoylation is a common PTM of nuclear proteins that affects their functional status. SUMO belongs to the large multiprotein family of Ubiquitin-like proteins. The sumoylation modification is achieved by a typical E1-, E2- and E3-ligase based system. Many transcription factors, chromatin proteins and proteins involved in other nuclear functions as well as the nuclear pores are sumoylated. Sumoylation is known to cause dramatic rearrangements of the subnuclear location of modified proteins.", - "The inverted version (D/ExKphi) of the canonical MOD_SUMO_1 motif is used less commonly than the canonical motif. In comparison to the regular version (PhiKxE), the hydrophobic residue, while preferred, might not be essential. Also it might tolerate some positional flexibility. The acidic residue position is more tolerant than for the canonical motif so that, besides glutamic acid, aspartic acid is also allowed. The core of the motif is preceded by a run of residues that prefer negative charges, however no position is strongly conserved. The reversed motif was first mentioned by Matic et al. (Matic,2010) and also found by other high throughput studies (Tammsalu,2014, Impens,2014 and Hendriks,2014). Yung-Kang Lee et al. (Lee,2007) found the SUMOylation site DT(K)FS at position 804 in TRIM28 by a combination of proteomic screening and site- directed mutagenesis. These findings have been confirmed by another study (Ivanov,2007). It is possible, that the canonical motif (PhiKxE) and the inverted motif (D/ExK) can sometimes be overlapping. For example, the SUMOylation site EV(K)AE at position 486 of PARK1 (Messner,2009) fits both motifs. Without a crystal structure, it would not be possible to determine the binding orientation of these instances. There is no crystal structure of a complex involving SUMO and a reverse motif available yet. The ELM pattern may not be optimal and is likely to be improved when a more precise experimental description becomes available.", - "LIG_KEPE_1 LIG_KEPE_2 LIG_KEPE_3 MOD_SUMO_for_1 MOD_SUMO_rev_2", - "[SDE].{0,5}[DE].(K).{0,1}[AIFLMPSTV]", - "0.0128026", - "Eukaryota", - "UQ_con (PF00179) Ubiquitin-conjugating enzyme (Stochiometry: 1 : 1)", - "209", - "218", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "DOC_USP7_MATH_1", - "ELME000239", - "USP7 binding motif", - "USP7, also known as HAUSP, is a deubiquitinating enzyme that cleaves ubiquitin moieties from its substrates. The USP7-mediated deubiquitination of p53, MDM2 and USP7 inhibition by the herpes viral proteins EBNA1 and ICP0 shows its importance in the regulation of cell survival pathways and in controlling key cellular processes important for viral infection. The N-terminal MATH domain of USP7 is responsible for substrate recognition and nuclear localization while the catalytic core domain is required for the deubiquitinating activity. The C-terminal Ubl domain is responsible for several USP7 substrate interactions, including ICP0, GMPS, DNMT1 and UHRF1 leading to substrate stabilisation, USP7 translocation or activation", - "Targeting motif found in USP7 substrates, docking to the MATH domain. The USP7 MATH domain is a TRAF-like domain but with different sequence specificity to the classical TRAF domain. The motif identified in p53 and MDM2 recognises the same surface groove in USP7. But MDM2s make more extensive contacts than p53, leading to stronger affinity. The motif identified in these proteins can therefore be categorised as either a lower affinity motif or high affinity motif, depending on the extent of contact. The general pattern of the motif is a simple P..S and will have frequent matches in cellular proteins but it is not clear how numerous are the USP7 substrates.", - "DOC_USP7_MATH_1 DOC_USP7_MATH_2 DOC_USP7_UBL2_3", - "[PA][^P][^FYWIL]S[^P]", - "0.0123885", - "Eukaryota", - "MATH (PF00917) MATH domain (Stochiometry: 1 : 1) PDB Structure: 3MQR", - "112", - "116", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "DOC_USP7_MATH_1", - "ELME000239", - "USP7 binding motif", - "USP7, also known as HAUSP, is a deubiquitinating enzyme that cleaves ubiquitin moieties from its substrates. The USP7-mediated deubiquitination of p53, MDM2 and USP7 inhibition by the herpes viral proteins EBNA1 and ICP0 shows its importance in the regulation of cell survival pathways and in controlling key cellular processes important for viral infection. The N-terminal MATH domain of USP7 is responsible for substrate recognition and nuclear localization while the catalytic core domain is required for the deubiquitinating activity. The C-terminal Ubl domain is responsible for several USP7 substrate interactions, including ICP0, GMPS, DNMT1 and UHRF1 leading to substrate stabilisation, USP7 translocation or activation", - "Targeting motif found in USP7 substrates, docking to the MATH domain. The USP7 MATH domain is a TRAF-like domain but with different sequence specificity to the classical TRAF domain. The motif identified in p53 and MDM2 recognises the same surface groove in USP7. But MDM2s make more extensive contacts than p53, leading to stronger affinity. The motif identified in these proteins can therefore be categorised as either a lower affinity motif or high affinity motif, depending on the extent of contact. The general pattern of the motif is a simple P..S and will have frequent matches in cellular proteins but it is not clear how numerous are the USP7 substrates.", - "DOC_USP7_MATH_1 DOC_USP7_MATH_2 DOC_USP7_UBL2_3", - "[PA][^P][^FYWIL]S[^P]", - "0.0123885", - "Eukaryota", - "MATH (PF00917) MATH domain (Stochiometry: 1 : 1) PDB Structure: 3MQR", - "194", - "198", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "DOC_USP7_MATH_1", - "ELME000239", - "USP7 binding motif", - "USP7, also known as HAUSP, is a deubiquitinating enzyme that cleaves ubiquitin moieties from its substrates. The USP7-mediated deubiquitination of p53, MDM2 and USP7 inhibition by the herpes viral proteins EBNA1 and ICP0 shows its importance in the regulation of cell survival pathways and in controlling key cellular processes important for viral infection. The N-terminal MATH domain of USP7 is responsible for substrate recognition and nuclear localization while the catalytic core domain is required for the deubiquitinating activity. The C-terminal Ubl domain is responsible for several USP7 substrate interactions, including ICP0, GMPS, DNMT1 and UHRF1 leading to substrate stabilisation, USP7 translocation or activation", - "Targeting motif found in USP7 substrates, docking to the MATH domain. The USP7 MATH domain is a TRAF-like domain but with different sequence specificity to the classical TRAF domain. The motif identified in p53 and MDM2 recognises the same surface groove in USP7. But MDM2s make more extensive contacts than p53, leading to stronger affinity. The motif identified in these proteins can therefore be categorised as either a lower affinity motif or high affinity motif, depending on the extent of contact. The general pattern of the motif is a simple P..S and will have frequent matches in cellular proteins but it is not clear how numerous are the USP7 substrates.", - "DOC_USP7_MATH_1 DOC_USP7_MATH_2 DOC_USP7_UBL2_3", - "[PA][^P][^FYWIL]S[^P]", - "0.0123885", - "Eukaryota", - "MATH (PF00917) MATH domain (Stochiometry: 1 : 1) PDB Structure: 3MQR", - "151", - "155", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_PKA_2", - "ELME000062", - "PKA Phosphorylation site", - "Motifs phosphorylated by a subset of AGC group kinases including PKA that all have similar sequence specificity.", - "Secondary preference for PKA-type AGC kinase phosphorylation with a single Arg at p-2. This motif is probably more often targeted by other basophilic kinases of the AGC group, including PAK1 and PKC isoforms: These kinases actually show a stronger preference at p-2 than PKA, which has the strongest basophilic preference at p-3. AGC group kinases do not tolerate Pro at position +1. It is likely that some specificity determinants distinguishing among these kinases may be present at the less conserved non-basic sites.", - "MOD_PKA_1 MOD_PKA_2", - ".R.([ST])[^P]..", - "0.0094575", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "78", - "84", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_PIKK_1", - "ELME000202", - "PIKK phosphorylation site", - "The phosphoinositide-3-OH-kinase related kinases (PIKKs) are atypical protein kinases exclusive to eukaryotes. The PIKK members are large proteins with Ser/Thr kinase activity serving important roles in DNA repair and DNA damage checkpoints. The three PIKK proteins with repair and checkpoint functions in mammalian cells are: DNA-PK (DNA-dependent protein kinase), ATM (ataxia telangiectasia mutated), and ATR (ATM and Rad3 related).", - "The PIKK family member proteins specifically phosphorylate the (ST)Q motif in their substrates. The glutamine adjacent to the target serine-threonine is critical for the substrate recognition.", - "nan", - "...([ST])Q..", - "0.0092301", - "Eukaryota", - "PI3_PI4_kinase (PF00454) Phosphatidylinositol 3- and 4-kinase (Stochiometry: 1 : 1)", - "206", - "212", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_PIKK_1", - "ELME000202", - "PIKK phosphorylation site", - "The phosphoinositide-3-OH-kinase related kinases (PIKKs) are atypical protein kinases exclusive to eukaryotes. The PIKK members are large proteins with Ser/Thr kinase activity serving important roles in DNA repair and DNA damage checkpoints. The three PIKK proteins with repair and checkpoint functions in mammalian cells are: DNA-PK (DNA-dependent protein kinase), ATM (ataxia telangiectasia mutated), and ATR (ATM and Rad3 related).", - "The PIKK family member proteins specifically phosphorylate the (ST)Q motif in their substrates. The glutamine adjacent to the target serine-threonine is critical for the substrate recognition.", - "nan", - "...([ST])Q..", - "0.0092301", - "Eukaryota", - "PI3_PI4_kinase (PF00454) Phosphatidylinositol 3- and 4-kinase (Stochiometry: 1 : 1)", - "78", - "84", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_FHA_1", - "ELME000052", - "FHA phosphopeptide ligands", - "The FHA domain is a signal transduction module which recognizes phosphothreonine containing peptides on the ligand proteins. FHA domains partake in many signalling processes but are especially prevalent in nuclear proteins that are involved in cell cycle checkpoint, DNA repair and transcriptional regulation.", - "LIG_FHA_1 motifs are short phosphothreonine modules binding FHA domains with large aliphatic amino acids at the pT+3 position. The motif has the consensus sequence of T..[IVL]. Proteins with FHA domains having this preference include the checkpoint kinase chk2 (Li,2002) and DNA repair protein rad9 (Byeon,2001).", - "LIG_FHA_1 LIG_FHA_2", - "..(T)..[ILV].", - "0.0086622", - "Eukaryota", - "FHA (PF00498) FHA domain (Stochiometry: 1 : 1) PDB Structure: 1K2N", - "163", - "169", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_FHA_2", - "ELME000220", - "FHA phosphopeptide ligands", - "The FHA domain is a signal transduction module which recognizes phosphothreonine containing peptides on the ligand proteins. FHA domains partake in many signalling processes but are especially prevalent in nuclear proteins that are involved in cell cycle checkpoint, DNA repair and transcriptional regulation.", - "LIG_FHA_2 motifs are short phosphothreonine peptide modules contains acidic amino acids at the pT+3 position. The motif has the consensus sequence of T..[ED]. FHA domains with this preference are found in checkpoint/repair proteins MRC1 and Rad9 of Fungi and Metazoa Xrcc1 (Luo,2004) and Xrcc4 (Koch,2004).", - "LIG_FHA_1 LIG_FHA_2", - "..(T)..[DE].", - "0.0082864", - "Eukaryota", - "FHA (PF00498) FHA domain (Stochiometry: 1 : 1) PDB Structure: 1K3N", - "48", - "54", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_FHA_2", - "ELME000220", - "FHA phosphopeptide ligands", - "The FHA domain is a signal transduction module which recognizes phosphothreonine containing peptides on the ligand proteins. FHA domains partake in many signalling processes but are especially prevalent in nuclear proteins that are involved in cell cycle checkpoint, DNA repair and transcriptional regulation.", - "LIG_FHA_2 motifs are short phosphothreonine peptide modules contains acidic amino acids at the pT+3 position. The motif has the consensus sequence of T..[ED]. FHA domains with this preference are found in checkpoint/repair proteins MRC1 and Rad9 of Fungi and Metazoa Xrcc1 (Luo,2004) and Xrcc4 (Koch,2004).", - "LIG_FHA_1 LIG_FHA_2", - "..(T)..[DE].", - "0.0082864", - "Eukaryota", - "FHA (PF00498) FHA domain (Stochiometry: 1 : 1) PDB Structure: 1K3N", - "40", - "46", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_FHA_2", - "ELME000220", - "FHA phosphopeptide ligands", - "The FHA domain is a signal transduction module which recognizes phosphothreonine containing peptides on the ligand proteins. FHA domains partake in many signalling processes but are especially prevalent in nuclear proteins that are involved in cell cycle checkpoint, DNA repair and transcriptional regulation.", - "LIG_FHA_2 motifs are short phosphothreonine peptide modules contains acidic amino acids at the pT+3 position. The motif has the consensus sequence of T..[ED]. FHA domains with this preference are found in checkpoint/repair proteins MRC1 and Rad9 of Fungi and Metazoa Xrcc1 (Luo,2004) and Xrcc4 (Koch,2004).", - "LIG_FHA_1 LIG_FHA_2", - "..(T)..[DE].", - "0.0082864", - "Eukaryota", - "FHA (PF00498) FHA domain (Stochiometry: 1 : 1) PDB Structure: 1K3N", - "216", - "222", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_FHA_2", - "ELME000220", - "FHA phosphopeptide ligands", - "The FHA domain is a signal transduction module which recognizes phosphothreonine containing peptides on the ligand proteins. FHA domains partake in many signalling processes but are especially prevalent in nuclear proteins that are involved in cell cycle checkpoint, DNA repair and transcriptional regulation.", - "LIG_FHA_2 motifs are short phosphothreonine peptide modules contains acidic amino acids at the pT+3 position. The motif has the consensus sequence of T..[ED]. FHA domains with this preference are found in checkpoint/repair proteins MRC1 and Rad9 of Fungi and Metazoa Xrcc1 (Luo,2004) and Xrcc4 (Koch,2004).", - "LIG_FHA_1 LIG_FHA_2", - "..(T)..[DE].", - "0.0082864", - "Eukaryota", - "FHA (PF00498) FHA domain (Stochiometry: 1 : 1) PDB Structure: 1K3N", - "224", - "230", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_Plk_1", - "ELME000442", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk1 can phosphorylate the substrate protein directly bound to its PBD or an alternative substrate that is scaffolded by the PBD-bound protein. Plk1 phosphorylates Ser/Thr residues with a strong preference for Asp, Glu, or Asn in the -2 position and typically a Phe, or other aromatic or bulky hydrophobic amino acid in the +1 position (Alexander,2011; Kettenbach,2012; Franchin,2014). Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +2 position also has a preference for hydrophobic residues but this does not seem to be absolutely required. However, some sites appear to use +2 as the main hydrophobic position so long as disfavoured residues (DEKNP) are avoided at the +1 position. There is no strict amino acid preference observed in the -1 position although the peptides containing either Gly or Pro in this position were poorly phosphorylated, so they are excluded from the motif pattern in ELM.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - ".[DNE][^PG][ST](([FYILMVW]..)|([^PEDGKN][FWYLIVM]).)", - "0.0076743", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "185", - "191", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_Plk_1", - "ELME000442", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk1 can phosphorylate the substrate protein directly bound to its PBD or an alternative substrate that is scaffolded by the PBD-bound protein. Plk1 phosphorylates Ser/Thr residues with a strong preference for Asp, Glu, or Asn in the -2 position and typically a Phe, or other aromatic or bulky hydrophobic amino acid in the +1 position (Alexander,2011; Kettenbach,2012; Franchin,2014). Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +2 position also has a preference for hydrophobic residues but this does not seem to be absolutely required. However, some sites appear to use +2 as the main hydrophobic position so long as disfavoured residues (DEKNP) are avoided at the +1 position. There is no strict amino acid preference observed in the -1 position although the peptides containing either Gly or Pro in this position were poorly phosphorylated, so they are excluded from the motif pattern in ELM.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - ".[DNE][^PG][ST](([FYILMVW]..)|([^PEDGKN][FWYLIVM]).)", - "0.0076743", - "Eukaryota", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "221", - "227", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "84", - "89", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "20", - "24", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "115", - "121", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "81", - "86", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "60", - "66", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "19", - "24", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "188", - "193", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "187", - "193", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "173", - "179", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_LIR_Nem_3", - "ELME000370", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "Based on multiple sequence alignments, the LIR motif appears to be less specific in Nematoda: in the last position phenylalanine and tyrosine are allowed in addition to the aliphatic hydrophobic amino acids.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY]..[ILVFY]", - "0.0063621", - "Nematoda", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1)", - "17", - "21", - "False", - "False", - "True", - "False", - "True", - "True", - "False" - ], - [ - "MOD_Plk_4", - "ELME000444", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk4 has the weakest acidophilic preference among Plks (Leung,2006; Kettenbach,2012). The weaker acidic preference is compensated by an increased hydrophobic preference. Unlike the other Plks, Plk4 has no strict requirement in the -1, -2, -3 positions from the phosphosite, though it has a mild preference for negative charge at -2. Certain bulky and charged residues are strongly disfavoured at the -1 position. The +1 and +2 positions both require a Phe, or other aromatic or bulky hydrophobic amino acid. Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +3 position also has a preference for hydrophobic residues but this does not seem to be absolutely required.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - "..[^IRFW]([ST])[ILMVFWY][ILMVFWY].", - "0.0060193", - "Choanoflagellida Chytridiomycota Cryptomycota Metazoa", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "17", - "23", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_Plk_4", - "ELME000444", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk4 has the weakest acidophilic preference among Plks (Leung,2006; Kettenbach,2012). The weaker acidic preference is compensated by an increased hydrophobic preference. Unlike the other Plks, Plk4 has no strict requirement in the -1, -2, -3 positions from the phosphosite, though it has a mild preference for negative charge at -2. Certain bulky and charged residues are strongly disfavoured at the -1 position. The +1 and +2 positions both require a Phe, or other aromatic or bulky hydrophobic amino acid. Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +3 position also has a preference for hydrophobic residues but this does not seem to be absolutely required.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - "..[^IRFW]([ST])[ILMVFWY][ILMVFWY].", - "0.0060193", - "Choanoflagellida Chytridiomycota Cryptomycota Metazoa", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "11", - "17", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_Plk_4", - "ELME000444", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk4 has the weakest acidophilic preference among Plks (Leung,2006; Kettenbach,2012). The weaker acidic preference is compensated by an increased hydrophobic preference. Unlike the other Plks, Plk4 has no strict requirement in the -1, -2, -3 positions from the phosphosite, though it has a mild preference for negative charge at -2. Certain bulky and charged residues are strongly disfavoured at the -1 position. The +1 and +2 positions both require a Phe, or other aromatic or bulky hydrophobic amino acid. Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +3 position also has a preference for hydrophobic residues but this does not seem to be absolutely required.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - "..[^IRFW]([ST])[ILMVFWY][ILMVFWY].", - "0.0060193", - "Choanoflagellida Chytridiomycota Cryptomycota Metazoa", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "185", - "191", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "MOD_Plk_4", - "ELME000444", - "Polo-like kinase phosphosites", - "The members of the Polo subfamily of the Ser/Thr protein kinases, Plks, play key roles during multiple stages of mitosis including prophase, metaphase, anaphase, and cytokinesis. They are spatially restricted to structures such as the centrosome, central spindle and kinetochores. Plk C-terminal regions have conserved polo box domains (PBD) that are critical for localization and function. The PBD recognizes the pS/pT docking motif on a substrate which has already been phosphorylated either by a self-priming mechanism e.g. by Plk1 itself, or by non-self priming by Pro-directed kinases such as CDKs. The C-terminal polo box domain (PBD) of the Plks acts as the pS/pT-binding module. The phosphorylation-dependent binding of the PBD to its ligands also activates the kinase domain by relieving an intramolecular inhibitory interaction, together with activating phosphorylations at one or more sites. Based on the sequence specificity around the pS or pT, three different motif variants have been categorized for the Plks.", - "Plks (polo-like kinases) are regarded as acidophilic kinases. The sites of phosphorylation are characterised by negatively charged and hydrophobic residues at specific positions around the phosphoacceptor S/T residue. Plk4 has the weakest acidophilic preference among Plks (Leung,2006; Kettenbach,2012). The weaker acidic preference is compensated by an increased hydrophobic preference. Unlike the other Plks, Plk4 has no strict requirement in the -1, -2, -3 positions from the phosphosite, though it has a mild preference for negative charge at -2. Certain bulky and charged residues are strongly disfavoured at the -1 position. The +1 and +2 positions both require a Phe, or other aromatic or bulky hydrophobic amino acid. Pro (which is also hydrophobic) is strictly forbidden at +1 because it blocks the required backbone H-bond. The +3 position also has a preference for hydrophobic residues but this does not seem to be absolutely required.", - "MOD_Plk_1 MOD_Plk_2-3 MOD_Plk_4", - "..[^IRFW]([ST])[ILMVFWY][ILMVFWY].", - "0.0060193", - "Choanoflagellida Chytridiomycota Cryptomycota Metazoa", - "Pkinase (PF00069) Protein kinase domain (Stochiometry: 1 : 1)", - "151", - "157", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "MOD_N-GLC_1", - "ELME000070", - "N-glycosylation site", - "N-linked glycosylation is a co-translational process involving the transfer of a oligosaccharide chain to asparagine residue in the protein.", - "Generic motif for N-glycosylation. It was shown that Trp, Asp, and Glu are uncommon before the Ser/Thr position (Shakin-Eshleman,1996). Efficient glycosylation usually occurs when ~60 residues or more separate the glycosylation acceptor site from the C-terminus.", - "MOD_N-GLC_1 MOD_N-GLC_2", - ".(N)[^P][ST]..", - "0.0050178", - "Eukaryota", - "STT3 (PF02516) Oligosaccharyl transferase STT3 subunit (Stochiometry: 1 : 1) PDB Structure: 2HG0", - "47", - "52", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SUMO_SIM_par_1", - "ELME000333", - "SUMO interaction site", - "Non-covalent binding to SUMO proteins is mediated via the SUMO-interacting motif (SIM). SUMO-interacting proteins predominantly function in the nucleus. The SIM is essential for a variety of cellular processes including transcriptional regulation, sub-nuclear localization, nuclear body assembly, and anti-viral response. Viral proteins are also known to utilize such processes via their SIMs upon host cell invasion.", - "This SUMO interacting motif variant is for SIMs bound as a beta-augmented strand in the parallel orientation. The SIM peptide inserts into a groove on the SUMO surface so that the motif has a hydrophobic core of four residues (preference V, I or L), the 3rd position being more variable. At the variable 3rd position, in addition to hydrophobic residues, acidic residues (D or E) and the phosphorylatable residue serine are allowed. A stretch of 1 to 5 acidic or phosphorylatable residues is considered necessary C-terminally from the hydrophobic core. Another negative stretch N-terminal to the core appears more optional, though both are usually present. These acidic stretches complement positively-charged residues on the SUMO surface. The length of the acidic stretch may be involved in determining the orientation of binding. When the longer acidic stretch is C-terminal, the beta strand seems usually to be parallel. The two crystal structures of PIAS2 (2ASQ, Song,2005, O75928) and Daxx (2KQS, Chang,2011, O75928) support this theory: They both bind in parallel orientation and have a C-terminal acidic stretch. The crystal structure of RanBP2 (1Z5S, Reverter,2005, P49792) can be contrasted: It binds as an anti-parallel beta strand and has an N-terminal acidic patch. Because of the high similarity of the motif patterns for the parallel and antiparallel orientations, many SIMs will be detected by both of the motifs in ELM. Quite possibly, some SIM peptides may be able to bind to SUMO in both orientations.", - "LIG_SUMO_SIM_anti_2 LIG_SUMO_SIM_par_1", - "[DEST]{0,5}.[VILPTM][VIL][DESTVILMA][VIL].{0,1}[DEST]{1,10}", - "0.0045452", - "Eukaryota", - "Rad60-SLD (PF11976) Ubiquitin-2 like Rad60 SUMO-like (Stochiometry: 1 : 1) PDB Structure: 2KQS", - "204", - "209", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_14-3-3_CanoR_1", - "ELME000417", - "14-3-3 binding phosphopeptide motif", - "The 14-3-3 proteins are a family of conserved regulatory molecules that are involved in diverse cellular processes through the interaction with hundreds of different proteins. In mammals, seven isoforms are present. 14-3-3 proteins form either homo- or heterodimers that target certain phosphoserine/threonine-containing motifs with a low micromolar affinity. Binding to a small set of unmodified proteins has also been reported. Phosphorylation-dependent and independent binding occurs via the same deep ligand-binding groove. There are canonical arginine-containing motifs and a non-canonical motif group that are difficult to classify but utilize additional hydrophobic interactions. The canonical Arg-containing 14-3-3 binding peptides are phosphorylated by members of basophilic kinases. Sites phosphorylated by Proline-directed kinases cannot be bound by 14-3-3 proteins, hence there is no overlap with the basophilic kinase signalling pathways.", - "The canonical motif has at least one Arg residue located at the -4,-3 or -2 position relative to the phospho-residue which aids in the placement of the peptide in the binding groove. Arg stabilizes the phospho-peptide conformation by forming hydrogen bonds as well as an intramolecular salt bridge with the phosphate group. Asp/Glu are not tolerated for at least 3 positions preceding the phosphorylated residue. Pro and Gly at position -1 are also not tolerated since they weaken the interaction between the phospho-residue and the 14-3-3 binding pocket. Proline is also disallowed at position +1 (Panni,2011). At this position, the -NH group of the peptide backbone forms a hydrogen bond with the 14-3-3 protein as part of local geometry crucial to orientate the adjacent phosphorylated sidechain (Molzan,2012). Therefore, there is no physical space for a Pro. The canonical binding motif requires at least one favourable hydrophobic interaction following the P-site residue as this part of the 14-3-3 groove is markedly hydrophobic. This residue can be located at three different distances following the phosphosite. These alternatives arrangements are non-exclusive. In the first option, the +1 residue is a bulky hydrophobic residue with an extensive packing face (5D3E, 4DAU). If there is no hydrophobic residue at +1, other residues are tolerated with the exception of the disallowed Arg, Ile, Gly, Lys and Asn (Panni,2011). In the second option, there is a Pro at +2. Pro is strongly favoured in the +2 position as it introduces an exit kink into the peptide chain and contacts hydrophobic walls of the groove (3O8I, 3MHR, 2C74, 2BR9, 2BTP, 3UAL, 4IEA, 4IHL). If option 1 or 3 are not present, option 3 is a requirement for a bulky hydrophobic residue in the +4 to +6 range which loops back down into the hydrophobic wall of the groove (5D2D, 4WRQ, 2C1J). Variant 2 and 3 have been reported more often than variant 1.", - "LIG_14-3-3_CanoR_1 LIG_14-3-3_ChREBP_3 LIG_14-3-3_CterR_2", - "R[^DE]{0,2}[^DEPG]([ST])(([FWYLMV].)|([^PRIKGN]P)|([^PRIKGN].{2,4}[VILMFWYP]))", - "0.0044767", - "Eukaryota", - "14-3-3 (PF00244) 14-3-3 protein (Stochiometry: 1 : 1) PDB Structure: 1QJB", - "79", - "87", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_Arc_Nlobe_1", - "ELME000534", - "Arc N-lobe binding ligand", - "The activity-regulated cytoskeleton-associated protein (Arc) has a retrotransposon origin and has been domesticated for synaptic functions in higher vertebrates. The Arc C-terminal domain consists of repeated 4-helix bundle domains that are homologous to the capsid domains. In the monomeric form, the first helical bundle, termed N-lobe (Arc-NL) is capable of binding peptides with variations of a PxY motif found in proteins with distinct roles in the postsynaptic densities.", - "The motif peptide binds Arc in an unusual conformation as the middle strand of a 3-stranded β-sheet. The key structural determinants in the N-lobe are the hydrophobic pocket and a groove formed by a β-strand. The peptides bind to the Arc N-lobe by β-augmentation. A long β-strand conformation of the peptide is required as a minimal peptide of PSY fails to bind (Zhang,2015). Mutational analysis identified two key resides: Pro in P2 and Tyr in P4 (Hallin,2021). Phe can also be accepted in the P4 position (4X3I; 6TNQ). It is likely that the Pro residue in P2 is essential to keep the peptide in an extended conformation, moreover, it stabilizes binding by interacting with the aromatic ring of Tyr227 deep in the hydrophobic groove. Other stabilizing amino acids are the above Tyr/Phe which also enters a hydrophobic pocket and makes additional C–H…π interactions with Phe220 and Phe271 in Arc (6TNQ; 6TNO). P2 with Arg instead of Pro was reportedly crystallised with Arc (4X3I) although a much lower binding affinity and is unlikely to be valid for the motif. In pull-down experiments, it was shown that Pro at P3 prevents binding (Zhang,2015) as expected due to disruption of the β-strand H bonding. A similar result was obtained by the phosphomimetic mutation of Ser in P3, which seems to either radically reduce the affinity or completely abolish the interaction. However, in other experiments, the same mutation on Ser did not affect the affinity of the binding (Hallin,2021).One of the Arc-interacting proteins (DLGP1) has a second binding motif with a Gly added in position after the Pro/Arg (6TQ0). The additional Gly residue might provide backbone flexibility that allows the alternate motif to bind with a β-bulge conformation. The requirement for backbone H bonds excludes Pro at P1, P3, and P5.Conflicting results have been obtained for His in the P+4 Tyr/Phe position (Nielsen,2019; Zhang,2015): currently, His is included in the motif but it appears to be weaker than P+4 Tyr/Phe.", - "nan", - "[^P][P]G{0,1}[^P][YFH][^P]", - "0.0043852", - "Mammalia Tetrapoda", - "Activity-regulated cytoskeleton-associated protein, C-terminal domain (IPR040814) This entry represents the C-terminal capsid-like domain of the Activity-regulated cytoskeleton-associated protein (Arc) (Stochiometry: 1 : 1)", - "1", - "6", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "17", - "26", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "115", - "126", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "81", - "90", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "173", - "181", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "187", - "198", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "188", - "198", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "19", - "29", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_LIR_Gen_1", - "ELME000368", - "Atg8 protein family ligands", - "The autophagy-related protein Atg8 and its homologues LC3 and GABARAP play an important role in selective autophagy. During autophagy, Atg8 proteins get directly conjugated to phosphatidylethanolamine (PE) lipids to mediate membrane fusion events involved in autophagosome biogenesis such as phagophore formation and elongation. In addition, different Atg8 protein family members can recruit specific adaptors bound to ubiquitylated proteins, organelles or pathogens for degradation. Many of these adaptor proteins contain an LC3-interacting region (LIR) that mediates binding to Atg8 and Atg8-related proteins. These LIR:Atg8/LC3/GABARAP interactions are essential for cellular cell homeostasis as well as the control of intra- and extracellular stress conditions.", - "The core of the LIR motif is defined by four amino acids and adopts a β-strand conformation that binds by β-augmentation, forming an intermolecular parallel β-sheet with the second β-strand of Atg8 protein family members (Rogov,2014). There is an absolute requirement for an aromatic residue at the N-terminal side of the LIR core and a large, hydrophobic residue at the C-terminal side. Structural studies have revealed that the side chain of the aromatic residue of the LIR motif binds deeply in HP1 whereas the hydrophobic residue docks to HP2 (2ZJD; Ichimura,2008). Position +2 in the core is solvent accessible and aromatic residues are not favoured. The presence of positive charges in the binding domain also restricts the +2 position to not-positively charged residues. The fixed distance from HP1 to HP2 makes it inadequate for a tiny residue or a Pro at +2 and +3 positions. The core motif is generally preceded by a varying number of acidic residues or by Ser or Thr residues that can be phosphorylated to incorporate a negative charge. These residues commonly occur within three positions N-terminal to the core motif. The negative charge of these acidic or phosphorylated residues has been shown to strengthen the LIR:Atg8/LC3/GABARAP interaction (Rogov,2013). Additional acidic residues or Ser/Thr phosphorylation sites that strengthen the interaction are sometimes observed in the positions between the aromatic (+1) and hydrophobic (+4) residue. A Trp residue is energetically favoured for this interaction over a Tyr or Phe residue, but the lower binding affinity can be compensated by electrostatic interactions between acidic residues or Ser/Thr phosphorylation sites of the LIR motif and basic residues in the N-terminal arm of the Atg8 homologues (Wild,2013). Analysis of current known structures indicates that the motif can be located at the C-terminus of the protein or be followed by a particular range of acidic or not residues immediately, or up to four positions, after the core.", - "LIG_LIR_Apic_2 LIG_LIR_Gen_1 LIG_LIR_LC3C_4 LIG_LIR_Nem_3", - "[EDST].{0,2}[WFY][^RKPGWFY][^PG][ILVFM]((.{0,4}[PLAFIVMY])|($)|(.{0,3}[ED]))", - "0.0036312", - "Eukaryota", - "Atg8 (PF02991) Autophagy protein Atg8 ubiquitin like (Stochiometry: 1 : 1) PDB Structure: 2LUE", - "20", - "29", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "TRG_ENDOCYTIC_2", - "ELME000120", - "Y-based sorting signal", - "Yxxphi sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "Tyrosine-based sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "nan", - "Y..[LMVIF]", - "0.0025875", - "Eukaryota Homo sapiens Mus musculus Rattus norvegicus Saccharomyces cerevisiae", - "Adap_comp_sub (PF00928) Adaptor complexes medium subunit family (Stochiometry: 1 : 1) PDB Structure: 1BXX", - "128", - "131", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "TRG_ENDOCYTIC_2", - "ELME000120", - "Y-based sorting signal", - "Yxxphi sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "Tyrosine-based sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "nan", - "Y..[LMVIF]", - "0.0025875", - "Eukaryota Homo sapiens Mus musculus Rattus norvegicus Saccharomyces cerevisiae", - "Adap_comp_sub (PF00928) Adaptor complexes medium subunit family (Stochiometry: 1 : 1) PDB Structure: 1BXX", - "62", - "65", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "TRG_ENDOCYTIC_2", - "ELME000120", - "Y-based sorting signal", - "Yxxphi sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "Tyrosine-based sorting signal responsible for the interaction with mu subunit of AP (Adaptor Protein) complex", - "nan", - "Y..[LMVIF]", - "0.0025875", - "Eukaryota Homo sapiens Mus musculus Rattus norvegicus Saccharomyces cerevisiae", - "Adap_comp_sub (PF00928) Adaptor complexes medium subunit family (Stochiometry: 1 : 1) PDB Structure: 1BXX", - "63", - "66", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SUMO_SIM_anti_2", - "ELME000335", - "SUMO interaction site", - "Non-covalent binding to SUMO proteins is mediated via the SUMO-interacting motif (SIM). SUMO-interacting proteins predominantly function in the nucleus. The SIM is essential for a variety of cellular processes including transcriptional regulation, sub-nuclear localization, nuclear body assembly, and anti-viral response. Viral proteins are also known to utilize such processes via their SIMs upon host cell invasion.", - "This SUMO interacting motif variant is for SIMs bound as a beta-augmented strand in the antiparallel orientation. The SIM peptide inserts into a groove on the SUMO surface so that the motif has a hydrophobic core of four residues (preference V, I or L), the 2nd position being more variable. At the variable 2nd position, in addition to hydrophobic residues, acidic residues (D or E) and the phosphorylatable residue serine are allowed. A short stretch of 1 to 5 acidic or phosphorylatable residues is considered necessary C-terminally from the hydrophobic core. Another negative stretch N-terminal to the core appears more optional, though both are usually present. These acidic stretches complement positively-charged residues on the SUMO surface. The length of the acidic stretch may be involved in determining the orientation of binding. When the longer acidic stretch is C-terminal, the beta strand seems usually to be parallel. The two crystal structures of PIAS2 (2ASQ, Song,2005, O75928) and Daxx (2KQS, Chang,2011, O75928) support this theory: They both bind in parallel orientation and have a C-terminal acidic stretch. The crystal structure of RanBP2 (1Z5S, Reverter,2005, P49792) can be contrasted: It binds as an anti-parallel beta strand and has an N-terminal acidic patch. Because of the high similarity of the motif patterns for the parallel and antiparallel orientations, many SIMs will be detected by both of the motifs in ELM. Quite possibly, some SIM peptides may be able to bind to SUMO in both orientations.", - "LIG_SUMO_SIM_anti_2 LIG_SUMO_SIM_par_1", - "[DEST]{1,10}.{0,1}[VIL][DESTVILMA][VIL][VILM].[DEST]{0,5}", - "0.0023495", - "Eukaryota", - "Rad60-SLD (PF11976) Ubiquitin-2 like Rad60 SUMO-like (Stochiometry: 1 : 1) PDB Structure: 2KQS", - "19", - "26", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_SUMO_SIM_anti_2", - "ELME000335", - "SUMO interaction site", - "Non-covalent binding to SUMO proteins is mediated via the SUMO-interacting motif (SIM). SUMO-interacting proteins predominantly function in the nucleus. The SIM is essential for a variety of cellular processes including transcriptional regulation, sub-nuclear localization, nuclear body assembly, and anti-viral response. Viral proteins are also known to utilize such processes via their SIMs upon host cell invasion.", - "This SUMO interacting motif variant is for SIMs bound as a beta-augmented strand in the antiparallel orientation. The SIM peptide inserts into a groove on the SUMO surface so that the motif has a hydrophobic core of four residues (preference V, I or L), the 2nd position being more variable. At the variable 2nd position, in addition to hydrophobic residues, acidic residues (D or E) and the phosphorylatable residue serine are allowed. A short stretch of 1 to 5 acidic or phosphorylatable residues is considered necessary C-terminally from the hydrophobic core. Another negative stretch N-terminal to the core appears more optional, though both are usually present. These acidic stretches complement positively-charged residues on the SUMO surface. The length of the acidic stretch may be involved in determining the orientation of binding. When the longer acidic stretch is C-terminal, the beta strand seems usually to be parallel. The two crystal structures of PIAS2 (2ASQ, Song,2005, O75928) and Daxx (2KQS, Chang,2011, O75928) support this theory: They both bind in parallel orientation and have a C-terminal acidic stretch. The crystal structure of RanBP2 (1Z5S, Reverter,2005, P49792) can be contrasted: It binds as an anti-parallel beta strand and has an N-terminal acidic patch. Because of the high similarity of the motif patterns for the parallel and antiparallel orientations, many SIMs will be detected by both of the motifs in ELM. Quite possibly, some SIM peptides may be able to bind to SUMO in both orientations.", - "LIG_SUMO_SIM_anti_2 LIG_SUMO_SIM_par_1", - "[DEST]{1,10}.{0,1}[VIL][DESTVILMA][VIL][VILM].[DEST]{0,5}", - "0.0023495", - "Eukaryota", - "Rad60-SLD (PF11976) Ubiquitin-2 like Rad60 SUMO-like (Stochiometry: 1 : 1) PDB Structure: 2KQS", - "42", - "48", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SUMO_SIM_anti_2", - "ELME000335", - "SUMO interaction site", - "Non-covalent binding to SUMO proteins is mediated via the SUMO-interacting motif (SIM). SUMO-interacting proteins predominantly function in the nucleus. The SIM is essential for a variety of cellular processes including transcriptional regulation, sub-nuclear localization, nuclear body assembly, and anti-viral response. Viral proteins are also known to utilize such processes via their SIMs upon host cell invasion.", - "This SUMO interacting motif variant is for SIMs bound as a beta-augmented strand in the antiparallel orientation. The SIM peptide inserts into a groove on the SUMO surface so that the motif has a hydrophobic core of four residues (preference V, I or L), the 2nd position being more variable. At the variable 2nd position, in addition to hydrophobic residues, acidic residues (D or E) and the phosphorylatable residue serine are allowed. A short stretch of 1 to 5 acidic or phosphorylatable residues is considered necessary C-terminally from the hydrophobic core. Another negative stretch N-terminal to the core appears more optional, though both are usually present. These acidic stretches complement positively-charged residues on the SUMO surface. The length of the acidic stretch may be involved in determining the orientation of binding. When the longer acidic stretch is C-terminal, the beta strand seems usually to be parallel. The two crystal structures of PIAS2 (2ASQ, Song,2005, O75928) and Daxx (2KQS, Chang,2011, O75928) support this theory: They both bind in parallel orientation and have a C-terminal acidic stretch. The crystal structure of RanBP2 (1Z5S, Reverter,2005, P49792) can be contrasted: It binds as an anti-parallel beta strand and has an N-terminal acidic patch. Because of the high similarity of the motif patterns for the parallel and antiparallel orientations, many SIMs will be detected by both of the motifs in ELM. Quite possibly, some SIM peptides may be able to bind to SUMO in both orientations.", - "LIG_SUMO_SIM_anti_2 LIG_SUMO_SIM_par_1", - "[DEST]{1,10}.{0,1}[VIL][DESTVILMA][VIL][VILM].[DEST]{0,5}", - "0.0023495", - "Eukaryota", - "Rad60-SLD (PF11976) Ubiquitin-2 like Rad60 SUMO-like (Stochiometry: 1 : 1) PDB Structure: 2KQS", - "20", - "26", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "DOC_PP2B_LxvP_1", - "ELME000367", - "Calcineurin (PP2B)-docking motif LxvP", - "Calcineurin (PP2B) is a Ca2+- and calmodulin-regulated serine/threonine protein phosphatase known to affect cell biological function mainly in yeast and mammalian systems but is found in all Eukaryotes. It regulates a number of different pathways, including activation of the NFAT family of transcription factors, regulation of axonal guidance, the Ca2+-dependent migration of neutrophils, synaptic plasticity, and apoptosis. The effectiveness of dephosphorylation depends not only on the proximal position of calcineurin-binding site to the dephosphorylation site but also on the flexibility of the calcineurin active site. Calcineurin has been shown to interact with two distinct linear motifs: one docking motif (DOC_PP2B_PxIxI_1) that directly interacts with the catalytic subunit A (CNA) of calcineurin, and a second docking motif (DOC_PP2B_LxvP_1) that binds to both CNA and calcineurin B (CNB), the regulatory Ca2+-binding subunit of calcineurin.", - "This PP2B-docking motif is defined by four amino acids. There is an absolute requirement for leucine in the first position of the motif and for proline in the last position. However, the viral protein A238L (O36972), which has been shown to prevent recognition of LxvP-containing substrates by calcineurin, contains a lysine residue instead of a proline in the last position (4F0Z) (Grigoriu,2013). The second position can accommodate a variety of amino acids. The third position generally contains a valine, but other hydrophobic residues have also been observed. Some motif instances are immediately preceded by an aromatic residue, which can further strengthen the interaction. Upon binding of Ca2+, the CNA subunit undergoes a conformational change, exposing the hydrophobic motif-binding pocket that is located at the interface of the CNA and CNB subunits. Therefore, LxvP sites can only interact with activated calcineurin. The hydrophobic pocket includes two CNA residues (W352, F356) and three CNB residues (L115, M118, V119), which mediate binding to substrates containing an LxvP motif. Immunosuppressants have been shown to bind to the hydrophobic pocket in a similar way. Upon binding to the hydrophobic pocket, the motif adopts a conformation in which it is almost parallel to the α-helix of CNA binding to CNB. The proline in the motif is predicted to interact with the aromatic residues in CNA.", - "nan", - "L.[LIVAPM]P", - "0.0022964", - "Eukaryota", - "Metallophos (PF00149) Calcineurin-like phosphoesterase (Stochiometry: 1 : 1) EF_hand_7 (PF13499) EF-hand domain pair (Stochiometry: 1 : 1)", - "28", - "31", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_BRCT_BRCA1_1", - "ELME000197", - "BRCT phosphopeptide ligands", - "BRCT domains are protein modules mainly found in Eukaryota. BRCT domains are present in proteins that are associated with DNA damage response. They recognize and bind specific phosphorylated serine (pS) sequences. This phospho-protein mediated interaction of the BRCT domain has a central role in cell-cycle check point and DNA repair functions.", - "The LIG_BRCT_BRCA1_1 motif binds with low affinity to the BRCT domain of BRCA1. The motif has the consensus sequence S..F and these residues are specially recognized by the binding pocket in the BRCT domains. The high affinity motif has an additional bound lysine residue (S..F.K).", - "LIG_BRCT_BRCA1_1 LIG_BRCT_BRCA1_2 LIG_BRCT_MDC1_1", - ".(S)..F", - "0.0019115", - "Eukaryota", - "BRCT (PF00533) BRCA1 C Terminus (BRCT) domain (Stochiometry: 1 : 1) PDB Structure: 1T15", - "114", - "118", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_SH2_CRK", - "ELME000458", - "Phosphotyrosine ligands bound by SH2 domains", - "Src Homology 2 (SH2) domains recognize small motifs containing a phosphorylated Tyrosine residue. Additional specificity determinants are mainly found up to four positions after the pTyr. They are primarily found in metazoa and close unicellular relatives which have Receptor Tyrosine Kinases (RTKs) as well as soluble TKs such as Src and Abl. SH2 is the main binding domain for TK phosphorylation signalling events.", - "The SH2 domain of the CRK family (CRK-1, CRK-II splice isoforms and CRK-like) binds a phosphotyrosine motif with optimal specificity for Pro or Leu at the pY+3 position (Liu,2010; Huang,2008; 1JU5). The motif appears to be present in pre-metazoans as well as in metazoans (Shigeno-Nakazawa,2016). The SH2 domain topology consists of a central β sheet with three anti-parallel β strands and two α helices, one each side of the sheet (Donaldson,2002). The pY residue of the ligand interacts with two highly conserved basic residues (R15 and R33) in the phosphotyrosine binding pocket situated at one side of the anti-parallel β sheet. Pro at the pY+3 position is buried in a hydrophobic pocket that lies on the other side of the three anti-parallel beta sheets. This hydrophobic pocket is lined by residues Y60, I61, I89, and L109 from the EF and BG loops that make important non-polar contacts with the pY+3 residue. The intermediate residue positions are more tolerant, indicating they are less critical for the interaction. However; peptide library SPOT arrays reveal residue preferences at specific positions which significantly diminish or even prohibit binding (Liu,2010; Huang,2008). Arrays also show that Val or Ile are allowed at +3, though they are clearly weaker than Pro or Leu and may require optimal residues in the adjacent positions to reach a good affinity. Acidic residues are disfavored at multiple positions. Hydrophobic residues, particularly large ones, are disfavoured at several positions. Unusually for SH2, basic residues are tolerated in several positions, with His being less preferred. Pro is prohibited at the +1 position. As seen in some of the physiological ligands, an Arg residue is most favoured at the pY+4 position as it is positioned to contact Asp-91 in the CRK SH2 domain (1JU5) whereas acidic residues are non-permissive (Liu,2010; Huang,2008). The current pattern is predominantly derived from the SPOT arrays for CRK and CRKL in (Huang,2008)", - "LIG_SH2_CRK LIG_SH2_GRB2like LIG_SH2_NCK_1 LIG_SH2_PTP2 LIG_SH2_SRC LIG_SH2_STAP1 LIG_SH2_STAT3 LIG_SH2_STAT5 LIG_SH2_STAT6", - "(Y)[^EPILVFYW][^HDEW][PLIV][^DEW]", - "0.0015153", - "Choanoflagellida Metazoa", - "SH2 (PF00017) SH2 domain (Stochiometry: 1 : 1)", - "128", - "132", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_UBA3_1", - "ELME000395", - "Binding motif for UBA3 adenylation domain", - "NAE1-UBA3 is the only known E1 to be involved in the NEDD8 cascade, which regulates cullin neddylation. Cullins are part of multi-subunit cullin-based E3s (CRLs), playing an important role in substrate ubiquitination and consequently regulated protein degradation. CRLs are activated by covalent attachment of NEDD8 to a conserved C-terminal lysine. First NAE1-UBA3 is recruited to one of the two NEDD8 E2s, UBE2M and UBE2F, to promote thiolester formation between E2 and NEDD8. The E2~NEDD8 intermediate is then bound by a DCNL via its PONY domain which mediates the recruitment of a E3 cullin subunit through binding to its WHB subdomain. The CRL RING domain (RBX1 or RBX2) acts as ligase by binding the E2~NEDD8 intermediate and catalysing NEDD8 transfer. The NAE1-UBA3 is required to be able to recognise both E2s which is in partly mediated by a distinct binding of the E2s core domain to the E1 ubiquitin fold domain and partly by a E2 N-terminal binding motif which binds to a groove of UBA3 in the E1 complex.", - "Motif found in the N-terminal regions of NEDD8 E2s UBE2M and UBE2F, which mediates binding to NAE1-UBA3 at its UBA3 adenylation domain. Additionally a distinct binding of the E2s core domain to the E1 ubiquitin fold domain (ufd) takes place, involving two main hydrophobic clusters, with UBE2F forming one ionic interaction and UBE2M forming numerous. Both the E2 N-terminal binding peptide and the E2 core domain must bind UBA3 simultaneously for ideal transfer of NEDD8 from E1 to E2. The interaction between NAE1-UBA3 and the E2 N-terminal peptides are probably unique to the NEDD8 pathway, because many E2s from other UBLs lack N-terminal extensions.The UBA3 docking groove binds the N-terminal E2 sequences showing a common motif characterised by a ϕ-ϕ-x-ϕ pattern. All three hydrophobic residues are conserved among the NEDD8 E2s family members (Leu4, Phe5 and Leu7 on UBE2M, Met1, Leu2 and Leu4 on UBE2F) from different species, however mutations of the second and forth positions of the motif have the greatest impact on binding. Ser6 on UBE2M forms a hydrogen bond with Ser313 on UBA3. The equivalent residue on UBE2F (Thr3) probably also makes this interaction. A lysine residue, which is spaced 0-4 amino acids from the polar Ser or Thr, forms hydrogen bonds with the carbonyls from UBA3's Arg136 and Phe138. UBE2M's Met1 tucks into a hydrophobic cavity on UBA3. Thus UBE2M has a longer interacting peptide chain than UBE2F. Furthermore there is a minimum length requirement between NEDD8 E2s docking peptide and its core domain, to ensure optimum binding. Because the motif is overlapping with the DCNL binding motif on NEDD8 E2s, and is in fact identical in terms of residues and position in the case of UBE2F, this peptide segment must be part of a switching mechanism during neddylation.", - "nan", - "[ILM][ILMF].{1,2}[ILM].{0,4}K", - "0.0011962", - "Eukaryota", - "ThiF (PF00899) ThiF family (Stochiometry: 1 : 1)", - "85", - "91", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SH2_STAP1", - "ELME000465", - "Phosphotyrosine ligands bound by SH2 domains", - "Src Homology 2 (SH2) domains recognize small motifs containing a phosphorylated Tyrosine residue. Additional specificity determinants are mainly found up to four positions after the pTyr. They are primarily found in metazoa and close unicellular relatives which have Receptor Tyrosine Kinases (RTKs) as well as soluble TKs such as Src and Abl. SH2 is the main binding domain for TK phosphorylation signalling events.", - "STAP1, Signal Adaptor Protein 1 (also known as STAP-1, BRDG1) is an adaptor protein with an SH2 and a PH class of lipid-binding domain. It is associated with autosomal dominant hypercholesterolaemia (Fouchier,2014). STAP1 is expressed in lymphoid cells and is phosphorylated by the Tec TK, which participates in B cell antigen receptor signaling (Ohya,1999).The STAP1 SH2 domain has been classified as a Class IIC SH2 domain together with the BKS, CBL, CBLB and CBLC domains which are unusual in having pY+4 as the strongest specificity determinant: SPOT arrays revealed that the strongest preference at +4 is for Leu or Ile (though Val and Phe are accepted) (Huang,2008).The structure of STAP1 bound to NTAL (3MAZ) shows that residues N-terminal to the pTyr are not strongly involved in binding (Kaneko,2010). The pTyr sits in the conserved pTyr pocket, establishing a network of hydrogen bond and van der Waals interactions with pocket-forming residues Arg184, Arg203, Ser212 and Lys228. The peptide is further stabilized by backbone hydrogen bonds from the +1 position to SH2 pocket residues His226, Glu239, and Lys228, similar to what is found for other SH2 domains. The +1 position has a preference for acidic or polar residues (Huang,2008) and the side chains of residues +1, +2 and +3 point towards solvent. +4 hydrophobic residues enter a deep hydrophobic pentagonally shaped pocket suited to accommodate these large side chains, with polar Tyr or smaller Ala residues being less favored. The +3 pocket in STAP1 is plugged by the EF loop residue Leu240, and the only requirement for +3 is the exclusion of Gly, Pro, and Trp. The BKS SH2 domain also has a “plugged” +3 pocket and shares a similar specificity to STAP1 (Kaneko,2010, Tinti,2013). Other Class IIC SH2 domains such as CBL have a shallower hydrophobic groove that preferentially accommodates a Pro residue at +4 (Kaneko,2010), and are not included in the current motif definition.", - "LIG_SH2_CRK LIG_SH2_GRB2like LIG_SH2_NCK_1 LIG_SH2_PTP2 LIG_SH2_SRC LIG_SH2_STAP1 LIG_SH2_STAT3 LIG_SH2_STAT5 LIG_SH2_STAT6", - "(Y)[DESTA][^GP][^GP][ILVFMWYA]", - "0.0010263", - "Metazoa", - "SH2 (PF00017) SH2 domain (Stochiometry: 1 : 1)", - "185", - "189", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_SH2_SRC", - "ELME000081", - "Phosphotyrosine ligands bound by SH2 domains", - "Src Homology 2 (SH2) domains recognize small motifs containing a phosphorylated Tyrosine residue. Additional specificity determinants are mainly found up to four positions after the pTyr. They are primarily found in metazoa and close unicellular relatives which have Receptor Tyrosine Kinases (RTKs) as well as soluble TKs such as Src and Abl. SH2 is the main binding domain for TK phosphorylation signalling events.", - "Src-family Src Homology 2 (SH2) domains binding motif.", - "LIG_SH2_CRK LIG_SH2_GRB2like LIG_SH2_NCK_1 LIG_SH2_PTP2 LIG_SH2_SRC LIG_SH2_STAP1 LIG_SH2_STAT3 LIG_SH2_STAT5 LIG_SH2_STAT6", - "(Y)[QDEVAIL][DENPYHI][IPVGAHS]", - "0.0008729", - "Bos taurus Cavia porcellus Gallus gallus Homo sapiens Mesocricetus auratus Metazoa Mus musculus Sus scrofa", - "SH2 (PF00017) SH2 domain (Stochiometry: 1 : 1)", - "185", - "188", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_WRC_WIRS_1", - "ELME000507", - "WAVE regulatory complex (WRC) binding site motif", - "The WASP family verprolin homologous protein (WAVE) regulatory complex (WRC) regulates actin cytoskeletal dynamics at distinct membrane-proximal sites. Protocadherins, ROBOs, netrin receptors, neuroligins, GPCRs, and channels employ the WIRS motif to recruit the WRC to specific locations where it stimulates actin nucleation through modulating the Arp2/3 complex. The WIRS motif binds to a highly conserved composite interaction surface of the WRC formed by the Sra and Abi subunits, therefore the interaction requires the entire WRC complex to be assembled.", - "Mutational screening of the protocadherin 10 cytoplasmic tail indicated a consensus of ϕx[TS]Fxx for the WRC-binding motif where the last two positions show some restrictions on allowed amino acids, though no consensus could be made (Chen,2014). In the structure of the WIRS binding surface (4N78) a selenomethionine replaces the original Phe in the +1 position and packs against E1084 and L1090 of Sra1 and Phe at +4 of the WIRS. The +1 residue is always a bulky hydrophobic and enters a deep hydrophobic pocket. The side chain in the +2 position faces the solvent, so it does not affect binding. The side chain of Thr at +3 forms an extended hydrogen bonding network with several residues in Sra1 and Abi2, and with the peptide bond amide group of the Thr itself: This H bond arrangement restricts the position to threonine and serine residues. The side chain of Phe in the +4 position inserts into another hydrophobic pocket, in this case lined by four residues of Sra1 and another four of Abi2 stacking against Y923 of Sra1 and making cation-pi interactions with R106 of Abi2. Phe appears to be fully required at this position. Many amino acids are allowed at +5 and +6 since these positions do not interact directly with the WRC surface. However, Gly at +5 adopts a left-handed α helical conformation to position Lys (+6) such that it can hydrogen bond with the backbone of the selenomethionine at position +1 in the WIRS motif. Other amino acids at position +5 would certainly adopt different conformations, and therefore hydrogen bonding between positions +1 and +6 may not be generally observed. Also, backbone H-bonding suggests that proline would likely be excluded in position +5, which is confirmed by the evolutionary conservation profiles of the known WIRS instances (Chen,2014).", - "nan", - "[FYILMV].[TS]F(G|[^P]).", - "0.0005802", - "Metazoa", - "Abi_HHR (PF07815) Abl-interactor HHR (Stochiometry: 1 : 1) FragX_IP (PF05994) Cytoplasmic Fragile-X interacting family (Stochiometry: 1 : 1)", - "152", - "157", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_WRC_WIRS_1", - "ELME000507", - "WAVE regulatory complex (WRC) binding site motif", - "The WASP family verprolin homologous protein (WAVE) regulatory complex (WRC) regulates actin cytoskeletal dynamics at distinct membrane-proximal sites. Protocadherins, ROBOs, netrin receptors, neuroligins, GPCRs, and channels employ the WIRS motif to recruit the WRC to specific locations where it stimulates actin nucleation through modulating the Arp2/3 complex. The WIRS motif binds to a highly conserved composite interaction surface of the WRC formed by the Sra and Abi subunits, therefore the interaction requires the entire WRC complex to be assembled.", - "Mutational screening of the protocadherin 10 cytoplasmic tail indicated a consensus of ϕx[TS]Fxx for the WRC-binding motif where the last two positions show some restrictions on allowed amino acids, though no consensus could be made (Chen,2014). In the structure of the WIRS binding surface (4N78) a selenomethionine replaces the original Phe in the +1 position and packs against E1084 and L1090 of Sra1 and Phe at +4 of the WIRS. The +1 residue is always a bulky hydrophobic and enters a deep hydrophobic pocket. The side chain in the +2 position faces the solvent, so it does not affect binding. The side chain of Thr at +3 forms an extended hydrogen bonding network with several residues in Sra1 and Abi2, and with the peptide bond amide group of the Thr itself: This H bond arrangement restricts the position to threonine and serine residues. The side chain of Phe in the +4 position inserts into another hydrophobic pocket, in this case lined by four residues of Sra1 and another four of Abi2 stacking against Y923 of Sra1 and making cation-pi interactions with R106 of Abi2. Phe appears to be fully required at this position. Many amino acids are allowed at +5 and +6 since these positions do not interact directly with the WRC surface. However, Gly at +5 adopts a left-handed α helical conformation to position Lys (+6) such that it can hydrogen bond with the backbone of the selenomethionine at position +1 in the WIRS motif. Other amino acids at position +5 would certainly adopt different conformations, and therefore hydrogen bonding between positions +1 and +6 may not be generally observed. Also, backbone H-bonding suggests that proline would likely be excluded in position +5, which is confirmed by the evolutionary conservation profiles of the known WIRS instances (Chen,2014).", - "nan", - "[FYILMV].[TS]F(G|[^P]).", - "0.0005802", - "Metazoa", - "Abi_HHR (PF07815) Abl-interactor HHR (Stochiometry: 1 : 1) FragX_IP (PF05994) Cytoplasmic Fragile-X interacting family (Stochiometry: 1 : 1)", - "15", - "20", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_WRC_WIRS_1", - "ELME000507", - "WAVE regulatory complex (WRC) binding site motif", - "The WASP family verprolin homologous protein (WAVE) regulatory complex (WRC) regulates actin cytoskeletal dynamics at distinct membrane-proximal sites. Protocadherins, ROBOs, netrin receptors, neuroligins, GPCRs, and channels employ the WIRS motif to recruit the WRC to specific locations where it stimulates actin nucleation through modulating the Arp2/3 complex. The WIRS motif binds to a highly conserved composite interaction surface of the WRC formed by the Sra and Abi subunits, therefore the interaction requires the entire WRC complex to be assembled.", - "Mutational screening of the protocadherin 10 cytoplasmic tail indicated a consensus of ϕx[TS]Fxx for the WRC-binding motif where the last two positions show some restrictions on allowed amino acids, though no consensus could be made (Chen,2014). In the structure of the WIRS binding surface (4N78) a selenomethionine replaces the original Phe in the +1 position and packs against E1084 and L1090 of Sra1 and Phe at +4 of the WIRS. The +1 residue is always a bulky hydrophobic and enters a deep hydrophobic pocket. The side chain in the +2 position faces the solvent, so it does not affect binding. The side chain of Thr at +3 forms an extended hydrogen bonding network with several residues in Sra1 and Abi2, and with the peptide bond amide group of the Thr itself: This H bond arrangement restricts the position to threonine and serine residues. The side chain of Phe in the +4 position inserts into another hydrophobic pocket, in this case lined by four residues of Sra1 and another four of Abi2 stacking against Y923 of Sra1 and making cation-pi interactions with R106 of Abi2. Phe appears to be fully required at this position. Many amino acids are allowed at +5 and +6 since these positions do not interact directly with the WRC surface. However, Gly at +5 adopts a left-handed α helical conformation to position Lys (+6) such that it can hydrogen bond with the backbone of the selenomethionine at position +1 in the WIRS motif. Other amino acids at position +5 would certainly adopt different conformations, and therefore hydrogen bonding between positions +1 and +6 may not be generally observed. Also, backbone H-bonding suggests that proline would likely be excluded in position +5, which is confirmed by the evolutionary conservation profiles of the known WIRS instances (Chen,2014).", - "nan", - "[FYILMV].[TS]F(G|[^P]).", - "0.0005802", - "Metazoa", - "Abi_HHR (PF07815) Abl-interactor HHR (Stochiometry: 1 : 1) FragX_IP (PF05994) Cytoplasmic Fragile-X interacting family (Stochiometry: 1 : 1)", - "18", - "23", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_PCNA_yPIPBox_3", - "ELME000482", - "PCNA binding PIP Box", - "The PCNA binding motifs include the PIP Box, PIP degron, the APIM and the TLS motif. These motifs are found in proteins involved in DNA replication, repair, methylation and cell cycle control.", - "Yeast Pip Box variant. Many proteins involved in DNA replication, repair and recombination are tethered to DNA through interaction with the PCNA sliding clamp. The functional relevance of these interactions is reflected by the structural conservation of the yeast (yPCNA) and human PCNA homologs and their target protein interactions. The yeast DNA ligase I seals nicks in dsDNA during DNA replication and repair. The latter function is mediated by interaction of the DNL1 PIP Box motif with yPCNA (2OD8). As in the metazoan PIP Box (LIG_PCNA_PIPBox_1) two major binding sites are formed by the smaller ‘Q pocket’ and a large hydrophobic groove on yPCNA [Vijayakumar,2007].The Q pocket accommodates a Gln residue, through van der Waals contacts and backbone hydrogen bonds to yPCNA Ala251 and Ala209. Compared to the metazoan motif no fungal instances bind Met, but the Q pocket accepts polar residues such as Asn in Replication factor C subunit RFC1 (1SXJ). The clamp loader (RFC 1-5)-yPCNA complex provides the only example of a full protein bound through a PIP Box motif to PCNA [Bowman,2004].The conserved ‘hydrophobic plug’ (⏀xx⏀⏀) formed by Leu46, Phe49 and Phe51 in DNL1 adopts the canonical 310 helix that binds to the hydrophobic groove on yPCNA via van der Waals contacts and proline packing. In fungal sequences, a higher variability is observed in the second and third hydrophobic positions. Positively charged residues flanking the core motif are highly conserved in fungal instances and are included in the motif definition.As in other PIP Boxes additional interactions such as beta augmentation from DNL1 to the C-terminus of PCNA contribute to binding [Vijayakumar,2007]. Non-canonical variants include the yeast Pol η motif, which overlaps with a Rev-1 binding site (LIG_REV1ctd_RIR_1) and shows low conservation at the Q Pocket [Haracska,2001]. The SRS2 protein motif forms an ɑ-helix (3V62) instead of the canonical 310 helix and replaces the second hydrophobic position with Gln [Armstrong,2012].", - "LIG_PCNA_APIM_2 LIG_PCNA_PIPBox_1 LIG_PCNA_TLS_4 LIG_PCNA_yPIPBox_3", - "([KR].{0,6}[QN].[^FHWY][LIVM][^P][^PFWYMLIV][FYLMWV][FYLMWVI])|([QN].[^FHWY][LIVM][^P][^PFWYMLIV][FYLMWV][FYLMWVI].{0,6}[KR])", - "0.0005575", - "Fungi", - "PCNA_C (PF02747) Proliferating cell nuclear antigen, C-terminal domain (Stochiometry: 1 : 1) PDB Structure: 2OD8", - "167", - "179", - "False", - "False", - "True", - "False", - "False", - "True", - "False" - ], - [ - "LIG_Pex14_2", - "ELME000328", - "Pex14 ligand motif", - "Several linear motifs bind to a hydrophobic groove on Pex14, a key protein in peroxisomal import. Wxxx[FY] and Fxxx[FW] motifs are present in peroxisomal import receptors Pex5 (P50542) and Pex19 (P40855), respectively. The LVXEF[LM] motif is only present in the Pex5 receptor. These three motifs bind to the same hydrophobic binding site in Pex14 (O75381) which is the minimal translocon that is essential for TRG_PTS1 cargo translocation into peroxisomal matrix. The N-terminal domain of Pex14 interacts with Wxxx[FY] and LVXEF[LM] motifs in Pex5 (Fungi: LIG_Pex14_4) to target PTS1-containing peroxisomal matrix enzymes entry into the peroxisomal matrix followed by interaction between cargo-free Pex5 with SH3 domain of Pex13 (Q92968) via Wxxx[FY] motif for recycling of Pex5 into the cytosol. Pex19 contains an FxxxF motif that mediates Pex19-Pex14 interactions. Pex19 is considered to be the cytosolic import receptor for peroxisomal membrane proteins that contain an mPTS motif.", - "FxxxF motif in N-terminus of Pex19 and Wxxx[FY] motif in N-terminal half of Pex5 bind to the same binding site in N-terminus of Pex14 (PF04695). Binding site in human Pex14 consists of helices alpha1 and alpha2 as well as connecting linker which form two hydrophobic pockets that are separated by two aromatic residues, PHE35 and PHE52. N-terminus of Pex14 contains three alpha helices. Helices alpha1 and alpha2 are anti-parallel oriented whereas helix alpha3 is diagonal to helices alpha1 and alpha2. Interaction between Pex5 and Pex14 is stronger than between Pex19 and Pex14. Salt bridge stabilises Pex5-Pex14 complex (2W84) and it involves conserved LYS56 residue in Pex14. Corresponding salt bridge is not observed in the structure of Pex19-Pex14 complex (2W85). Lack of this salt bridge in Pex19 motif may contribute to the reduced binding affinity of Pex19 compared to Pex5 (Neufeld,2009). Pex5 adopts amphipathic alpha helical conformation when it binds diagonally across helices alpha1 and alpha2 of N-terminus Pex14. Pex19 also forms amphipathic alpha helix upon binding to the identical site in Pex14. However Pex19 binds to Pex14 in opposite orientation to Pex5 ligand. Upon binding of FxxxF motif of Pex19 or Wxxx[FY] motif of Pex5 to N-terminus of Pex14, positively charged helices alpha1 and alpha2 are flanked by negatively charged helix alpha3 on one side and by negatively charged Pex5 or Pex19 ligands on the other side. Charge complimentarity is one of the possible determinants of Pex5-Pex14 and Pex19-Pex14 interactions (Neufeld,2009). S. cerevisiae Pex5 contains FxxxW motif that binds to the N-terminus of Pex14. Neither of the two WxxxF motifs in S. cerevisiae Pex5 are required for Pex14 binding. Tryptophan in FxxxW motif is essential for Pex5-Pex14 interaction but not for Pex13 interaction (Kerssen,2006).", - "LIG_Pex14_1 LIG_Pex14_2 LIG_Pex14_3 LIG_Pex14_4", - "F...[WF]", - "0.0004628", - "Bos taurus Candida albicans Candida glabrata Cricetulus griseus Danio rerio Debaryomyces hansenii Equus caballus Eremothecium gossypii Homo sapiens Kluyveromyces lactis Komagataella Mus musculus Ogataea Pongo abelii Rattus norvegicus Saccharomyces cerevisiae Yarrowia lipolytica", - "Pex14_N (PF04695) Peroxisomal membrane anchor protein (Pex14p) conserved region (Stochiometry: 1 : 1)", - "5", - "9", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_TYR_ITIM", - "ELME000020", - "Immunoreceptor tyrosine-based motif", - "In the cytoplasmatic region of immunoreceptors there are motifs that are critical for the activation and termination of signal transduction pathways.", - "ITIM (immunoreceptor tyrosine-based inhibitory motif). Phosphorylation of the ITIM motif, found in the cytoplasmic tail of some inhibitory receptors (KIRs) that bind MHC Class I, leads to the recruitment and activation of a protein tyrosine phosphatase.", - "LIG_TYR_ITAM LIG_TYR_ITIM LIG_TYR_ITSM", - "[ILV].(Y)..[ILV]", - "0.0002992", - "Vertebrata", - "SH2 (PF00017) SH2 domain (Stochiometry: 1 : 1)", - "126", - "131", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_NRBOX", - "ELME000045", - "Nuclear receptor box", - "The NR box confers binding to nuclear receptors (NRs), and are found primarily in co-activators of NRs.", - "The nuclear receptor box motif (LXXLL) confers binding to nuclear receptors. It must therefore be exposed and reside in natively disordered polypeptide. However, as this motif is hydrophobic, it is particularly prone to false positives buried in globular domains.", - "nan", - "[^P]L[^P][^P]LL[^P]", - "0.0002628", - "Metazoa", - "Hormone_recep (PF00104) Ligand-binding domain of nuclear hormone receptor (Stochiometry: 1 : 1) PDB Structure: 2GPO", - "11", - "17", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "TRG_DiLeu_BaEn_2", - "ELME000524", - "Adaptin binding Endosome-Lysosome-Basolateral sorting signals", - "Endocytosis and/or vesicular sorting signals for membrane proteins. Depending on organism, cell type as well as the nature of the adaptin complex bound, they can target either to cell surface or to specific, internal membrane-bound organelles (endosomes, lysosomes, melanosomes, synaptic vesicles, etc.)All these motifs are believed to bind to the sigma subunit of activated adaptin complexes (AP-1, AP-2 and AP-3). These clathrin-associated complexes are ancient and found in most eukaryotes. Dileucine motifs are variable (especially at their negatively charged positions and at the hydrophobic residues) and the various motif subtypes tend to have slightly different functions (Mattera,2011).One should avoid confusing the adaptin sigma-binding classical dileucine motifs discussed here, and the GGA-binding lysosomal targeting motifs (sometimes also called dileucine motifs).", - "This relatively uncommon acidic dileucine-type motif features a phenylalanine at its first hydrophobic position (+5) instead of a leucine (Iverson,2005). Numbering is from the required Glu as +1. Motifs of this subtype are typically involved in basolateral sorting of membrane proteins in polarized epithelial cells (believed to be primarily driven by AP-1 association).Since Phe-containing motifs do not appear to be suitable to act as a lysosomal signal, these variant motifs can also carry a proline before the two hydrophobic residues without the protein becoming lysosomal/late endosomal. Adding an additional negative charge (particularly glutamate) before its canonical glutamate (+1) would however strengthen them to the point where they would act mostly as a vesicular targeting signal (now considered to be a separate motif subtype in ELM) and therefore is disallowed for this functional variant.Unfortunately, no detailed structural information is available on these motifs yet (as of 2021). Phe +5-containing dileucine motifs have not yet been described outside multicellular animals and it is questionable if they exist in fungi or plants.", - "TRG_DiLeu_BaEn_1 TRG_DiLeu_BaEn_2 TRG_DiLeu_BaEn_3 TRG_DiLeu_BaEn_4 TRG_DiLeu_BaLyEn_6 TRG_DiLeu_LyEn_5", - "[^E]E...F[LIVM]", - "0.0002451", - "Metazoa", - "Clat_adaptor_s (PF01217) Clathrin adaptor complex small chain (Stochiometry: 1 : 1)", - "52", - "58", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_eIF4E_1", - "ELME000317", - "eIF4E binding motif", - "Variant YxxxxL motifs that mediate binding to the dorsal surface of eIF4E through interactions with the Tryptophan at position 73 (in human and mouse). eIF4E is a key regulator of eukaryotic cap-dependent translation and these motifs play a role in both translation initiation, via interactions with eIF4G, and repression, via binding of eIF4E inhibitory proteins.', 'A conserved YxxxxLϕ motif mediates binding of key regulatory proteins to the dorsal surface of eIF4E through interactions with the Tryptophan at position 73 (in human and mouse). This interaction plays a crucial role in eukaryotic cap-dependent translation initiation, recruiting the scaffold eIF4G to the cap-binding eIF4E protein as part of the eIF4F complex. eIF4G, recruits the ribosome to the bound mRNA through additional interactions with the ribosome-associated factor eIF3. The same region is also targeted by a group of 'eIF4E inhibitory proteins', which competitively bind and sequester the available eIF4E and preventing eIF4F complex formation. These inhibitory proteins play a key role in important cellular processes, including cell cycle progression and metabolism. Although the motif is defined in the literature as YxxxxLϕ, it should be noted that there are variants that do not have a Y at position 1 and/or have a positively charged residue in the final position.", - "LIG_eIF4E_1 LIG_eIF4E_2", - "Y....L[VILMF]", - "0.0001891", - "Eukaryota", - "IF4E (PF01652) Eukaryotic initiation factor 4E (Stochiometry: 1 : 1)", - "10", - "16", - "False", - "False", - "True", - "False", - "True", - "False", - "False" - ], - [ - "LIG_GBD_Chelix_1", - "ELME000441", - "GTPase-binding domain (GBD) ligand", - "The C helix is an amphipatic alpha helix located in the C-terminal VCA segment of WASP and N-WASP proteins. This helix binds to the centrally located GBD region of the same protein. This intramoleular interaction hides the acidic domain in the VCA segment from the Arp2/3 complex, thus preventing Arp2/3-dependent activation of actin polymerization. Some pathogens use a mimic of this motif in a multivalent manner to hijack cell regulation and promote actin polymerisation.", - "The alpha helix has at least three turns where the positions +1, +5 and +9 need to be hydrophobic as they face a hydrophobic cavity in the GBD region of either WASP or N-WASP (Panchal,2003; Sallee,2008; Cheng,2008; Okrut,2015). The second position is also occupied by a hydrophobic residue, Val in the case of WASP, N-WASP, NCK1 and NCK2, and Ala in the two bacterial proteins.Positions +3 and +7 are completely oriented away from the GBD region, therefore no restriction exists for these two positions.Position +4 does interact with the GBD region, showing an Ala in WASP, N-WASP and the bacterial EspF. Based on the structure of the autoinhibited WASP, this Ala interacts with the Phe 293 in WASP. In the case of Nck1, an Asn is present while EspFU has an Arg that also interacts with Phe293. This indicates that the +4 position is variable but influences the binding with the target domain.Position +6 has a conserved Met in WASP and N-WASP, a Lys in Nck1, Nck2 and EspF, and different nonpolar residues in the different repeats and homologs of EspFU (Met being the most common).The structure of the autoinhibited WASP indicates that position +8 can only host a small amino acid like Val that could be interacting with Leu289, while the structure of EspFU bound to the GBD region in WASP shows that the corresponding residue in this position is slightly oriented outside the contacting area but still interacting with Asp92.Dreadlocks (or Dock), the Drosophila homolog of NCK1, as well as other arthropod homologs, contains a Phe in the +2 position and different hydrophobic residues in the +9 position (Iso, Val). Dreadlocks is known to interact with WASp, the Drosophila homolog of WASP; however, the GBD-binding activity of the Phe-containing motif remains to be shown (Kaipa,2013).", - "nan", - "[ILV][VA][^P][^P][LI][^P][^P][^P][LM]", - "0.0000979", - "Chordata", - "PBD (PF00786) P21-Rho-binding domain (Stochiometry: 1 : 1)", - "89", - "97", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_EH1_1", - "ELME000148", - "eh1 motif", - "The eh1 motif binds to the WD40 domain of the Groucho/TLE corepressor.", - "The original eh1 motif identified in homeodomains has been augmented by Eberhard,2000, Barolo,2002, Copley,2005 and Goldstein,2005 who found other TF families with the motif. Any eh1 motif linked to a paired box (including some with homeodomains) shows a variant motif with Y or H at the first position where the classical motif is always F. The motif in ELM is a composite of these two variants which have both been reported to bind Groucho/TLE. There should be some other functional explanation for this consistent variation but at the time of pattern creation this was not resolved. This motif is found in natively disordered polypeptide but the conserved hydrophobic amino acids will cause buried false positives in globular domains.", - "nan", - ".[FYH].[IVM][^WFYP][^WFYP][ILM][ILMV].", - "0.0000786", - "Metazoa", - "WD40 (PF00400) WD domain, G-beta repeat (Stochiometry: 1 : 1) PDB Structure: 2CE8", - "97", - "105", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "LIG_EH1_1", - "ELME000148", - "eh1 motif", - "The eh1 motif binds to the WD40 domain of the Groucho/TLE corepressor.", - "The original eh1 motif identified in homeodomains has been augmented by Eberhard,2000, Barolo,2002, Copley,2005 and Goldstein,2005 who found other TF families with the motif. Any eh1 motif linked to a paired box (including some with homeodomains) shows a variant motif with Y or H at the first position where the classical motif is always F. The motif in ELM is a composite of these two variants which have both been reported to bind Groucho/TLE. There should be some other functional explanation for this consistent variation but at the time of pattern creation this was not resolved. This motif is found in natively disordered polypeptide but the conserved hydrophobic amino acids will cause buried false positives in globular domains.", - "nan", - ".[FYH].[IVM][^WFYP][^WFYP][ILM][ILMV].", - "0.0000786", - "Metazoa", - "WD40 (PF00400) WD domain, G-beta repeat (Stochiometry: 1 : 1) PDB Structure: 2CE8", - "117", - "125", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ], - [ - "DEG_MDM2_SWIB_1", - "ELME000184", - "MDM2 binding motif", - "A degron motif found within the N-terminal p53 transactivation domain (TAD) (PF08563) and its relatives. The degron binds into a hydrophobic cleft in the N-terminal SWIB domain (PF02201) of the MDM2 E3 ubiquitin ligase. The sides of this pocket are formed by two helices, the bottom by two shorter helices and the ends are capped each by a three-stranded β-sheet (Kussie,1996). The p53 degron forms an amphipathic helix projecting a pair of aromatic residues deep into the MDM2 binding pocket. Regulation of p53 protein stability by Mdm2 is a key part of p53 function.", - "The MDM2-binding degron motif is located in the N-terminal transactivation domain (TAD) of p53 family members, so-called BOX-I (PF08563). The motif peptide folds as an amphipathic α-helix of about 2.5 turns, which binds in the hydrophobic cleft of the MDM2 SWIB domain (Kussie,1996). The three hydrophobic amino acids Phe-19, Trp-23 and Leu-26 are all found on the same side of the p53 degron helix and are critical for binding to MDM2 since they insert deeply into the binding pocket (1YCR). For example, substitution of residues Leu-22 and Trp-23 with Gln and Ser abolishes p53-MDM2 interaction, which leads to constitutively increased p53 levels (Chehab,2000). Since the motif adopts the α-helical fold when bound, proline residues are excluded from the non-conserved positions in the motif. When looking at p53 sequence alignments, there is sometimes an extra non-conserved residue between the Trp and Leu residues: This is likely to be due to underwinding of the end turn of the helix.", - "nan", - "F[^P]{3}W[^P]{2,3}[VIL]", - "0.0000212", - "Metazoa", - "SWIB (PF02201) SWIB/MDM2 domain (Stochiometry: 1 : 1) PDB Structure: 1YCR", - "5", - "12", - "False", - "False", - "False", - "False", - "False", - "False", - "False" - ] - ] - }, - "test5": { - "type": "error", - "args": { - "sequence": "xxx" - }, - "expected_result": "None" - } -} \ No newline at end of file