{"id":4112,"identifier":"darus-803","persistentUrl":"https://doi.org/10.18419/darus-803","protocol":"doi","authority":"10.18419","publisher":"DaRUS","publicationDate":"2020-06-01","storageIdentifier":"s3://10.18419/darus-803","datasetVersion":{"id":376,"datasetId":4112,"datasetPersistentId":"doi:10.18419/darus-803","storageIdentifier":"s3://10.18419/darus-803","versionNumber":1,"versionMinorNumber":0,"versionState":"RELEASED","lastUpdateTime":"2020-06-01T19:16:12Z","releaseTime":"2020-06-01T19:16:12Z","createTime":"2020-05-11T07:43:22Z","publicationDate":"2020-06-01","citationDate":"2020-06-01","license":{"name":"CC BY 4.0","uri":"http://creativecommons.org/licenses/by/4.0","iconUri":"https://licensebuttons.net/l/by/4.0/88x31.png"},"fileAccessRequest":false,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Profile hidden Markov models of the Glycoside Hydrolase 19 Engineering Database"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Orlando, Marco"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"University of Milano Bicocca"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0000-0002-5914-3052"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Pleiss, Jürgen"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"A starting alignment was built if other sequences with a known PDB structure were available, by performing a GH19 domain structure-based alignment generated through the mmaker command implemented in ChimeraX. Other seed sequences in the same superfamily were added to this fixed structural alignment by the use of “--add” flag option available in MAFFT. If no other structures were available rather than the reference, a sequence-based alignment with other seeds was created with MAFFT “L-INS-i” strategy, improved by adding information of up to 600 close homologues obtained from a search in Uniprot non-redundant Uniref50 database using a restrictive E-value threshold of 10-20.\r\nThe obtained alignments were manually cut with respect to the length of the GH19 domain of the reference structure and used to generate the new superfamily-specific profile HMMs."}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Medicine, Health and Life Sciences"]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"profile hidden Markov model"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"protein sequence"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"multiple sequence alignment"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"amino acid sequence"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Orlando M, Buchholz PCF, Lotti M, Pleiss J (2020) Large-scale exploration of sequences, substrate specificity and evolution in glycoside hydrolase family 19: the GH19 Engineering Database (submitted)"}}]},{"typeName":"grantNumber","multiple":true,"typeClass":"compound","value":[{"grantNumberAgency":{"typeName":"grantNumberAgency","multiple":false,"typeClass":"primitive","value":"Bundesministerium für Bildung und Forschung"},"grantNumberValue":{"typeName":"grantNumberValue","multiple":false,"typeClass":"primitive","value":"031B0571A"}},{"grantNumberAgency":{"typeName":"grantNumberAgency","multiple":false,"typeClass":"primitive","value":"Deutsche Forschungsgemeinschaft"},"grantNumberValue":{"typeName":"grantNumberValue","multiple":false,"typeClass":"primitive","value":"EXC2075"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Buchholz, Patrick C. F."},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2020-05-11"},{"typeName":"dataSources","multiple":true,"typeClass":"primitive","value":["Uniref50 database (Uniprot)\r\nftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50","https://gh19ed.biocatnet.de/"]}]},"process":{"displayName":"Process Metadata","name":"process","fields":[{"typeName":"processMethodsPar","multiple":true,"typeClass":"compound","value":[{"processMethodsParName":{"typeName":"processMethodsParName","multiple":false,"typeClass":"primitive","value":"E-value"},"processMethodsParValue":{"typeName":"processMethodsParValue","multiple":false,"typeClass":"primitive","value":"1E-20"}}]},{"typeName":"processSoftware","multiple":true,"typeClass":"compound","value":[{"processSoftwareName":{"typeName":"processSoftwareName","multiple":false,"typeClass":"primitive","value":"HMMER"},"processSoftwareVersion":{"typeName":"processSoftwareVersion","multiple":false,"typeClass":"primitive","value":"3.1b2"},"processSoftwareURL":{"typeName":"processSoftwareURL","multiple":false,"typeClass":"primitive","value":"http://www.hmmer.org"}},{"processSoftwareName":{"typeName":"processSoftwareName","multiple":false,"typeClass":"primitive","value":"ChimeraX"},"processSoftwareVersion":{"typeName":"processSoftwareVersion","multiple":false,"typeClass":"primitive","value":"0.9"},"processSoftwareCitation":{"typeName":"processSoftwareCitation","multiple":false,"typeClass":"primitive","value":"Goddard TD, Huang CC, Meng EC, Pettersen EF, Couch GS, Morris JH, Ferrin TE (2018) UCSF ChimeraX: Meeting modern challenges in visualization and analysis. Protein Sci 27(1):14-25. https://doi.org/10.1002/pro.3235"}},{"processSoftwareName":{"typeName":"processSoftwareName","multiple":false,"typeClass":"primitive","value":"MAFFT"},"processSoftwareVersion":{"typeName":"processSoftwareVersion","multiple":false,"typeClass":"primitive","value":"7.407"},"processSoftwareCitation":{"typeName":"processSoftwareCitation","multiple":false,"typeClass":"primitive","value":"Katoh K, Standley DM (2013) MAFFT multiple sequence alignment software version 7: improvements in performance and usability. Mol Biol Evol 30(4):772-780. https://doi.org/10.1093/molbev/mst010"}}]}]}},"files":[{"description":"Profile hidden Markov model in HMMER format for the chitinase superfamily","label":"CHITs.hmm","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4118,"persistentId":"doi:10.18419/darus-803/6","pidURL":"https://doi.org/10.18419/darus-803/6","filename":"CHITs.hmm","contentType":"application/octet-stream","filesize":113564,"description":"Profile hidden Markov model in HMMER format for the chitinase superfamily","storageIdentifier":"s3://fokus-dv-prod-1:17202b00fca-a1fa63f7cc4b","rootDataFileId":-1,"md5":"9c4c77ebc1b7f7a3c4c4f9bf713aa7fe","checksum":{"type":"MD5","value":"9c4c77ebc1b7f7a3c4c4f9bf713aa7fe"},"creationDate":"2020-05-11"}},{"description":"Profile hidden Markov model in HMMER format for the endolysin superfamily","label":"ELYSs.hmm","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4114,"persistentId":"doi:10.18419/darus-803/2","pidURL":"https://doi.org/10.18419/darus-803/2","filename":"ELYSs.hmm","contentType":"application/octet-stream","filesize":83322,"description":"Profile hidden Markov model in HMMER format for the endolysin superfamily","storageIdentifier":"s3://fokus-dv-prod-1:17202b0129f-28f8232542d5","rootDataFileId":-1,"md5":"61d6a4adfde821f447d18458a00e7667","checksum":{"type":"MD5","value":"61d6a4adfde821f447d18458a00e7667"},"creationDate":"2020-05-11"}},{"description":"Clustal alignment for the profile hidden Markov model of the chitinase superfamily\r\n\t","label":"Reference_alignment_CHITs.aln","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4113,"persistentId":"doi:10.18419/darus-803/1","pidURL":"https://doi.org/10.18419/darus-803/1","filename":"Reference_alignment_CHITs.aln","contentType":"application/octet-stream","filesize":16272,"description":"Clustal alignment for the profile hidden Markov model of the chitinase superfamily\r\n\t","storageIdentifier":"s3://fokus-dv-prod-1:17202b01482-b0f4b40f7595","rootDataFileId":-1,"md5":"1de23a931f1988e2e26fa21ca210605c","checksum":{"type":"MD5","value":"1de23a931f1988e2e26fa21ca210605c"},"creationDate":"2020-05-11"}},{"description":"Stockholm alignment for the profile hidden Markov model of the chitinase superfamily","label":"Reference_alignment_CHITs.stockholm","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4115,"persistentId":"doi:10.18419/darus-803/3","pidURL":"https://doi.org/10.18419/darus-803/3","filename":"Reference_alignment_CHITs.stockholm","contentType":"application/octet-stream","filesize":18605,"description":"Stockholm alignment for the profile hidden Markov model of the chitinase superfamily","storageIdentifier":"s3://fokus-dv-prod-1:17202b0164f-63b777b39aeb","rootDataFileId":-1,"md5":"116056d1f562c17e3a2b0ebe8714d0fe","checksum":{"type":"MD5","value":"116056d1f562c17e3a2b0ebe8714d0fe"},"creationDate":"2020-05-11"}},{"description":"Clustal alignment for the profile hidden Markov model of the endolysin superfamily","label":"Reference_alignment_ELYSs.aln","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4116,"persistentId":"doi:10.18419/darus-803/4","pidURL":"https://doi.org/10.18419/darus-803/4","filename":"Reference_alignment_ELYSs.aln","contentType":"application/octet-stream","filesize":403941,"description":"Clustal alignment for the profile hidden Markov model of the endolysin superfamily","storageIdentifier":"s3://fokus-dv-prod-1:17202b0191d-084a4f5111b3","rootDataFileId":-1,"md5":"b30ebdc0bf4ad0af8b50c15f6339b5e7","checksum":{"type":"MD5","value":"b30ebdc0bf4ad0af8b50c15f6339b5e7"},"creationDate":"2020-05-11"}},{"description":"Stockholm alignment for the profile hidden Markov model of the endolysin superfamily","label":"Reference_alignment_ELYSs.stockholm","restricted":false,"version":2,"datasetVersionId":376,"dataFile":{"id":4117,"persistentId":"doi:10.18419/darus-803/5","pidURL":"https://doi.org/10.18419/darus-803/5","filename":"Reference_alignment_ELYSs.stockholm","contentType":"application/octet-stream","filesize":429406,"description":"Stockholm alignment for the profile hidden Markov model of the endolysin superfamily","storageIdentifier":"s3://fokus-dv-prod-1:17202b01bbf-5b80fd35d28b","rootDataFileId":-1,"md5":"88e1f79860f29df5626599bd48cfcff3","checksum":{"type":"MD5","value":"88e1f79860f29df5626599bd48cfcff3"},"creationDate":"2020-05-11"}}],"citation":"Orlando, Marco, 2020, \"Profile hidden Markov models of the Glycoside Hydrolase 19 Engineering Database\", https://doi.org/10.18419/darus-803, DaRUS, V1"}}