{"id":33716,"identifier":"darus-1163","persistentUrl":"https://doi.org/10.18419/darus-1163","protocol":"doi","authority":"10.18419","publisher":"DaRUS","publicationDate":"2021-05-20","storageIdentifier":"s3://10.18419/darus-1163","datasetVersion":{"id":742,"datasetId":33716,"datasetPersistentId":"doi:10.18419/darus-1163","storageIdentifier":"s3://10.18419/darus-1163","versionNumber":1,"versionMinorNumber":0,"versionState":"RELEASED","UNF":"UNF:6:zi8TRxkq1C/pCN14pXTA0Q==","lastUpdateTime":"2021-05-20T07:06:42Z","releaseTime":"2021-05-20T07:06:42Z","createTime":"2020-11-30T14:36:30Z","publicationDate":"2021-05-20","citationDate":"2021-05-20","license":{"name":"CC BY 4.0","uri":"http://creativecommons.org/licenses/by/4.0","iconUri":"https://licensebuttons.net/l/by/4.0/88x31.png"},"fileAccessRequest":false,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Sequence cross-references and taxonomic lineage for glycoside hydrolase family 19"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Buchholz, Patrick C. F."},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0000-0001-5967-3777"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Pleiss, Jürgen"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"The Glycoside Hydrolase 19 Engineering Database (GH19ED) contains information on protein sequences and structures of glycoside hydrolases from family 19. This dataset lists cross-references to the National Center for Biotechnology Information (NCBI), cross-references to the Protein Data Bank (PDB) and the taxonomic lineage for each sequence entry in the GH19ED."}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Medicine, Health and Life Sciences"]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"protein sequence"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"EDAM"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://edamontology.org/data_2976"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"protein structure"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"EDAM"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://edamontology.org/data_1460"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"taxonomy"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"EDAM"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://edamontology.org/data_3028"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"lineage"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"source organism"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"amino acid sequence"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"NCIT"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://purl.obolibrary.org/obo/NCIT_C13187"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Orlando M., Buchholz P. C. F., Lotti M. & Pleiss J. (2020). The GH19 Engineering Database: an extended classification system for exploring the properties of sequence space and protein evolution. (submitted)"}}]},{"typeName":"notesText","multiple":false,"typeClass":"primitive","value":"The tab-separated tabular file comprises nine columns:
\r\n(1) the sequence identifier from the GH19ED, integer (Sequence_id),
\r\n(2) the protein sequence accessions from the NCBI, semicolon-separated (NCBI_accessions),
\r\n(3) the PDB accessions, semicolon-separated (PDB_accessions),
\r\n(4) the name of the source or source organism (Source_name),
\r\n(5) the NCBI taxonomy identifier for the source (NCBI_taxonomy_id),
\r\n(6) the taxonomic lineage from the lowest to the highest rank, as inferred from NCBI taxonomy (Lineage),
\r\n(7) the \"protein\" identifier from the GH19ED, integer (Protein_id),
\r\n(8) the \"homologous family\" (or group) identifier from the GH19ED, integer (Homologous_family_id),
\r\n(9) the \"superfamily\" (or subfamily) identifier from the GH19ED, integer (Superfamily_id). For sequence entries assigned to more than one source organism name, only the first taxonomic lineage found in the GH19ED is listed."},{"typeName":"language","multiple":true,"typeClass":"controlledVocabulary","value":["English"]},{"typeName":"project","multiple":true,"typeClass":"compound","value":[{"projectName":{"typeName":"projectName","multiple":false,"typeClass":"primitive","value":"Bundesministerium für Bildung und Forschung: 031B0571A"}},{"projectName":{"typeName":"projectName","multiple":false,"typeClass":"primitive","value":"Deutsche Forschungsgemeinschaft: EXC2075"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Buchholz, Patrick C. F."},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2020-11-30"},{"typeName":"dataSources","multiple":true,"typeClass":"primitive","value":["https://gh19ed.biocatnet.de/","https://www.ncbi.nlm.nih.gov/protein","https://www.rcsb.org/","https://www.ncbi.nlm.nih.gov/taxonomy"]}]},"process":{"displayName":"Process Metadata","name":"process","fields":[]}},"files":[{"label":"GH19ED.tab","restricted":false,"version":3,"datasetVersionId":742,"dataFile":{"id":33717,"persistentId":"doi:10.18419/darus-1163/1","pidURL":"https://doi.org/10.18419/darus-1163/1","filename":"GH19ED.tab","contentType":"text/tab-separated-values","filesize":5239878,"storageIdentifier":"s3://fokus-dv-prod-1:17619942dac-ebf33733d8da","originalFileFormat":"text/tsv","originalFormatLabel":"Tab-Separated Values","originalFileSize":5060317,"originalFileName":"GH19ED.tsv","UNF":"UNF:6:zi8TRxkq1C/pCN14pXTA0Q==","rootDataFileId":-1,"md5":"d8ca2043f7a4ab25792322c82037a74e","checksum":{"type":"MD5","value":"d8ca2043f7a4ab25792322c82037a74e"},"creationDate":"2020-11-30"}}],"citation":"Buchholz, Patrick C. F., 2021, \"Sequence cross-references and taxonomic lineage for glycoside hydrolase family 19\", https://doi.org/10.18419/darus-1163, DaRUS, V1, UNF:6:zi8TRxkq1C/pCN14pXTA0Q== [fileUNF]"}}