{"id":1631,"identifier":"darus-622","persistentUrl":"https://doi.org/10.18419/darus-622","protocol":"doi","authority":"10.18419","publisher":"DaRUS","publicationDate":"2020-01-30","storageIdentifier":"s3://10.18419/darus-622","datasetVersion":{"id":299,"datasetId":1631,"datasetPersistentId":"doi:10.18419/darus-622","storageIdentifier":"s3://10.18419/darus-622","versionNumber":1,"versionMinorNumber":1,"versionState":"RELEASED","UNF":"UNF:6:eFcDsa8udm3F11lB7M2q6w==","lastUpdateTime":"2021-05-03T07:35:54Z","releaseTime":"2021-05-03T07:35:54Z","createTime":"2020-03-10T07:55:40Z","publicationDate":"2020-01-30","citationDate":"2020-01-30","license":{"name":"CC BY 4.0","uri":"http://creativecommons.org/licenses/by/4.0","iconUri":"https://licensebuttons.net/l/by/4.0/88x31.png"},"fileAccessRequest":false,"metadataBlocks":{"citation":{"displayName":"Citation Metadata","name":"citation","fields":[{"typeName":"title","multiple":false,"typeClass":"primitive","value":"Query sequences for the update of the ExED"},{"typeName":"alternativeTitle","multiple":false,"typeClass":"primitive","value":"Query sequences for the update of the Expansin Engineering Database"},{"typeName":"author","multiple":true,"typeClass":"compound","value":[{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Lohoff, Caroline"},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"}},{"authorName":{"typeName":"authorName","multiple":false,"typeClass":"primitive","value":"Buchholz, Patrick C. F."},"authorAffiliation":{"typeName":"authorAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"},"authorIdentifierScheme":{"typeName":"authorIdentifierScheme","multiple":false,"typeClass":"controlledVocabulary","value":"ORCID"},"authorIdentifier":{"typeName":"authorIdentifier","multiple":false,"typeClass":"primitive","value":"0000-0001-5967-3777"}}]},{"typeName":"datasetContact","multiple":true,"typeClass":"compound","value":[{"datasetContactName":{"typeName":"datasetContactName","multiple":false,"typeClass":"primitive","value":"Pleiss, Jürgen"},"datasetContactAffiliation":{"typeName":"datasetContactAffiliation","multiple":false,"typeClass":"primitive","value":"Universität Stuttgart"}}]},{"typeName":"dsDescription","multiple":true,"typeClass":"compound","value":[{"dsDescriptionValue":{"typeName":"dsDescriptionValue","multiple":false,"typeClass":"primitive","value":"Query sequences for the individual BLAST searches used to update the Expansin Engineering Database (ExED, https://exed.biocatnet.de/)."}}]},{"typeName":"subject","multiple":true,"typeClass":"controlledVocabulary","value":["Medicine, Health and Life Sciences"]},{"typeName":"keyword","multiple":true,"typeClass":"compound","value":[{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Protein sequence"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"EDAM"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://edamontology.org/data_2976"}},{"keywordValue":{"typeName":"keywordValue","multiple":false,"typeClass":"primitive","value":"Amino acid sequence"},"keywordVocabulary":{"typeName":"keywordVocabulary","multiple":false,"typeClass":"primitive","value":"NCIT"},"keywordVocabularyURI":{"typeName":"keywordVocabularyURI","multiple":false,"typeClass":"primitive","value":"http://purl.obolibrary.org/obo/NCIT_C13187"}}]},{"typeName":"publication","multiple":true,"typeClass":"compound","value":[{"publicationCitation":{"typeName":"publicationCitation","multiple":false,"typeClass":"primitive","value":"Lohoff C., Buchholz P. C. F., Le Roes-Hill M. & Pleiss J. (2020). The Expansin Engineering Database: a navigation and classification tool for expansins and homologues. Proteins: Structure, Function, and Bioinformatics 89:2."},"publicationIDType":{"typeName":"publicationIDType","multiple":false,"typeClass":"controlledVocabulary","value":"doi"},"publicationIDNumber":{"typeName":"publicationIDNumber","multiple":false,"typeClass":"primitive","value":"10.1002/prot.26001"},"publicationURL":{"typeName":"publicationURL","multiple":false,"typeClass":"primitive","value":"https://doi.org/10.1002/prot.26001"}}]},{"typeName":"depositor","multiple":false,"typeClass":"primitive","value":"Buchholz, Patrick C. F."},{"typeName":"dateOfDeposit","multiple":false,"typeClass":"primitive","value":"2020-01-27"},{"typeName":"dataSources","multiple":true,"typeClass":"primitive","value":["Expansin Engineering Database (https://exed.biocatnet.de/)","Protein Database (https://www.ncbi.nlm.nih.gov/protein/)","RCSB Protein Data Bank (https://www.rcsb.org/)","UniProt (https://www.uniprot.org/)"]}]},"process":{"displayName":"Process Metadata","name":"process","fields":[{"typeName":"processMethods","multiple":true,"typeClass":"compound","value":[{"processMethodsName":{"typeName":"processMethodsName","multiple":false,"typeClass":"primitive","value":"UCLUST"},"processMethodsDescription":{"typeName":"processMethodsDescription","multiple":false,"typeClass":"primitive","value":"sequence clustering using cluster_fast command from USEARCH"},"processMethodsPars":{"typeName":"processMethodsPars","multiple":false,"typeClass":"primitive","value":"sequence identity threshold"}}]},{"typeName":"processMethodsPar","multiple":true,"typeClass":"compound","value":[{"processMethodsParName":{"typeName":"processMethodsParName","multiple":false,"typeClass":"primitive","value":"sequence identity threshold"},"processMethodsParUnit":{"typeName":"processMethodsParUnit","multiple":false,"typeClass":"primitive","value":"0.8"}}]},{"typeName":"processSoftware","multiple":true,"typeClass":"compound","value":[{"processSoftwareName":{"typeName":"processSoftwareName","multiple":false,"typeClass":"primitive","value":"USEARCH"},"processSoftwareVersion":{"typeName":"processSoftwareVersion","multiple":false,"typeClass":"primitive","value":"11.0.667"},"processSoftwareURL":{"typeName":"processSoftwareURL","multiple":false,"typeClass":"primitive","value":"https://www.drive5.com/usearch/"},"processSoftwareCitation":{"typeName":"processSoftwareCitation","multiple":false,"typeClass":"primitive","value":"Edgar, R. C. (2010). Search and clustering orders of magnitude faster than BLAST. Bioinformatics, 26(19), 2460-2461. https://doi.org/10.1093/bioinformatics/btq461"}}]}]}},"files":[{"description":"Seed (query) sequences for the initial setup of the ExED. Protein sequences are given in FASTA format. Headers correspond to sequence identifiers in the style of UniprotKB.","label":"Update_1_Seed_sequences_Hfams_1-25.fasta","restricted":false,"version":1,"datasetVersionId":299,"dataFile":{"id":1657,"persistentId":"doi:10.18419/darus-622/3","pidURL":"https://doi.org/10.18419/darus-622/3","filename":"Update_1_Seed_sequences_Hfams_1-25.fasta","contentType":"application/octet-stream","filesize":8914,"description":"Seed (query) sequences for the initial setup of the ExED. Protein sequences are given in FASTA format. Headers correspond to sequence identifiers in the style of UniprotKB.","storageIdentifier":"s3://fokus-dv-prod-1:16ff175af84-7e84e6617c57","rootDataFileId":-1,"md5":"4f5de838715758c51a8cf33de96da0e8","checksum":{"type":"MD5","value":"4f5de838715758c51a8cf33de96da0e8"},"creationDate":"2020-01-29"}},{"description":"Seed (query) sequences for the first update of the ExED (USEARCH centroid sequences). Protein sequences are given in FASTA format with one line per sequence. Headers correspond to sequence identifiers of the Expansin Engineering Database.","label":"Update_2_Centroids_USEARCH.fasta","restricted":false,"version":1,"datasetVersionId":299,"dataFile":{"id":1654,"persistentId":"doi:10.18419/darus-622/1","pidURL":"https://doi.org/10.18419/darus-622/1","filename":"Update_2_Centroids_USEARCH.fasta","contentType":"application/octet-stream","filesize":6619,"description":"Seed (query) sequences for the first update of the ExED (USEARCH centroid sequences). Protein sequences are given in FASTA format with one line per sequence. Headers correspond to sequence identifiers of the Expansin Engineering Database.","storageIdentifier":"s3://fokus-dv-prod-1:16ff13e54bb-c236891cc8bd","rootDataFileId":-1,"md5":"5ae0036abf33f3585e5629a8337b7039","checksum":{"type":"MD5","value":"5ae0036abf33f3585e5629a8337b7039"},"creationDate":"2020-01-29"}},{"description":"Tabular file with headers: sequence identifier from the Expansin Engineering Database (1st column) and NCBI accession.version (2nd column). This file lists the sequence accessions used in the first update of the ExED.","label":"Update_2_Centroids_USEARCH.tab","restricted":false,"version":1,"datasetVersionId":299,"dataFile":{"id":1656,"persistentId":"doi:10.18419/darus-622/2","pidURL":"https://doi.org/10.18419/darus-622/2","filename":"Update_2_Centroids_USEARCH.tab","contentType":"text/tab-separated-values","filesize":505,"description":"Tabular file with headers: sequence identifier from the Expansin Engineering Database (1st column) and NCBI accession.version (2nd column). This file lists the sequence accessions used in the first update of the ExED.","storageIdentifier":"s3://fokus-dv-prod-1:16ff1736594-bf75198ec29a","originalFileFormat":"text/csv","originalFormatLabel":"Comma Separated Values","originalFileSize":471,"originalFileName":"Update_2_Centroids_USEARCH.csv","UNF":"UNF:6:eFcDsa8udm3F11lB7M2q6w==","rootDataFileId":-1,"md5":"b7bb4f4ac61adf7e13764a72e2a3af02","checksum":{"type":"MD5","value":"b7bb4f4ac61adf7e13764a72e2a3af02"},"creationDate":"2020-01-29"}}],"citation":"Lohoff, Caroline; Buchholz, Patrick C. F., 2020, \"Query sequences for the update of the ExED\", https://doi.org/10.18419/darus-622, DaRUS, V1, UNF:6:eFcDsa8udm3F11lB7M2q6w== [fileUNF]"}}