From df8a002dc5c2536ad415afc3b6ada97e7279252c Mon Sep 17 00:00:00 2001 From: Sachin Kadyan Date: Tue, 26 Mar 2024 17:35:45 -0400 Subject: [PATCH] Add script to download embeddings for training SoloSeq --- scripts/download_soloseq_embeddings.sh | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100755 scripts/download_soloseq_embeddings.sh diff --git a/scripts/download_soloseq_embeddings.sh b/scripts/download_soloseq_embeddings.sh new file mode 100755 index 00000000..63902ec7 --- /dev/null +++ b/scripts/download_soloseq_embeddings.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# +# Copyright 2024 AlQuraishi Laboratory +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads ESM-1b embeddings used to train OpenFold SoloSeq single-seq model. +# +# Usage: bash download_soloseq_embeddings.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aws &> /dev/null ; then + echo "Error: aws could not be found. Please install aws." + exit 1 +fi + +DOWNLOAD_DIR="${1}/soloseq_embeddings" +mkdir -p "${DOWNLOAD_DIR}" +aws s3 cp --no-sign-request --region us-east-1 s3://openfold/soloseq_embeddings/ "${DOWNLOAD_DIR}" --recursive