Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added multiple file download API #2962

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
116 changes: 105 additions & 11 deletions apps/api/R/workflows.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,31 @@ getWorkflows <- function(req, model_id=NA, site_id=NA, offset=0, limit=50, res){
result <- list(workflows = qry_res)
result$count <- nrow(qry_res)
if(has_next){
result$next_page <- paste0(
req$rook.url_scheme, "://",
req$HTTP_HOST,
"/api/workflows",
req$PATH_INFO,
substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
(as.numeric(limit) + as.numeric(offset)),
"&limit=",
limit
)
if(grepl("offset=", req$QUERY_STRING, fixed = TRUE)){
result$next_page <- paste0(
req$rook.url_scheme, "://",
req$HTTP_HOST,
"/api/workflows",
req$PATH_INFO,
substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
(as.numeric(limit) + as.numeric(offset)),
"&limit=",
limit
)
}
else {
result$next_page <- paste0(
req$rook.url_scheme, "://",
req$HTTP_HOST,
"/api/workflows",
req$PATH_INFO,
substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "limit=")[[2]] - 6),
"offset=",
(as.numeric(limit) + as.numeric(offset)),
"&limit=",
limit
)
}
}
if(has_prev) {
result$prev_page <- paste0(
Expand Down Expand Up @@ -200,7 +215,7 @@ getWorkflowStatus <- function(req, id, res){
#* @get /<id>/file/<filename>
getWorkflowFile <- function(req, id, filename, res){
Workflow <- tbl(global_db_pool, "workflows") %>%
select(id, user_id) %>%
select(id, user_id) %>%
filter(id == !!id)

qry_res <- Workflow %>% collect()
Expand Down Expand Up @@ -229,3 +244,82 @@ getWorkflowFile <- function(req, id, filename, res){
return(bin)
}
}


#################################################################################################
#' Get the list of files in a workflow specified by the id
#' @param id Workflow id (character)
#' @return List of files
#' @author Nihar Sanda
#* @serializer contentType list(type="application/octet-stream")
#* @get /<id>/files

getWorkflowFileDetails <- function(req, id, res){
Workflow <- tbl(global_db_pool, "workflows") %>%
select(id, user_id) %>%
filter(id == !!id)

qry_res <- Workflow %>% collect()

if (nrow(qry_res) == 0) {
res$status <- 404
return(list(error="Workflow with specified ID was not found"))
}
else {
file_names <- list()
file_names <- list.files(paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're querying the database to see if a workflow exists, why are you then making very strong (and often false) assumptions about where the workflow folder is rather than just looking at the folder that's stored in BETY?


return(list(workflow_id = id))
}
}

#################################################################################################
#' Get the zip of specified files of the workflow specified by the id
#' @param id Workflow id (character)
#' @return Details of requested workflow
#' @author Nihar Sanda
#* @serializer contentType list(type="application/octet-stream")
#* @post /<id>/file-multiple/

getWorkflowFilesAsZip <- function(req, id, filenames, res){
if(req$HTTP_CONTENT_TYPE == "application/json") {
filenames_req <- req$postBody
}

filenamesList <- jsonlite::fromJSON(filenames_req)
filenames <- filenamesList$files

Workflow <- tbl(global_db_pool, "workflows") %>%
select(id, user_id) %>%
filter(id == !!id)

qry_res <- Workflow %>% collect()

if (nrow(qry_res) == 0) {
res$status <- 404
return()
}
else {
full_files <- vector(mode = "character", length = length(filenames))
for (i in 1:length(filenames)) {

# Check if the requested file exists on the host
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're doing this check both here and in the last function then it seems like a good place to create a function that does the check. That way you only need to fix errors in one place? This code has the same filepath error as the previous and neither seems to be checkin whether the workflow being queried is on the local machine or not

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure sir, Ill try to have a single function for both of them!

filepath <- paste0(Sys.getenv("DATA_DIR", "/data/"), "workflows/PEcAn_", id, "/", filenames[i])
if(! file.exists(filepath)){
res$status <- 404
return()
}

if(Sys.getenv("AUTH_REQ") == TRUE){
if(qry_res$user_id != req$user$userid) {
res$status <- 403
return()
}
}

full_files[i] <- filepath
}
zip_file <- zip::zipr("output.zip", full_files)
return(zip_file)
}
}
167 changes: 167 additions & 0 deletions apps/api/pecanapi-spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ tags:
description: Everything about PEcAn PFTs (Plant Functional Types)
- name: inputs
description: Everything about PEcAn inputs
- name: posteriors
description: Everything about PEcAn posteriors

#####################################################################################################################
##################################################### API Endpoints #################################################
Expand Down Expand Up @@ -779,6 +781,40 @@ paths:
description: Authentication required
'403':
description: Access forbidden

/api/workflows/{id}/file-multiple/:
post:
tags:
- workflows
summary: Download multiple files
parameters:
- in: path
name: id
description: ID of the PEcAn Workflow
required: true
schema:
type: string
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/WorkflowFiles_POST'


responses:
'200':
description: Download the zip file consisting of the desired files
content:
application/octet-stream:
schema:
type: string
format: binary
'401':
description: Authentication required
'415':
description: Unsupported request content type


/api/runs/:
get:
Expand Down Expand Up @@ -992,6 +1028,123 @@ paths:
description: Access forbidden
'404':
description: Run data not found

/api/posteriors/:
get:
tags:
- posteriors
summary: Search for the posteriors
parameters:
- in: query
name: pft_id
description: If provided, returns all posteriors for the provided model_id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo between pft_id and model_id

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes sir Ill fix this typo

required: false
schema:
type: string
- in: query
name: host_id
description: If provided, returns all posteriors for the provided host_id
required: false
schema:
type: string
- in: query
name: offset
description: The number of posteriors to skip before starting to collect the result set.
schema:
type: integer
minimum: 0
default: 0
required: false
- in: query
name: limit
description: The number of posteriors to return.
schema:
type: integer
default: 50
enum:
- 10
- 20
- 50
- 100
- 500
required: false
responses:
'200':
description: List of posteriors
content:
application/json:
schema:
type: object
properties:
inputs:
type: array
items:
type: object
properties:
id:
type: string
file_name:
type: string
file_path:
type: string
pft_name:
type: string
tag:
type: string
hostname:
type: string
start_date:
type: string
end_date:
type: string
count:
type: integer
next_page:
type: string
prev_page:
type: string

'401':
description: Authentication required
'403':
description: Access forbidden
'404':
description: Workflows not found
mdietze marked this conversation as resolved.
Show resolved Hide resolved

/api/posteriors/{posterior_id}:
get:
tags:
- posteriors
summary: Download a desired PEcAn posterior file
parameters:
- in: path
name: posterior_id
description: ID of the PEcAn Posterior to be downloaded
required: true
schema:
type: string
- in: query
name: filename
description: Optional filename specified if the id points to a folder instead of file
required: false
schema:
type: string
responses:
'200':
description: Contents of the desired input file
content:
application/octet-stream:
schema:
type: string
format: binary
'400':
description: Bad request. Input ID points to directory & filename is not specified
'401':
description: Authentication required
'403':
description: Access forbidden


#####################################################################################################################
###################################################### Components ###################################################
#####################################################################################################################
Expand Down Expand Up @@ -1275,6 +1428,20 @@ components:
dbfiles:
type: string
example: pecan/dbfiles

WorkflowFiles_POST:
type: object

properties:
files:
type: array
items:
type: string
example: [
"pecan.xml",
"workflow.R"
]

securitySchemes:
basicAuth:
type: http
Expand Down