Skip to content

Commit

Permalink
Improvements to osquery AWS logic (#7714)
Browse files Browse the repository at this point in the history
- Introduce the flag aws_imdsv2_request_attempts
  to specify how many attempts should be done
  to retrieve an IMDSv2 token to do a secure request.

- Introduce the flag aws_imdsv2_request_interval
  to specify the base seconds to wait between attempts,
  which scales quadratically with the number of attempts.

- Introduce the flag aws_disable_imdsv1_fallback
  which disables IMDSv1 as a fallback if the IMDSv2 token
  fails to be retrieved.

- Remove the automatic check to see if osquery
  is running on an EC2 instance.

- Improve the retrieval of instance id and region.
  If the retrieval keeps failing, don't cache empty values,
  keep retrying on next requests until it has success,
  then cache the values.

- Improve error message when STS credentials fail to be retrieved.
  The hardcoded error was hiding the true reason for the failure.
  • Loading branch information
Smjert committed Oct 5, 2022
1 parent c4689b5 commit 61ebbb1
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 115 deletions.
14 changes: 14 additions & 0 deletions docs/wiki/installation/cli-flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -631,3 +631,17 @@ Comma separated list of tables to disable. By default no tables are disabled.
`--enable_tables=table1,table2`

Comma separated list of tables to enable. By default every table is enabled. If a specific table is set in both `--enable_tables` and `--disable_tables`, disabling take precedence. If `--enable_tables` is defined and `--disable_tables` is not set, every table but the one defined in `--enable_tables` become disabled.

## AWS

`--aws_imdsv2_request_attempts=3`

How many attempts to do at requesting an IMDSv2 token. Such a token is retrieved from an AWS metadata service that might not always be accessible, and it's used by plugins like the loggers `AWS Kinesis`, `AWS Firehose` or the EC2 tables.

`--aws_imdsv2_request_interval=3`

Base seconds to wait between attempts at requesting an IMDSv2 token. Scales quadratically with the number of attempts.

`--aws_disable_imdsv1_fallback=false`

Whether to disable support for IMDSv1 and fail if an IMDSv2 token could not be retrieved
29 changes: 21 additions & 8 deletions osquery/tables/cloud/aws/ec2_instance_metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
namespace pt = boost::property_tree;

namespace osquery {

DECLARE_bool(aws_disable_imdsv1_fallback);
namespace tables {

/**
Expand Down Expand Up @@ -62,6 +64,12 @@ class Ec2MetaData {
*/
void get(Row& r) const {
const std::string http_body = doGet();

if (http_body.empty()) {
LOG(ERROR) << "Failed to get instance metadata from the metadata service";
return;
}

extractResult(http_body, r);
}
};
Expand Down Expand Up @@ -127,11 +135,19 @@ class JSONEc2MetaData : public Ec2MetaData {
std::string Ec2MetaData::doGet() const {
const static std::string ec2_metadata_url{kEc2MetadataUrl};

auto token = getIMDSToken();
auto opt_token = getIMDSToken();
http::Request req(ec2_metadata_url + url_suffix_);
if (!token.empty()) {
req << http::Request::Header(kImdsTokenHeader, token);

if (opt_token.has_value()) {
req << http::Request::Header(kImdsTokenHeader, *opt_token);
} else if (FLAGS_aws_disable_imdsv1_fallback) {
/* If the IMDSv2 token cannot be retrieved and we disabled IMDSv1,
we cannot attempt to do a request, so return with empty results. */
VLOG(1) << "Could not retrieve an IMDSv2 token to request the instance id "
"and region. The IMDSv1 fallback is disabled";
return {};
}

http::Client::Options options;
options.timeout(3);
http::Client client(options);
Expand All @@ -142,14 +158,14 @@ std::string Ec2MetaData::doGet() const {

// Silently ignore 404
if (http_status_code == 404) {
return "";
return {};
}

// Log "hard" errors
if (http_status_code != 200) {
VLOG(1) << "Unexpected HTTP response for: " << url_suffix_
<< " Status: " << http_status_code;
return "";
return {};
}

return res.body();
Expand Down Expand Up @@ -209,9 +225,6 @@ void JSONEc2MetaData::extractResult(const std::string& http_body,

QueryData genEc2Metadata(QueryContext& context) {
QueryData results;
if (!isEc2Instance()) {
return results;
}

const static std::vector<std::shared_ptr<Ec2MetaData>> fields(
{std::make_shared<JSONEc2MetaData>(
Expand Down
13 changes: 11 additions & 2 deletions osquery/tables/cloud/aws/ec2_instance_tags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,18 @@ namespace model = Aws::EC2::Model;

QueryData genEc2InstanceTags(QueryContext& context) {
QueryData results;
std::string instance_id, region;
getInstanceIDAndRegion(instance_id, region);

auto opt_instance_info = getInstanceIDAndRegion();

if (!opt_instance_info.has_value()) {
LOG(WARNING) << "Failed to retrieve region and instance id";
return results;
}

const auto& [instance_id, region] = *opt_instance_info;

if (instance_id.empty() || region.empty()) {
LOG(WARNING) << "Instance id and region are empty, returning no results";
return results;
}

Expand Down
206 changes: 118 additions & 88 deletions osquery/utils/aws/aws_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@
#include <aws/sts/model/Credentials.h>

#include <osquery/core/flags.h>
#include <osquery/core/shutdown.h>
#include <osquery/logger/data_logger.h>
#include <osquery/logger/logger.h>
#include <osquery/utils/aws/aws_util.h>
#include <osquery/utils/json/json.h>
#include <osquery/utils/system/time.h>
#include <osquery/utils/aws/aws_util.h>

namespace pt = boost::property_tree;

Expand Down Expand Up @@ -82,10 +84,31 @@ FLAG(string,
"Proxy password for use in AWS client config");
FLAG(bool, aws_debug, false, "Enable AWS SDK debug logging");

FLAG(uint32,
aws_imdsv2_request_attempts,
3,
"How many attempts to do at requesting an IMDSv2 token");

FLAG(uint32,
aws_imdsv2_request_interval,
3,
"Base seconds to wait between attempts at requesting an IMDSv2 token. "
"Scales quadratically with the number of attempts");

FLAG(bool,
aws_disable_imdsv1_fallback,
false,
"Whether to disable support for IMDSv1 and fail if an IMDSv2 token could "
"not be retrieved");

/// EC2 instance latestmetadata URL
const std::string kEc2MetadataUrl =
"http://" + http::kInstanceMetadataAuthority + "/latest/";

/// EC2 instance identity document URL
const std::string kEc2IdentityDocument =
kEc2MetadataUrl + "dynamic/instance-identity/document";

/// Hypervisor UUID file
const std::string kHypervisorUuid = "/sys/hypervisor/uuid";

Expand Down Expand Up @@ -114,6 +137,26 @@ static const std::set<std::string> kAwsRegions = {
// Default AWS region to use when no region set in flags or profile
static RegionName kDefaultAWSRegion = Aws::Region::US_EAST_1;

// To protect the access to the AWS instance id and region that are being cached
static std::mutex cached_values_mutex;

namespace {
bool validateIMDSV2RequestAttempts(const char* flagname, std::uint32_t value) {
if (value == 0) {
std::string error_message =
"Only values higher than 0 are supported for " + std::string(flagname);
osquery::systemLog(error_message);
std::cerr << error_message << std::endl;

return false;
}

return true;
}
}; // namespace

DEFINE_validator(aws_imdsv2_request_attempts, validateIMDSV2RequestAttempts);

std::shared_ptr<Aws::Http::HttpClient>
OsqueryHttpClientFactory::CreateHttpClient(
const Aws::Client::ClientConfiguration& clientConfiguration) const {
Expand Down Expand Up @@ -295,8 +338,8 @@ OsquerySTSAWSCredentialsProvider::GetAWSCredentials() {
// Calculate when our credentials will expire.
token_expire_time_ = current_time + FLAGS_aws_sts_timeout;
} else {
LOG(ERROR) << "Failed to create STS temporary credentials: "
"No STS policy exists for the AWS user/role";
LOG(ERROR) << "Failed to create STS temporary credentials, error: "
<< sts_outcome.GetError().GetMessage();
}
}
return Aws::Auth::AWSCredentials(
Expand Down Expand Up @@ -384,114 +427,101 @@ void initAwsSdk() {
}
}

void getInstanceIDAndRegion(std::string& instance_id, std::string& region) {
static std::atomic<bool> checked(false);
boost::optional<std::pair<std::string, std::string>> getInstanceIDAndRegion() {
static std::string cached_id;
static std::string cached_region;
if (checked || !isEc2Instance()) {
// Return if already checked or this is not EC2 instance
instance_id = cached_id;
region = cached_region;
return;
static bool init_successfully = false;

std::lock_guard<std::mutex> lock(cached_values_mutex);

if (init_successfully) {
return {{cached_id, cached_region}};
}

static std::once_flag once_flag;
std::call_once(once_flag, []() {
if (checked) {
return;
}
initAwsSdk();
http::Request req(kEc2IdentityDocument);
auto opt_token = getIMDSToken();
if (opt_token.has_value()) {
req << http::Request::Header(kImdsTokenHeader, *opt_token);
} else if (FLAGS_aws_disable_imdsv1_fallback) {
/* If the IMDSv2 token cannot be retrieved and we disabled IMDSv1,
we cannot attempt to do a request, so return with empty results. */
VLOG(1) << "Could not retrieve an IMDSv2 token to request the instance id "
"and region. The IMDSv1 fallback is disabled";
return boost::none;
}

initAwsSdk();
http::Request req(kEc2MetadataUrl + "dynamic/instance-identity/document");
auto token = getIMDSToken();
if (!token.empty()) {
req << http::Request::Header(kImdsTokenHeader, token);
}
http::Client::Options options;
options.timeout(3);
http::Client client(options);
http::Client::Options options;
options.timeout(3);
http::Client client(options);

try {
http::Response res = client.get(req);
if (res.status() == 200) {
pt::ptree tree;
std::stringstream ss(res.body());
pt::read_json(ss, tree);
cached_id = tree.get<std::string>("instanceId", ""),
cached_region = tree.get<std::string>("region", ""),
VLOG(1) << "EC2 instance ID: " << cached_id
<< ". Region: " << cached_region;
}
} catch (const std::system_error& e) {
// Assume that this is not EC2 instance
VLOG(1) << "Error getting EC2 instance information: " << e.what();
try {
http::Response res = client.get(req);
if (res.status() == 200) {
pt::ptree tree;
std::stringstream ss(res.body());
pt::read_json(ss, tree);
cached_id = tree.get<std::string>("instanceId", ""),
cached_region = tree.get<std::string>("region", ""),
VLOG(1) << "EC2 instance ID: " << cached_id
<< ". Region: " << cached_region;
}
checked = true;
});
} catch (const std::system_error& e) {
VLOG(1) << "Error getting EC2 instance information: " << e.what();
return boost::none;
}

init_successfully = true;

instance_id = cached_id;
region = cached_region;
return {{cached_id, cached_region}};
}

std::string getIMDSToken() {
boost::optional<std::string> getIMDSToken() {
std::string token;
http::Request req(kEc2MetadataUrl + kImdsTokenResource);
http::Client::Options options;
options.timeout(3);
http::Client client(options);
req << http::Request::Header(kImdsTokenTtlHeader, kImdsTokenTtlDefaultValue);

try {
http::Response res = client.put(req, "", "");
token = res.status() == 200 ? res.body() : "";
} catch (const std::system_error& e) {
VLOG(1) << "Request for " << kImdsTokenResource << " failed:" << e.what();
}
return token;
}

bool isEc2Instance() {
static std::atomic<bool> checked(false);
static std::atomic<bool> is_ec2_instance(false);
if (checked) {
return is_ec2_instance; // Return if already checked
}

static std::once_flag once_flag;
std::call_once(once_flag, []() {
if (checked) {
return;
}
checked = true;

std::ifstream fd(kHypervisorUuid, std::ifstream::in);
if (fd && !(fd.get() == 'e' && fd.get() == 'c' && fd.get() == '2')) {
return; // Not EC2 instance
std::uint32_t attempts = 0;
std::uint32_t interval = FLAGS_aws_imdsv2_request_interval;
while (attempts < FLAGS_aws_imdsv2_request_attempts) {
try {
http::Response res = client.put(req, "", "");
token = res.status() == 200 ? res.body() : "";
} catch (const std::system_error& e) {
VLOG(1) << "Request for " << kImdsTokenResource
<< " failed: " << e.what();
} catch (const std::runtime_error& e) {
VLOG(1) << "Request for " << kImdsTokenResource
<< " failed: " << e.what();
}

auto token = getIMDSToken();
http::Request req(kEc2MetadataUrl);
if (!token.empty()) {
req << http::Request::Header(kImdsTokenHeader, token);
}
http::Client::Options options;
options.timeout(3);
http::Client client(options);
if (token.empty()) {
if (attempts < FLAGS_aws_imdsv2_request_attempts) {
auto should_shutdown =
osquery::waitTimeoutOrShutdown(std::chrono::seconds(interval));
if (should_shutdown) {
return boost::none;
}

try {
http::Response res = client.get(req);
if (res.status() == 200) {
is_ec2_instance = true;
interval *= FLAGS_aws_imdsv2_request_interval;
++attempts;
}
} catch (const std::system_error& e) {
// Assume that this is not EC2 instance
VLOG(1) << "Error checking if this is EC2 instance: " << e.what();
} catch (const std::runtime_error& e) {
VLOG(1) << "Error checking if this is EC2 instance: " << e.what();
continue;
}
});

return is_ec2_instance;
break;
}

if (attempts == FLAGS_aws_imdsv2_request_attempts) {
LOG(ERROR) << "Failed " << FLAGS_aws_imdsv2_request_attempts
<< " attempts at retrieving an IMDSv2 token";
return boost::none;
}

return token;
}

Status getAWSRegion(std::string& region, bool sts, bool validate_region) {
Expand Down

0 comments on commit 61ebbb1

Please sign in to comment.