Skip to content

Commit

Permalink
[pdq] Replace POSIX dependent functions with STL equivalent (#1333)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianwal committed Jul 17, 2023
1 parent b626064 commit c0cc68f
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 74 deletions.
19 changes: 11 additions & 8 deletions pdq/cpp/bin/clusterize256x.cpp
Expand Up @@ -6,6 +6,9 @@
#define _GNU_SOURCE
#endif

#include <fstream>
#include <iostream>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -47,7 +50,7 @@ const int DEFAULT_PDQ_DISTANCE_THRESHOLD = 31;

// ----------------------------------------------------------------
static void handle_fp(
FILE* fp,
std::istream& in,
MIH256<std::string>& mih,
std::map<Hash256, int>& centersToIndices,
int distanceThreshold,
Expand Down Expand Up @@ -131,7 +134,7 @@ int main(int argc, char** argv) {
int counter = 0;
if (argi == argc) {
handle_fp(
stdin,
std::cin,
mih,
centersToIndices,
distanceThreshold,
Expand All @@ -143,15 +146,15 @@ int main(int argc, char** argv) {
} else {
for (; argi < argc; argi++) {
char* filename = argv[argi];
FILE* fp = fopen(filename, "r");
if (fp == nullptr) {
std::ifstream in(filename);
if (!in) {
perror("fopen");
fprintf(stderr, "Could not open \"%s\" for read.\n", filename);
exit(1);
}

handle_fp(
fp,
in,
mih,
centersToIndices,
distanceThreshold,
Expand All @@ -161,7 +164,7 @@ int main(int argc, char** argv) {
traceCount,
doBruteForceQuery);

fclose(fp);
in.close();
}
}

Expand All @@ -170,7 +173,7 @@ int main(int argc, char** argv) {

// ----------------------------------------------------------------
static void handle_fp(
FILE* fp,
std::istream& in,
MIH256<std::string>& mih,
std::map<Hash256, int>& centersToIndices,
int distanceThreshold,
Expand All @@ -183,7 +186,7 @@ static void handle_fp(
std::string metadata;

while (facebook::pdq::io::loadHashAndMetadataFromStream(
fp, hash, metadata, counter)) {
in, hash, metadata, counter)) {
if (traceCount > 0) {
if ((counter % traceCount) == 0) {
fprintf(stderr, "-- %d\n", counter);
Expand Down
12 changes: 5 additions & 7 deletions pdq/cpp/bin/hashtool256.cpp
Expand Up @@ -6,11 +6,11 @@
#define _GNU_SOURCE
#endif

#include <iostream>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <pdq/cpp/common/pdqhashtypes.h>
#include <pdq/cpp/io/hashio.h>

Expand Down Expand Up @@ -71,9 +71,7 @@ static void usage(char* argv0, int rc) {

// ----------------------------------------------------------------
int main(int argc, char** argv) {
srandom(time(nullptr) ^ getpid()); // seed the RNG for Hash256::fuzz

// Parse command-line flags. I'm expliclily not using gflags or other such
// Parse command-line flags. I'm explicitly not using gflags or other such
// libraries, to minimize the number of external dependencies for this
// project.
if (argc < 2) {
Expand Down Expand Up @@ -127,7 +125,7 @@ static void do_slot_norms(
for (auto hash : hashes) {
printf("%s", hash.format().c_str());
for (int i = 0; i < HASH256_NUM_WORDS; i++) {
printf(" %2d", __builtin_popcount(hash.w[i]));
printf(" %2d", hammingNorm16(hash.w[i]));
}
printf("\n");
}
Expand Down Expand Up @@ -179,7 +177,7 @@ static void do_matrix(
std::vector<Hash256> hashes2;

if (argc == 0) {
loadHashesFromStream(stdin, hashes1);
loadHashesFromStream(std::cin, hashes1);
hashes2 = hashes1;
} else if (argc == 1) {
loadHashesFromFile(argv[0], hashes1);
Expand Down
4 changes: 4 additions & 0 deletions pdq/cpp/common/pdqhamming.h
Expand Up @@ -10,7 +10,11 @@
// If your compiler doesn't support __builtin_popcount then feel free to
// undefine this. (Experiments have shown that using builtin popcount helps
// performance by a few percent -- worth using but OK to live without.)
#if !defined(_MSC_VER) && !defined(WIN32) && !defined(_WIN32) && \
!defined(__WIN32__) && !defined(WIN64) && !defined(_WIN64) && \
!defined(__WIN64__)
#define USE_BUILTIN_POPCOUNT
#endif

namespace facebook {
namespace pdq {
Expand Down
24 changes: 14 additions & 10 deletions pdq/cpp/common/pdqhashtypes.cpp
Expand Up @@ -4,6 +4,8 @@

#include <pdq/cpp/common/pdqhashtypes.h>

#include <random>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand All @@ -18,6 +20,9 @@ const char hash256_format[] =
"%04hx%04hx%04hx%04hx%04hx%04hx%04hx%04hx"
"%04hx%04hx%04hx%04hx%04hx%04hx%04hx%04hx";

std::random_device rd;
std::mt19937 gen(rd());

// ================================================================
Hash256::Hash256(const char* hex_formatted_string) {
if (strlen(hex_formatted_string) != 64) {
Expand Down Expand Up @@ -50,24 +55,24 @@ Hash256::Hash256(const char* hex_formatted_string) {
}

// ----------------------------------------------------------------
Hash256 Hash256::fromLineOrDie(char* line, int linelen) {
if (line[linelen - 1] == '\n') {
line[linelen - 1] = 0;
Hash256 Hash256::fromLineOrDie(std::string& line) {
if (!line.empty() && line.back() == '\n') {
line.pop_back();
}
return Hash256::fromStringOrDie(line);
}

// ----------------------------------------------------------------
Hash256 Hash256::fromStringOrDie(char* string) {
Hash256 Hash256::fromStringOrDie(const std::string& string) {
Hash256 h;
if (strlen(string) != 64) {
if (string.size() != 64) {
// could throw; only current use is ops-tools which
// would exit anyway.
fprintf(stderr, "Scan \"%s\" failed.\n", string);
fprintf(stderr, "Scan \"%s\" failed.\n", string.c_str());
exit(1);
}
int rv = sscanf(
string,
string.c_str(),
hash256_format,
&h.w[15],
&h.w[14],
Expand All @@ -88,7 +93,7 @@ Hash256 Hash256::fromStringOrDie(char* string) {
if (rv != 16) {
// could throw; only current use is ops-tools which
// would exit anyway.
fprintf(stderr, "Scan \"%s\" failed.\n", string);
fprintf(stderr, "Scan \"%s\" failed.\n", string.c_str());
exit(1);
}
return h;
Expand Down Expand Up @@ -185,11 +190,10 @@ bool Hash256::operator==(const Hash256& that) const {
}

// ----------------------------------------------------------------
// Does not itself call srandom(); caller must.
Hash256 Hash256::fuzz(int numErrorBits) {
Hash256 rv = *this;
for (int i = 0; i < numErrorBits; i++) {
int idx = random() % 256;
int idx = std::uniform_int_distribution<int>(0, 255)(gen);
rv.flipBit(idx);
}
return rv;
Expand Down
9 changes: 5 additions & 4 deletions pdq/cpp/common/pdqhashtypes.h
Expand Up @@ -10,6 +10,7 @@
// ================================================================

#include <pdq/cpp/common/pdqbasetypes.h>
#include <pdq/cpp/common/pdqhamming.h>

#include <stdio.h>
#include <string>
Expand Down Expand Up @@ -73,14 +74,14 @@ struct Hash256 {
int hammingNorm() {
int n = 0;
for (int i = 0; i < HASH256_NUM_WORDS; i++) {
n += __builtin_popcount(this->w[i]);
n += hammingNorm16(this->w[i]);
}
return n;
}
int hammingDistance(const Hash256& that) const {
int n = 0;
for (int i = 0; i < HASH256_NUM_WORDS; i++) {
n += __builtin_popcount(this->w[i] ^ that.w[i]);
n += hammingDistance16(this->w[i], that.w[i]);
}
return n;
}
Expand Down Expand Up @@ -132,8 +133,8 @@ struct Hash256 {
bool operator>=(const Hash256& that) const;
bool operator==(const Hash256& that) const;

static Hash256 fromLineOrDie(char* line, int linelen);
static Hash256 fromStringOrDie(char* string);
static Hash256 fromLineOrDie(std::string& line);
static Hash256 fromStringOrDie(const std::string& string);

std::string format() const;
void dump() { printf("%s", this->format().c_str()); }
Expand Down

0 comments on commit c0cc68f

Please sign in to comment.