How to Get Hash Code of a String in C++

hash function for string

I've had nice results with djb2 by Dan Bernstein.

unsigned long
hash(unsigned char *str)
{
unsigned long hash = 5381;
int c;

while (c = *str++)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

return hash;
}

how to get hash code of a string in c++

Boost provides a hash function:

boost hash

#include <boost/functional/hash.hpp>

int hashCode()
{
boost::hash<std::string> string_hash;

return string_hash("Hash me");
}

How to replicate java hashcode in C language

... and may the source be with you, Luke

                int h = 0; // added for clarity

1496 int off = offset;
1497 char val[] = value;
1498 int len = count;
1499
1500 for (int i = 0; i < len; i++) {
1501 h = 31*h + val[off++];
1502 }
1503 hash = h;

The offset and count are offset and length of the string content in the char buffer. It need not start at 0, neither span till the end of it.

The source code is from OpenJDK here but it is supposed to return identical values. It is Java, not C, but if you know C you should be able to understand and convert.

This is for String. The default hash code of an object is derived from the object address in Java Virtual Machine. There is no way (and most likely no any sense) to replicate this.

How to calculate the hash of a string literal using only the C preprocessor?

The question "How to calculate the hash of a string literal using only the C preprocessor?"
is valid, however I think you're adding a red-herring by including details about __FILE__ and logging ID's.

This means anyone answering needs to solve the problem you describe, or answer the question on hashing a string with the pre-processor (which may not be a good solution in your particular case!).

As it happens, __FILE__ expands to variable, not a literal string (GCC at least), so you will need to define the filename as a constant. You could use the build system to pass along a define for each for example.

As others have pointed out you can calculate the hash and pass this in via the build-system, although this avoids the question about hashing a sting literal.


Whatever the case, this question comes up when I searched for using the pre-processor for hashing, and none of the answers cover this, so heres is an answer that covers the string hashing part.

This is possible, albeit quite verbose

/**
* Implement compile-time string hashing on string literals.
*
* This macro implements the widely used "djb" hash apparently posted
* by Daniel Bernstein to comp.lang.c some time ago. The 32 bit
* unsigned hash value starts at 5381 and for each byte 'c' in the
* string, is updated: ``hash = hash * 33 + c``. This
* function uses the signed value of each byte.
*
* note: this is the same hash method that glib 2.34.0 uses.
*/

#define SEED 5381

#if 0
// correct but causes insane expansion
# define _SH(e, c) (((e) << 5) + (e) + (unsigned char)(c))
#elif defined(__GNUC__)
// Use statement-expression extension
# define _SH(e, c) ({ unsigned int _e = (unsigned int)(e); (_e << 5) + _e + (unsigned char)c; })
#else
// use an inline function, the compiler will be able to optimize this out.
static inline unsigned int _SH(unsigned int e, unsigned char c)
{
unsigned int _e = (unsigned int)e;
return (_e << 5) + _e + (unsigned char)c;
}
#endif

#define _SH_1(a) _SH(SEED, (a)[0])
#define _SH_2(a) _SH(_SH_1(a), (a)[1])
#define _SH_3(a) _SH(_SH_2(a), (a)[2])
#define _SH_4(a) _SH(_SH_3(a), (a)[3])
#define _SH_5(a) _SH(_SH_4(a), (a)[4])
#define _SH_6(a) _SH(_SH_5(a), (a)[5])
#define _SH_7(a) _SH(_SH_6(a), (a)[6])
#define _SH_8(a) _SH(_SH_7(a), (a)[7])
#define _SH_9(a) _SH(_SH_8(a), (a)[8])
#define _SH_10(a) _SH(_SH_9(a), (a)[9])
#define _SH_11(a) _SH(_SH_10(a), (a)[10])
#define _SH_12(a) _SH(_SH_11(a), (a)[11])
#define _SH_13(a) _SH(_SH_12(a), (a)[12])
#define _SH_14(a) _SH(_SH_13(a), (a)[13])
#define _SH_15(a) _SH(_SH_14(a), (a)[14])
#define _SH_16(a) _SH(_SH_15(a), (a)[15])
#define _SH_17(a) _SH(_SH_16(a), (a)[16])
#define _SH_18(a) _SH(_SH_17(a), (a)[17])
#define _SH_19(a) _SH(_SH_18(a), (a)[18])
#define _SH_20(a) _SH(_SH_19(a), (a)[19])
#define _SH_21(a) _SH(_SH_20(a), (a)[20])
#define _SH_22(a) _SH(_SH_21(a), (a)[21])
#define _SH_23(a) _SH(_SH_22(a), (a)[22])
#define _SH_24(a) _SH(_SH_23(a), (a)[23])
#define _SH_25(a) _SH(_SH_24(a), (a)[24])
#define _SH_26(a) _SH(_SH_25(a), (a)[25])
#define _SH_27(a) _SH(_SH_26(a), (a)[26])
#define _SH_28(a) _SH(_SH_27(a), (a)[27])
#define _SH_29(a) _SH(_SH_28(a), (a)[28])
#define _SH_30(a) _SH(_SH_29(a), (a)[29])
#define _SH_31(a) _SH(_SH_30(a), (a)[30])
#define _SH_32(a) _SH(_SH_31(a), (a)[31])

// initial check prevents too-large strings from compiling
#define STRHASH(a) ( \
(void)(sizeof(int[(sizeof(a) > 33 ? -1 : 1)])), \
(sizeof(a) == 1) ? SEED : \
(sizeof(a) == 2) ? _SH_1(a) : \
(sizeof(a) == 3) ? _SH_2(a) : \
(sizeof(a) == 4) ? _SH_3(a) : \
(sizeof(a) == 4) ? _SH_3(a) : \
(sizeof(a) == 5) ? _SH_4(a) : \
(sizeof(a) == 6) ? _SH_5(a) : \
(sizeof(a) == 7) ? _SH_6(a) : \
(sizeof(a) == 8) ? _SH_7(a) : \
(sizeof(a) == 9) ? _SH_8(a) : \
(sizeof(a) == 10) ? _SH_9(a) : \
(sizeof(a) == 11) ? _SH_10(a) : \
(sizeof(a) == 12) ? _SH_11(a) : \
(sizeof(a) == 13) ? _SH_12(a) : \
(sizeof(a) == 14) ? _SH_13(a) : \
(sizeof(a) == 15) ? _SH_14(a) : \
(sizeof(a) == 16) ? _SH_15(a) : \
(sizeof(a) == 17) ? _SH_16(a) : \
(sizeof(a) == 18) ? _SH_17(a) : \
(sizeof(a) == 19) ? _SH_18(a) : \
(sizeof(a) == 20) ? _SH_19(a) : \
(sizeof(a) == 21) ? _SH_20(a) : \
(sizeof(a) == 22) ? _SH_21(a) : \
(sizeof(a) == 23) ? _SH_22(a) : \
(sizeof(a) == 24) ? _SH_23(a) : \
(sizeof(a) == 25) ? _SH_24(a) : \
(sizeof(a) == 26) ? _SH_25(a) : \
(sizeof(a) == 27) ? _SH_26(a) : \
(sizeof(a) == 28) ? _SH_27(a) : \
(sizeof(a) == 29) ? _SH_28(a) : \
(sizeof(a) == 30) ? _SH_29(a) : \
(sizeof(a) == 31) ? _SH_30(a) : \
(sizeof(a) == 32) ? _SH_31(a) : \
(sizeof(a) == 33) ? _SH_32(a) : \
0)
// last zero is unreachable

// only for comparison
unsigned int strhash_func(const void *str)
{
const signed char *p;
unsigned int h = 5381;

for (p = str; *p != '\0'; p++) {
h = (h << 5) + h + (unsigned int)*p;
}

return h;
}

/* -------------------------------------------------------------------- */
#include <stdio.h>

#define TEST_STR1 "Hello World"
#define TEST_STR2 "Testing 123"
int main(void)
{
unsigned int A = STRHASH(TEST_STR1);
unsigned int B = STRHASH(TEST_STR2);

printf("String hash: const %u <- '%s'\n", STRHASH(TEST_STR1), TEST_STR1);
printf("String hash: const %u <- '%s'\n", STRHASH(TEST_STR2), TEST_STR2);
printf("String hash: dyn %u <- '%s'\n", strhash_func(TEST_STR1), TEST_STR1);
printf("String hash: dyn %u <- '%s'\n", strhash_func(TEST_STR2), TEST_STR2);

#if defined(__GNUC__)
printf("Is this known at compile time?, answer is: %d\n", __builtin_constant_p(A));
#endif
}

Note, for some reason Clang 5.0 prints answer is: 0, however on closer inspection is does in fact know the value at compile time, __builtin_constant_p just doesn't seem to work as GCC does.

How to get original string from the int generated by gethashcode

For all practical purposes, you can't: there are lot less hash codes than there are strings, so there is more than one original value that would give your same hash code.

Hashing is, in reality, a one-way operation. If someone refers to a reversible hash, this isn't a true hash (because a hash by definition reduces an input set into one of a smaller number of output values). The closest operation to what you describe might be an encryption function - this would allow you to reverse the operation - but this is unlikely to generate as small a number as the 10-digit output in your question.



Related Topics



Leave a reply



Submit