ethereum · pcw109550 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024
diff --git a/Changelog.md b/Changelog.md
@@ -2,7 +2,7 @@
 
 Language Features:
  * Introduce a new overload ``require(bool, Error)`` that allows usage of ``require`` functions with custom errors. This feature is available in the ``via-ir`` pipeline only.
-
+ * Introduce support for binary literals using the ``0b`` prefix.
 
 Compiler Features:
 

diff --git a/docs/grammar/SolidityLexer.g4 b/docs/grammar/SolidityLexer.g4
@@ -221,6 +221,15 @@ fragment EvenHexDigits: HexCharacter HexCharacter ('_'? HexCharacter HexCharacte
 //@doc:inline
 fragment HexCharacter: [0-9A-Fa-f];
 
+/**
+ * Bin numbers consist of a prefix and an arbitrary number of binary digits that may be delimited by underscores.
+ */
+BinNumber: '0' 'b' BinDigits;
+//@doc:inline
+fragment BinDigits: BinCharacter ('_'? BinCharacter)*;
+//@doc:inline
+fragment BinCharacter: [01];
+
 /**
  * Scanned but not used by any rule, i.e, disallowed.
  * solc parser considers number starting with '0', not immediately followed by '.' or 'x' as

diff --git a/docs/grammar/SolidityParser.g4 b/docs/grammar/SolidityParser.g4
@@ -440,9 +440,9 @@ hexStringLiteral: HexString+;
 unicodeStringLiteral: UnicodeStringLiteral+;
 
 /**
- * Number literals can be decimal or hexadecimal numbers with an optional unit.
+ * Number literals can be decimal or hexadecimal numbers or binary numbers with an optional unit.
  */
-numberLiteral: DecimalNumber | HexNumber;
+numberLiteral: DecimalNumber | HexNumber | BinNumber;
 
 /**
  * A curly-braced block of statements. Opens its own scope.

diff --git a/docs/types/conversion.rst b/docs/types/conversion.rst
@@ -130,7 +130,7 @@ If the array is shorter than the target type, it will be padded with zeros at th
         }
     }
 
-.. index:: ! literal;conversion, literal;rational, literal;hexadecimal number
+.. index:: ! literal;conversion, literal;rational, literal;hexadecimal number, literal;binary number
 .. _types-conversion-literals:
 
 Conversions between Literals and Elementary Types
@@ -139,7 +139,7 @@ Conversions between Literals and Elementary Types
 Integer Types
 -------------
 
-Decimal and hexadecimal number literals can be implicitly converted to any integer type
+Decimal, hexadecimal and binary number literals can be implicitly converted to any integer type
 that is large enough to represent it without truncation:
 
 .. code-block:: solidity
@@ -153,15 +153,18 @@ that is large enough to represent it without truncation:
     converted to an integer type. From 0.8.0, such explicit conversions are as strict as implicit
     conversions, i.e., they are only allowed if the literal fits in the resulting range.
 
-.. index:: literal;string, literal;hexadecimal
+.. index:: literal;string, literal;hexadecimal, literal;binary
 
 Fixed-Size Byte Arrays
 ----------------------
 
 Decimal number literals cannot be implicitly converted to fixed-size byte arrays. Hexadecimal
 number literals can be, but only if the number of hex digits exactly fits the size of the bytes
-type. As an exception both decimal and hexadecimal literals which have a value of zero can be
-converted to any fixed-size bytes type:
+type. The same applies to binary number literals, which can only be implicitly converted if
+the number of binary digits exactly fits the size of the bytes type.
+
+The only exception to this is that zero literals in all of the above forms (binary, decimal and hex)
+can be converted to any fixed-size bytes type.
 
 .. code-block:: solidity
 

diff --git a/docs/types/value-types.rst b/docs/types/value-types.rst
@@ -451,7 +451,7 @@ The literal ``MeE`` is equivalent to ``M * 10**E``.
 Examples include ``2e10``, ``-2e10``, ``2e-10``, ``2.5e1``.
 
 Underscores can be used to separate the digits of a numeric literal to aid readability.
-For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, scientific decimal notation ``1_2e345_678`` are all valid.
+For example, decimal ``123_000``, hexadecimal ``0x2eff_abde``, binary ``0b01011111_11010000``, scientific decimal notation ``1_2e345_678`` are all valid.
 Underscores are only allowed between two digits and only one consecutive underscore is allowed.
 There is no additional semantic meaning added to a number literal containing underscores,
 the underscores are ignored.

diff --git a/liblangutil/Common.h b/liblangutil/Common.h
@@ -30,6 +30,11 @@ inline bool isHexDigit(char c)
 		('A' <= c && c <= 'F');
 }
 
+inline bool isBinDigit(char const c)
+{
+	return c == '0' || c == '1';
+}
+
 inline bool isWhiteSpace(char c)
 {
 	return c == ' ' || c == '\n' || c == '\t' || c == '\r';

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
@@ -73,6 +73,7 @@ std::string to_string(ScannerError _errorCode)
 		case ScannerError::IllegalToken: return "Invalid token.";
 		case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles.";
 		case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
+		case ScannerError::IllegalBinDigit: return "Binary digit missing or invalid.";
 		case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
 		case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
 		case ScannerError::UnicodeCharacterInNonUnicodeString: return "Invalid character in string. If you are trying to use Unicode characters, use a unicode\"...\" string literal.";
@@ -931,7 +932,7 @@ Token Scanner::scanNumber(char _charSeen)
 	else
 	{
 		solAssert(_charSeen == 0, "");
-		// if the first character is '0' we must check for octals and hex
+		// if the first character is '0' we must check for octals and hex and bin
 		if (m_char == '0')
 		{
 			addLiteralCharAndAdvance();
@@ -947,6 +948,17 @@ Token Scanner::scanNumber(char _charSeen)
 				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
+			else if (m_char == 'b')
+			{
+				// binary number
+				kind = BINARY;
+				addLiteralCharAndAdvance();
+				if (!isBinDigit(m_char))
+					return setError(ScannerError::IllegalBinDigit); // we must have at least one bin digit after 'b'
+
+				while (isBinDigit(m_char) || m_char == '_') // We keep the underscores for later validation
+					addLiteralCharAndAdvance();
+			}
 			else if (isDecimalDigit(m_char))
 				// We do not allow octal numbers
 				return setError(ScannerError::OctalNotAllowed);

diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h
@@ -80,6 +80,7 @@ enum class ScannerError
 	IllegalToken,
 	IllegalHexString,
 	IllegalHexDigit,
+	IllegalBinDigit,
 	IllegalCommentTerminator,
 	IllegalEscapeSequence,
 	UnicodeCharacterInNonUnicodeString,

diff --git a/libsolidity/analysis/TypeChecker.cpp b/libsolidity/analysis/TypeChecker.cpp
@@ -3877,6 +3877,14 @@ void TypeChecker::endVisit(Literal const& _literal)
 			"You can use an expression of the form \"0x1234 * 1 days\" instead."
 		);
 
+	if (_literal.isBinNumber() && _literal.subDenomination() != Literal::SubDenomination::None)
+		m_errorReporter.fatalTypeError(
+			5146_error,
+			_literal.location(),
+			"Binary numbers cannot be used with unit denominations. "
+			"You can use an expression of the form \"0b1011 * 1 days\" instead."
+		);
+
 	if (_literal.subDenomination() == Literal::SubDenomination::Year)
 		m_errorReporter.typeError(
 			4820_error,

diff --git a/libsolidity/ast/AST.cpp b/libsolidity/ast/AST.cpp
@@ -1012,6 +1012,13 @@ bool Literal::isHexNumber() const
 	return boost::starts_with(value(), "0x");
 }
 
+bool Literal::isBinNumber() const
+{
+	if (token() != Token::Number)
+		return false;
+	return boost::starts_with(value(), "0b");
+}
+
 bool Literal::looksLikeAddress() const
 {
 	if (subDenomination() != SubDenomination::None)

diff --git a/libsolidity/ast/AST.h b/libsolidity/ast/AST.h
@@ -2448,6 +2448,8 @@ class Literal: public PrimaryExpression
 
 	/// @returns true if this is a number with a hex prefix.
 	bool isHexNumber() const;
+	/// @returns true if this is a number with a bin prefix.
+	bool isBinNumber() const;
 
 	/// @returns true if this looks like a checksummed address.
 	bool looksLikeAddress() const;

diff --git a/libsolidity/ast/TypeProvider.cpp b/libsolidity/ast/TypeProvider.cpp
@@ -370,6 +370,12 @@ RationalNumberType const* TypeProvider::rationalNumber(Literal const& _literal)
 			if (digitCount % 2 == 0 && (digitCount / 2) <= 32)
 				compatibleBytesType = fixedBytes(static_cast<unsigned>(digitCount / 2));
 		}
+		else if (_literal.isBinNumber())
+		{
+			size_t const digitCount = _literal.valueWithoutUnderscores().length() - 2;
+			if (digitCount % 8 == 0 && digitCount <= 256)
+				compatibleBytesType = fixedBytes(static_cast<unsigned>(digitCount / 8));
+		}
 
 		return rationalNumber(std::get<1>(validLiteral), compatibleBytesType);
 	}

diff --git a/libsolidity/ast/Types.cpp b/libsolidity/ast/Types.cpp
@@ -916,6 +916,16 @@ std::tuple<bool, rational> RationalNumberType::isValidLiteral(Literal const& _li
 			// process as hex
 			value = bigint(valueString);
 		}
+		else if (boost::starts_with(valueString, "0b"))
+		{
+			// process as bin
+			bigint tempValue;
+			size_t valueStringLength = valueString.length();
+			for (size_t i = 0; i < valueStringLength - 2; i++)
+				if (valueString[i + 2] != '0')
+					boost::multiprecision::bit_set(tempValue, static_cast<unsigned int>(valueStringLength - 3 - i));
+			value = tempValue;
+		}
 		else if (expPoint != valueString.end())
 		{
 			// Parse mantissa and exponent. Checks numeric limit.

diff --git a/test/liblangutil/Scanner.cpp b/test/liblangutil/Scanner.cpp
@@ -225,6 +225,23 @@ BOOST_AUTO_TEST_CASE(hex_numbers)
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
 }
 
+BOOST_AUTO_TEST_CASE(bin_numbers)
+{
+	TestScanner scanner("var x = 0b01001010101001;");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Var);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Assign);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0b01001010101001");
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
+	scanner.reset("0b1011");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0b1011");
+	scanner.reset("0B1011");
+	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Illegal);
+}
+
 BOOST_AUTO_TEST_CASE(octal_numbers)
 {
 	TestScanner scanner("07");
@@ -404,7 +421,7 @@ BOOST_AUTO_TEST_CASE(number_literals_with_trailing_underscore_at_eos)
 
 BOOST_AUTO_TEST_CASE(negative_numbers)
 {
-	TestScanner scanner("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2;");
+	TestScanner scanner("var x = -.2 + -0x78 + -7.3 + 8.9 + 2e-2 + -0b11011;");
 	BOOST_CHECK_EQUAL(scanner.currentToken(), Token::Var);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Identifier);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Assign);
@@ -425,6 +442,10 @@ BOOST_AUTO_TEST_CASE(negative_numbers)
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Add);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
 	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "2e-2");
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Add);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Sub);
+	BOOST_CHECK_EQUAL(scanner.next(), Token::Number);
+	BOOST_CHECK_EQUAL(scanner.currentLiteral(), "0b11011");
 	BOOST_CHECK_EQUAL(scanner.next(), Token::Semicolon);
 	BOOST_CHECK_EQUAL(scanner.next(), Token::EOS);
 }

diff --git a/test/libsolidity/semanticTests/various/bin_number_literal_operator_complex.sol b/test/libsolidity/semanticTests/various/bin_number_literal_operator_complex.sol
@@ -0,0 +1,47 @@
+contract test {
+    function complex_random_arithmetics() public pure returns (uint d) {
+        uint a = ((0b1101 + 0b101010 - 0b1011) << 2) * (0b100011 & 0b11111) + ((0b1100 ^ 0b1011) >> 1) * (0b1110 % 0b101) % 0b101000011;
+        uint b = (((0b100011 * 0b11) + (0b1100 ^ 0b1011)) & 0b111111) * ((0b1010101 % (0b11001 + 0b1110)) << 3);
+        uint c = ((0b1001 + 0b111 - 0b11) * (0b1 & 0b1111) + (0b10111 ^ 0b1101) + (0b1100101 & 0b100011)) >> 2 * (0b1001 + 0b1011 << 1);
+        d = (a + b + c * (0b101 & 0b11111) + (0b11100 | 0b1010) + (0b1101 ^ (0b1010 << 2))) % 0b1000000000000000000000000000000000000000000000000000000000000001;
+    }
+    function complex_fibonacci_identity() public pure returns (uint c) {
+        // \sum_{i=0}^{n - 1} F_{i} ** 2 == F_{n - 1} * F_{n}
+        c += 0b0 * 0b0;
+        c += 0b1 * 0b1;
+        c += 0b1 * 0b1;
+        c += 0b10 * 0b10;
+        c += 0b11 * 0b11;
+        c += 0b101 * 0b101;
+        c += 0b1000 * 0b1000;
+        c += 0b1101 * 0b1101;
+        c += 0b10101 * 0b10101;
+        c += 0b100010 * 0b100010;
+        c += 0b110111 * 0b110111;
+        c += 0b1011001 * 0b1011001;
+        c += 0b10010000 * 0b10010000;
+        c += 0b11101001 * 0b11101001;
+        c += 0b101111001 * 0b101111001;
+        c += 0b1001100010 * 0b1001100010;
+        c += 0b1111011011 * 0b1111011011;
+        c += 0b11000111101 * 0b11000111101;
+        c += 0b101000011000 * 0b101000011000;
+        c += 0b1000001010101 * 0b1000001010101;
+        c += 0b1101001101101 * 0b1101001101101;
+        c += 0b10101011000010 * 0b10101011000010;
+        c -= 0b10101011000010 * 0b100010100101111;
+    }
+    function complex_vandermonde_determinant() public pure returns (uint e) {
+        // [[1, 1, 1, 1], [1, 2, 4, 8], [1, 3, 9, 27], [1, 4, 16, 64]]
+        uint a = (0b10 * (0b1001 * 0b1000000 - 0b11011 * 0b10000) - 0b100 * (0b11 * 0b1000000 - 0b11011 * 0b100) + 0b1000 * (0b11 * 0b10000 - 0b1001 * 0b100));
+        uint b = (0b1 * (0b1001 * 0b1000000 - 0b11011 * 0b10000) - 0b100 * (0b1 * 0b1000000 - 0b11011 * 0b1) + 0b1000 * (0b1 * 0b10000 - 0b1001 * 0b1));
+        uint c = (0b1 * (0b11 * 0b1000000 - 0b11011 * 0b100) - 0b10 * (0b1 * 0b1000000 - 0b11011 * 0b1) + 0b1000 * (0b1 * 0b100 - 0b11 * 0b1));
+        uint d = (0b1 * (0b11 * 0b10000 - 0b1001 * 0b100) - 0b10 * (0b1 * 0b10000 - 0b1001 * 0b1) + 0b100 * (0b1 * 0b100 - 0b11 * 0b1));
+        e = a + c;
+        e -= b + d;
+    }
+}
+// ----
+// complex_random_arithmetics() -> 0xcdf
+// complex_fibonacci_identity() -> 0x0
+// complex_vandermonde_determinant() -> 0xc
diff --git a/test/libsolidity/semanticTests/various/bin_number_literal_operator_simple.sol b/test/libsolidity/semanticTests/various/bin_number_literal_operator_simple.sol
@@ -0,0 +1,58 @@
+contract test {
+    function add() public pure returns (uint c) {
+        c = 0b01 + 0b110;
+    }
+    function sub() public pure returns (uint c) {
+        c = 0b0111110 - 0b110;
+    }
+    function mul() public pure returns (uint c) {
+        c = 0b110 * 0b01011;
+    }
+    function div() public pure returns (uint c) {
+        c = 0b10111 / uint256(0b11);
+    }
+    function mod() public pure returns (uint c) {
+        c = 0b110101010 % 0b1011;
+    }
+    function xor() public pure returns (uint c) {
+        c = 0b110101 ^ 0b001010;
+    }
+    function and() public pure returns (uint c) {
+        c = 0b110010 & 0b010101;
+    }
+    function or() public pure returns (uint c) {
+        c = 0b101010 | 0b110001;
+    }
+    function shiftleft() public pure returns (uint c) {
+        c = 0b11111111 << 0b10000;
+    }
+    function shiftright() public pure returns (uint c) {
+        c = 0b111111110000000000000000 >> 0b10000;
+    }
+    function compound_add() public pure returns (uint c) {
+        c =  0b11110011000;
+        c += 0b00001100111;
+    }
+    function compound_sub() public pure returns (uint c) {
+        c =  0b1111111111;
+        c -= 0b1010101010;
+    }
+    function compound_mul() public pure returns (uint c) {
+        c =  0b110101011;
+        c *= 0b10110101;
+    }
+}
+// ----
+// add() -> 0x7
+// sub() -> 0x38
+// mul() -> 0x42
+// div() -> 0x7
+// mod() -> 0x8
+// xor() -> 0x3f
+// and() -> 0x10
+// or() -> 0x3b
+// shiftleft() -> 0xff0000
+// shiftright() -> 0xff
+// compound_add() -> 0x7ff
+// compound_sub() -> 0x155
+// compound_mul() -> 0x12de7
diff --git a/test/libsolidity/semanticTests/various/test_underscore_in_bin.sol b/test/libsolidity/semanticTests/various/test_underscore_in_bin.sol
@@ -0,0 +1,10 @@
+contract test {
+    function f(bool cond) public pure returns (uint256) {
+        uint32 x = 0b0001001000110100_10101011;
+        uint256 y = 0b0001001000110100_1010101111001101_0001001000110100;
+        return cond ? x : y;
+    }
+}
+// ----
+// f(bool): true -> 0x1234ab
+// f(bool): false -> 0x1234abcd1234
diff --git a/test/libsolidity/syntaxTests/denominations/combining_bin_and_denomination.sol b/test/libsolidity/syntaxTests/denominations/combining_bin_and_denomination.sol
@@ -0,0 +1,5 @@
+contract C {
+	uint constant x = 0b01 wei;
+}
+// ----
+// TypeError 5146: (32-40): Binary numbers cannot be used with unit denominations. You can use an expression of the form "0b1011 * 1 days" instead.
diff --git a/test/libsolidity/syntaxTests/literals/invalid_bin_number_end.sol b/test/libsolidity/syntaxTests/literals/invalid_bin_number_end.sol
@@ -0,0 +1,5 @@
+contract C {
+    uint x = 0b10a;
+}
+// ----
+// ParserError 8936: (26-30): Identifier-start is not allowed at end of a number.
diff --git a/test/libsolidity/syntaxTests/literals/invalid_bin_number_start.sol b/test/libsolidity/syntaxTests/literals/invalid_bin_number_start.sol
@@ -0,0 +1,5 @@
+contract C {
+    uint x = 0b20;
+}
+// ----
+// ParserError 8936: (26-28): Binary digit missing or invalid.