Delay the 2-difficulty price calculation until 21,600 blocks after th…

…e 2.7.2 hardfork (#382) When calculating the estimated v2 price, don't start considering the PoA1 difficulty multiple until 21,600 blocks after 2.7.2 activation. Also add a fixed lower bound. Upper bound: 340 Winston per GiB/Minute (or ~0.714 AR per GiB upload) Lower bound: 170 Winston
ArweaveTeam · Feb 29, 2024 · a50f05a · a50f05a
1 parent 9010dbf
commit a50f05a
Show file tree

Hide file tree

Showing 3 changed files with 272 additions and 141 deletions.
diff --git a/apps/arweave/src/ar_pricing.erl b/apps/arweave/src/ar_pricing.erl
@@ -47,134 +47,236 @@ get_price_per_gib_minute(Height, LockedRewards, BlockTimeHistory, Denomination)
 get_v2_price_per_gib_minute(Height, LockedRewards, BlockTimeHistory, Denomination) ->
 	{HashRateTotal, RewardTotal} = ar_rewards:get_locked_totals(LockedRewards, Denomination),
 
-	case Height - ?BLOCK_TIME_HISTORY_BLOCKS >= ar_fork:height_2_7() of
-		true ->
-			{IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount} =
-				lists:foldl(
-					fun({BlockInterval, VDFInterval, ChunkCount}, {Acc1, Acc2, Acc3, Acc4}) ->
-						{
-							Acc1 + BlockInterval,
-							Acc2 + VDFInterval,
-							case ChunkCount of
-								1 -> Acc3 + 1;
-								_ -> Acc3
-							end,
-							case ChunkCount of
-								1 -> Acc4;
-								_ -> Acc4 + 1
-							end
-						}
-					end,
-					{0, 0, 0, 0},
-					BlockTimeHistory
-				),
-			%% The intent of the SolutionsPerPartitionPerVDFStep is to estimate network replica
-			%% count (how many copies of the weave are stored across the network).
-			%% The logic behind this is complex - an explanation from @vird:
-			%%
-			%% 1. Naive solution: If we assume that each miner stores 1 replica, then we
-			%%    can trivially calculate the network replica count using the network hashrate
-			%%    (which we have) and the weave size (which we also have). However what if on
-			%%    average each miner only stores 50% of the weave? In that case each miner will
-			%%    get fewer hashes per partition (because they will miss out on 2-chunk solutions
-			%%    that fall on the partitions they don't store), and that will push *up* the
-			%%    replica count for a given network hashrate. How much to scale up our replica
-			%%    count is based on the average replica count per miner.
-			%%
-			%% 2. Estimate average replica count per miner. Start with this basic assumption:
-			%%    the higher the percentage of the weave a miner stores, the more likely they are
-			%%    to mine a 2-chunk solution. If a miner has 100% of the weave and if the PoA1 and
-			%%    PoA2 difficulties are the same, then, on average, 50% of their solutions will be
-			%%    1-chunk, and 50% will be 2-chunk.
-			%%
-			%%    With this we can use the ratio of observed 2-chunk to 1-chunk solutions to
-			%%    estimate the average percentage of the weave each miner stores.
-			%%
-			%% 3. However, what happens if the PoA1 difficulty is higher than the PoA2 difficulty?
-			%%    In that case, we'd expect a miner with 100% of the weave to have fewer 1-chunk
-			%%    solutions than 2-chunk solutions. If the PoA1 difficulty is PoA1Mult times higher
-			%%    than the PoA2 difficulty, we'd expect the maximum number of solutions to be:
-			%%    
-			%%    (PoA1Mult + 1) * ?RECALL_RANGE_SIZE div (?DATA_CHUNK_SIZE * PoA1Mult)
-			%%    
-			%%    Or basically 1 1-chunk solution for every PoA1Mult 2-chunk solutions in the
-			%%    full-replica case.
-			%%
-			%% 4. Finally, what if the average miner is not mining a full replica? In that case we
-			%%    need to arrive at an equation that weights the 1-chunk and 2-chunk solutions
-			%%    differently - and use that to estimate the expected number of solutions per
-			%%    partition:
-			%%
-			%%    EstimatedSolutionsPerPartition = 
-			%%    (
-			%%      ?RECALL_RANGE_SIZE div PoA1Mult +
-			%%		?RECALL_RANGE_SIZE * TwoChunkCount div (OneChunkCount * PoA1Mult)
-			%%    ) div (?DATA_CHUNK_SIZE) 
-			%%
-			%% The SolutionsPerPartitionPerVDFStep combines that average weave calculation
-			%% with the expected number of solutions per partition per VDF step to arrive a single
-			%% number that can be used in the PricePerGiBPerMinute calculation.
-			PoA1Mult = ar_difficulty:poa1_diff_multiplier(Height),
-			MaxSolutionsPerPartition =
-				(PoA1Mult + 1) * ?RECALL_RANGE_SIZE div (?DATA_CHUNK_SIZE * PoA1Mult),
-			SolutionsPerPartitionPerVDFStep =
-				case OneChunkCount of
-					0 ->
-						MaxSolutionsPerPartition;
-					_ ->
-						%% The following is a version of the EstimatedSolutionsPerPartition
-						%% equation mentioned above that has been simpplified to limit rounding
-						%% errors:
-						EstimatedSolutionsPerPartition =
-							(OneChunkCount + TwoChunkCount) * ?RECALL_RANGE_SIZE
-							div (?DATA_CHUNK_SIZE * OneChunkCount * PoA1Mult),
-						min(MaxSolutionsPerPartition, EstimatedSolutionsPerPartition)
-				end,
-			%% The following walks through the math of calculating the price per GiB per minute.
-			%% However to reduce rounding errors due to divs, the uncommented equation at the
-			%% end is used instead. Logically they should be the same. Notably the '* 2' in
-			%% SolutionsPerPartitionPerBlock and the 'div 2' in PricePerGiBPerMinute cancel each
-			%% other out.
-			%%
-			%% SolutionsPerPartitionPerSecond =
-			%%          (SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) div IntervalTotal
-			%% SolutionsPerPartitionPerMinute = SolutionsPerPartitionPerSecond * 60,
-			%% SolutionsPerPartitionPerBlock = SolutionsPerPartitionPerMinute * 2,
-			%% EstimatedPartitionCount = max(1, HashRateTotal) div SolutionsPerPartitionPerBlock,
-			%% EstimatedDataSizeInGiB = EstimatedPartitionCount * (?PARTITION_SIZE) div (?GiB),
-			%% PricePerGiBPerBlock = max(1, RewardTotal) div EstimatedDataSizeInGiB,
-			%% PricePerGiBPerMinute = PricePerGibPerBlock div 2,
-			PricePerGiBPerMinute = 
-				(
-					(SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) *
-					max(1, RewardTotal) * (?GiB) * 60
-				)
-				div
-				(
-					IntervalTotal * max(1, HashRateTotal) * (?PARTITION_SIZE)
-				),
-			log_price_metrics(Height, length(LockedRewards), HashRateTotal, RewardTotal, 
-					IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount,
-					SolutionsPerPartitionPerVDFStep, PricePerGiBPerMinute),
-			PricePerGiBPerMinute;
-		false ->
-			%% 2 recall ranges per partition per second.
-			SolutionsPerPartitionPerSecond = 2 * (?RECALL_RANGE_SIZE) div (?DATA_CHUNK_SIZE),
-			SolutionsPerPartitionPerMinute = SolutionsPerPartitionPerSecond * 60,
-			SolutionsPerPartitionPerBlock = SolutionsPerPartitionPerMinute * 2,
-			%% Estimated partition count = hash rate / 2 / solutions per partition per minute.
-			%% 2 minutes is the average block time.
-			%% Estimated data size = estimated partition count * partition size.
-			%% Estimated price per gib minute = total block reward / estimated data size
-			%% in gibibytes.
-			(max(1, RewardTotal) * (?GiB) * SolutionsPerPartitionPerBlock)
-				div (max(1, HashRateTotal)
-						* (?PARTITION_SIZE)
-						* 2	% The reward is paid every two minutes whereas we are calculating
-							% the minute rate here.
-					)
+	Fork_2_7 = ar_fork:height_2_7(),
+	Fork_2_7_2 = ar_fork:height_2_7_2(),
+
+	case Height of
+		_ when Height - ?BLOCK_TIME_HISTORY_BLOCKS >= Fork_2_7_2 ->
+			get_v2_price_per_gib_minute_two_difficulty(
+				Height, LockedRewards, BlockTimeHistory, HashRateTotal, RewardTotal);
+		_ when Height - ?BLOCK_TIME_HISTORY_BLOCKS >= Fork_2_7 ->
+			%% Calculate (but ignore) the price as it will be determined after 2.7.2 - this is
+			%% so we can log the data to better predict how the price will move.
+			get_v2_price_per_gib_minute_two_difficulty(
+				Height, LockedRewards, BlockTimeHistory, HashRateTotal, RewardTotal),
+			get_v2_price_per_gib_minute_one_difficulty(
+				Height, LockedRewards, BlockTimeHistory, HashRateTotal, RewardTotal);
+		_ ->
+			get_v2_price_per_gib_minute_simple(HashRateTotal, RewardTotal)
 	end.
 
+get_v2_price_per_gib_minute_two_difficulty(
+		Height, LockedRewards, BlockTimeHistory, HashRateTotal, RewardTotal) ->
+	{IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount} =
+		lists:foldl(
+			fun({BlockInterval, VDFInterval, ChunkCount}, {Acc1, Acc2, Acc3, Acc4}) ->
+				{
+					Acc1 + BlockInterval,
+					Acc2 + VDFInterval,
+					case ChunkCount of
+						1 -> Acc3 + 1;
+						_ -> Acc3
+					end,
+					case ChunkCount of
+						1 -> Acc4;
+						_ -> Acc4 + 1
+					end
+				}
+			end,
+			{0, 0, 0, 0},
+			BlockTimeHistory
+		),
+	%% The intent of the SolutionsPerPartitionPerVDFStep is to estimate network replica
+	%% count (how many copies of the weave are stored across the network).
+	%% The logic behind this is complex - an explanation from @vird:
+	%%
+	%% 1. Naive solution: If we assume that each miner stores 1 replica, then we
+	%%    can trivially calculate the network replica count using the network hashrate
+	%%    (which we have) and the weave size (which we also have). However what if on
+	%%    average each miner only stores 50% of the weave? In that case each miner will
+	%%    get fewer hashes per partition (because they will miss out on 2-chunk solutions
+	%%    that fall on the partitions they don't store), and that will push *up* the
+	%%    replica count for a given network hashrate. How much to scale up our replica
+	%%    count is based on the average replica count per miner.
+	%%
+	%% 2. Estimate average replica count per miner. Start with this basic assumption:
+	%%    the higher the percentage of the weave a miner stores, the more likely they are
+	%%    to mine a 2-chunk solution. If a miner has 100% of the weave and if the PoA1 and
+	%%    PoA2 difficulties are the same, then, on average, 50% of their solutions will be
+	%%    1-chunk, and 50% will be 2-chunk.
+	%%
+	%%    With this we can use the ratio of observed 2-chunk to 1-chunk solutions to
+	%%    estimate the average percentage of the weave each miner stores.
+	%%
+	%% 3. However, what happens if the PoA1 difficulty is higher than the PoA2 difficulty?
+	%%    In that case, we'd expect a miner with 100% of the weave to have fewer 1-chunk
+	%%    solutions than 2-chunk solutions. If the PoA1 difficulty is PoA1Mult times higher
+	%%    than the PoA2 difficulty, we'd expect the maximum number of solutions to be:
+	%%    
+	%%    (PoA1Mult + 1) * ?RECALL_RANGE_SIZE div (?DATA_CHUNK_SIZE * PoA1Mult)
+	%%    
+	%%    Or basically 1 1-chunk solution for every PoA1Mult 2-chunk solutions in the
+	%%    full-replica case.
+	%%
+	%% 4. Finally, what if the average miner is not mining a full replica? In that case we
+	%%    need to arrive at an equation that weights the 1-chunk and 2-chunk solutions
+	%%    differently - and use that to estimate the expected number of solutions per
+	%%    partition:
+	%%
+	%%    EstimatedSolutionsPerPartition = 
+	%%    (
+	%%      ?RECALL_RANGE_SIZE div PoA1Mult +
+	%%		?RECALL_RANGE_SIZE * TwoChunkCount div (OneChunkCount * PoA1Mult)
+	%%    ) div (?DATA_CHUNK_SIZE) 
+	%%
+	%% The SolutionsPerPartitionPerVDFStep combines that average weave calculation
+	%% with the expected number of solutions per partition per VDF step to arrive a single
+	%% number that can be used in the PricePerGiBPerMinute calculation.
+	PoA1Mult = ar_difficulty:poa1_diff_multiplier(Height),
+	MaxSolutionsPerPartition =
+		(PoA1Mult + 1) * ?RECALL_RANGE_SIZE div (?DATA_CHUNK_SIZE * PoA1Mult),
+	SolutionsPerPartitionPerVDFStep =
+		case OneChunkCount of
+			0 ->
+				MaxSolutionsPerPartition;
+			_ ->
+				%% The following is a version of the EstimatedSolutionsPerPartition
+				%% equation mentioned above that has been simpplified to limit rounding
+				%% errors:
+				EstimatedSolutionsPerPartition =
+					(OneChunkCount + TwoChunkCount) * ?RECALL_RANGE_SIZE
+					div (?DATA_CHUNK_SIZE * OneChunkCount * PoA1Mult),
+				min(MaxSolutionsPerPartition, EstimatedSolutionsPerPartition)
+		end,
+	%% The following walks through the math of calculating the price per GiB per minute.
+	%% However to reduce rounding errors due to divs, the uncommented equation at the
+	%% end is used instead. Logically they should be the same. Notably the '* 2' in
+	%% SolutionsPerPartitionPerBlock and the 'div 2' in PricePerGiBPerMinute cancel each
+	%% other out.
+	%%
+	%% SolutionsPerPartitionPerSecond =
+	%%          (SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) div IntervalTotal
+	%% SolutionsPerPartitionPerMinute = SolutionsPerPartitionPerSecond * 60,
+	%% SolutionsPerPartitionPerBlock = SolutionsPerPartitionPerMinute * 2,
+	%% EstimatedPartitionCount = max(1, HashRateTotal) div SolutionsPerPartitionPerBlock,
+	%% EstimatedDataSizeInGiB = EstimatedPartitionCount * (?PARTITION_SIZE) div (?GiB),
+	%% PricePerGiBPerBlock = max(1, RewardTotal) div EstimatedDataSizeInGiB,
+	%% PricePerGiBPerMinute = PricePerGibPerBlock div 2,
+	PricePerGiBPerMinute = 
+		(
+			(SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) *
+			max(1, RewardTotal) * (?GiB) * 60
+		)
+		div
+		(
+			IntervalTotal * max(1, HashRateTotal) * (?PARTITION_SIZE)
+		),
+	log_price_metrics(get_v2_price_per_gib_minute_two_difficulty,
+		Height, length(LockedRewards), HashRateTotal, RewardTotal, 
+		IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount,
+		SolutionsPerPartitionPerVDFStep, PricePerGiBPerMinute),
+	PricePerGiBPerMinute.
+
+get_v2_price_per_gib_minute_one_difficulty(
+		Height, LockedRewards, BlockTimeHistory, HashRateTotal, RewardTotal) ->
+	{IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount} =
+		lists:foldl(
+			fun({BlockInterval, VDFInterval, ChunkCount}, {Acc1, Acc2, Acc3, Acc4}) ->
+				{
+					Acc1 + BlockInterval,
+					Acc2 + VDFInterval,
+					case ChunkCount of
+						1 -> Acc3 + 1;
+						_ -> Acc3
+					end,
+					case ChunkCount of
+						1 -> Acc4;
+						_ -> Acc4 + 1
+					end
+				}
+			end,
+			{0, 0, 0, 0},
+			BlockTimeHistory
+		),
+	%% The intent of the SolutionsPerPartitionPerVDFStep is to estimate network replica
+	%% count (how many copies of the weave are stored across the network).
+	%% The logic behind this is complex - an explanation from @vird:
+	%%
+	%% 1. Naive solution: If we assume that each miner stores 1 replica, then we
+	%%    can trivially calculate the network replica count using the network hashrate
+	%%    (which we have) and the weave size (which we also have). However what if on
+	%%    average each miner only stores 50% of the weave? In that case each miner will
+	%%    get fewer hashes per partition (because they will miss out on 2-chunk solutions
+	%%    that fall on the partitions they don't store), and that will push *up* the
+	%%    replica count for a given network hashrate. How much to scale up our replica
+	%%    count is based on the average replica count per miner.
+	%% 2. Estimate average replica count per miner: Start with this basic assumption:
+	%%    the higher the percentage of the weave a miner stores, the more likely they are
+	%%    to mine a 2-chunk solution. If a miner has 100% of the weave, then, on average,
+	%%    50% of their solutions will be 1-chunk, and 50% will be 2-chunk.
+	%%
+	%%    With this we can use the ratio of observed 2-chunk to 1-chunk solutions to
+	%%    estimate the average percentage of the weave each miner stores.
+	%%
+	%% The SolutionsPerPartitionPerVDFStep combines that average weave % calculation
+	%% with the expected number of solutions per partition per VDF step to arrive a single
+	%% number that can be used in the PricePerGiBPerMinute calculation.
+	SolutionsPerPartitionPerVDFStep =
+		case OneChunkCount of
+			0 ->
+				2 * (?RECALL_RANGE_SIZE) div (?DATA_CHUNK_SIZE);
+			_ ->
+				min(2 * ?RECALL_RANGE_SIZE,
+						?RECALL_RANGE_SIZE
+							+ ?RECALL_RANGE_SIZE * TwoChunkCount div OneChunkCount)
+					div ?DATA_CHUNK_SIZE
+		end,
+	%% The following walks through the math of calculating the price per GiB per minute.
+	%% However to reduce rounding errors due to divs, the uncommented equation at the
+	%% end is used instead. Logically they should be the same. Notably the '* 2' in
+	%% SolutionsPerPartitionPerBlock and the 'div 2' in PricePerGiBPerMinute cancel each
+	%% other out.
+	%%
+	%% SolutionsPerPartitionPerSecond =
+	%%          (SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) div IntervalTotal
+	%% SolutionsPerPartitionPerMinute = SolutionsPerPartitionPerSecond * 60,
+	%% SolutionsPerPartitionPerBlock = SolutionsPerPartitionPerMinute * 2,
+	%% EstimatedPartitionCount = max(1, HashRateTotal) div SolutionsPerPartitionPerBlock,
+	%% EstimatedDataSizeInGiB = EstimatedPartitionCount * (?PARTITION_SIZE) div (?GiB),
+	%% PricePerGiBPerBlock = max(1, RewardTotal) div EstimatedDataSizeInGiB,
+	%% PricePerGiBPerMinute = PricePerGibPerBlock div 2,
+	PricePerGiBPerMinute = 
+		(
+			(SolutionsPerPartitionPerVDFStep * VDFIntervalTotal) *
+			max(1, RewardTotal) * (?GiB) * 60
+		)
+		div
+		(
+			IntervalTotal * max(1, HashRateTotal) * (?PARTITION_SIZE)
+		),
+	log_price_metrics(get_v2_price_per_gib_minute_one_difficulty,
+		Height, length(LockedRewards), HashRateTotal, RewardTotal, 
+		IntervalTotal, VDFIntervalTotal, OneChunkCount, TwoChunkCount,
+		SolutionsPerPartitionPerVDFStep, PricePerGiBPerMinute),
+	PricePerGiBPerMinute.
+
+get_v2_price_per_gib_minute_simple(HashRateTotal, RewardTotal) ->
+	%% 2 recall ranges per partition per second.
+	SolutionsPerPartitionPerSecond = 2 * (?RECALL_RANGE_SIZE) div (?DATA_CHUNK_SIZE),
+	SolutionsPerPartitionPerMinute = SolutionsPerPartitionPerSecond * 60,
+	SolutionsPerPartitionPerBlock = SolutionsPerPartitionPerMinute * 2,
+	%% Estimated partition count = hash rate / 2 / solutions per partition per minute.
+	%% 2 minutes is the average block time.
+	%% Estimated data size = estimated partition count * partition size.
+	%% Estimated price per gib minute = total block reward / estimated data size
+	%% in gibibytes.
+	(max(1, RewardTotal) * (?GiB) * SolutionsPerPartitionPerBlock)
+		div (max(1, HashRateTotal)
+				* (?PARTITION_SIZE)
+				* 2	% The reward is paid every two minutes whereas we are calculating
+					% the minute rate here.
+			).
+
 %% @doc Return the minimum required transaction fee for the given number of
 %% total bytes stored and gibibyte minute price.
 get_tx_fee(Args) ->
@@ -740,7 +842,7 @@ recalculate_usd_to_ar_rate3(#block{ height = PrevHeight, diff = Diff } = B) ->
 				element(2,MaxAdjustmentDown))}]),
 	{Rate, CappedScheduledRate}.
 
-log_price_metrics(
+log_price_metrics(Event,
 		Height, RewardHistoryLength, HashRateTotal, RewardTotal, IntervalTotal, VDFIntervalTotal,
 		OneChunkCount, TwoChunkCount, SolutionsPerPartitionPerVDFStep, PricePerGiBPerMinute) ->
 
@@ -753,7 +855,7 @@ log_price_metrics(
 	prometheus_gauge:set(v2_price_per_gibibyte_minute, PricePerGiBPerMinute),
 	prometheus_gauge:set(network_data_size, EstimatedDataSizeInBytes),
 
-	?LOG_DEBUG([{event, get_v2_price_per_gib_minute}, {height, Height},
+	?LOG_DEBUG([{event, Event}, {height, Height},
 		{hash_rate_total, HashRateTotal}, {average_hash_rate, AverageHashRate},
 		{reward_total, RewardTotal},
 		{interval_total, IntervalTotal}, {vdf_interval_total, VDFIntervalTotal},