Skip to content

Commit ba3a680

Browse files
authored
Return valid POI for deterministically failed SGs (#4774)
Requesting a POI for a deterministically failed Subgraph on a block past the failure will now return the latest valid POI.
1 parent 97e0adc commit ba3a680

File tree

4 files changed

+92
-19
lines changed

4 files changed

+92
-19
lines changed

core/src/subgraph/runner.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ where
127127
// There's no point in calling it if we have no current or parent block
128128
// pointers, because there would be: no block to revert to or to search
129129
// errors from (first execution).
130+
//
131+
// We attempt to unfail deterministic errors to mitigate deterministic
132+
// errors caused by wrong data being consumed from the providers. It has
133+
// been a frequent case in the past so this helps recover on a larger scale.
130134
let _outcome = self
131135
.inputs
132136
.store

store/postgres/src/deployment_store.rs

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -918,25 +918,24 @@ impl DeploymentStore {
918918
block: BlockPtr,
919919
) -> Result<Option<[u8; 32]>, StoreError> {
920920
let indexer = *indexer;
921-
let site3 = site.cheap_clone();
922-
let site4 = site.cheap_clone();
923-
let site5 = site.cheap_clone();
921+
let site2 = site.cheap_clone();
924922
let store = self.cheap_clone();
925-
let block2 = block.cheap_clone();
926923

927-
let entities = self
924+
let entities: Option<(Vec<Entity>, BlockPtr)> = self
928925
.with_conn(move |conn, cancel| {
926+
let site = site.clone();
929927
cancel.check_cancel()?;
930928

931-
let layout = store.layout(conn, site4.cheap_clone())?;
929+
let layout = store.layout(conn, site.cheap_clone())?;
932930

933931
if !layout.supports_proof_of_indexing() {
934932
return Ok(None);
935933
}
936934

937935
conn.transaction::<_, CancelableError<anyhow::Error>, _>(move || {
936+
let mut block_ptr = block.cheap_clone();
938937
let latest_block_ptr =
939-
match Self::block_ptr_with_conn(conn, site4.cheap_clone())? {
938+
match Self::block_ptr_with_conn(conn, site.cheap_clone())? {
940939
Some(inner) => inner,
941940
None => return Ok(None),
942941
};
@@ -951,30 +950,38 @@ impl DeploymentStore {
951950
// The best we can do right now is just to make sure that the block number
952951
// is high enough.
953952
if latest_block_ptr.number < block.number {
954-
return Ok(None);
955-
}
953+
// If a subgraph has failed deterministically then any blocks past head
954+
// should return the same POI
955+
let fatal_error = ErrorDetail::fatal(conn, &site.deployment)?;
956+
block_ptr = match fatal_error {
957+
Some(se) => TryInto::<SubgraphError>::try_into(se)?
958+
.block_ptr
959+
.unwrap_or(block_ptr),
960+
None => return Ok(None),
961+
};
962+
};
956963

957964
let query = EntityQuery::new(
958-
site4.deployment.cheap_clone(),
959-
block.number,
965+
site.deployment.cheap_clone(),
966+
block_ptr.number,
960967
EntityCollection::All(vec![(
961968
POI_OBJECT.cheap_clone(),
962969
AttributeNames::All,
963970
)]),
964971
);
965972
let entities = store
966-
.execute_query::<Entity>(conn, site4, query)
973+
.execute_query::<Entity>(conn, site, query)
967974
.map(|(entities, _)| entities)
968975
.map_err(anyhow::Error::from)?;
969976

970-
Ok(Some(entities))
977+
Ok(Some((entities, block_ptr)))
971978
})
972979
.map_err(Into::into)
973980
})
974981
.await?;
975982

976-
let entities = if let Some(entities) = entities {
977-
entities
983+
let (entities, block_ptr) = if let Some((entities, bp)) = entities {
984+
(entities, bp)
978985
} else {
979986
return Ok(None);
980987
};
@@ -995,10 +1002,10 @@ impl DeploymentStore {
9951002
})
9961003
.collect::<Result<HashMap<_, _>, anyhow::Error>>()?;
9971004

998-
let info = self.subgraph_info(&site5).map_err(anyhow::Error::from)?;
1005+
let info = self.subgraph_info(&site2).map_err(anyhow::Error::from)?;
9991006

10001007
let mut finisher =
1001-
ProofOfIndexingFinisher::new(&block2, &site3.deployment, &indexer, info.poi_version);
1008+
ProofOfIndexingFinisher::new(&block_ptr, &site2.deployment, &indexer, info.poi_version);
10021009
for (name, region) in by_causality_region.drain() {
10031010
finisher.add_causality_region(&name, &region);
10041011
}

tests/src/fixture/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ impl Drop for TestContext {
262262
pub struct Stores {
263263
network_name: String,
264264
chain_head_listener: Arc<ChainHeadUpdateListener>,
265-
network_store: Arc<Store>,
265+
pub network_store: Arc<Store>,
266266
chain_store: Arc<ChainStore>,
267267
}
268268

tests/tests/runner_tests.rs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use graph_tests::helpers::run_cmd;
2424
use slog::{o, Discard, Logger};
2525

2626
struct RunnerTestRecipe {
27-
stores: Stores,
27+
pub stores: Stores,
2828
subgraph_name: SubgraphName,
2929
hash: DeploymentHash,
3030
}
@@ -503,6 +503,68 @@ async fn fatal_error() -> anyhow::Result<()> {
503503
Ok(())
504504
}
505505

506+
#[tokio::test]
507+
async fn poi_for_deterministically_failed_sg() -> anyhow::Result<()> {
508+
let RunnerTestRecipe {
509+
stores,
510+
subgraph_name,
511+
hash,
512+
} = RunnerTestRecipe::new("fatal-error").await;
513+
514+
let blocks = {
515+
let block_0 = genesis();
516+
let block_1 = empty_block(block_0.ptr(), test_ptr(1));
517+
let block_2 = empty_block(block_1.ptr(), test_ptr(2));
518+
let block_3 = empty_block(block_2.ptr(), test_ptr(3));
519+
// let block_4 = empty_block(block_3.ptr(), test_ptr(4));
520+
vec![block_0, block_1, block_2, block_3]
521+
};
522+
523+
let stop_block = blocks.last().unwrap().block.ptr();
524+
525+
let chain = chain(blocks.clone(), &stores, None).await;
526+
let ctx = fixture::setup(subgraph_name.clone(), &hash, &stores, &chain, None, None).await;
527+
528+
ctx.start_and_sync_to_error(stop_block).await;
529+
530+
// Go through the indexing status API to also test it.
531+
let status = ctx.indexing_status().await;
532+
assert!(status.health == SubgraphHealth::Failed);
533+
assert!(status.entity_count == 1.into()); // Only PoI
534+
let err = status.fatal_error.unwrap();
535+
assert!(err.block.number == 3.into());
536+
assert!(err.deterministic);
537+
538+
let sg_store = stores.network_store.subgraph_store();
539+
540+
let poi2 = sg_store
541+
.get_proof_of_indexing(&hash, &None, test_ptr(2))
542+
.await
543+
.unwrap();
544+
545+
// All POIs past this point should be the same
546+
let poi3 = sg_store
547+
.get_proof_of_indexing(&hash, &None, test_ptr(3))
548+
.await
549+
.unwrap();
550+
assert!(poi2 != poi3);
551+
552+
let poi4 = sg_store
553+
.get_proof_of_indexing(&hash, &None, test_ptr(4))
554+
.await
555+
.unwrap();
556+
assert_eq!(poi3, poi4);
557+
assert!(poi2 != poi4);
558+
559+
let poi100 = sg_store
560+
.get_proof_of_indexing(&hash, &None, test_ptr(100))
561+
.await
562+
.unwrap();
563+
assert_eq!(poi4, poi100);
564+
assert!(poi2 != poi100);
565+
566+
Ok(())
567+
}
506568
async fn build_subgraph(dir: &str) -> DeploymentHash {
507569
build_subgraph_with_yarn_cmd(dir, "deploy:test").await
508570
}

0 commit comments

Comments
 (0)