mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-23 22:55:24 +00:00
test: tighten qa live scenarios
This commit is contained in:
@@ -57,6 +57,9 @@ steps:
|
||||
- set: wakeMarker
|
||||
value:
|
||||
expr: "`QA-RESTART-${randomUUID().slice(0, 8)}`"
|
||||
- set: wakeStartIndex
|
||||
value:
|
||||
expr: "state.getSnapshot().messages.length"
|
||||
- call: applyConfig
|
||||
args:
|
||||
- env:
|
||||
@@ -65,6 +68,8 @@ steps:
|
||||
ref: nextConfig
|
||||
sessionKey:
|
||||
ref: sessionKey
|
||||
deliveryContext:
|
||||
expr: "({ channel: 'qa-channel', to: `channel:${config.channelId}` })"
|
||||
note:
|
||||
ref: wakeMarker
|
||||
- try:
|
||||
@@ -99,6 +104,8 @@ steps:
|
||||
params: [candidate]
|
||||
expr: "candidate.text.includes(wakeMarker)"
|
||||
- 60000
|
||||
- sinceIndex:
|
||||
ref: wakeStartIndex
|
||||
catchAs: wakeError
|
||||
catch:
|
||||
- throw:
|
||||
|
||||
@@ -8,7 +8,7 @@ objective: Verify the agent degrades gracefully when memory tools are unavailabl
|
||||
successCriteria:
|
||||
- Memory tools are absent from the effective tool inventory.
|
||||
- Agent does not hallucinate the hidden fact.
|
||||
- Agent says it could not confirm and surfaces the limitation.
|
||||
- Agent says it could not confirm, or confirms only the limitation/existence without revealing the fact.
|
||||
docsRefs:
|
||||
- docs/concepts/memory.md
|
||||
- docs/tools/index.md
|
||||
@@ -39,6 +39,8 @@ execution:
|
||||
- won't reveal
|
||||
- won’t reveal
|
||||
- will not reveal
|
||||
- confirmed: the hidden fact is present
|
||||
- hidden fact is present
|
||||
```
|
||||
|
||||
```yaml qa-flow
|
||||
|
||||
@@ -19,8 +19,8 @@ execution:
|
||||
kind: flow
|
||||
summary: Verify switching models preserves session context and tool use instead of dropping into plain-text only behavior.
|
||||
config:
|
||||
initialPrompt: "Read QA_KICKOFF_TASK.md and summarize the QA mission in one clause before any model switch."
|
||||
followupPrompt: "Switch models now. Tool continuity check: reread QA_KICKOFF_TASK.md and mention the handoff in one short sentence."
|
||||
initialPrompt: "Read repo/qa/scenarios/index.md and summarize the QA scenario pack mission in one clause before any model switch."
|
||||
followupPrompt: "The harness has already requested the alternate model for this turn. Do not call session_status or change models yourself. Tool continuity check: use the read tool to reread repo/qa/scenarios/index.md, then mention the model handoff and QA mission in one short sentence."
|
||||
promptSnippet: "Tool continuity check"
|
||||
```
|
||||
|
||||
@@ -64,7 +64,7 @@ steps:
|
||||
args:
|
||||
- lambda:
|
||||
expr: "state.getSnapshot().messages.slice(beforeSwitchCursor).filter((candidate) => candidate.direction === 'outbound' && candidate.conversation.id === 'qa-operator' && hasModelSwitchContinuityEvidence(candidate.text)).at(-1)"
|
||||
- 10000
|
||||
- expr: resolveQaLiveTurnTimeoutMs(env, 20000, env.alternateModel)
|
||||
- assert:
|
||||
expr: hasModelSwitchContinuityEvidence(outbound.text)
|
||||
message:
|
||||
|
||||
Reference in New Issue
Block a user