diff --git a/perf-tests/build.gradle b/perf-tests/build.gradle
new file mode 100644
index 00000000..001973a2
--- /dev/null
+++ b/perf-tests/build.gradle
@@ -0,0 +1,65 @@
+plugins {
+  id 'java'
+}
+
+java {
+  toolchain {
+    languageVersion = JavaLanguageVersion.of(17)
+    vendor = JvmVendorSpec.ADOPTIUM
+  }
+}
+
+repositories {
+  mavenCentral()
+}
+
+def langchainVersion = '1.8.0'
+
+ext {
+  otelVersion = rootProject.ext.otelVersion
+  junitVersion = rootProject.ext.junitVersion
+  slf4jVersion = rootProject.ext.slf4jVersion
+}
+
+dependencies {
+  testImplementation project(":braintrust-sdk")
+  testImplementation project(":braintrust-sdk:instrumentation:langchain_1_8_0")
+  testImplementation project(":braintrust-java-agent:instrumenter")
+  testImplementation(testFixtures(project(":test-harness")))
+
+  testImplementation "io.opentelemetry:opentelemetry-api:${otelVersion}"
+  testImplementation "io.opentelemetry:opentelemetry-sdk:${otelVersion}"
+  testImplementation "io.opentelemetry:opentelemetry-sdk-trace:${otelVersion}"
+  testImplementation "io.opentelemetry:opentelemetry-sdk-logs:${otelVersion}"
+  testImplementation "io.opentelemetry:opentelemetry-sdk-metrics:${otelVersion}"
+
+  testImplementation "dev.langchain4j:langchain4j:${langchainVersion}"
+  testImplementation "dev.langchain4j:langchain4j-http-client:${langchainVersion}"
+  testImplementation "dev.langchain4j:langchain4j-open-ai:${langchainVersion}"
+
+  testImplementation 'net.bytebuddy:byte-buddy-agent:1.17.5'
+
+  testImplementation "org.junit.jupiter:junit-jupiter:${junitVersion}"
+  testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
+  testRuntimeOnly "org.slf4j:slf4j-simple:${slf4jVersion}"
+}
+
+// Disable the default test task so perf tests don't run during ./gradlew test or check.
+// Run explicitly with: ./gradlew :perf-tests:perfTest
+test {
+  enabled = false
+}
+
+task perfTest(type: Test) {
+  useJUnitPlatform()
+  workingDir = rootProject.projectDir
+
+  // Disable JUnit's per-test timeout — perf tests can take a while
+  systemProperty 'junit.jupiter.execution.timeout.default', 'disabled'
+
+  testLogging {
+    events "passed", "skipped", "failed"
+    showStandardStreams = true
+    exceptionFormat "full"
+  }
+}
diff --git a/perf-tests/src/test/java/dev/braintrust/perf/PerfResult.java b/perf-tests/src/test/java/dev/braintrust/perf/PerfResult.java
new file mode 100644
index 00000000..1b208c5b
--- /dev/null
+++ b/perf-tests/src/test/java/dev/braintrust/perf/PerfResult.java
@@ -0,0 +1,27 @@
+package dev.braintrust.perf;
+
+/**
+ * Captures the result of a single performance test run.
+ *
+ * @param config the configuration that produced this result
+ * @param payloadBytes total bytes of the OTLP HTTP request body captured at the wire
+ * @param spanCount number of spans that were exported (as observed by the server)
+ * @param requestCount number of HTTP requests received by the capture server
+ */
+public record PerfResult(PerfRunConfig config, long payloadBytes, int spanCount, int requestCount) {
+
+    /** Bytes per span (approximate). */
+    public double bytesPerSpan() {
+        return spanCount > 0 ? (double) payloadBytes / spanCount : 0;
+    }
+
+    public double payloadMB() {
+        return payloadBytes / (1024.0 * 1024.0);
+    }
+
+    public String summary() {
+        return String.format(
+                "[%s] %d request(s), %d span(s), %.3f MB total (%.1f bytes/span)",
+                config.name(), requestCount, spanCount, payloadMB(), bytesPerSpan());
+    }
+}
diff --git a/perf-tests/src/test/java/dev/braintrust/perf/PerfRunConfig.java b/perf-tests/src/test/java/dev/braintrust/perf/PerfRunConfig.java
new file mode 100644
index 00000000..8d883a01
--- /dev/null
+++ b/perf-tests/src/test/java/dev/braintrust/perf/PerfRunConfig.java
@@ -0,0 +1,24 @@
+package dev.braintrust.perf;
+
+/**
+ * Describes the configuration for a single performance test run.
+ *
+ * <p>Each field controls the shape of the multi-turn conversation that will be generated and
+ * exported. Add new fields here as you add new scenarios (e.g. tool use, streaming, etc.).
+ *
+ * @param name human-readable label for this configuration
+ * @param turns number of conversational turns (user messages sent to the AI service)
+ * @param includeImageAttachment whether to include an image attachment in the first user message
+ */
+public record PerfRunConfig(String name, int turns, boolean includeImageAttachment) {
+
+    /** A multi-turn conversation with an image attachment on the first turn. */
+    public static PerfRunConfig multiTurnWithAttachment() {
+        return new PerfRunConfig("multi-turn-with-attachment", 10, true);
+    }
+
+    /** A multi-turn conversation with text only (no attachments). */
+    public static PerfRunConfig multiTurnTextOnly() {
+        return new PerfRunConfig("multi-turn-text-only", 3, false);
+    }
+}
diff --git a/perf-tests/src/test/java/dev/braintrust/perf/TracePayloadSizeTest.java b/perf-tests/src/test/java/dev/braintrust/perf/TracePayloadSizeTest.java
new file mode 100644
index 00000000..ad6cdb70
--- /dev/null
+++ b/perf-tests/src/test/java/dev/braintrust/perf/TracePayloadSizeTest.java
@@ -0,0 +1,316 @@
+package dev.braintrust.perf;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import com.sun.net.httpserver.HttpServer;
+import dev.braintrust.TestHarness;
+import dev.braintrust.instrumentation.Instrumenter;
+import dev.langchain4j.data.message.*;
+import dev.langchain4j.memory.chat.MessageWindowChatMemory;
+import dev.langchain4j.model.chat.ChatModel;
+import dev.langchain4j.model.openai.OpenAiChatModel;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import javax.imageio.ImageIO;
+import net.bytebuddy.agent.ByteBuddyAgent;
+import org.junit.jupiter.api.*;
+
+/**
+ * Measures the wire-format (OTLP protobuf over HTTP) payload size produced by {@code
+ * BraintrustSpanExporter} for realistic multi-turn LangChain4j conversations.
+ *
+ * <p>The test stands up a plain HTTP capture server on {@code /otel/v1/traces}, configures the
+ * Braintrust SDK to export there (while OpenAI calls go through the TestHarness VCR proxy), runs a
+ * multi-turn conversation, flushes, and records how many bytes the capture server received.
+ *
+ * <p>To add new scenarios, create additional {@link PerfRunConfig} instances and call {@link
+ * #runScenario}.
+ */
+public class TracePayloadSizeTest {
+
+    /**
+     * Base64-encoded JPEG generated at class load time. 512x512 with random pixel noise to simulate
+     * a realistic photo payload (~150KB raw, ~210K base64 chars).
+     */
+    private static final String TEST_IMAGE_BASE64 = generateTestImageBase64(512, 512);
+
+    private HttpServer captureServer;
+    private int capturePort;
+
+    /** Accumulated byte count across all requests for a single scenario. */
+    private AtomicLong capturedBytes;
+
+    /** Number of HTTP requests received. */
+    private AtomicInteger requestCount;
+
+    /**
+     * Latch that fires when at least one export request arrives. We use this to know when the
+     * exporter has sent data, then wait a grace period for any trailing batches.
+     */
+    private CountDownLatch exportLatch;
+
+    /** All results collected during the test class, printed in @AfterEach for comparison. */
+    private final List<PerfResult> results = new ArrayList<>();
+
+    @BeforeAll
+    static void installInstrumentation() {
+        var instrumentation = ByteBuddyAgent.install();
+        Instrumenter.install(instrumentation, TracePayloadSizeTest.class.getClassLoader());
+    }
+
+    @BeforeEach
+    void setUp() throws IOException {
+        capturedBytes = new AtomicLong();
+        requestCount = new AtomicInteger();
+        exportLatch = new CountDownLatch(1);
+
+        captureServer = HttpServer.create(new InetSocketAddress("localhost", 0), 0);
+        capturePort = captureServer.getAddress().getPort();
+
+        // ── OTLP trace export endpoint (the one we're measuring) ──
+        captureServer.createContext(
+                "/otel/v1/traces",
+                exchange -> {
+                    try {
+                        byte[] body = exchange.getRequestBody().readAllBytes();
+                        capturedBytes.addAndGet(body.length);
+                        requestCount.incrementAndGet();
+                        exportLatch.countDown();
+                        exchange.sendResponseHeaders(200, 0);
+                    } finally {
+                        exchange.close();
+                    }
+                });
+
+        // ── Attachment upload flow stubs ──
+        // The SDK's AttachmentProcessor extracts base64 data URIs from span attributes,
+        // replaces them with attachment references, and uploads the data via:
+        //   1. POST /api/apikey/login       -> resolve org ID
+        //   2. POST /attachment             -> get a signed upload URL
+        //   3. PUT  /s3-upload              -> upload data to the signed URL
+        //   4. POST /attachment/status      -> report upload status
+
+        var loginResponse =
+                "{\"org_info\": [{\"id\": \"00000000-0000-0000-0000-000000000000\","
+                        + " \"name\": \"perf-test-org\"}]}";
+        captureServer.createContext(
+                "/api/apikey/login",
+                exchange -> {
+                    try {
+                        exchange.getRequestBody().readAllBytes(); // drain
+                        var body = loginResponse.getBytes();
+                        exchange.getResponseHeaders().set("Content-Type", "application/json");
+                        exchange.sendResponseHeaders(200, body.length);
+                        exchange.getResponseBody().write(body);
+                    } finally {
+                        exchange.close();
+                    }
+                });
+
+        var signedUrl = "http://localhost:" + capturePort + "/s3-upload";
+        var attachmentResponse = "{\"signedUrl\": \"" + signedUrl + "\", \"headers\": {}}";
+        captureServer.createContext(
+                "/attachment",
+                exchange -> {
+                    try {
+                        exchange.getRequestBody().readAllBytes(); // drain
+                        var body = attachmentResponse.getBytes();
+                        exchange.getResponseHeaders().set("Content-Type", "application/json");
+                        exchange.sendResponseHeaders(200, body.length);
+                        exchange.getResponseBody().write(body);
+                    } finally {
+                        exchange.close();
+                    }
+                });
+
+        captureServer.createContext(
+                "/s3-upload",
+                exchange -> {
+                    try {
+                        exchange.getRequestBody().readAllBytes(); // drain
+                        exchange.sendResponseHeaders(200, -1);
+                    } finally {
+                        exchange.close();
+                    }
+                });
+
+        captureServer.start();
+    }
+
+    @AfterEach
+    void tearDown() {
+        if (captureServer != null) {
+            captureServer.stop(0);
+        }
+        if (!results.isEmpty()) {
+            System.out.println("\n=== Perf Results ===");
+            for (PerfResult r : results) {
+                System.out.println(r.summary());
+            }
+            System.out.println("====================\n");
+        }
+    }
+
+    @Test
+    void multiTurnWithAttachment() throws Exception {
+        var result = runScenario(PerfRunConfig.multiTurnWithAttachment());
+        results.add(result);
+        System.out.println(result.summary());
+
+        assertTrue(result.requestCount() > 0, "Expected at least one HTTP request");
+        assertTrue(result.payloadBytes() > 0, "Expected non-empty payload");
+        assertTrue(result.spanCount() > 0, "Expected at least one span");
+    }
+
+    @Test
+    @Disabled
+    void multiTurnTextOnly() throws Exception {
+        var result = runScenario(PerfRunConfig.multiTurnTextOnly());
+        results.add(result);
+        System.out.println(result.summary());
+
+        assertTrue(result.requestCount() > 0, "Expected at least one HTTP request");
+        assertTrue(result.payloadBytes() > 0, "Expected non-empty payload");
+        assertTrue(result.spanCount() > 0, "Expected at least one span");
+    }
+
+    // ─── Core ───────────────────────────────────────────────────────────────────
+
+    /**
+     * Runs a single scenario: sets up the SDK with the capture server as the Braintrust API
+     * endpoint, builds a LangChain4j ChatModel via the VCR-proxied OpenAI, runs a multi-turn
+     * conversation, flushes, and returns the measured result.
+     */
+    private PerfResult runScenario(PerfRunConfig config) throws Exception {
+        capturedBytes.set(0);
+        requestCount.set(0);
+        exportLatch = new CountDownLatch(1);
+
+        // TestHarness sets up:
+        //   - VCR proxy for OpenAI (testHarness.openAiBaseUrl())
+        //   - Braintrust SDK with BraintrustSpanExporter pointing at config.apiUrl()
+        //   - UnitTestSpanExporter for in-memory span capture
+        //
+        // We override apiUrl to point at our capture server so the exporter sends there.
+        var testHarness =
+                TestHarness.setup(
+                        cfg ->
+                                cfg.apiUrl("http://localhost:" + capturePort)
+                                        .autoConvertAIAttachments(true)
+                                        .compressOtelPayload(true));
+
+        // Build the OpenAI-backed ChatModel. ByteBuddy auto-instrumentation intercepts
+        // OpenAiChatModel.Builder.build() and wraps the internal HttpClient with
+        // WrappedHttpClient, which creates OTel spans for each LLM call.
+        ChatModel model =
+                OpenAiChatModel.builder()
+                        .apiKey(testHarness.openAiApiKey())
+                        .baseUrl(testHarness.openAiBaseUrl())
+                        .modelName("gpt-4o-mini")
+                        .temperature(0.0)
+                        .build();
+
+        // Chat memory to accumulate conversation history across turns
+        var memory = MessageWindowChatMemory.withMaxMessages(20);
+
+        try {
+            runConversation(testHarness, model, memory, config);
+
+            // Flush spans through BatchSpanProcessor → BraintrustSpanExporter → capture server
+            var flushResult =
+                    testHarness
+                            .openTelemetry()
+                            .getSdkTracerProvider()
+                            .forceFlush()
+                            .join(30, TimeUnit.SECONDS);
+
+            assertTrue(flushResult.isDone());
+            assertTrue(flushResult.isSuccess());
+
+            boolean received = exportLatch.await(15, TimeUnit.SECONDS);
+            assertTrue(received, "Timed out waiting for span export for: " + config.name());
+
+            // Grace period for any trailing batch exports to be seen by the server
+            Thread.sleep(1_000);
+
+            // Get span count from the in-memory exporter
+            var spans = testHarness.awaitExportedSpans();
+            int spanCount = spans.size();
+
+            return new PerfResult(config, capturedBytes.get(), spanCount, requestCount.get());
+        } finally {
+            testHarness.openTelemetry().getSdkTracerProvider().shutdown().join(5, TimeUnit.SECONDS);
+        }
+    }
+
+    /** Runs a multi-turn conversation under a single root span. */
+    private void runConversation(
+            TestHarness testHarness,
+            ChatModel model,
+            MessageWindowChatMemory memory,
+            PerfRunConfig config) {
+        var tracer = testHarness.openTelemetry().getTracer("perf-test");
+        var rootSpan = tracer.spanBuilder("multi-turn-conversation").startSpan();
+
+        try (var ignored = rootSpan.makeCurrent()) {
+            String[] userPrompts = {
+                "Tell me a story about a wise cracking talking dog.", "tell me another story",
+            };
+
+            for (int turn = 0; turn < config.turns(); turn++) {
+                UserMessage userMessage;
+                var userPrompt = userPrompts[Math.min(turn, userPrompts.length - 1)];
+
+                if (turn == 0 && config.includeImageAttachment()) {
+                    // First turn includes an image attachment alongside the text
+                    userMessage =
+                            UserMessage.from(
+                                    TextContent.from(
+                                            userPrompt + " -- take inspiration from this picture"),
+                                    ImageContent.from(TEST_IMAGE_BASE64, "image/jpeg"));
+                } else {
+                    userMessage = UserMessage.from(userPrompt);
+                }
+
+                memory.add(userMessage);
+
+                var response = model.chat(memory.messages());
+                var aiMessage = response.aiMessage();
+
+                memory.add(aiMessage);
+            }
+        } finally {
+            rootSpan.end();
+        }
+    }
+
+    /**
+     * Generates a JPEG image with random pixel noise and returns it as a base64 string. A fixed
+     * seed ensures the output is deterministic across runs.
+     */
+    private static String generateTestImageBase64(int width, int height) {
+        var img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
+        var rng = new java.util.Random(42);
+        for (int y = 0; y < height; y++) {
+            for (int x = 0; x < width; x++) {
+                img.setRGB(x, y, rng.nextInt(0xFFFFFF));
+            }
+        }
+        try {
+            var baos = new ByteArrayOutputStream();
+            ImageIO.write(img, "JPEG", baos);
+            return Base64.getEncoder().encodeToString(baos.toByteArray());
+        } catch (IOException e) {
+            throw new RuntimeException("Failed to generate test image", e);
+        }
+    }
+}
diff --git a/settings.gradle b/settings.gradle
index f4d4f199..436211e6 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -27,3 +27,4 @@ include 'braintrust-java-agent:smoke-test:wildfly'
 include 'btx'
 include 'braintrust-api'
 include 'braintrust-otel-extension'
+include 'perf-tests'