diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml
index 89b740a5e315..60593de299b7 100644
--- a/dlc_developer_config.toml
+++ b/dlc_developer_config.toml
@@ -1,6 +1,6 @@
 [dev]
 # Set to "huggingface", for example, if you are a huggingface developer. Default is ""
-partner_developer = ""
+partner_developer = "huggingface"
 # Please only set it to true if you are preparing an EI related PR
 # Do remember to revert it back to false before merging any PR (including EI dedicated PR)
 ei_mode = false
@@ -36,12 +36,12 @@ deep_canary_mode = false
 
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
-# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_sglang", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_sglang", "huggingface_llamacpp", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
+build_frameworks = ["huggingface_llamacpp"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
-build_training = true
+build_training = false
 build_inference = true
 
 # Set do_build to "false" to skip builds and test the latest image built by this PR
@@ -192,5 +192,8 @@ dlc-pr-huggingface-vllm = ""
 # HuggingFace SGLang
 dlc-pr-huggingface-sglang = ""
 
+# Huggingface Llamacpp
+dlc-pr-huggingface-llamacpp = "/huggingface/llamacpp/buildspec.yml"
+
 # sglang
 dlc-pr-sglang = ""
diff --git a/huggingface/llamacpp/build_artifacts/llamacpp_sagemaker_server.patch b/huggingface/llamacpp/build_artifacts/llamacpp_sagemaker_server.patch
new file mode 100644
index 000000000000..a8491c93a26a
--- /dev/null
+++ b/huggingface/llamacpp/build_artifacts/llamacpp_sagemaker_server.patch
@@ -0,0 +1,133 @@
+--- a/tools/server/server.cpp
++++ b/tools/server/server.cpp
+@@ -11,7 +11,9 @@
+ #include "llama.h"
+ #include "log.h"
+ 
++#include <algorithm>
+ #include <atomic>
++#include <cctype>
+ #include <clocale>
+ #include <exception>
+ #include <signal.h>
+@@ -69,6 +71,81 @@
+         }
+         return res;
+     };
++}
++
++static std::string sagemaker_header(const server_http_req & req, const std::string & name) {
++    for (const auto & h : req.headers) {
++        std::string key = h.first;
++        std::transform(key.begin(), key.end(), key.begin(), [](unsigned char c) { return std::tolower(c); });
++        if (key == name) {
++            return h.second;
++        }
++    }
++    return "";
++}
++
++static std::string sagemaker_route_from_attrs(const server_http_req & req) {
++    const std::string attrs = sagemaker_header(req, "x-amzn-sagemaker-custom-attributes");
++    const std::string key = "route=";
++    const size_t pos = attrs.find(key);
++    if (pos == std::string::npos) {
++        return "";
++    }
++    const size_t start = pos + key.size();
++    const size_t end = attrs.find_first_of(",; \t\r\n", start);
++    return attrs.substr(start, end == std::string::npos ? std::string::npos : end - start);
++}
++
++static bool sagemaker_route_syntax_ok(const std::string & route) {
++    return !route.empty() && route[0] == '/' && route.find("..") == std::string::npos &&
++           route.find("://") == std::string::npos && route.find('?') == std::string::npos &&
++           route.find('#') == std::string::npos;
++}
++
++static std::string sagemaker_default_route(const server_http_req & req) {
++    const json body = json::parse(req.body, nullptr, false);
++    if (body.is_object()) {
++        if (body.contains("messages")) {
++            return "/v1/chat/completions";
++        }
++        if (body.contains("prompt")) {
++            return "/v1/completions";
++        }
++        if (body.contains("input")) {
++            return "/v1/embeddings";
++        }
++    }
++    return "/v1/chat/completions";
++}
++
++static server_http_res_ptr sagemaker_error(int status, const std::string & message) {
++    auto res = std::make_unique<server_http_res>();
++    res->status = status;
++    res->data = safe_json_to_str({
++        { "error", {
++            { "code", status },
++            { "message", message },
++            { "type", "invalid_request_error" },
++        } },
++    });
++    return res;
++}
++
++static server_http_res_ptr sagemaker_invocations(
++        const server_http_req & req,
++        const std::map<std::string, server_http_context::handler_t> & routes) {
++    const std::string requested = sagemaker_route_from_attrs(req);
++    const std::string route = requested.empty() ? sagemaker_default_route(req) : requested;
++    if (!sagemaker_route_syntax_ok(route)) {
++        return sagemaker_error(400, "invalid SageMaker route: " + route);
++    }
++    const auto it = routes.find(route);
++    if (it == routes.end()) {
++        return sagemaker_error(400, "unsupported SageMaker route: " + route);
++    }
++    server_http_req routed_req = req;
++    routed_req.path = route;
++    return it->second(routed_req);
+ }
+ 
+ int main(int argc, char ** argv) {
+@@ -169,6 +246,38 @@
+         ctx_http.post("/models/unload",        ex_wrapper(models_routes->post_router_models_unload));
+     }
+ 
++
++    const std::map<std::string, server_http_context::handler_t> sagemaker_routes = {
++        {"/props", routes.post_props},
++        {"/completion", routes.post_completions},
++        {"/completions", routes.post_completions},
++        {"/v1/completions", routes.post_completions_oai},
++        {"/chat/completions", routes.post_chat_completions},
++        {"/v1/chat/completions", routes.post_chat_completions},
++        {"/v1/responses", routes.post_responses_oai},
++        {"/responses", routes.post_responses_oai},
++        {"/v1/audio/transcriptions", routes.post_transcriptions_oai},
++        {"/audio/transcriptions", routes.post_transcriptions_oai},
++        {"/v1/messages", routes.post_anthropic_messages},
++        {"/v1/messages/count_tokens", routes.post_anthropic_count_tokens},
++        {"/infill", routes.post_infill},
++        {"/embedding", routes.post_embeddings},
++        {"/embeddings", routes.post_embeddings},
++        {"/v1/embeddings", routes.post_embeddings_oai},
++        {"/rerank", routes.post_rerank},
++        {"/reranking", routes.post_rerank},
++        {"/v1/rerank", routes.post_rerank},
++        {"/v1/reranking", routes.post_rerank},
++        {"/tokenize", routes.post_tokenize},
++        {"/detokenize", routes.post_detokenize},
++        {"/apply-template", routes.post_apply_template},
++        {"/lora-adapters", routes.post_lora_adapters},
++    };
++
++    ctx_http.get ("/ping", ex_wrapper(routes.get_health)); // SageMaker health endpoint
++    ctx_http.post("/invocations", ex_wrapper([&sagemaker_routes](const server_http_req & req) {
++        return sagemaker_invocations(req, sagemaker_routes);
++    }));
+     ctx_http.get ("/health",                   ex_wrapper(routes.get_health)); // public endpoint (no API key check)
+     ctx_http.get ("/v1/health",                ex_wrapper(routes.get_health)); // public endpoint (no API key check)
+     ctx_http.get ("/metrics",                  ex_wrapper(routes.get_metrics));
diff --git a/huggingface/llamacpp/build_artifacts/sagemaker_entrypoint.sh b/huggingface/llamacpp/build_artifacts/sagemaker_entrypoint.sh
new file mode 100644
index 000000000000..1d06097ef569
--- /dev/null
+++ b/huggingface/llamacpp/build_artifacts/sagemaker_entrypoint.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+set -euo pipefail
+
+# Check if telemetry file exists before executing
+# Execute telemetry script if it exists, suppress errors
+bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
+
+# Source CUDA compat for older drivers (e.g., g5 instances)
+if [ -f /usr/local/bin/start_cuda_compat.sh ] \
+    && command -v nvidia-smi >/dev/null 2>&1 \
+    && command -v nvcc >/dev/null 2>&1; then
+    source /usr/local/bin/start_cuda_compat.sh
+fi
+
+# SageMaker sends traffic to port 8080 on /ping and /invocations. The custom
+# llama-server build handles those routes directly.
+HOST="${LLAMACPP_SAGEMAKER_HOST:-0.0.0.0}"
+PORT="${SAGEMAKER_BIND_TO_PORT:-${LLAMACPP_SAGEMAKER_PORT:-8080}}"
+
+PREFIX="SM_LLAMACPP_"
+ARG_PREFIX="--"
+
+ARGS=()
+
+while IFS='=' read -r key value; do
+    arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
+
+    ARGS+=("${ARG_PREFIX}${arg_name}")
+    if [ -n "$value" ]; then
+        ARGS+=("$value")
+    fi
+done < <(env | grep "^${PREFIX}" || true)
+
+# Drop any user-supplied --host / --port so SageMaker can always reach the server.
+normalized=()
+skip_next=0
+for a in "${ARGS[@]}"; do
+    if [ "$skip_next" -eq 1 ]; then
+        skip_next=0
+        continue
+    fi
+    if [ "$a" = "--host" ] || [ "$a" = "--port" ]; then
+        skip_next=1
+        continue
+    fi
+    normalized+=("$a")
+done
+ARGS=("${normalized[@]}")
+ARGS+=(--host "$HOST" --port "$PORT")
+
+echo "[sagemaker] llama-server args: ${ARGS[*]}" >&2
+
+exec /app/llama-server "${ARGS[@]}"
diff --git a/huggingface/llamacpp/build_artifacts/start_cuda_compat.sh b/huggingface/llamacpp/build_artifacts/start_cuda_compat.sh
new file mode 100644
index 000000000000..791d355c5abe
--- /dev/null
+++ b/huggingface/llamacpp/build_artifacts/start_cuda_compat.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+verlte() {
+  [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
+}
+
+COMPAT_FILE=/usr/local/cuda/compat/libcuda.so.1
+if [ -f $COMPAT_FILE ]; then
+  CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink $COMPAT_FILE | cut -d'.' -f 3-)
+  echo "CUDA compat package should be installed for NVIDIA driver smaller than ${CUDA_COMPAT_MAX_DRIVER_VERSION}"
+  NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
+  if [ -z "$NVIDIA_DRIVER_VERSION" ]; then
+    NVIDIA_DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader --id=0 2>/dev/null || true)
+  fi
+  echo "Current installed NVIDIA driver version is ${NVIDIA_DRIVER_VERSION}"
+  if verlte $NVIDIA_DRIVER_VERSION $CUDA_COMPAT_MAX_DRIVER_VERSION; then
+    echo "Adding CUDA compat to LD_LIBRARY_PATH"
+    export LD_LIBRARY_PATH=/usr/local/cuda/compat:$LD_LIBRARY_PATH
+    echo $LD_LIBRARY_PATH
+  else
+    echo "Skipping CUDA compat setup as newer NVIDIA driver is installed"
+  fi
+else
+  echo "Skipping CUDA compat setup as package not found"
+fi
diff --git a/huggingface/llamacpp/buildspec.yml b/huggingface/llamacpp/buildspec.yml
new file mode 100644
index 000000000000..8b05831cbadf
--- /dev/null
+++ b/huggingface/llamacpp/buildspec.yml
@@ -0,0 +1,78 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+base_framework: &BASE_FRAMEWORK llamacpp
+framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
+version: &VERSION "b8882"
+short_version: &SHORT_VERSION "b8882"
+arch_type: &ARCH_TYPE x86_64
+autopatch_build: "False"
+
+repository_info:
+  build_repository: &BUILD_REPOSITORY
+    image_type: &IMAGE_TYPE inference
+    root: huggingface/llamacpp
+    repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  build_context: &BUILD_CONTEXT
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+    start_cuda_compat:
+      source: build_artifacts/start_cuda_compat.sh
+      target: start_cuda_compat.sh
+    sagemaker_entrypoint:
+      source: build_artifacts/sagemaker_entrypoint.sh
+      target: sagemaker_entrypoint.sh
+    llamacpp_sagemaker_server_patch:
+      source: build_artifacts/llamacpp_sagemaker_server.patch
+      target: llamacpp_sagemaker_server.patch
+
+
+images:
+  BuildHuggingFaceLlamacppGpuCu130DockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 40000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu130
+    os_version: &OS_VERSION ubuntu24.04
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    transformers_version: &TRANSFORMERS_VERSION 4.57.3
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *CUDA_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
+
+  BuildHuggingFaceLlamacppCpuDockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 40000
+    device_type: &DEVICE_TYPE cpu
+    os_version: &OS_VERSION ubuntu24.04
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    transformers_version: &TRANSFORMERS_VERSION 4.57.3
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
diff --git a/huggingface/llamacpp/docker/b8882/Dockerfile.cpu b/huggingface/llamacpp/docker/b8882/Dockerfile.cpu
new file mode 100644
index 000000000000..a577306960f9
--- /dev/null
+++ b/huggingface/llamacpp/docker/b8882/Dockerfile.cpu
@@ -0,0 +1,91 @@
+ARG UBUNTU_VERSION=24.04
+ARG LLAMACPP_VERSION=b8882
+
+FROM ubuntu:${UBUNTU_VERSION} AS build
+
+ARG LLAMACPP_VERSION
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        build-essential \
+        ca-certificates \
+        cmake \
+        gcc-14 \
+        g++-14 \
+        git \
+        libgomp1 \
+        libssl-dev \
+        patch \
+        python3 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV CC=gcc-14 \
+    CXX=g++-14
+
+WORKDIR /src/llama.cpp
+
+RUN git clone --branch "${LLAMACPP_VERSION}" --depth 1 https://github.com/ggml-org/llama.cpp.git .
+
+COPY llamacpp_sagemaker_server.patch /tmp/llamacpp_sagemaker_server.patch
+
+RUN patch -p1 < /tmp/llamacpp_sagemaker_server.patch
+RUN cmake -B build \
+    -DGGML_NATIVE=OFF \
+    -DGGML_BACKEND_DL=ON \
+    -DGGML_CPU_ALL_VARIANTS=ON \
+    -DLLAMA_BUILD_TESTS=OFF \
+    . \
+    && cmake --build build --config Release -j"$(nproc)" --target llama-server
+
+RUN mkdir -p /app/lib \
+    && find build -name "*.so*" -exec cp -P {} /app/lib \; \
+    && cp build/bin/llama-server /app/llama-server
+
+FROM ubuntu:${UBUNTU_VERSION} AS base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+WORKDIR /app
+
+ENV LD_LIBRARY_PATH=/app:${LD_LIBRARY_PATH}
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        ca-certificates \
+        curl \
+        libgomp1 \
+    && apt-get autoremove -y \
+    && apt-get clean -y \
+    && rm -rf /tmp/* /var/tmp/* \
+    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
+    && find /var/cache -type f -delete \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /app/lib/ /app/
+COPY --from=build /app/llama-server /app/llama-server
+
+FROM base AS sagemaker
+
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+COPY --chmod=0755 sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
+
+# Fix several CVEs:
+# CVE-2026-31789, CVE-2025-15467, CVE-2025-13151, CVE-2025-15281,
+# CVE-2025-69419, CVE-2025-68973, CVE-2025-69421, CVE-2026-28390,
+# CVE-2025-69420, CVE-2026-0915, CVE-2026-0861, CVE-2026-28388,
+# CVE-2026-31790, CVE-2026-28387, CVE-2026-28389
+RUN apt-get update \
+    && apt-get install -y --only-upgrade \
+        libssl3t64 \
+        openssl \
+        libtasn1-6 \
+        libc6 \
+        libc-bin \
+        gnupg \
+        gpg \
+        gpgv \
+    && rm -rf /var/lib/apt/lists/*
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
diff --git a/huggingface/llamacpp/docker/b8882/cu130/Dockerfile.gpu b/huggingface/llamacpp/docker/b8882/cu130/Dockerfile.gpu
new file mode 100644
index 000000000000..c7110bd0b007
--- /dev/null
+++ b/huggingface/llamacpp/docker/b8882/cu130/Dockerfile.gpu
@@ -0,0 +1,100 @@
+ARG UBUNTU_VERSION=24.04
+ARG CUDA_VERSION=13.0.2
+ARG LLAMACPP_VERSION=b8882
+ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_CUDA_DEV_CONTAINER} AS build
+
+ARG LLAMACPP_VERSION
+ARG CUDA_DOCKER_ARCH=default
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        build-essential \
+        ca-certificates \
+        cmake \
+        gcc-14 \
+        g++-14 \
+        git \
+        libgomp1 \
+        libssl-dev \
+        patch \
+        python3 \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV CC=gcc-14 \
+    CXX=g++-14 \
+    CUDAHOSTCXX=g++-14
+
+WORKDIR /src/llama.cpp
+
+RUN git clone --branch "${LLAMACPP_VERSION}" --depth 1 https://github.com/ggml-org/llama.cpp.git .
+COPY llamacpp_sagemaker_server.patch /tmp/llamacpp_sagemaker_server.patch
+RUN patch -p1 < /tmp/llamacpp_sagemaker_server.patch
+RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
+        export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
+    fi \
+    && cmake -B build \
+        -DGGML_NATIVE=OFF \
+        -DGGML_CUDA=ON \
+        -DGGML_BACKEND_DL=ON \
+        -DGGML_CPU_ALL_VARIANTS=ON \
+        -DLLAMA_BUILD_TESTS=OFF \
+        ${CMAKE_ARGS:-} \
+        -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
+        . \
+    && cmake --build build --config Release -j"$(nproc)" --target llama-server
+
+RUN mkdir -p /app/lib \
+    && find build -name "*.so*" -exec cp -P {} /app/lib \; \
+    && cp build/bin/llama-server /app/llama-server
+
+FROM ${BASE_CUDA_RUN_CONTAINER} AS base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+WORKDIR /app
+ENV LD_LIBRARY_PATH=/app:${LD_LIBRARY_PATH}
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        ca-certificates \
+        curl \
+        libgomp1 \
+    && apt-get autoremove -y \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /tmp/* /var/tmp/* \
+    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
+    && find /var/cache -type f -delete
+
+COPY --from=build /app/lib/ /app/
+COPY --from=build /app/llama-server /app/llama-server
+
+FROM base AS sagemaker
+
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+COPY --chmod=0755 start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh
+COPY --chmod=0755 sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
+
+# Fix several CVEs:
+# CVE-2026-31789, CVE-2025-15467, CVE-2025-13151, CVE-2025-15281,
+# CVE-2025-69419, CVE-2025-68973, CVE-2025-69421, CVE-2026-28390,
+# CVE-2025-69420, CVE-2026-0915, CVE-2026-0861, CVE-2026-28388,
+# CVE-2026-31790, CVE-2026-28387, CVE-2026-28389
+RUN apt-get update \
+    && apt-get install -y --only-upgrade \
+        libssl3t64 \
+        openssl \
+        libtasn1-6 \
+        libc6 \
+        libc-bin \
+        gnupg \
+        gpg \
+        gpgv \
+    && rm -rf /var/lib/apt/lists/*
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
diff --git a/src/constants.py b/src/constants.py
index 037414380bca..42275c5532f1 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -29,6 +29,7 @@
     "sglang",
     "huggingface_vllm",
     "huggingface_sglang",
+    "huggingface_llamacpp",
 }
 DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}
 IMAGE_TYPES = {"training", "inference"}
diff --git a/src/image_builder.py b/src/image_builder.py
index f101f33fa7fb..cc401a6a5e11 100644
--- a/src/image_builder.py
+++ b/src/image_builder.py
@@ -686,6 +686,7 @@ def get_job_type(image_repo_uri):
         "base": "general",
         "vllm": "general",
         "sglang": "general",
+        "llamacpp": "general",
     }
 
     for key, job_type in job_type_mapping.items():
diff --git a/test/sagemaker_tests/huggingface/llamacpp/__init__.py b/test/sagemaker_tests/huggingface/llamacpp/__init__.py
new file mode 100644
index 000000000000..199e66b95926
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
diff --git a/test/sagemaker_tests/huggingface/llamacpp/conftest.py b/test/sagemaker_tests/huggingface/llamacpp/conftest.py
new file mode 100644
index 000000000000..57374310db49
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/conftest.py
@@ -0,0 +1,391 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import logging
+import os
+import platform
+import shutil
+import sys
+import tempfile
+
+import boto3
+import pytest
+
+from botocore.exceptions import ClientError
+from sagemaker import LocalSession, Session
+from sagemaker.pytorch import PyTorch
+
+from .utils import image_utils, get_ecr_registry
+
+NO_P4_REGIONS = [
+    "af-south-1",
+    "ap-east-1",
+    "ap-northeast-3",
+    "ap-southeast-1",
+    "ap-southeast-2",
+    "ap-south-1",
+    "ca-central-1",
+    "eu-central-1",
+    "eu-north-1",
+    "eu-west-2",
+    "eu-west-3",
+    "eu-south-1",
+    "me-south-1",
+    "sa-east-1",
+    "us-west-1",
+    "cn-northwest-1",
+    "il-central-1",
+]
+
+NO_G5_REGIONS = [
+    "us-west-1",
+    "ca-west-1",
+    "mx-cental-1",
+    "af-south-1",
+    "ap-east-1",
+    "ap-south-2",
+    "ap-southeast-5",
+    "ap-southeast-4",
+    "ap-northeast-3",
+    "ap-southeast-1",
+    "ap-southeast-7",
+    "eu-south-1",
+    "eu-west-3",
+    "eu-south-2",
+    "eu-central-2",
+    "me-south-1",
+]
+
+
+logger = logging.getLogger(__name__)
+logging.getLogger("boto").setLevel(logging.INFO)
+logging.getLogger("boto3").setLevel(logging.INFO)
+logging.getLogger("botocore").setLevel(logging.INFO)
+logging.getLogger("factory.py").setLevel(logging.INFO)
+logging.getLogger("auth.py").setLevel(logging.INFO)
+logging.getLogger("connectionpool.py").setLevel(logging.INFO)
+
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+
+def pytest_addoption(parser):
+    parser.addoption("--build-image", "-D", action="store_true")
+    parser.addoption("--build-base-image", "-B", action="store_true")
+    parser.addoption("--aws-id")
+    parser.addoption("--instance-type")
+    parser.addoption("--accelerator-type", default=None)
+    parser.addoption("--docker-base-name", default="huggingface_llamacpp")
+    parser.addoption("--region", default="us-west-2")
+    parser.addoption("--framework-version", default="")
+    parser.addoption(
+        "--py-version",
+        choices=["2", "3", "37", "38", "39", "310", "311", "312"],
+        default=str(sys.version_info.major),
+    )
+    # Processor is still "cpu" for EIA tests
+    parser.addoption(
+        "--processor", choices=["gpu", "cpu", "eia", "neuron", "neuronx"], default="cpu"
+    )
+    # If not specified, will default to {framework-version}-{processor}-py{py-version}
+    parser.addoption("--tag", default=None)
+    parser.addoption(
+        "--generate-coverage-doc",
+        default=False,
+        action="store_true",
+        help="use this option to generate test coverage doc",
+    )
+    parser.addoption(
+        "--efa",
+        action="store_true",
+        default=False,
+        help="Run only efa tests",
+    )
+    parser.addoption("--sagemaker-regions", default="us-west-2")
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "efa(): explicitly mark to run efa tests")
+
+
+def pytest_runtest_setup(item):
+    if item.config.getoption("--efa"):
+        efa_tests = [mark for mark in item.iter_markers(name="efa")]
+        if not efa_tests:
+            pytest.skip("Skipping non-efa tests")
+
+
+def pytest_collection_modifyitems(session, config, items):
+    for item in items:
+        print(f"item {item}")
+        for marker in item.iter_markers(name="team"):
+            print(f"item {marker}")
+            team_name = marker.args[0]
+            item.user_properties.append(("team_marker", team_name))
+            print(f"item.user_properties {item.user_properties}")
+
+    if config.getoption("--generate-coverage-doc"):
+        from test.test_utils.test_reporting import TestReportGenerator
+
+        report_generator = TestReportGenerator(items, is_sagemaker=True)
+        report_generator.generate_coverage_doc(framework="huggingface_llamacpp", job_type="inference")
+
+
+@pytest.fixture(scope="session", name="docker_base_name")
+def fixture_docker_base_name(request):
+    return request.config.getoption("--docker-base-name")
+
+
+@pytest.fixture(scope="session", name="region")
+def fixture_region(request):
+    return request.config.getoption("--region")
+
+
+@pytest.fixture(scope="session", name="framework_version")
+def fixture_framework_version(request):
+    return request.config.getoption("--framework-version")
+
+
+@pytest.fixture(scope="session", name="py_version")
+def fixture_py_version(request):
+    return "py{}".format(int(request.config.getoption("--py-version")))
+
+
+@pytest.fixture(scope="session", name="processor")
+def fixture_processor(request):
+    return request.config.getoption("--processor")
+
+
+@pytest.fixture(scope="session", name="tag")
+def fixture_tag(request, framework_version, processor, py_version):
+    provided_tag = request.config.getoption("--tag")
+    default_tag = "{}-{}-{}".format(framework_version, processor, py_version)
+    return provided_tag if provided_tag else default_tag
+
+
+@pytest.fixture(scope="session", name="docker_image")
+def fixture_docker_image(docker_base_name, tag):
+    return "{}:{}".format(docker_base_name, tag)
+
+
+@pytest.fixture
+def opt_ml():
+    tmp = tempfile.mkdtemp()
+    os.mkdir(os.path.join(tmp, "output"))
+
+    # Docker cannot mount Mac OS /var folder properly see
+    # https://forums.docker.com/t/var-folders-isnt-mounted-properly/9600
+    opt_ml_dir = "/private{}".format(tmp) if platform.system() == "Darwin" else tmp
+    yield opt_ml_dir
+
+    shutil.rmtree(tmp, True)
+
+
+@pytest.fixture(scope="session", name="use_gpu")
+def fixture_use_gpu(processor):
+    return processor == "gpu"
+
+
+@pytest.fixture(scope="session", name="build_base_image", autouse=True)
+def fixture_build_base_image(
+    request, framework_version, py_version, processor, tag, docker_base_name
+):
+    build_base_image = request.config.getoption("--build-base-image")
+    if build_base_image:
+        return image_utils.build_base_image(
+            framework_name=docker_base_name,
+            framework_version=framework_version,
+            py_version=py_version,
+            base_image_tag=tag,
+            processor=processor,
+            cwd=os.path.join(dir_path, ".."),
+        )
+
+    return tag
+
+
+@pytest.fixture(scope="session", name="sagemaker_session")
+def fixture_sagemaker_session(region):
+    return Session(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(scope="session", name="sagemaker_regions")
+def fixture_sagemaker_regions(request):
+    sagemaker_regions = request.config.getoption("--sagemaker-regions")
+    return sagemaker_regions.split(",")
+
+
+@pytest.fixture(scope="session", name="sagemaker_local_session")
+def fixture_sagemaker_local_session(region):
+    return LocalSession(boto_session=boto3.Session(region_name=region))
+
+
+@pytest.fixture(name="aws_id", scope="session")
+def fixture_aws_id(request):
+    return request.config.getoption("--aws-id")
+
+
+@pytest.fixture(name="instance_type", scope="session")
+def fixture_instance_type(request, processor):
+    provided_instance_type = request.config.getoption("--instance-type")
+    default_instance_type = "local" if processor == "cpu" else "local_gpu"
+    return provided_instance_type or default_instance_type
+
+
+@pytest.fixture(name="accelerator_type", scope="session")
+def fixture_accelerator_type(request):
+    return request.config.getoption("--accelerator-type")
+
+
+@pytest.fixture(name="docker_registry", scope="session")
+def fixture_docker_registry(aws_id, region):
+    return get_ecr_registry(aws_id, region)
+
+
+@pytest.fixture(name="ecr_image", scope="session")
+def fixture_ecr_image(docker_registry, docker_base_name, tag):
+    return "{}/{}:{}".format(docker_registry, docker_base_name, tag)
+
+
+@pytest.fixture(autouse=True)
+def skip_by_device_type(request, use_gpu, instance_type, accelerator_type):
+    is_gpu = use_gpu or instance_type[3] in ["g", "p"]
+    is_eia = accelerator_type is not None
+    is_neuron = instance_type.startswith("ml.inf1")
+    is_neuronx = instance_type.startswith("ml.inf2") or instance_type.startswith("ml.trn1")
+
+    # Separate out cases for clearer logic.
+    # When running Neuron test, skip CPU  and GPU test.
+    if request.node.get_closest_marker("neuron_test") and not is_neuron:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+    elif request.node.get_closest_marker("neuronx_test") and not is_neuronx:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running GPU test, skip CPU  and neuron test. When running CPU test, skip GPU  and neuron test.
+    elif (request.node.get_closest_marker("gpu_test") and not is_gpu) or (
+        request.node.get_closest_marker("cpu_test") and (is_gpu or is_neuron or is_neuronx)
+    ):
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running EIA test, skip the CPU, GPU and Neuron functions
+    elif (
+        request.node.get_closest_marker("neuron_test")
+        or request.node.get_closest_marker("gpu_test")
+        or request.node.get_closest_marker("cpu_test")
+    ) and is_eia:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+    # When running CPU or GPU or Neuron test, skip EIA test.
+    elif request.node.get_closest_marker("eia_test") and not is_eia:
+        pytest.skip("Skipping because running on '{}' instance".format(instance_type))
+
+
+@pytest.fixture(autouse=True)
+def skip_by_py_version(request, py_version):
+    if request.node.get_closest_marker("skip_py2") and py_version != "py3":
+        pytest.skip("Skipping the test because Python 2 is not supported.")
+
+
+@pytest.fixture(autouse=True)
+def skip_gpu_instance_restricted_regions(region, instance_type):
+    if (region in NO_P4_REGIONS and instance_type.startswith("ml.p4")) or (
+        region in NO_G5_REGIONS and instance_type.startswith("ml.g5")
+    ):
+        pytest.skip(
+            "Skipping GPU test in region {} with instance type {}".format(region, instance_type)
+        )
+
+
+@pytest.fixture(autouse=True)
+def skip_gpu_py2(request, use_gpu, instance_type, py_version, framework_version):
+    is_gpu = use_gpu or instance_type[3] in ["g", "p"]
+    if (
+        request.node.get_closest_marker("skip_gpu_py2")
+        and is_gpu
+        and py_version != "py3"
+        and framework_version == "1.4.0"
+    ):
+        pytest.skip("Skipping the test until mms issue resolved.")
+
+
+def _get_remote_override_flags():
+    try:
+        s3_client = boto3.client("s3")
+        sts_client = boto3.client("sts")
+        account_id = sts_client.get_caller_identity().get("Account")
+        result = s3_client.get_object(
+            Bucket=f"dlc-cicd-helper-{account_id}", Key="override_tests_flags.json"
+        )
+        json_content = json.loads(result["Body"].read().decode("utf-8"))
+    except ClientError as e:
+        logger.warning("ClientError when performing S3/STS operation: {}".format(e))
+        json_content = {}
+    return json_content
+
+
+def _is_test_disabled(test_name, build_name, version):
+    """
+    Expected format of remote_override_flags:
+    {
+        "CB Project Name for Test Type A": {
+            "CodeBuild Resolved Source Version": ["test_type_A_test_function_1", "test_type_A_test_function_2"]
+        },
+        "CB Project Name for Test Type B": {
+            "CodeBuild Resolved Source Version": ["test_type_B_test_function_1", "test_type_B_test_function_2"]
+        }
+    }
+
+    :param test_name: str Test Function node name (includes parametrized values in string)
+    :param build_name: str Build Project name of current execution
+    :param version: str Source Version of current execution
+    :return: bool True if test is disabled as per remote override, False otherwise
+    """
+    remote_override_flags = _get_remote_override_flags()
+    remote_override_build = remote_override_flags.get(build_name, {})
+    if version in remote_override_build:
+        return not remote_override_build[version] or any(
+            [test_keyword in test_name for test_keyword in remote_override_build[version]]
+        )
+    return False
+
+
+@pytest.fixture(autouse=True)
+def disable_test(request):
+    test_name = request.node.name
+    # We do not have a regex pattern to find CB name, which means we must resort to string splitting
+    build_arn = os.getenv("CODEBUILD_BUILD_ARN")
+    build_name = build_arn.split("/")[-1].split(":")[0] if build_arn else None
+    version = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION")
+
+    if build_name and version and _is_test_disabled(test_name, build_name, version):
+        pytest.skip(f"Skipping {test_name} test because it has been disabled.")
+
+
+@pytest.fixture(autouse=True)
+def skip_test_successfully_executed_before(request):
+    """
+    "cache/lastfailed" contains information about failed tests only. We're running SM tests in separate threads for each image.
+    So when we retry SM tests, successfully executed tests executed again because pytest doesn't have that info in /.cache.
+    But the flag "--last-failed-no-failures all" requires pytest to execute all the available tests.
+    The only sign that a test passed last time - lastfailed file exists and the test name isn't in that file.
+    The method checks whether lastfailed file exists and the test name is not in it.
+    """
+    test_name = request.node.name
+    lastfailed = request.config.cache.get("cache/lastfailed", None)
+
+    # if lastfailed is not None and not any(
+    #     test_name in failed_test_name for failed_test_name in lastfailed.keys()
+    # ):
+    #     pytest.skip(f"Skipping {test_name} because it was successfully executed for this commit")
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/__init__.py b/test/sagemaker_tests/huggingface/llamacpp/integration/__init__.py
new file mode 100644
index 000000000000..44e2b20386e5
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/__init__.py
@@ -0,0 +1,137 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import os
+import re
+import shutil
+import tarfile
+
+import boto3
+
+# Path to test resources
+resources_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "resources"))
+
+# Model artifacts for local mode tests - downloaded from HuggingFace Hub at runtime
+MODEL_ID = "unsloth/Qwen3.5-0.8B-GGUF"
+MODEL_FILENAME = "Qwen3.5-0.8B-UD-IQ2_XXS.gguf"
+model_dir = os.path.join(resources_path, "qwen3.5-0.8b")
+model_data = "qwen3.5-0.8b.tar.gz"
+model_data_path = os.path.join(model_dir, model_data)
+
+
+def _tar_contains_expected_model(tar_path):
+    if not os.path.exists(tar_path):
+        return False
+    try:
+        with tarfile.open(tar_path, "r:gz") as tar:
+            return any(
+                os.path.basename(member.name) == MODEL_FILENAME
+                for member in tar.getmembers()
+                if member.isfile()
+            )
+    except tarfile.TarError:
+        return False
+
+
+def ensure_model_downloaded():
+    """Download model from HuggingFace Hub and create tarball if not already present."""
+    if _tar_contains_expected_model(model_data_path):
+        return model_data_path
+
+    from huggingface_hub import hf_hub_download
+
+    os.makedirs(model_dir, exist_ok=True)
+    local_model_dir = os.path.join(model_dir, "model")
+    if os.path.exists(local_model_dir):
+        shutil.rmtree(local_model_dir)
+    os.makedirs(local_model_dir, exist_ok=True)
+
+    print(f"Downloading {MODEL_FILENAME} from {MODEL_ID} on HuggingFace Hub...")
+    hf_hub_download(
+        repo_id=MODEL_ID,
+        filename=MODEL_FILENAME,
+        local_dir=local_model_dir,
+    )
+
+    # Remove cache folder if present
+    cache_dir = os.path.join(local_model_dir, ".cache")
+    if os.path.exists(cache_dir):
+        shutil.rmtree(cache_dir)
+
+    print(f"Creating tarball {model_data}...")
+    with tarfile.open(model_data_path, "w:gz") as tar:
+        for item in os.listdir(local_model_dir):
+            tar.add(os.path.join(local_model_dir, item), arcname=item)
+
+    # Clean up extracted model
+    shutil.rmtree(local_model_dir)
+
+    print(f"Model ready at {model_data_path}")
+    return model_data_path
+
+
+# Role for local mode (not used but required by SageMaker SDK)
+ROLE = "dummy/unused-role"
+DEFAULT_TIMEOUT = 45
+
+# Llama.cpp SageMaker images listen on port 8080 with a custom llama-server build
+# that serves SageMaker-compatible /ping and /invocations routes directly.
+
+
+class NoLogStreamFoundError(Exception):
+    pass
+
+
+class SageMakerEndpointFailure(Exception):
+    pass
+
+
+def dump_logs_from_cloudwatch(e, region="us-west-2"):
+    """
+    Function to dump logs from cloudwatch during error handling.
+    Gracefully handles missing log groups/streams.
+    """
+    error_hosting_endpoint_regex = re.compile(r"Error hosting endpoint ((\w|-)+):")
+    endpoint_url_regex = re.compile(r"/aws/sagemaker/Endpoints/((\w|-)+)")
+    endpoint_match = error_hosting_endpoint_regex.search(str(e)) or endpoint_url_regex.search(
+        str(e)
+    )
+    if endpoint_match:
+        logs_client = boto3.client("logs", region_name=region)
+        endpoint = endpoint_match.group(1)
+        log_group_name = f"/aws/sagemaker/Endpoints/{endpoint}"
+        try:
+            log_stream_resp = logs_client.describe_log_streams(logGroupName=log_group_name)
+            all_traffic_log_stream = ""
+            for log_stream in log_stream_resp.get("logStreams", []):
+                log_stream_name = log_stream.get("logStreamName")
+                if log_stream_name.startswith("AllTraffic"):
+                    all_traffic_log_stream = log_stream_name
+                    break
+            if not all_traffic_log_stream:
+                raise NoLogStreamFoundError(
+                    f"Cannot find all traffic log streams for endpoint {endpoint}"
+                ) from e
+            events = logs_client.get_log_events(
+                logGroupName=log_group_name, logStreamName=all_traffic_log_stream
+            )
+            raise SageMakerEndpointFailure(
+                f"Error from endpoint {endpoint}:\n{json.dumps(events, indent=4)}"
+            ) from e
+        except logs_client.exceptions.ResourceNotFoundException:
+            # Log group doesn't exist yet - endpoint may have failed before creating logs
+            raise SageMakerEndpointFailure(
+                f"Endpoint {endpoint} failed. No CloudWatch logs available yet."
+            ) from e
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/local/__init__.py b/test/sagemaker_tests/huggingface/llamacpp/integration/local/__init__.py
new file mode 100644
index 000000000000..199e66b95926
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/local/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/local/test_serving.py b/test/sagemaker_tests/huggingface/llamacpp/integration/local/test_serving.py
new file mode 100644
index 000000000000..f4807f5c4cf0
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/local/test_serving.py
@@ -0,0 +1,109 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+import pytest
+import requests
+from sagemaker.model import Model
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import JSONSerializer
+from sagemaker.deserializers import JSONDeserializer
+
+from ...integration import ROLE, ensure_model_downloaded
+from ...utils import local_mode_utils
+
+
+@contextmanager
+def _predictor(image, sagemaker_local_session, instance_type):
+    """Context manager for Llama.cpp model deployment and cleanup.
+
+    Model is extracted to /opt/ml/model by SageMaker from model_data tar.gz.
+    The container entrypoint runs a custom llama-server build with
+    SageMaker-compatible /ping and /invocations routes on port 8080.
+    """
+    # Download model from HuggingFace Hub if not already present
+    model_data_path = ensure_model_downloaded()
+
+    env = {
+        "SM_LLAMACPP_MODEL": "/opt/ml/model/Qwen3.5-0.8B-UD-IQ2_XXS.gguf",
+    }
+
+    model = Model(
+        model_data=f"file://{model_data_path}",
+        role=ROLE,
+        image_uri=image,
+        env=env,
+        sagemaker_session=sagemaker_local_session,
+        predictor_cls=Predictor,
+    )
+    with local_mode_utils.lock():
+        predictor = None
+        try:
+            predictor = model.deploy(1, instance_type)
+            yield predictor
+        finally:
+            if predictor is not None:
+                predictor.delete_endpoint()
+
+
+def _assert_sagemaker_ping_local():
+    """SageMaker contract: GET /ping on the container HTTP port (local mode: 8080)."""
+    response = requests.get("http://127.0.0.1:8080/ping", timeout=60)
+    assert response.status_code == 200
+
+
+def _assert_llamacpp_chat_prediction(predictor):
+    """Test Llama.cpp inference using OpenAI-compatible chat completions API."""
+    predictor.serializer = JSONSerializer()
+    predictor.deserializer = JSONDeserializer()
+
+    data = {
+        "messages": [{"role": "user", "content": "What is Deep Learning?"}],
+        "max_tokens": 50,
+        "temperature": 0.7,
+    }
+    output = predictor.predict(data)
+
+    assert output is not None
+    assert "choices" in output
+
+
+def _assert_llamacpp_chat_prediction_explicit_route(predictor):
+    """Same as chat test but forces target path via SageMaker CustomAttributes route=."""
+    predictor.serializer = JSONSerializer()
+    predictor.deserializer = JSONDeserializer()
+
+    data = {
+        "messages": [{"role": "user", "content": "Say hello in one word."}],
+        "max_tokens": 16,
+        "temperature": 0.3,
+    }
+    output = predictor.predict(
+        data,
+        custom_attributes="route=/v1/chat/completions",
+    )
+
+    assert output is not None
+    assert "choices" in output
+
+
+@pytest.mark.model("qwen3.5-0.8b")
+@pytest.mark.team("sagemaker-1p-algorithms")
+def test_llamacpp_local_chat(docker_image, sagemaker_local_session, instance_type):
+    """Test Llama.cpp local deployment: /ping shim, /invocations chat, and explicit route=."""
+    with _predictor(docker_image, sagemaker_local_session, instance_type) as predictor:
+        _assert_sagemaker_ping_local()
+        _assert_llamacpp_chat_prediction(predictor)
+        _assert_llamacpp_chat_prediction_explicit_route(predictor)
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/__init__.py b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/__init__.py
new file mode 100644
index 000000000000..04fbf5d9a144
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/__init__.py
@@ -0,0 +1,12 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/test_llamacpp.py b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/test_llamacpp.py
new file mode 100644
index 000000000000..370ae0f51e1b
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/test_llamacpp.py
@@ -0,0 +1,116 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import logging
+
+import pytest
+import sagemaker
+from sagemaker.model import Model
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import JSONSerializer
+from sagemaker.deserializers import JSONDeserializer
+
+from ...integration import dump_logs_from_cloudwatch
+from ...integration.sagemaker.timeout import timeout_and_delete_endpoint
+from ..... import invoke_sm_endpoint_helper_function
+
+LOGGER = logging.getLogger(__name__)
+
+
+@pytest.mark.model("qwen3.5-0.8b")
+@pytest.mark.processor("gpu")
+@pytest.mark.gpu_test
+@pytest.mark.team("sagemaker-1p-algorithms")
+def test_llamacpp_qwen(framework_version, ecr_image, instance_type, sagemaker_regions):
+    invoke_sm_endpoint_helper_function(
+        ecr_image=ecr_image,
+        sagemaker_regions=sagemaker_regions,
+        test_function=_test_llamacpp_model,
+        dump_logs_from_cloudwatch=dump_logs_from_cloudwatch,
+        framework_version=framework_version,
+        instance_type=instance_type,
+        model_id="unsloth/Qwen3.5-0.8B-GGUF",
+    )
+
+
+def _test_llamacpp_model(
+    image_uri,
+    sagemaker_session,
+    instance_type,
+    model_id,
+    framework_version=None,
+    **kwargs,
+):
+    """Test Llama.cpp model deployment and inference using OpenAI-compatible API format
+
+    Uses sagemaker.model.Model for SDK v3 compatibility instead of HuggingFaceModel.
+
+    Args:
+        image_uri: ECR image URI
+        sagemaker_session: SageMaker session
+        instance_type: ML instance type
+        model_id: HuggingFace model ID
+        framework_version: Optional version info
+        **kwargs: Additional args from helper (boto_session, sagemaker_client, etc.)
+    """
+    endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-hf-llamacpp-serving")
+
+    env = {
+        "SM_LLAMACPP_HF_REPO": model_id,
+    }
+
+    model = Model(
+        name=endpoint_name,
+        image_uri=image_uri,
+        role="SageMakerRole",
+        env=env,
+        sagemaker_session=sagemaker_session,
+        predictor_cls=Predictor,
+    )
+
+    with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=45):
+        predictor = model.deploy(
+            initial_instance_count=1,
+            instance_type=instance_type,
+            endpoint_name=endpoint_name,
+            container_startup_health_check_timeout=1800,
+            inference_ami_version="al2-ami-sagemaker-inference-gpu-3-1",
+        )
+
+        predictor.serializer = JSONSerializer()
+        predictor.deserializer = JSONDeserializer()
+
+        # Llama.cpp SageMaker uses OpenAI-compatible chat completions API format
+        data = {
+            "messages": [{"role": "user", "content": "What is Deep Learning?"}],
+            "max_tokens": 50,
+            "temperature": 0.7,
+        }
+
+        LOGGER.info(f"Running inference with data: {data}")
+        output = predictor.predict(data)
+        LOGGER.info(f"Output: {json.dumps(output)}")
+
+        assert output is not None
+        assert "choices" in output
+
+        # Explicit route= uses SageMaker CustomAttributes routing in the custom llama-server build.
+        output_routed = predictor.predict(
+            data,
+            custom_attributes="route=/v1/chat/completions",
+        )
+        LOGGER.info(f"Output (routed): {json.dumps(output_routed)}")
+        assert output_routed is not None
+        assert "choices" in output_routed
diff --git a/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/timeout.py b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/timeout.py
new file mode 100644
index 000000000000..1d13878031f7
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/integration/sagemaker/timeout.py
@@ -0,0 +1,66 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+import signal
+from contextlib import contextmanager
+import logging
+
+from botocore.exceptions import ClientError
+
+LOGGER = logging.getLogger("timeout")
+
+
+class TimeoutError(Exception):
+    pass
+
+
+@contextmanager
+def timeout(seconds=0, minutes=0, hours=0):
+    """Add a signal-based timeout to any block of code.
+    If multiple time units are specified, they will be added together to determine time limit.
+    Usage:
+    with timeout(seconds=5):
+        my_slow_function(...)
+    Args:
+        - seconds: The time limit, in seconds.
+        - minutes: The time limit, in minutes.
+        - hours: The time limit, in hours.
+    """
+
+    limit = seconds + 60 * minutes + 3600 * hours
+
+    def handler(signum, frame):
+        raise TimeoutError("timed out after {} seconds".format(limit))
+
+    try:
+        signal.signal(signal.SIGALRM, handler)
+        signal.alarm(limit)
+
+        yield
+    finally:
+        signal.alarm(0)
+
+
+@contextmanager
+def timeout_and_delete_endpoint(endpoint_name, sagemaker_session, seconds=0, minutes=0, hours=0):
+    with timeout(seconds=seconds, minutes=minutes, hours=hours) as t:
+        try:
+            yield [t]
+        finally:
+            try:
+                sagemaker_session.delete_endpoint(endpoint_name)
+                LOGGER.info("deleted endpoint {}".format(endpoint_name))
+            except ClientError as ce:
+                if ce.response["Error"]["Code"] == "ValidationException":
+                    # avoids the inner exception to be overwritten
+                    pass
diff --git a/test/sagemaker_tests/huggingface/llamacpp/requirements.txt b/test/sagemaker_tests/huggingface/llamacpp/requirements.txt
new file mode 100644
index 000000000000..890bbe499718
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/requirements.txt
@@ -0,0 +1,29 @@
+boto3
+coverage
+# Docker v7.0.0 breaks compatibility with Docker Compose v1 (SageMaker Local)
+docker>=5,<=6.1.3
+flake8==3.7.7
+Flask==1.1.1
+mock
+pytest==8.3.5
+pytest-cov
+pytest-rerunfailures
+pytest-xdist
+PyYAML
+protobuf>=3.20,<=3.20.2
+sagemaker>=2.237.0,<3
+six
+requests<2.32.0
+requests_mock
+Pillow
+retrying==1.3.3
+urllib3>=1.26.8
+pluggy>=1.5,<2
+requests_mock
+sagemaker-inference
+tenacity
+fabric
+invoke
+gitpython
+toml
+huggingface_hub
diff --git a/test/sagemaker_tests/huggingface/llamacpp/utils/__init__.py b/test/sagemaker_tests/huggingface/llamacpp/utils/__init__.py
new file mode 100644
index 000000000000..6932ed1abd5b
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/utils/__init__.py
@@ -0,0 +1,36 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import boto3
+import botocore
+
+
+def _botocore_resolver():
+    """
+    Get the DNS suffix for the given region.
+    :return: endpoint object
+    """
+    loader = botocore.loaders.create_loader()
+    return botocore.regions.EndpointResolver(loader.load_data("endpoints"))
+
+
+def get_ecr_registry(account, region):
+    """
+    Get prefix of ECR image URI
+    :param account: Account ID
+    :param region: region where ECR repo exists
+    :return: AWS ECR registry
+    """
+    endpoint_data = _botocore_resolver().construct_endpoint("ecr", region)
+    return "{}.dkr.{}".format(account, endpoint_data["hostname"])
diff --git a/test/sagemaker_tests/huggingface/llamacpp/utils/image_utils.py b/test/sagemaker_tests/huggingface/llamacpp/utils/image_utils.py
new file mode 100644
index 000000000000..3421e6ce2b42
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/utils/image_utils.py
@@ -0,0 +1,67 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import os
+import subprocess
+import sys
+
+CYAN_COLOR = "\033[36m"
+END_COLOR = "\033[0m"
+
+
+def build_base_image(
+    framework_name, framework_version, py_version, processor, base_image_tag, cwd="."
+):
+    base_image_uri = get_base_image_uri(framework_name, base_image_tag)
+
+    dockerfile_location = os.path.join(
+        "docker", framework_version, "base", "Dockerfile.{}".format(processor)
+    )
+
+    subprocess.check_call(
+        [
+            "docker",
+            "build",
+            "-t",
+            base_image_uri,
+            "-f",
+            dockerfile_location,
+            "--build-arg",
+            "py_version={}".format(py_version[-1]),
+            cwd,
+        ],
+        cwd=cwd,
+    )
+    print("created image {}".format(base_image_uri))
+    return base_image_uri
+
+
+def get_base_image_uri(framework_name, base_image_tag):
+    return "{}-base:{}".format(framework_name, base_image_tag)
+
+
+def get_image_uri(framework_name, tag):
+    return "{}:{}".format(framework_name, tag)
+
+
+def _check_call(cmd, *popenargs, **kwargs):
+    if isinstance(cmd, str):
+        cmd = cmd.split(" ")
+    _print_cmd(cmd)
+    subprocess.check_call(cmd, *popenargs, **kwargs)
+
+
+def _print_cmd(cmd):
+    print("executing docker command: {}{}{}".format(CYAN_COLOR, " ".join(cmd), END_COLOR))
+    sys.stdout.flush()
diff --git a/test/sagemaker_tests/huggingface/llamacpp/utils/local_mode_utils.py b/test/sagemaker_tests/huggingface/llamacpp/utils/local_mode_utils.py
new file mode 100644
index 000000000000..fa6b3cf00c36
--- /dev/null
+++ b/test/sagemaker_tests/huggingface/llamacpp/utils/local_mode_utils.py
@@ -0,0 +1,46 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+import fcntl
+import os
+import tarfile
+import time
+
+from ..integration import resources_path
+
+LOCK_PATH = os.path.join(resources_path, "local_mode_lock")
+
+
+@contextmanager
+def lock():
+    # Since Local Mode uses the same port for serving, we need a lock in order
+    # to allow concurrent test execution.
+    local_mode_lock_fd = open(LOCK_PATH, "w")
+    local_mode_lock = local_mode_lock_fd.fileno()
+
+    fcntl.lockf(local_mode_lock, fcntl.LOCK_EX)
+
+    try:
+        yield
+    finally:
+        time.sleep(5)
+        fcntl.lockf(local_mode_lock, fcntl.LOCK_UN)
+
+
+def assert_files_exist(output_path, directory_file_map):
+    for directory, files in directory_file_map.items():
+        with tarfile.open(os.path.join(output_path, "{}.tar.gz".format(directory))) as tar:
+            for f in files:
+                tar.getmember(f)
diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py
index d593deea76e7..571b9fb26ed3 100644
--- a/test/test_utils/__init__.py
+++ b/test/test_utils/__init__.py
@@ -1822,6 +1822,7 @@ def get_framework_and_version_from_tag(image_uri):
         "huggingface_pytorch",
         "huggingface_vllm",
         "huggingface_sglang",
+        "huggingface_llamacpp",
         "stabilityai_pytorch",
         "pytorch_trcomp",
         "tensorflow",
@@ -1939,6 +1940,7 @@ def get_framework_from_image_uri(image_uri):
         "huggingface-pytorch": "huggingface_pytorch",
         "huggingface-vllm": "huggingface_vllm",
         "huggingface-sglang": "huggingface_sglang",
+        "huggingface-llamacpp": "huggingface_llamacpp",
         "stabilityai-pytorch": "stabilityai_pytorch",
         "mxnet": "mxnet",
         "pytorch": "pytorch",
@@ -2080,6 +2082,7 @@ def get_job_type_from_image(image_uri):
         "base": "general",
         "vllm": "general",
         "sglang": "general",
+        "llamacpp": "general",
     }
 
     for key, job_type in job_type_mapping.items():
diff --git a/test/test_utils/sagemaker.py b/test/test_utils/sagemaker.py
index 0ab4d69e4829..24f256f66253 100644
--- a/test/test_utils/sagemaker.py
+++ b/test/test_utils/sagemaker.py
@@ -164,6 +164,8 @@ def generate_sagemaker_pytest_cmd(image, sagemaker_test_type):
         path = os.path.join("test", "sagemaker_tests", "huggingface", "vllm")
     elif framework == "huggingface_sglang":
         path = os.path.join("test", "sagemaker_tests", "huggingface", "sglang")
+    elif framework == "huggingface_llamacpp":
+        path = os.path.join("test", "sagemaker_tests", "huggingface", "llamacpp")
     else:
         path = os.path.join("test", "sagemaker_tests", framework, job_type)
     aws_id_arg = "--aws-id"
@@ -286,6 +288,8 @@ def generate_sagemaker_pytest_cmd(image, sagemaker_test_type):
         path = os.path.join("test", "sagemaker_tests", "huggingface", "vllm")
     elif "huggingface" in framework and "sglang" in framework:
         path = os.path.join("test", "sagemaker_tests", "huggingface", "sglang")
+    elif "huggingface" in framework and "llamacpp" in framework:
+        path = os.path.join("test", "sagemaker_tests", "huggingface", "llamacpp")
     elif "huggingface" in framework and job_type == "inference":
         path = os.path.join("test", "sagemaker_tests", "huggingface", "inference")
     if "trcomp" in framework:
diff --git a/test/testrunner.py b/test/testrunner.py
index 2d7deb2cfe24..9773d11e1604 100644
--- a/test/testrunner.py
+++ b/test/testrunner.py
@@ -629,6 +629,15 @@ def main():
             sm_utils.generate_empty_report(report, test_type, "sglang")
             return
 
+        # Skip base llamacpp (not huggingface_llamacpp) - huggingface_llamacpp has local tests
+        if "llamacpp" in dlc_images and "huggingface" not in dlc_images:
+            LOGGER.info(
+                f"Skipping - there are no local mode tests for base Llamacpp. Images: {dlc_images}"
+            )
+            report = os.path.join(os.getcwd(), "test", f"{test_type}.xml")
+            sm_utils.generate_empty_report(report, test_type, "llamacpp")
+            return
+
         testing_image_list = [
             image
             for image in standard_images_list