Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Changes for crate
Unreleased
==========

- Added gzip compression for outgoing request bodies (``compress_client=True``,
default enabled). Use ``compress_threshold`` (default: ``8192`` bytes) to
skip compression on small payloads. Server-side response compression
(``compress_server``) is available but defaults to ``False`` to avoid
BREACH-class oracle attacks on TLS-compressed responses; opt in explicitly
with ``compress_server=True``. Server-side response compression requires
``http.compression=true`` in server configuration.

- Added named parameter support (``pyformat`` paramstyle). Passing a
:class:`py:dict` as ``parameters`` to ``cursor.execute()`` now accepts
``%(name)s`` placeholders and converts them to positional ``?`` markers
Expand Down
41 changes: 41 additions & 0 deletions docs/connect.rst
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,47 @@ with the rest of your arguments.

However, you can query any schema you like by specifying it in the query.

.. _compression:

Request and response compression
=================================

By default, ``crate-python`` compresses outgoing request bodies with gzip
(``compress_client=True``). Response compression is opt-in (``compress_server``
defaults to ``False``; see the security note below)::

>>> connection = client.connect('localhost:4200')
# compress_client=True, compress_server=False are the defaults

To disable client-side request compression::

>>> connection = client.connect('localhost:4200', compress_client=False)

Compression is skipped for request bodies smaller than ``compress_threshold``
bytes (default ``8192``). This avoids CPU overhead on tiny payloads where
bandwidth savings are negligible::

>>> connection = client.connect('localhost:4200', compress_threshold=16384)

To enable server-side response compression, set ``compress_server=True``. The
server must also have ``http.compression=true``. The client
sends ``Accept-Encoding: gzip, deflate`` and urllib3 decompresses responses
transparently::

>>> connection = client.connect('localhost:4200', compress_server=True)

.. NOTE::

``compress_server`` defaults to ``False`` as a precaution against
`BREACH`_-class attacks. BREACH allows an attacker who can both observe
TLS traffic *and* inject content into requests to gradually recover secrets
from compressed HTTP responses. CrateDB SQL responses do not contain
credentials, so the practical risk is low for most deployments. Enable
``compress_server=True`` explicitly if your deployment benefits from
response compression and you have assessed the risk.

.. _BREACH: https://en.wikipedia.org/wiki/BREACH

Next steps
==========

Expand Down
23 changes: 23 additions & 0 deletions src/crate/client/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def __init__(
converter=None,
time_zone=None,
jwt_token=None,
compress_client=True,
compress_threshold=8192,
compress_algorithm="gzip",
compress_server=False,
):
"""
:param servers:
Expand Down Expand Up @@ -131,6 +135,21 @@ def __init__(
converted from UTC to use the given time zone.
:param jwt_token:
the JWT token to authenticate with the server.
:param compress_client:
(optional, defaults to ``True``)
Compress outgoing request bodies with gzip. Payloads smaller than
``compress_threshold`` bytes are sent uncompressed.
:param compress_threshold:
(optional, defaults to ``8192``)
Minimum request body size in bytes to trigger client-side compression.
:param compress_algorithm:
(optional, defaults to ``"gzip"``)
Compression algorithm. Only ``"gzip"`` is supported in this version.
:param compress_server:
(optional, defaults to ``False``)
Send ``Accept-Encoding: gzip`` so the server may return compressed
responses. Disabled by default to avoid BREACH-class oracle attacks
on compressed TLS responses.
""" # noqa: E501

self._converter = converter
Expand Down Expand Up @@ -158,6 +177,10 @@ def __init__(
socket_tcp_keepintvl=socket_tcp_keepintvl,
socket_tcp_keepcnt=socket_tcp_keepcnt,
jwt_token=jwt_token,
compress_client=compress_client,
compress_threshold=compress_threshold,
compress_algorithm=compress_algorithm,
compress_server=compress_server,
)
self.lowest_server_version = self._lowest_server_version()
self._closed = False
Expand Down
31 changes: 30 additions & 1 deletion src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import calendar
import datetime as dt
import gzip
import heapq
import io
import logging
Expand Down Expand Up @@ -463,6 +464,10 @@ def __init__(
socket_tcp_keepintvl=None,
socket_tcp_keepcnt=None,
jwt_token=None,
compress_client=True,
compress_threshold=8192,
compress_algorithm="gzip",
compress_server=False,
):
if not servers:
servers = [self.default_server]
Expand Down Expand Up @@ -516,6 +521,16 @@ def __init__(
self.jwt_token = jwt_token
self.schema = schema

if compress_algorithm != "gzip":
raise ValueError(
f"Unsupported compress_algorithm: {compress_algorithm!r}. "
"Only 'gzip' is supported."
)
self.compress_client = compress_client
self.compress_threshold = compress_threshold
self.compress_algorithm = compress_algorithm
self.compress_server = compress_server

self.path = self.SQL_PATH
if error_trace:
self.path += "&error_trace=true"
Expand Down Expand Up @@ -678,8 +693,22 @@ def _json_request(self, method, path, data):
"""
Issue request against the crate HTTP API.
"""
headers = {}

response = self._request(method, path, data=data)
if self.compress_server:
headers["Accept-Encoding"] = "gzip, deflate"

if (
self.compress_client
and self.compress_algorithm == "gzip"
and len(data) >= self.compress_threshold
):
data = gzip.compress(data, compresslevel=6)
headers["Content-Encoding"] = "gzip"

response = self._request(
method, path, data=data, headers=headers or None
)
_raise_for_status(response)
if len(response.data) > 0:
return _json_from_response(response)
Expand Down
110 changes: 110 additions & 0 deletions tests/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# with Crate these terms will supersede the license and you may use the
# software solely pursuant to the terms of the relevant commercial agreement.

import gzip
import json
import os
import queue
Expand Down Expand Up @@ -735,3 +736,112 @@ def test_credentials_and_token(serve_http):
assert excinfo.match(
"Either JWT tokens are accepted, or user credentials, but not both"
)

def test_compress_client_disabled():
"""
No Content-Encoding header when client compression is off.
"""
captured = {}

def capturing(method, path, **kwargs):
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(
servers="localhost:4200",
compress_client=False,
compress_server=False,
)
client.sql("SELECT 1")
assert "Content-Encoding" not in captured["headers"]


def test_compress_client_enabled():
"""Request body is gzip-compressed and Content-Encoding header is set."""
captured = {}

def capturing(method, path, **kwargs):
captured["data"] = kwargs.get("data", b"")
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(
servers="localhost:4200",
compress_client=True,
compress_threshold=0,
compress_server=False,
)
client.sql("SELECT 1")
assert captured["headers"].get("Content-Encoding") == "gzip"
assert b'"stmt"' in gzip.decompress(captured["data"])


def test_compress_client_below_threshold():
"""No Content-Encoding header when payload is below the threshold."""
captured = {}

def capturing(method, path, **kwargs):
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(
servers="localhost:4200",
compress_client=True,
compress_threshold=999_999,
compress_server=False,
)
client.sql("SELECT 1")
assert "Content-Encoding" not in captured["headers"]


def test_compress_server_sends_accept_encoding():
"""Accept-Encoding: gzip, deflate is sent when server compression is on."""
captured = {}

def capturing(method, path, **kwargs):
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(
servers="localhost:4200",
compress_client=False,
compress_server=True,
)
client.sql("SELECT 1")
assert captured["headers"].get("Accept-Encoding") == "gzip, deflate"


def test_compress_server_disabled():
"""No Accept-Encoding header when server compression is off."""
captured = {}

def capturing(method, path, **kwargs):
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(
servers="localhost:4200",
compress_client=False,
compress_server=False,
)
client.sql("SELECT 1")
assert "Accept-Encoding" not in captured["headers"]


def test_compress_server_default_disabled():
"""No Accept-Encoding header with Client instantiated by default args."""
captured = {}

def capturing(method, path, **kwargs):
captured["headers"] = kwargs.get("headers") or {}
return fake_response(200)

with patch(REQUEST_PATH, side_effect=capturing):
client = Client(servers="localhost:4200")
client.sql("SELECT 1")
assert "Accept-Encoding" not in captured["headers"]
Loading