diff --git a/tests/fixtures/migration/cassette_with_duplicate_requests.yaml b/tests/fixtures/migration/cassette_with_duplicate_requests.yaml new file mode 100644 index 00000000..121289ab --- /dev/null +++ b/tests/fixtures/migration/cassette_with_duplicate_requests.yaml @@ -0,0 +1,19 @@ +interactions: +- request: + body: null + headers: + accept: ['*/*'] + accept-encoding: ['gzip, deflate, compress'] + user-agent: ['python-requests/2.2.1 CPython/2.6.1 Darwin/10.8.0'] + method: GET + uri: http://httpbin.org/ip + response: + body: {string: "{\n \"origin\": \"217.122.164.194\"\n}"} + headers: + access-control-allow-origin: ['*'] + content-type: [application/json] + date: ['Mon, 21 Apr 2014 23:06:09 GMT'] + server: [gunicorn/0.17.4] + content-length: ['32'] + connection: [keep-alive] + status: {code: 200, message: OK} diff --git a/tests/unit/test_persist.py b/tests/unit/test_persist.py index 025ad968..fafe6dc5 100644 --- a/tests/unit/test_persist.py +++ b/tests/unit/test_persist.py @@ -1,8 +1,10 @@ import pytest +from unittest.mock import patch from vcr.persisters.filesystem import FilesystemPersister +from vcr.persisters.deduplicated_filesystem import DeduplicatedFilesystemPersister from vcr.serializers import jsonserializer, yamlserializer - +import vcr @pytest.mark.parametrize( "cassette_path, serializer", @@ -28,3 +30,22 @@ def test_load_cassette_with_invalid_cassettes(cassette_path, serializer): with pytest.raises(Exception) as excinfo: FilesystemPersister.load_cassette(cassette_path, serializer) assert "run the migration script" not in excinfo.exconly() + +@pytest.mark.parametrize( + "cassette_path, serializer", + [ + ("tests/fixtures/migration/cassette_with_duplicate_requests.yaml", yamlserializer), + ], +) +def test_load_cassette_with_duplicate_requests_cassettes(cassette_path, serializer): + cassette_dict = DeduplicatedFilesystemPersister.load_cassette(cassette_path, serializer) + breakpoint() + with patch.object(FilesystemPersister, "save_cassette") as mock: + with vcr.use_cassette(cassette_path, serializer=serializer, persister=DeduplicatedFilesystemPersister): + pass + + # it's deduped when it is saved + # DeduplicatedFilesystemPersister.save_cassette(cassette_path, cassette_dict, serializer) + breakpoint() + assert mock.call_count == 1 + diff --git a/vcr/cassette.py b/vcr/cassette.py index 5822afac..c943bd07 100644 --- a/vcr/cassette.py +++ b/vcr/cassette.py @@ -12,6 +12,7 @@ from .matchers import get_matchers_results, method, requests_match, uri from .patch import CassettePatcherBuilder from .persisters.filesystem import FilesystemPersister +from .persisters.deduplicated_filesystem import DeduplicatedFilesystemPersister from .record_mode import RecordMode from .serializers import yamlserializer from .util import partition_dict diff --git a/vcr/persisters/deduplicated_filesystem.py b/vcr/persisters/deduplicated_filesystem.py new file mode 100644 index 00000000..2ba92b8d --- /dev/null +++ b/vcr/persisters/deduplicated_filesystem.py @@ -0,0 +1,58 @@ +# https://github.com/Azure/azure-sdk-for-python/pull/17973/files + +import copy +import os + +from vcr.serialize import serialize, deserialize +from .filesystem import FilesystemPersister + + +ATTRIBUTES_TO_COMPARE = [ + "body", + "headers", + "host", + "method", + "path", + "protocol", + "query", + "scheme", + "uri", + "url", +] + + +def trim_duplicates(cassette_dict): + # Dict[str] -> Dict[str] + cassette_copy = copy.deepcopy(cassette_dict) + requests = cassette_dict["requests"] + responses = cassette_dict["responses"] + pairs_to_remove = [] + for i in range(1, len(requests)): + for j in range(1, min(i, 4)): + if same_requests(requests[i - j], requests[i]): + pairs_to_remove.append(i - j) + # Always keep the last one + ret = {"requests": [], "responses": []} + + for i in range(len(requests)): + if i not in pairs_to_remove: + ret["requests"].append(requests[i]) + ret["responses"].append(responses[i]) + + return ret + + +def same_requests(request1, request2): + # (vcr.Request, vcr.Request) -> bool + for attr in ATTRIBUTES_TO_COMPARE: + if getattr(request1, attr) != getattr(request2, attr): + return False + + return True + + +class DeduplicatedFilesystemPersister(FilesystemPersister): + @staticmethod + def save_cassette(cassette_path, cassette_dict, serializer): + cassette_dict = trim_duplicates(cassette_dict) + FilesystemPersister.save_cassette(cassette_path, cassette_dict, serializer)