From 911eb609461a5bad653e4ebcd2cbcad97d2530c9 Mon Sep 17 00:00:00 2001 From: chrislu Date: Sun, 23 Nov 2025 14:58:31 -0800 Subject: [PATCH] debug: add directory structure inspection before file download Added weed shell commands to inspect the directory structure: - List /test-spark/ to see what directories exist - List /test-spark/employees/ to see what files are there This will help diagnose why the HTTP API returns empty: - Are files there but HTTP not working? - Are files in a different location? - Were files cleaned up after the test? - Did the volume data persist after container restart? Will show us exactly what's in SeaweedFS after test failure. --- .github/workflows/spark-integration-tests.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/spark-integration-tests.yml b/.github/workflows/spark-integration-tests.yml index 434af66ca..b621dd577 100644 --- a/.github/workflows/spark-integration-tests.yml +++ b/.github/workflows/spark-integration-tests.yml @@ -153,8 +153,17 @@ jobs: # Install parquet-tools pip3 install parquet-tools - # List available files - echo "Available Parquet files:" + # First, check what's in the test-spark directory + echo "=== Checking test-spark directory structure ===" + echo -e "fs.ls /test-spark/\nexit" | docker compose exec -T seaweedfs-master weed shell + + echo "" + echo "=== Checking employees directory ===" + echo -e "fs.ls /test-spark/employees/\nexit" | docker compose exec -T seaweedfs-master weed shell + + # List available files via HTTP + echo "" + echo "=== Available Parquet files via HTTP API ===" echo "Checking: http://localhost:8888/test-spark/employees/" curl -s http://localhost:8888/test-spark/employees/?pretty=y | tee files.json