Q1.

pip version : 24.3.1

~/data-engineering-zoomcamp main* ⇡ ❯ docker run -it python:3.12.8-slim bash                                                                                                                              17s de-zoomcamp-py3.12 22:58:21
root@30b7734d5f9fe:/# pip -V
pip 24.3.1 from /usr/local/lib/python3.12/site-packages/pip (python 3.12)

Q2

services:
  db:
    container_name: postgres
    image: postgres:17-alpine
    environment:
      POSTGRES_USER: 'postgres'
      POSTGRES_PASSWORD: 'postgres'
      POSTGRES_DB: 'ny_taxi'
    ports:
      - '5433:5432'
    volumes:
      - vol-pgdata:/var/lib/postgresql/data

  pgadmin:
    container_name: pgadmin
    image: dpage/pgadmin4:latest
    environment:
      PGADMIN_DEFAULT_EMAIL: "[email protected]"
      PGADMIN_DEFAULT_PASSWORD: "pgadmin"
    ports:
      - "8080:80"
    volumes:
      - vol-pgadmin_data:/var/lib/pgadmin

volumes:
  vol-pgdata:
    name: vol-pgdata
  vol-pgadmin_data:
    name: vol-pgadmin_data

In this code. pgadmin should use postgres:5432 (or db:5432)

It must use service name or contanier_name in hostname.

Q3

SELECT 
    COUNT(*) AS cnt
FROM green_tripdata
GROUP BY 
    "VendorID", "lpep_pickup_datetime",
    "lpep_dropoff_datetime", "store_and_fwd_flag",
    "RatecodeID", "PULocationID",
    "DOLocationID", "passenger_count",
    "trip_distance", "fare_amount",
    "extra", "mta_tax",
    "tip_amount", "tolls_amount",
    "ehail_fee", "improvement_surcharge", "total_amount",
    "payment_type", "trip_type", "congestion_surcharge"
HAVING
    COUNT(*) > 1;

SELECT 
    SUM(CASE WHEN Trip_distance <= 1 THEN 1 END) AS "1. 1마일 이하",
    SUM(CASE WHEN Trip_distance > 1 AND Trip_distance <= 3 THEN 1 END) AS "2. 1마일 초과 3마일 이하",
    SUM(CASE WHEN Trip_distance > 3 AND Trip_distance <= 7 THEN 1 END) AS "3. 3마일 초과 7마일 이하",
    SUM(CASE WHEN Trip_distance > 7 AND Trip_distance <= 10 THEN 1 END) AS "4. 7마일 초과 10마일 이하",
    SUM(CASE WHEN Trip_distance > 10 THEN 1 END) AS "5. 10마일 초과"
FROM green_tripdata
WHERE lpep_pickup_datetime >= '2019-10-01' AND lpep_dropoff_datetime< '2019-11-01';

**-- 104802	198924	109603	27678	35189 // answer**

Q4