Working with CSV and JSON Data - Python Mastery

CSV Files Without pandas

Python's built-in csv module handles CSV files when you don't need the full power of pandas.

import csv

# Write CSV
data = [
    ["Name", "Age", "City"],
    ["Alice", 30, "New York"],
    ["Bob", 25, "Los Angeles"],
    ["Carol", 35, "Chicago"],
]
with open("people.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(data)

# Read CSV as lists
with open("people.csv") as f:
    reader = csv.reader(f)
    header = next(reader)       # Skip header row
    for row in reader:
        name, age, city = row
        print(f"{name} lives in {city}")

# Read CSV as dicts (first row becomes keys)
with open("people.csv") as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row["Name"], row["City"])

# Write dicts
people = [
    {"Name": "Alice", "Age": 30, "City": "NYC"},
    {"Name": "Bob",   "Age": 25, "City": "LA"},
]
with open("output.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["Name", "Age", "City"])
    writer.writeheader()
    writer.writerows(people)

JSON Data

import json

# Serialization: Python → JSON
data = {
    "name": "Alice",
    "age": 30,
    "hobbies": ["python", "chess", "hiking"],
    "address": {"city": "NYC", "zip": "10001"},
    "active": True,
    "score": None    # Python None → JSON null
}

# Write to file
with open("data.json", "w") as f:
    json.dump(data, f, indent=2)    # indent for human-readable

# Write to string
json_str = json.dumps(data, indent=2)
print(json_str)

# Read from file
with open("data.json") as f:
    loaded = json.load(f)

# Parse JSON string
parsed = json.loads('{"name": "Bob", "age": 25}')
print(type(parsed))    # dict

# Python → JSON type mapping
# dict → object {}
# list, tuple → array []
# str → string
# int, float → number
# True/False → true/false
# None → null

Working with Nested JSON

import json
import requests

# GitHub API response (complex nested JSON)
response = requests.get("https://api.github.com/repos/python/cpython")
repo = response.json()

print(repo["name"])           # "cpython"
print(repo["stargazers_count"])
print(repo["owner"]["login"]) # Nested object
print(repo["topics"])         # Array

# Parse array of objects
users = json.loads('[{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]')
for user in users:
    print(user["id"], user["name"])

# Safely access nested values
def get_nested(d, *keys, default=None):
    for key in keys:
        if isinstance(d, dict):
            d = d.get(key, default)
        else:
            return default
    return d

get_nested(repo, "owner", "login")        # "python"
get_nested(repo, "license", "name")       # "PSF-2.0" or None

Data Transformation Patterns

import json, csv

# JSON API → CSV report
response = requests.get("https://api.github.com/search/repositories?q=python&sort=stars&per_page=10")
repos = response.json()["items"]

# Extract relevant fields
records = [
    {"name": r["name"], "stars": r["stargazers_count"], "url": r["html_url"]}
    for r in repos
]

# Write to CSV
with open("top_repos.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["name", "stars", "url"])
    writer.writeheader()
    writer.writerows(records)

# CSV → JSON transformation
with open("input.csv") as f:
    reader = csv.DictReader(f)
    records = list(reader)   # Convert to list of dicts

with open("output.json", "w") as f:
    json.dump(records, f, indent=2)

Key Takeaways

csv.DictReader/DictWriter: work with CSV as dicts — cleaner than lists
newline="" on Windows: prevents double newlines in CSV files
json.dump/load for files, json.dumps/loads for strings
indent=2 in json.dump: human-readable output for debugging
None → null, True → true: Python/JSON type mapping to remember

Practice Exercises

Write a script that converts a JSON file (array of objects) to CSV and vice versa.
Read a CSV of transactions (date, amount, category), compute total by category, and write results to a new CSV.
Fetch JSON from a public API, extract specific fields, and save as a clean CSV.

← pandas Basics Web Scraping →