Visualizing harmful PM<sub>2.5</sub> levels in the US by county

2024-12-08·
Ka Ming FUNG
Ka Ming FUNG
· 3 min read
# %pip install pandas geopandas folium matplotlib mapclassify
import pandas as pd
import geopandas as gpd
# download the data from US NIH (https://hdpulse.nimhd.nih.gov/data-portal/physical/table?age=001&age_options=ageall_1&demo=234&demo_options=air_pollution_1&physicaltopic=002&physicaltopic_options=physical_2&race=00&race_options=raceall_1&sex=0&sex_options=sexboth_1&statefips=99&statefips_options=area_states)

county_pm25: pd.DataFrame = pd.read_csv(
    "HDPulse_data_export.csv",
    skiprows=5,
)
county_pm25

CountyFIPSMicrograms per cubic meter (PM2.5)(2)
0United States0.07.4
1San Bernardino County, California6071.015.6
2Fairbanks North Star, Alaska2090.015.5
3Allegheny County, Pennsylvania42003.014.1
4San Diego County, California6073.013.8
............
3146Notes:NaNNaN
3147Source: National Environmental Public Health T...NaNNaN
3148Average daily density of fine particulate matt...NaNNaN
3149Some data are not available or suppressed due ...NaNNaN
3150Note: This website still uses Connecticut coun...NaNNaN

3151 rows × 3 columns

county_pm25_processed: pd.DataFrame = (
    county_pm25.assign(
        # make PM2.5 reading a float
        pm25_ug_per_m3=lambda x: pd.to_numeric(arg=x[x.keys()[-1]], errors="coerce"),
        # convert FIPS to a 5-digit string
        FIPS=lambda x: pd.to_numeric(x["FIPS"]),
    )
    .dropna(
        # drop rows with missing PM2.5 readings
        subset=[
            "FIPS",
            "pm25_ug_per_m3",
        ],
    )
    .assign(
        FIPS=lambda x: x["FIPS"].astype(int).astype(str).str.zfill(5),
    )
)
# optional sense check
county_pm25_processed

CountyFIPSMicrograms per cubic meter (PM2.5)(2)pm25_ug_per_m3
0United States000007.47.4
1San Bernardino County, California0607115.615.6
2Fairbanks North Star, Alaska0209015.515.5
3Allegheny County, Pennsylvania4200314.114.1
4San Diego County, California0607313.813.8
...............
3111Custer County, South Dakota460332.62.6
3112Apache County, Arizona040012.52.5
3113Campbell County, Wyoming560052.42.4
3114Converse County, Wyoming560092.22.2
3115Gallatin County, Montana300310.90.9

3116 rows × 4 columns

# download us county shape files from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
counties: gpd.GeoDataFrame = gpd.read_file(
    "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip"
)
counties_processed: gpd.GeoDataFrame = counties.assign(
    FIPS=lambda x: x["STATEFP"] + x["COUNTYFP"],
)
# optional sense check
counties_processed

STATEFPCOUNTYFPCOUNTYNSAFFGEOIDGEOIDNAMELSADALANDAWATERgeometryFIPS
021007005168500500000US2100721007Ballard0663938745469473325POLYGON ((-89.18137 37.0463, -89.17938 37.0530...21007
121017005168550500000US2101721017Bourbon067504393514829777POLYGON ((-84.44266 38.28324, -84.44114 38.283...21017
221031005168620500000US2103121031Butler06110357197413943044POLYGON ((-86.94486 37.07341, -86.94346 37.074...21031
321065005168790500000US2106521065Estill066555099306516335POLYGON ((-84.12662 37.6454, -84.12483 37.6461...21065
421069005168810500000US2106921069Fleming069027271517182793POLYGON ((-83.98428 38.44549, -83.98246 38.450...21069
....................................
322831073008358580500000US3107331073Gosper06118661623711831826POLYGON ((-100.0951 40.43866, -100.08937 40.43...31073
322939075010740500500000US3907539075Holmes0610944058663695230POLYGON ((-82.22066 40.66758, -82.19327 40.667...39075
323048171013838710500000US4817148171Gillespie0627407191149012764POLYGON ((-99.304 30.49983, -99.28234 30.49967...48171
323155079015811000500000US5507955079Milwaukee066254405632455383635POLYGON ((-88.06959 42.86726, -88.06959 42.872...55079
323226139016230120500000US2613926139Ottawa0614595024082765830983POLYGON ((-86.26432 43.1183, -86.25103 43.1182...26139

3233 rows × 11 columns

# merge the two dataframes
counties_w_pm25 = counties_processed.merge(
    right=county_pm25_processed,
    on="FIPS",
    how="left",
)
# optional sense check
counties_w_pm25.STATEFP.unique()
array(['21', '17', '18', '01', '02', '05', '06', '08', '09', '11', '12',
       '13', '15', '16', '19', '20', '48', '29', '30', '31', '53', '22',
       '23', '24', '34', '35', '36', '37', '38', '39', '40', '49', '41',
       '42', '45', '46', '47', '25', '26', '51', '72', '78', '27', '28',
       '32', '33', '04', '54', '55', '56', '60', '69', '50', '10', '44',
       '66'], dtype=object)
# display PM2.5 in Contiguous US
ax = counties_w_pm25.pipe(lambda x: x[(x['STATEFP'].astype(int)<=56) & ~(x['STATEFP'].astype(int).isin([2, 15]))]).plot(column='pm25_ug_per_m3', legend=False, cmap='magma',)
ax.set_axis_off()

import matplotlib.pyplot as plt

# Customize the colorbar
plt.colorbar(ax.collections[0], orientation='horizontal', label='24-hr average PM2.5 (ug/m3) in 2018')

plt.show()

png

# display the data on map by level of PM2.5
m = counties_w_pm25.explore(
    column="pm25_ug_per_m3",
)

interactive maps