Visualizing harmful PM<sub>2.5</sub> levels in the US by county
2024-12-08··
3 min read
Ka Ming FUNG
# %pip install pandas geopandas folium matplotlib mapclassify
import pandas as pd
import geopandas as gpd
# download the data from US NIH (https://hdpulse.nimhd.nih.gov/data-portal/physical/table?age=001&age_options=ageall_1&demo=234&demo_options=air_pollution_1&physicaltopic=002&physicaltopic_options=physical_2&race=00&race_options=raceall_1&sex=0&sex_options=sexboth_1&statefips=99&statefips_options=area_states)
county_pm25: pd.DataFrame = pd.read_csv(
"HDPulse_data_export.csv",
skiprows=5,
)
county_pm25
County | FIPS | Micrograms per cubic meter (PM2.5)(2) | |
---|---|---|---|
0 | United States | 0.0 | 7.4 |
1 | San Bernardino County, California | 6071.0 | 15.6 |
2 | Fairbanks North Star, Alaska | 2090.0 | 15.5 |
3 | Allegheny County, Pennsylvania | 42003.0 | 14.1 |
4 | San Diego County, California | 6073.0 | 13.8 |
... | ... | ... | ... |
3146 | Notes: | NaN | NaN |
3147 | Source: National Environmental Public Health T... | NaN | NaN |
3148 | Average daily density of fine particulate matt... | NaN | NaN |
3149 | Some data are not available or suppressed due ... | NaN | NaN |
3150 | Note: This website still uses Connecticut coun... | NaN | NaN |
3151 rows × 3 columns
county_pm25_processed: pd.DataFrame = (
county_pm25.assign(
# make PM2.5 reading a float
pm25_ug_per_m3=lambda x: pd.to_numeric(arg=x[x.keys()[-1]], errors="coerce"),
# convert FIPS to a 5-digit string
FIPS=lambda x: pd.to_numeric(x["FIPS"]),
)
.dropna(
# drop rows with missing PM2.5 readings
subset=[
"FIPS",
"pm25_ug_per_m3",
],
)
.assign(
FIPS=lambda x: x["FIPS"].astype(int).astype(str).str.zfill(5),
)
)
# optional sense check
county_pm25_processed
County | FIPS | Micrograms per cubic meter (PM2.5)(2) | pm25_ug_per_m3 | |
---|---|---|---|---|
0 | United States | 00000 | 7.4 | 7.4 |
1 | San Bernardino County, California | 06071 | 15.6 | 15.6 |
2 | Fairbanks North Star, Alaska | 02090 | 15.5 | 15.5 |
3 | Allegheny County, Pennsylvania | 42003 | 14.1 | 14.1 |
4 | San Diego County, California | 06073 | 13.8 | 13.8 |
... | ... | ... | ... | ... |
3111 | Custer County, South Dakota | 46033 | 2.6 | 2.6 |
3112 | Apache County, Arizona | 04001 | 2.5 | 2.5 |
3113 | Campbell County, Wyoming | 56005 | 2.4 | 2.4 |
3114 | Converse County, Wyoming | 56009 | 2.2 | 2.2 |
3115 | Gallatin County, Montana | 30031 | 0.9 | 0.9 |
3116 rows × 4 columns
# download us county shape files from https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
counties: gpd.GeoDataFrame = gpd.read_file(
"https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip"
)
counties_processed: gpd.GeoDataFrame = counties.assign(
FIPS=lambda x: x["STATEFP"] + x["COUNTYFP"],
)
# optional sense check
counties_processed
STATEFP | COUNTYFP | COUNTYNS | AFFGEOID | GEOID | NAME | LSAD | ALAND | AWATER | geometry | FIPS | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 21 | 007 | 00516850 | 0500000US21007 | 21007 | Ballard | 06 | 639387454 | 69473325 | POLYGON ((-89.18137 37.0463, -89.17938 37.0530... | 21007 |
1 | 21 | 017 | 00516855 | 0500000US21017 | 21017 | Bourbon | 06 | 750439351 | 4829777 | POLYGON ((-84.44266 38.28324, -84.44114 38.283... | 21017 |
2 | 21 | 031 | 00516862 | 0500000US21031 | 21031 | Butler | 06 | 1103571974 | 13943044 | POLYGON ((-86.94486 37.07341, -86.94346 37.074... | 21031 |
3 | 21 | 065 | 00516879 | 0500000US21065 | 21065 | Estill | 06 | 655509930 | 6516335 | POLYGON ((-84.12662 37.6454, -84.12483 37.6461... | 21065 |
4 | 21 | 069 | 00516881 | 0500000US21069 | 21069 | Fleming | 06 | 902727151 | 7182793 | POLYGON ((-83.98428 38.44549, -83.98246 38.450... | 21069 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3228 | 31 | 073 | 00835858 | 0500000US31073 | 31073 | Gosper | 06 | 1186616237 | 11831826 | POLYGON ((-100.0951 40.43866, -100.08937 40.43... | 31073 |
3229 | 39 | 075 | 01074050 | 0500000US39075 | 39075 | Holmes | 06 | 1094405866 | 3695230 | POLYGON ((-82.22066 40.66758, -82.19327 40.667... | 39075 |
3230 | 48 | 171 | 01383871 | 0500000US48171 | 48171 | Gillespie | 06 | 2740719114 | 9012764 | POLYGON ((-99.304 30.49983, -99.28234 30.49967... | 48171 |
3231 | 55 | 079 | 01581100 | 0500000US55079 | 55079 | Milwaukee | 06 | 625440563 | 2455383635 | POLYGON ((-88.06959 42.86726, -88.06959 42.872... | 55079 |
3232 | 26 | 139 | 01623012 | 0500000US26139 | 26139 | Ottawa | 06 | 1459502408 | 2765830983 | POLYGON ((-86.26432 43.1183, -86.25103 43.1182... | 26139 |
3233 rows × 11 columns
# merge the two dataframes
counties_w_pm25 = counties_processed.merge(
right=county_pm25_processed,
on="FIPS",
how="left",
)
# optional sense check
counties_w_pm25.STATEFP.unique()
array(['21', '17', '18', '01', '02', '05', '06', '08', '09', '11', '12',
'13', '15', '16', '19', '20', '48', '29', '30', '31', '53', '22',
'23', '24', '34', '35', '36', '37', '38', '39', '40', '49', '41',
'42', '45', '46', '47', '25', '26', '51', '72', '78', '27', '28',
'32', '33', '04', '54', '55', '56', '60', '69', '50', '10', '44',
'66'], dtype=object)
# display PM2.5 in Contiguous US
ax = counties_w_pm25.pipe(lambda x: x[(x['STATEFP'].astype(int)<=56) & ~(x['STATEFP'].astype(int).isin([2, 15]))]).plot(column='pm25_ug_per_m3', legend=False, cmap='magma',)
ax.set_axis_off()
import matplotlib.pyplot as plt
# Customize the colorbar
plt.colorbar(ax.collections[0], orientation='horizontal', label='24-hr average PM2.5 (ug/m3) in 2018')
plt.show()
# display the data on map by level of PM2.5
m = counties_w_pm25.explore(
column="pm25_ug_per_m3",
)