Return the latitude / longitude of the address
import os
import requests
import json
import pandas as pd
apiKey = 'NpY7DwL9AxxxxxxxxxxxxxxxxN03Gvht3vYE'
inputFile = pd.read_csv('SevereInjuryBefore.csv')
outputFile = 'outputTest.csv'
results = []
url = f'https://atlas.microsoft.com/search/address/json?api-version=1.0&query={address1}" "{address2}", "{city}", "{state_cd}" "{zip}&subscription-key={apiKey}'
headers = { 'Content-type': 'application/json'}
r = requests.get(url, headers = headers).json()['results'][0]['position']
lon = r.get("lon")
lat = r.get("lat")
file1 = open(outputFile, 'a', newline='')
writer_object = writer(file1)
writer_object.writerow([ID, report_id, eventdate, latitude, longitude, elevation, weatherCode, temp_max, temp_min, temp_feel_like_min, temp_feel_like_max, temp_feel_like_avg, sunrise, sunset, precip, rain, snowfall, precip_hours, windspeed_max, wind_gust_max, wind_direction])
file1.close()
results.append([lon, lat])
output_df = pd.DataFrame(results, columns = ['longitude','latitude'])
output_df
latitude | longitude | |
---|---|---|
0 | -79.92106 | 40.45885 |
import os
import requests
import json
import pandas as pd
apiKey = 'NpY7DwLxxxxxxxxxxxxxxxxxxxxxN03Gvht3vYE'
inputFile = pd.read_csv('SevereInjuryBefore.csv')
outputFile = 'outputTest.csv'
results = []
test = pd.DataFrame(inputFile, columns = ['report_id','upa','eventdate','address1','address2','city','state_cd','zip','latitude','longitude'])
test
report_id | upa | eventdate | address1 | address2 | city | state_cd | zip | latitude | longitude | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2021087301 | 1804581 | 2021-08-26 | 12600 S. Torrence Ave. | NaN | CHICAGO | IL | 60633.0 | 41.66 | -87.56 |
1 | 2021087302 | 1812019 | 2021-08-26 | 600 N Pickaway St | NaN | CIRCLEVILLE | OH | 43113.0 | 39.61 | -82.93 |
2 | 2021087303 | 1804042 | 2021-08-26 | 11600 S. Burley Ave. | NaN | CHICAGO | IL | 60617.0 | 41.69 | -87.54 |
3 | 2020076225 | 1630100 | 2020-07-03 | 5415 Dayton Street | NaN | OMAHA | NE | 68117.0 | 41.21 | -95.99 |
4 | 2021087304 | 1803837 | 2021-08-26 | Shoppes of Aberdeen | NaN | JACKSONVILLE | FL | 32259.0 | 30.07 | -81.58 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
74019 | 2021087293 | 1804449 | 2021-08-25 | 6910 N. Whirlpool Dr. | NaN | TULSA | OK | 74117.0 | 36.25 | -95.92 |
74020 | 2021087294 | 1804872 | 2021-08-25 | 65 Broadway | NaN | DENVER | CO | 80203.0 | 39.71 | -104.98 |
74021 | 2021087296 | 1803853 | 2021-08-25 | 913 Industrial Park Circle | NaN | BESSEMER | AL | 35022.0 | 33.36 | -86.97 |
74022 | 2021087297 | 1803276 | 2021-08-25 | 10102 F Street | NaN | OMAHA | NE | 68127.0 | 41.21 | -96.07 |
74023 | 2021087300 | 1803876 | 2021-08-26 | 636 Gerard Avenue | NaN | EAST AURORA | NY | 14052.0 | 42.77 | -78.61 |
74024 rows × 10 columns
Also Changed the source to the Postgres table instead of a csv file.
from sqlalchemy import create_engine, text, inspect
import os
import requests
import json
import pandas as pd
apiKey = 'NpY7DwL9Ay3xxxxxxxxxxxxxxxxxx03Gvht3vYE'
results = []
engine = create_engine('postgresql://user:USE_REAL_PASSWORD@IP_ADDRESS:5432/oshaData')
with engine.begin() as connection:
res = pd.read_sql(
sql=text(f'SELECT address1, address2, city, state_cd, zip, latitude ,longitude FROM typing."severeInjury" ORDER BY report_id LIMIT 10'),
con=connection,
)
for index, row in res.iterrows():
address1 = row[0]
address2 = row[1]
city = row[2]
state_cd = row[3]
zip = row[4]
latitude = row[5]
longitude = row[6]
url = f'https://atlas.microsoft.com/search/address/json?api-version=1.0&query={address1}" "{address2}", "{city}", "{state_cd}" "{zip}&subscription-key={apiKey}'
#url = f'https://atlas.microsoft.com/search/address/json?api-version=1.0&query=Store 17 6320 Shakespeare Avenue, Pittsburgh, PA 15206&subscription-key={apiKey}'
headers = { 'Content-type': 'application/json'}
r = requests.get(url, headers = headers).json()['results'][0]['position']
lon = r.get("lon")
lat = r.get("lat")
results.append([address1, address2, city, state_cd, zip, latitude , lat, longitude, lon])
output_df = pd.DataFrame(results, columns = ['address1', 'address2', 'city', 'state_cd', 'zip', 'latitude','FixedLatitude', 'longitude', 'FixedLongitude'])
output_df
#output_df.to_csv('BaseDataOutput.csv', index=False)
## Data returned below compares the before and after Lat, Log
address1 | address2 | city | state_cd | zip | latitude | FixedLatitude | longitude | FixedLongitude | |
---|---|---|---|---|---|---|---|---|---|
0 | 8910 Point Six Circle | None | HOUSTON | TX | 77240 | 29.90 | 29.90659 | -95.62 | -95.62460 |
1 | 101 Plantation St | None | WORCESTER | MA | 1604 | 42.25 | 42.25797 | -71.78 | -71.78340 |
2 | 1161 BUCKEYE RD. | None | LIMA | OH | 45804 | 40.70 | 40.70860 | -84.12 | -84.12820 |
3 | 1044 Park Street | None | STOUGHTON | MA | 2072 | 42.10 | 42.10995 | -71.06 | -71.07057 |
4 | 711 N Bower Road | None | MACOMB | IL | 61455 | 40.46 | 40.48492 | -90.63 | -90.68211 |
5 | Two Mile Drive | None | OTISVILLE | NY | 10963 | 41.46 | 41.48640 | -74.53 | -74.53597 |
6 | 2901 E. Industrial Blvd | None | WACO | TX | 76705 | 31.60 | 31.60390 | -97.11 | -97.11938 |
7 | 171 Progress Drive | None | LAKE DELTON | WI | 53940 | 43.59 | 43.60309 | -89.79 | -89.81509 |
8 | 5901 S. LaGrange Road | None | COUNTRYSIDE | IL | 60525 | 41.78 | 41.78551 | -87.86 | -87.86921 |
9 | 9000 Rockville Pike | None | BETHESDA | MD | 20892 | 39.00 | 39.00197 | -77.10 | -77.10454 |