Parse JSON From External URL With Python
A few snippets to help you out
In its most basic form.
from requests import get
data = get(url).json()
Now let's add a url and print out the results. Lets use https://jsonplaceholder.typicode.com as it's free and all that jazz.
from requests import get
url = 'https://jsonplaceholder.typicode.com/users/1'
data = get(url).json()
print(data)
We'll get some data which looks something like this.
{'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': '[email protected]', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat':'-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}}
Lets beautify that so we can easily read it.
{
'id': 1,
'name': 'Leanne Graham',
'username': 'Bret',
'email': '[email protected]',
'address': {
'street': 'Kulas Light',
'suite': 'Apt. 556',
'city': 'Gwenborough',
'zipcode': '92998-3874',
'geo': {
'lat': '-37.3159',
'lng': '81.1496'
}
},
'phone': '1-770-736-8031 x56442',
'website': 'hildegard.org',
'company': {
'name': 'Romaguera-Crona',
'catchPhrase': 'Multi-layered client-server neural-net',
'bs': 'harness real-time e-markets'
}
}
Next we'll want to print out specific values.
from requests import get
data = get('https://jsonplaceholder.typicode.com/users/1').json()
print(data['id'])
print(data['name'])
print(data['username'])
print(data['email'])
print(data['phone'])
print(data['website'])
That's just the top level values. Now we'll get rest of them... the nested values.
from requests import get
data = get('https://jsonplaceholder.typicode.com/users/1').json()
print(data['id'])
print(data['name'])
print(data['username'])
print(data['email'])
print(data['address']['street'])
print(data['address']['suite'])
print(data['address']['city'])
print(data['address']['zipcode'])
print(data['address']['geo']['lat'])
print(data['address']['geo']['lng'])
print(data['phone'])
print(data['website'])
print(data['company']['name'])
print(data['company']['catchPhrase'])
print(data['company']['bs'])
There's more entries if we change the url to get all results.
from requests import get
datas = get('https://jsonplaceholder.typicode.com/users').json()
print(datas)
Which returns...
[{
'id': 1,
'name': 'Leanne Graham',
'username': 'Bret',
'email': '[email protected]',
'address': {
'street': 'Kulas Light',
'suite': 'Apt. 556',
'city': 'Gwenborough',
'zipcode': '92998-3874',
'geo': {
'lat': '-37.3159',
'lng': '81.1496'
}
},
'phone': '1-770-736-8031 x56442',
'website': 'hildegard.org',
'company': {
'name': 'Romaguera-Crona',
'catchPhrase': 'Multi-layered client-server neural-net',
'bs': 'harnessreal-time e-markets'
}
}, {
'id': 2,
'name': 'Ervin Howell',
'username': 'Antonette',
'email': '[email protected]',
'address': {
'street': 'Victor Plains',
'suite': 'Suite 879',
'city': 'Wisokyburgh',
'zipcode': '90566-7771',
'geo': {
'lat': '-43.9509',
'lng': '-34.4618'
}
},
'phone': '010-692-6593 x09125',
'website': 'anastasia.net',
'company': {
'name': 'Deckow-Crist',
'catchPhrase': 'Proactive didactic contingency',
'bs': 'synergize scalable supply-chains'
}
}, {
'id': 3,
'name': 'Clementine Bauch',
'username': 'Samantha',
'email': '[email protected]',
'address': {
'street': 'Douglas Extension',
'suite': 'Suite 847',
'city': 'McKenziehaven',
'zipcode': '59590-4157',
'geo': {
'lat': '-68.6102',
'lng': '-47.0653'
}
},
'phone': '1-463-123-4447',
'website': 'ramiro.info',
'company': {
'name': 'Romaguera-Jacobson',
'catchPhrase': 'Face to face bifurcated interface',
'bs': 'e-enable strategic applications'
}
}, {
'id': 4,
'name': 'Patricia Lebsack',
'username': 'Karianne',
'email': '[email protected]',
'address': {
'street': 'Hoeger Mall',
'suite': 'Apt. 692',
'city': 'South Elvis',
'zipcode': '53919-4257',
'geo': {
'lat': '29.4572',
'lng': '-164.2990'
}
},
'phone': '493-170-9623 x156',
'website': 'kale.biz',
'company': {
'name': 'Robel-Corkery',
'catchPhrase': 'Multi-tiered zero tolerance productivity',
'bs': 'transition cutting-edgeweb services'
}
}, {
'id': 5,
'name': 'Chelsey Dietrich',
'username': 'Kamren',
'email': '[email protected]',
'address': {
'street': 'Skiles Walks',
'suite': 'Suite 351',
'city': 'Roscoeview',
'zipcode': '33263',
'geo': {
'lat': '-31.8129',
'lng': '62.5342'
}
},
'phone': '(254)954-1289',
'website': 'demarco.info',
'company': {
'name': 'Keebler LLC',
'catchPhrase': 'User-centric fault-tolerant solution',
'bs': 'revolutionize end-to-end systems'
}
}, {
'id': 6,
'name': 'Mrs. Dennis Schulist',
'username': 'Leopoldo_Corkery',
'email': '[email protected]',
'address': {
'street': 'Norberto Crossing',
'suite': 'Apt.950',
'city': 'South Christy',
'zipcode': '23505-1337',
'geo': {
'lat': '-71.4197',
'lng': '71.7478'
}
},
'phone': '1-477-935-8478 x6430',
'website': 'ola.org',
'company': {
'name': 'Considine-Lockman',
'catchPhrase': 'Synchronised bottom-line interface',
'bs': 'e-enable innovative applications'
}
}, {
'id': 7,
'name': 'Kurtis Weissnat',
'username': 'Elwyn.Skiles',
'email': '[email protected]',
'address': {
'street': 'Rex Trail',
'suite': 'Suite 280',
'city': 'Howemouth',
'zipcode': '58804-1099',
'geo': {
'lat': '24.8918',
'lng': '21.8984'
}
},
'phone': '210.067.6132',
'website': 'elvis.io',
'company': {
'name': 'Johns Group',
'catchPhrase': 'Configurable multimedia task-force',
'bs': 'generate enterprise e-tailers'
}
}, {
'id': 8,
'name': 'Nicholas Runolfsdottir V',
'username': 'Maxime_Nienow',
'email': '[email protected]',
'address': {
'street': 'Ellsworth Summit',
'suite': 'Suite 729',
'city': 'Aliyaview',
'zipcode': '45169',
'geo': {
'lat': '-14.3990',
'lng': '-120.7677'
}
},
'phone': '586.493.6943 x140',
'website': 'jacynthe.com',
'company': {
'name': 'Abernathy Group',
'catchPhrase': 'Implemented secondary concept',
'bs': 'e-enable extensible e-tailers'
}
}, {
'id': 9,
'name': 'Glenna Reichert',
'username': 'Delphine',
'email': '[email protected]',
'address': {
'street': 'Dayna Park',
'suite': 'Suite 449',
'city': 'Bartholomebury',
'zipcode': '76495-3109',
'geo': {
'lat': '24.6463',
'lng': '-168.8889'
}
},
'phone': '(775)976-6794 x41206',
'website': 'conrad.com',
'company': {
'name': 'Yost and Sons',
'catchPhrase': 'Switchablecontextually-based project',
'bs': 'aggregate real-time technologies'
}
}, {
'id': 10,
'name': 'Clementina DuBuque',
'username': 'Moriah.Stanton',
'email': '[email protected]',
'address': {
'street': 'Kattie Turnpike',
'suite': 'Suite 198',
'city': 'Lebsackbury',
'zipcode': '31428-2261',
'geo': {
'lat': '-38.2386',
'lng': '57.2232'
}
},
'phone': '024-648-3804',
'website': 'ambrose.net',
'company': {
'name': 'Hoeger LLC',
'catchPhrase': 'Centralized empowering task-force',
'bs': 'target end-to-end models'
}
}]
Let's loop over them and print them out.
from requests import get
datas = get('https://jsonplaceholder.typicode.com/users').json()
for data in datas:
print(data['id'])
print(data['name'])
print(data['username'])
print(data['email'])
print(data['address']['street'])
print(data['address']['suite'])
print(data['address']['city'])
print(data['address']['zipcode'])
print(data['address']['geo']['lat'])
print(data['address']['geo']['lng'])
print(data['phone'])
print(data['website'])
print(data['company']['name'])
print(data['company']['catchPhrase'])
print(data['company']['bs'])
Let's print all these out like this for some reason.
from requests import get
datas = get('https://jsonplaceholder.typicode.com/users').json()
for data in datas:
print('id: {}'.format(data['id']))
print('name: {}'.format(data['name']))
print('username: {}'.format(data['username']))
print('email: {}'.format(data['email']))
print('address street: {}'.format(data['address']['street']))
print('address suite: {}'.format(data['address']['suite']))
print('address city: {}'.format(data['address']['city']))
print('address zipcode: {}'.format(data['address']['zipcode']))
print('address geo lat: {}'.format(data['address']['geo']['lat']))
print('address geo lng: {}'.format(data['address']['geo']['lng']))
print('phone: {}'.format(data['phone']))
print('website: {}'.format(data['website']))
print('company name: {}'.format(data['company']['name']))
print('company catchPhrase: {}'.format(data['company']['catchPhrase']))
print('company bs: {}\n'.format(data['company']['bs']))
Just for fun
It looks like we have a pile of useless text. Lets go and get as much data as possible from this API and play around with it.
from requests import get
from wordcloud import WordCloud
from PIL import Image
import numpy as np
def get_json(endpoint):
return get('https://jsonplaceholder.typicode.com/{}'.format(endpoint)).json()
endpoints = ['users', 'posts', 'comments', 'todos', 'photos', 'albums']
cloud_fuel = []
for endpoint in endpoints:
results = get_json(endpoint)
if endpoint == 'users':
for data in results:
cloud_fuel.append(data['name'])
cloud_fuel.append(data['username'])
cloud_fuel.append(data['email'])
cloud_fuel.append(data['address']['street'])
cloud_fuel.append(data['address']['suite'])
cloud_fuel.append(data['address']['city'])
cloud_fuel.append(data['address']['zipcode'])
cloud_fuel.append(data['phone'])
cloud_fuel.append(data['website'])
cloud_fuel.append(data['company']['name'])
cloud_fuel.append(data['company']['catchPhrase'])
cloud_fuel.append(data['company']['bs'])
elif endpoint == 'comments':
for data in results:
cloud_fuel.append(data['name'])
cloud_fuel.append(data['email'])
cloud_fuel.append(data['body'])
elif endpoint == 'posts':
for data in results:
cloud_fuel.append(data['title'])
cloud_fuel.append(data['body'])
else:
for data in results:
cloud_fuel.append(data['title'])
fuel = ' '.join(cloud_fuel)
img = Image.new('RGB', (1200, 1200), color = 'black')
img.save('cloud.jpg')
mask = np.array(Image.open('cloud.jpg'))
wc = WordCloud(background_color='#000', max_words=9999,
font_path='nexa.otf', collocations=False, mask=mask, margin=30)
wc.generate(fuel)
wc.to_file('cloud.jpg')
That'll do it I guess.
Resources
- Python: https://python.org
- Requests: http://docs.python-requests.org/en/master/
- Nexa font: https://github.com/r4in/typefaces/tree/master/Nexa
- WordCloud: https://github.com/amueller/word_cloud
- Numpy: http://www.numpy.org
Thanks for reading. x