In [1]:
import geopandas
aus_lga_shp = geopandas.read_file('LGA_2021_AUST_GDA2020.shp')
aus_lga_shp.head()
Out[1]:
LGA_CODE21 | LGA_NAME21 | STE_CODE21 | STE_NAME21 | AUS_CODE21 | AUS_NAME21 | AREASQKM21 | LOCI_URI21 | SHAPE_Leng | SHAPE_Area | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10050 | Albury | 1 | New South Wales | AUS | Australia | 305.6386 | http://linked.data.gov.au/dataset/asgsed3/LGA2... | 1.321768 | 0.030560 | POLYGON ((146.86566 -36.07292, 146.86512 -36.0... |
1 | 10180 | Armidale Regional | 1 | New South Wales | AUS | Australia | 7809.4406 | http://linked.data.gov.au/dataset/asgsed3/LGA2... | 6.034583 | 0.732825 | POLYGON ((151.32425 -30.26922, 151.32419 -30.2... |
2 | 10250 | Ballina | 1 | New South Wales | AUS | Australia | 484.9692 | http://linked.data.gov.au/dataset/asgsed3/LGA2... | 1.511121 | 0.044843 | MULTIPOLYGON (((153.57106 -28.87381, 153.57106... |
3 | 10300 | Balranald | 1 | New South Wales | AUS | Australia | 21690.7493 | http://linked.data.gov.au/dataset/asgsed3/LGA2... | 11.489912 | 2.115528 | POLYGON ((143.00433 -33.78164, 143.01538 -33.7... |
4 | 10470 | Bathurst Regional | 1 | New South Wales | AUS | Australia | 3817.8645 | http://linked.data.gov.au/dataset/asgsed3/LGA2... | 5.395114 | 0.370149 | POLYGON ((149.91213 -33.39581, 149.91147 -33.3... |
In [3]:
#Mean House Prices in Victoria
# import pandas lib as pd
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
import matplotlib.pyplot as plt
# read by default 1st sheet of an excel file
house_price_df = pd.read_excel('MeanHousePricesClean-1.xlsx')
house_price_df.head()
Out[3]:
Year | Alpine Shire | Ararat Rural City | Ballarat City | Banyule City | Bass Coast Shire | Baw Baw Shire | Bayside City | Benalla Rural City | Greater Bendigo City | ... | Warrnambool City | Wellington Shire | West Wimmera Shire | Whitehorse City | Whittlesea City | Wodonga City | Wyndham City | Yarra City | Yarra Ranges Shire | Yarriambiack Shire | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1992 | 89478 | 63935 | 86980 | 156065 | 83421 | 84464 | 259197 | 82485 | 89851 | ... | 93197 | 71970 | 47391 | 151645 | 123118 | 109005 | 109223 | 166613 | 113826 | 43558 |
1 | 1993 | 93673 | 54778 | 90677 | 152532 | 86893 | 91758 | 265039 | 90076 | 91912 | ... | 99641 | 76112 | 40380 | 151953 | 120661 | 111019 | 107135 | 180305 | 114444 | 39993 |
2 | 1994 | 94228 | 58724 | 90513 | 156499 | 81986 | 94648 | 293252 | 94532 | 94314 | ... | 105397 | 78180 | 39833 | 155134 | 122274 | 112841 | 107345 | 190320 | 114052 | 46915 |
3 | 1995 | 94926 | 57494 | 92235 | 158410 | 81871 | 91836 | 290501 | 91310 | 92787 | ... | 106074 | 81626 | 46256 | 154490 | 120681 | 108251 | 106063 | 190894 | 114158 | 46114 |
4 | 1996 | 101134 | 69558 | 92064 | 157426 | 83273 | 93762 | 300624 | 95720 | 91482 | ... | 108810 | 83100 | 53365 | 154262 | 122159 | 113102 | 109181 | 201726 | 113168 | 61275 |
5 rows × 80 columns
In [4]:
# Draw map of Australia by LGA
fig, ax = plt.subplots(1, figsize=(10,8))
aus_lga_shp.plot(column='LGA_CODE21', cmap='jet', ax=ax)
plt.title('Australia by Local Government Area Code')
plt.show()
In [5]:
# Plot mean house prices for each LGA of Victoria for all the years
fig = plt.figure(figsize=(10, 8))
# Iterating the columns
for col in house_price_df.columns[1:]:
# Ignore the first column as it is the Year column
plt.plot( house_price_df['Year'], house_price_df[col])
# Year is on x-axis
plt.xlabel('Year')
# Mean House Prices of local governement areas(lga) on y-axis
plt.ylabel('House Prices')
plt.title("Mean House prices of Local Government Areas over the years")
# This graph shows that house prices have increases considerably over the years for all LGAs in Victoria
Out[5]:
Text(0.5, 1.0, 'Mean House prices of Local Government Areas over the years')
In [6]:
# Compare increase of Mean House price over the years
# Get only polygon shapes of LGA in Victoria
import numpy as np
# Get only LGAs in Victoria
vic_shp = aus_lga_shp[(aus_lga_shp['STE_NAME21'] == 'Victoria')]
#print(vic_shp['LGA_CODE21'])
#print(vic_shp.head())
#vic_shp has 82 rows
# Get row of mean house prices for every year and merge with vic_shp dataframe
# Clean column names in House price data
#Convert column names to lowercase
#print( house_price_df.columns)
house_price_df.columns = house_price_df.columns.str.lower()
# Use str.replace to rename columns by replacing substrings
house_price_df.columns = house_price_df.columns.str.replace('city', '').str.replace('shire', '').str.replace('rural', '').str.replace('borough of', '')
house_price_df.columns = house_price_df.columns.str.strip()
#Cardinia is spelled wrong, fix it
house_price_df.columns = house_price_df.columns.str.replace("cardina", "cardinia")
# Get one row from house price data and add it as a column to vic_shape data
# let year be the column name
# Make dataframe from first column of house price
#house_1992_df = house_price_df(house_price_df['year'] == '1992')
all_years = [1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]
index = 0
def get_houseprice(lga):
result=0
lga = lga.lower()
lga = lga.removesuffix('(vic.)')
lga = lga.strip()
df = house_price_df[(house_price_df['year'] == all_years[index])]
if lga in df.columns:
result = df[lga].values[0]
#print(result)
else:
#print("This lga is missing:"+lga+"*")
result = np.nan
return result
for year in all_years:
col_name = str(year)
vic_shp[col_name] = vic_shp['LGA_NAME21'].apply(get_houseprice)
#print(vic_shp.head(5))
index = index+1
print(vic_shp.head(5))
LGA_CODE21 LGA_NAME21 STE_CODE21 STE_NAME21 AUS_CODE21 AUS_NAME21 \ 131 20110 Alpine 2 Victoria AUS Australia 132 20260 Ararat 2 Victoria AUS Australia 133 20570 Ballarat 2 Victoria AUS Australia 134 20660 Banyule 2 Victoria AUS Australia 135 20740 Bass Coast 2 Victoria AUS Australia AREASQKM21 LOCI_URI21 \ 131 4788.1570 http://linked.data.gov.au/dataset/asgsed3/LGA2... 132 4211.1172 http://linked.data.gov.au/dataset/asgsed3/LGA2... 133 739.0321 http://linked.data.gov.au/dataset/asgsed3/LGA2... 134 62.5402 http://linked.data.gov.au/dataset/asgsed3/LGA2... 135 865.8094 http://linked.data.gov.au/dataset/asgsed3/LGA2... SHAPE_Leng SHAPE_Area ... 2013 2014 2015 2016 \ 131 4.532210 0.483801 ... 293760.0 294889.0 315481.0 351412.0 132 3.977757 0.428937 ... 186820.0 200452.0 219761.0 203307.0 133 1.651781 0.075296 ... 325847.0 335969.0 334881.0 353829.0 134 0.582852 0.006392 ... 689614.0 758588.0 851457.0 939152.0 135 2.908080 0.089421 ... 368200.0 372235.0 366889.0 378916.0 2017 2018 2019 2020 2021 2022 131 374240.0 408328.0 458745.0 525540.0 725924.0 837159.0 132 207351.0 210758.0 222632.0 249554.0 323385.0 312644.0 133 374289.0 412714.0 447187.0 488502.0 588140.0 642952.0 134 1070705.0 1038328.0 999074.0 1066546.0 1262926.0 1297013.0 135 419527.0 486271.0 512286.0 583758.0 774813.0 834318.0 [5 rows x 42 columns]
In [7]:
# The most difference is in Melbourne, so we will plot LGAs with code starting with 3000
#melb_shp = vic_shp.query('LGA_CODE21 >=20110 & LGA_CODE21 <= 25000')
# Remove columns where house price is null
print(vic_shp.shape)
vic_shp = vic_shp.dropna(axis=0)
print(vic_shp.shape)
# Find the difference every 3 years
vic_shp['diff1995'] = vic_shp['1995'] - vic_shp['1992']
vic_shp['diff1998'] = vic_shp['1998'] - vic_shp['1995']
vic_shp['diff2001'] = vic_shp['2001'] - vic_shp['1998']
vic_shp['diff2004'] = vic_shp['2004'] - vic_shp['2001']
vic_shp['diff2007'] = vic_shp['2007'] - vic_shp['2005']
vic_shp['diff2010'] = vic_shp['2010'] - vic_shp['2007']
vic_shp['diff2013'] = vic_shp['2013'] - vic_shp['2010']
vic_shp['diff2016'] = vic_shp['2016'] - vic_shp['2013']
vic_shp['diff2019'] = vic_shp['2019'] - vic_shp['2016']
vic_shp['diff2022'] = vic_shp['2022'] - vic_shp['2019']
fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6),(ax7, ax8),(ax9, ax10)) = plt.subplots(ncols=2, nrows=5,figsize=(14, 20))
ax1.get_xaxis().set_visible(False)
ax1.get_yaxis().set_visible(False)
ax2.get_xaxis().set_visible(False)
ax2.get_yaxis().set_visible(False)
ax3.get_xaxis().set_visible(False)
ax3.get_yaxis().set_visible(False)
ax4.get_xaxis().set_visible(False)
ax4.get_yaxis().set_visible(False)
ax5.get_xaxis().set_visible(False)
ax5.get_yaxis().set_visible(False)
ax6.get_xaxis().set_visible(False)
ax6.get_yaxis().set_visible(False)
ax7.get_xaxis().set_visible(False)
ax7.get_yaxis().set_visible(False)
ax8.get_xaxis().set_visible(False)
ax8.get_yaxis().set_visible(False)
ax9.get_xaxis().set_visible(False)
ax9.get_yaxis().set_visible(False)
ax10.get_xaxis().set_visible(False)
ax10.get_yaxis().set_visible(False)
#, legend=True, vmin=0, vmax=500000
vic_shp.plot(ax=ax1, column='diff1995', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax2, column='diff1998', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax3, column='diff2001', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax4, column='diff2004', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax5, column='diff2007', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax6, column='diff2010', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax7, column='diff2013', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax8, column='diff2016', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax9, column='diff2019', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
vic_shp.plot(ax=ax10, column='diff2022', cmap = 'Reds',legend=True, vmin=0, vmax=400000)
# Set title for all the subplots
ax1.set_title('Mean House price difference between Year 1992 and 1995')
ax2.set_title('Mean House price difference between Year 1998 and 1995')
ax3.set_title('Mean House price difference between Year 2001 and 1998')
ax4.set_title('Mean House price difference between Year 2004 and 2001')
ax5.set_title('Mean House price difference between Year 2007 and 2004')
ax6.set_title('Mean House price difference between Year 2010 and 2007')
ax7.set_title('Mean House price difference between Year 2013 and 2010')
ax8.set_title('Mean House price difference between Year 2016 and 2013')
ax9.set_title('Mean House price difference between Year 2019 and 2016')
ax10.set_title('Mean House price difference between Year 2022 and 2019')
(82, 42) (79, 42)
Out[7]:
Text(0.5, 1.0, 'Mean House price difference between Year 2022 and 2019')
In [9]:
import folium
# Find LGAs where house prices have increased the most
vic_shp['diffmost'] =vic_shp['2022'] - vic_shp['1992']
# Find the difference of mean house prices in percentage
vic_shp['perc_diffmost'] =((vic_shp['2022'] - vic_shp['1992'])/vic_shp['1992'])*100
# Round off the calculates percentage values
vic_shp['perc_diffmost'] = vic_shp['perc_diffmost'].round(0)
# Sort dataframe in ascending order
highprice_shp = vic_shp.sort_values(by=['diffmost'], ascending=False)
highprice_shp[['LGA_NAME21', 'diffmost','perc_diffmost']].head(20)
# Create map
# Use only relevant columns
highprice_shp = highprice_shp[['LGA_NAME21', 'diffmost','1992', '2022','perc_diffmost','geometry']]
#highprice_shp['geometry'] = highprice_shp.simplify(0.01)
map = highprice_shp.explore(column= "perc_diffmost", name="Percentage of difference", cmap= "Reds", tiles = "CartoDB positron")
folium.LayerControl().add_to(map)
map
Out[9]:
Make this Notebook Trusted to load map: File -> Trust Notebook