#import library yang dibutuhkan
import pandas as pd
import numpy as np
from bokeh.io import output_notebook
from bokeh.plotting import figure, show, reset_output
from bokeh.io import output_file
from bokeh.models.widgets import Tabs, Panel
from bokeh.models import ColumnDataSource, CategoricalColorMapper, NumeralTickFormatter, HoverTool, CDSView, GroupFilter, Div
from bokeh.layouts import gridplot, column
# mount to google drive
from google.colab import drive
drive.mount("/content/drive")
Mounted at /content/drive
cd drive/MyDrive/Colab\ Notebooks/visdat/tubes interactive
/content/drive/MyDrive/Colab Notebooks/visdat/tubes interactive
!ls
Andrew-1301180390.ipynb stock_market.csv
# membaca file
DT = 'stock_market.csv'
dataset = pd.read_csv(DT, parse_dates=['Date'])
# data exploration
dataset = dataset.rename(columns = {'Adj Close': 'AdjClose'}, inplace = False)
dataset.sort_values(by ='Date')
Date | AdjClose | Volume | Day_Perc_Change | Name | |
---|---|---|---|---|---|
0 | 2018-10-22 | 26153.150390 | 2222812200 | 2.315014 | HANG SENG |
992 | 2018-10-22 | 22614.820310 | 63000 | 0.367211 | NIKKEI |
491 | 2018-10-22 | 7468.629883 | 2282400000 | 0.263123 | NASDAQ |
1 | 2018-10-23 | 25346.550780 | 1772912000 | -3.084139 | HANG SENG |
993 | 2018-10-23 | 22010.779300 | 80200 | -2.670996 | NIKKEI |
... | ... | ... | ... | ... | ... |
990 | 2020-10-15 | 11713.870120 | 3304490000 | -0.466154 | NASDAQ |
991 | 2020-10-16 | 11671.559570 | 3156270000 | -0.361200 | NASDAQ |
1472 | 2020-10-16 | 23410.630860 | 48100 | -0.410936 | NIKKEI |
490 | 2020-10-16 | 24386.789060 | 2697525100 | 0.944801 | HANG SENG |
1473 | 2020-10-19 | 23671.130860 | 0 | 1.112742 | NIKKEI |
1474 rows × 5 columns
# jumlah data
dataset.shape
(1474, 5)
dataset.dtypes
Date datetime64[ns] AdjClose float64 Volume int64 Day_Perc_Change float64 Name object dtype: object
# mengecek duplikasi data
dupeCheck = dataset.duplicated()
print('Duplikat data = %d' % (dupeCheck.sum()))
Duplikat data = 0
# mengecek missing value
missingdata=dataset.isnull().sum().sort_values(ascending = False)
print(missingdata)
Name 0 Day_Perc_Change 0 Volume 0 AdjClose 0 Date 0 dtype: int64
membuat figure untuk adj close, volume, dan day percentage change, serta memilih tools yang akan tersedia nantinya
# memnbuat figure untuk adjclose, volume, dan day percentage change
output_notebook() # menampilkan output pada notebook
# memilih tools
select_tools = ['pan', 'box_select', 'wheel_zoom', 'tap', 'reset']
FAdj = figure(x_axis_type='datetime', plot_height=500, plot_width=600, title='Adj Close', toolbar_location="below", tools=select_tools, x_axis_label='Date', y_axis_label='Adj Close')
FVolume = figure(x_axis_type='datetime', plot_height=500, plot_width=600, title='Volume', toolbar_location="below", tools=select_tools, x_axis_label='Date', y_axis_label='Volume')
FDayPerc = figure(x_axis_type='datetime', plot_height=500, plot_width=600, title='Day_Perc_Change', toolbar_location="below", tools=select_tools, x_axis_label='Date', y_axis_label='Day_Perc_Change')
# mengubah dataset ke columdatasource
datasetFF = ColumnDataSource(dataset)
# membuat view dan filter untuk masing-masing indeks saham
# hangseng
FILhangseng = [GroupFilter(column_name='Name', group='HANG SENG')]
VIEWhangseng = CDSView(source=datasetFF, filters=FILhangseng)
# nikkei
FILnikkei = [GroupFilter(column_name='Name', group='NIKKEI')]
VIEWnikkei = CDSView(source=datasetFF, filters=FILnikkei)
# nasdaq
FILnasdaq = [GroupFilter(column_name='Name', group='NASDAQ')]
VIEWnasdaq = CDSView(source=datasetFF, filters=FILnasdaq)
# membuat dictionary untuk circle dan indeks saham
DICTcircle = {'source': datasetFF, 'size': 4, 'alpha': 0.7, 'selection_color':'red', 'nonselection_color':'lightgray', 'nonselection_alpha': 0.3, 'muted_alpha': 0.1} #circle
DICThangseng = {'view': VIEWhangseng, 'color': '#eb6777', 'legend': 'HANG SENG'} #hangseng
DICTnikkei = {'view': VIEWnikkei, 'color': '#e0e38f', 'legend': 'NIKKEI'} #nikkei
DICTnasdaq = {'view': VIEWnasdaq, 'color': '#073278', 'legend': 'NASDAQ'} #nasdaq
# menambah data indeks saham dengan bentuk circle untuk figure
# utuk AdjClose
FAdj.circle(x='Date', y='AdjClose', **DICTcircle, **DICThangseng)
FAdj.circle(x='Date', y='AdjClose', **DICTcircle, **DICTnikkei)
FAdj.circle(x='Date', y='AdjClose', **DICTcircle, **DICTnasdaq)
# untuk Volume
FVolume.circle(x='Date', y='Volume', **DICTcircle, **DICThangseng)
FVolume.circle(x='Date', y='Volume', **DICTcircle, **DICTnikkei)
FVolume.circle(x='Date', y='Volume', **DICTcircle, **DICTnasdaq)
# untuk Day_Perc_Change
FDayPerc.circle(x='Date', y='Day_Perc_Change', **DICTcircle, **DICThangseng)
FDayPerc.circle(x='Date', y='Day_Perc_Change', **DICTcircle, **DICTnikkei)
FDayPerc.circle(x='Date', y='Day_Perc_Change', **DICTcircle, **DICTnasdaq)
BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead
# untuk tooltips
adjTool = [('Name','@Name'), ('Adj Close', '@AdjClose')]
volumeTool = [('Name','@Name'), ('Volume', '@Volume')]
daypercTool = [('Name','@Name'), ('Day_Perc_Change', '@Day_Perc_Change')]
# untuk hover
adjHG = FAdj.circle(x='Date', y='AdjClose', source=datasetFF, size=7, alpha=0, hover_fill_color='red', hover_alpha=0.5)
volumeHG = FVolume.circle(x='Date', y='Volume', source=datasetFF, size=7, alpha=0, hover_fill_color='red', hover_alpha=0.5)
daypercHG = FDayPerc.circle(x='Date', y='Day_Perc_Change', source=datasetFF, size=7, alpha=0, hover_fill_color='red', hover_alpha=0.5)
# untuk hover glyph
FAdj.add_tools(HoverTool(tooltips=adjTool, renderers=[adjHG]))
FVolume.add_tools(HoverTool(tooltips=volumeTool, renderers=[volumeHG]))
FDayPerc.add_tools(HoverTool(tooltips=daypercTool, renderers=[daypercHG]))
html = """<h3>Visualisasi Data</h3>
<br>
"""
judulUp = Div(text=html)
# agar dapat menyembunyikan data untuk indeks pasar saham tertentu
# dengan mengklik legend dari indeks tersebut
FAdj.legend.click_policy = 'hide'
FVolume.legend.click_policy = 'hide'
FDayPerc.legend.click_policy = 'hide'
# membuat panel yang untuk adj close
PAdjClose = Panel(child=FAdj, title='Adj Close')
# membuat panel yang untuk volume
PVolume = Panel(child=FVolume, title='Volume')
# membuat panel yang untuk adj day_perc_change
PDayPerc = Panel(child=FDayPerc, title='Day_Perc_Change')
# membuat panel-panel tersebut menjadi tab, nanti bisa diclik tiap tab
tabs = Tabs(tabs=[PAdjClose, PVolume, PDayPerc])
# menampilkan tab
show(column(judulUp,tabs))