경기도미래기술학교 AI개발자 부트캠프 31일차 TIL- Continuous Descriptive Features
2023. 6. 21. 17:43ㆍ카테고리 없음
반응형
https://process-mining.tistory.com/42
import numpy as np
# elevation_list = [300,1200,1500,3000,3900,4450,5000]
# vegetation_list = [rip,cha,rip,cha,cha,con,con]
#4개로 나뉜다.
# rip/cha/rip/cha,cha/con,con
# 750
n_number = 2
target_level_differ_1 = (300+1200)//n_number
target_level_differ_2 = (1200+1500)//n_number
target_level_differ_3 = (1500+3000)//n_number
target_level_differ_4 = (3900+4450)//n_number
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)
print('target_level_differ_4 : ',target_level_differ_4)
print('-------------##root node##-------------')
root_node = (-3/7*np.log2(3/7)) + ((-2/7*np.log2(2/7))*2)
print('root_entropy: ',root_node)
print('-------------750 start-------------')
low_node_750 = (-1/1*np.log2(1/1))
print('low_node_750: ', low_node_750)
low_weight_node_750 = 1/7*low_node_750
print('low_weight_node_750: ',low_weight_node_750)
high_node_750 = (-3/6*np.log2(3/6))+(-2/6*np.log2(2/6))+(-1/6*np.log2(1/6))
print('high_node_750: ', high_node_750)
high_weight_node_750 = 6/7*high_node_750
print('high_weight_node_750: ',high_weight_node_750)
ig_750 = root_node - (high_weight_node_750+low_weight_node_750)
print('lower than 750 ig: ',ig_750)
print('-------------1350 start-------------')
low_node_1350 = (-1/2*np.log2(1/2))*2
print('low_weight_node_1350: ',low_node_1350)
low_weight_node_1350 = 2/7*low_node_1350
print('low_weight_node_1350: ',low_weight_node_1350)
high_node_1350 = (-2/5*np.log2(2/5))+(-2/5*np.log2(2/5))+(-1/5*np.log2(1/5))
print('high_node_1350: ', high_node_1350)
high_weight_node_1350 = 5/7*high_node_1350
print('high_weight_node_1350: ',high_weight_node_1350)
ig_1350 = root_node - (low_weight_node_1350+high_weight_node_1350)
print('lower than 1350 ig: ',ig_1350)
print('-------------2250 start-------------')
low_node_2250 = ((-2/3*np.log2(2/3))+(-1/3*np.log2(1/3)))
print('low_node_2250: ', low_node_2250)
low_weight_node_2250 = 3/7*low_node_2250
print('low_weight_node_2250: ',low_weight_node_2250)
high_node_2250 = ((-1/2*np.log2(1/2))*2)
print('high_node_2250: ', high_node_2250)
high_weight_node_2250 = 4/7*high_node_2250
print('high_weight_node_2250: ',high_weight_node_2250)
ig_2250 = root_node - (low_weight_node_2250+high_weight_node_2250)
print('lower than 2250 ig: ',ig_2250)
print('-------------4150 start-------------')
low_node_4150 = (-2/5*np.log2(2/5))+(-3/5*np.log2(3/5))
print('low_node_4150: ', low_node_4150)
low_weight_node_4150 = 5/7*low_node_4150
print('low_weight_node_4150: ',low_weight_node_4150)
high_node_4150 = ((-1*np.log2(1))*2)
print('high_node_4150: ', high_node_4150)
high_weight_node_4150 = 2/7*high_node_4150
print('high_weight_node_4150: ',high_weight_node_4150)
ig_4150 = root_node - (low_weight_node_4150+high_weight_node_4150)
print('lower than 4150 ig: ',ig_4150)
print('-------------4150 seperated -------------')
elevation_4150_node = (-3/5*np.log2(3/5)) + ((-2/5*np.log2(2/5)))
print('elevation_4150_node: ',elevation_4150_node)
print('-------------stream seperated test -------------')
stream_true_node_weight = 3/5 * ((-2/3*np.log2(2/3)) + ((-1/3*np.log2(1/3))))
print('stream_true_node_weight: ',stream_true_node_weight)
stream_false_node_weight = 2/5*(-1*np.log2(1))
print('stream_false_node_weight: ',stream_false_node_weight)
ig = elevation_4150_node-(stream_true_node_weight+stream_false_node_weight)
print('stream ig: ',ig)
print('-------------slope seperated test -------------')
slope_moderate_node_weight = 1/5*(-1*np.log2(1))
print('slope_moderate_node_weight: ',slope_moderate_node_weight)
slope_steep_node_weight = 4/5*((-3/4*np.log2(3/4))+(-1/4*np.log2(1/4)))
print('slope_steep_node_weight: ',slope_steep_node_weight)
ig = elevation_4150_node-(slope_moderate_node_weight+slope_steep_node_weight)
print('slope ig: ',ig)
print('-------------elevation seperated test -------------')
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)
print('-------------elevation seperated 750 test -------------')
low_weight_node_750 = 1/5*low_node_750
print('low_weight_node_750: ',low_weight_node_750)
high_node_750 = (-3/4*np.log2(3/4))+(-1/4*np.log2(1/4))
print('high_node_750: ', high_node_750)
high_weight_node_750 = 4/5*high_node_750
print('high_weight_node_750: ',high_weight_node_750)
ig_750 = elevation_4150_node - (high_weight_node_750+low_weight_node_750)
print('lower than 750 ig: ',ig_750)
print('-------------elevation seperated 1350 test -------------')
low_weight_node_1350 = 2/5*low_node_1350
print('low_weight_node_1350: ',low_weight_node_1350)
high_node_1350 = (-1/3*np.log2(1/3))+(-2/3*np.log2(2/3))
print('high_node_1350: ', high_node_1350)
high_weight_node_1350 = 3/5*high_node_1350
print('high_weight_node_1350: ',high_weight_node_1350)
ig_1350 = elevation_4150_node - (low_weight_node_1350+high_weight_node_1350)
print('lower than 1350 ig: ',ig_1350)
print('-------------elevation seperated 2250 test -------------')
low_weight_node_2250 = 3/5*low_node_2250
print('low_weight_node_2250: ',low_weight_node_2250)
high_node_2250 = ((-1/1*np.log2(1/1))*2)
print('high_node_2250: ', high_node_2250)
high_weight_node_2250 = 2/5*high_node_2250
print('high_weight_node_2250: ',high_weight_node_2250)
ig_2250 = elevation_4150_node - (low_weight_node_2250+high_weight_node_2250)
print('lower than 2250 ig: ',ig_2250)
print('-------------3rd v1 : elevation seperated 2250 -------------')
elevation_2nd_2250_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('elevation_2nd_2250_node: ',elevation_2nd_2250_node)
print('-------------3rd : stream test --------------')
stream_3rd_false_node_weight = 0
print('stream_3rd_false_node_weight: ',stream_false_node_weight)
ig = elevation_2nd_2250_node-(stream_3rd_false_node_weight)
print('stream ig: ',ig)
print('-------------3rd : slope test --------------')
slope_3rd_moderate_node_weight = 0
print('slope_3rd_moderate_node_weight: ',slope_3rd_moderate_node_weight)
slope_3rd_steep_node_weight = 2/3*((-1/2*np.log2(1/2))+(-1/2*np.log2(1/2)))
print('slope_steep_node_weight: ',slope_steep_node_weight)
ig = elevation_2nd_2250_node-(slope_3rd_moderate_node_weight+slope_3rd_steep_node_weight)
print('slope ig: ',ig)
print('-------------3rd : elevation test start--------------')
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('-------------3rd : 750 test --------------')
print('750 ig: ',ig)
print('-------------3rd : 1350 test --------------')
print('1350 ig: ',ig)
print('-------------3rd v2 : stream -------------')
true_3rd_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('true_3rd_node: ',true_3rd_node)
print('-------------3rd v2 : slope test -------------')
# elevation_list = [300,1200,1500,3000,3900,4450,5000]
# vegetation_list = [rip,cha,rip,cha,cha,con,con]
#4개로 나뉜다.
# rip/cha/rip/cha,cha/con,con
# 750
n_number = 2
target_level_differ_1 = (300+1200)//n_number
target_level_differ_2 = (1200+1500)//n_number
target_level_differ_3 = (1500+3000)//n_number
target_level_differ_4 = (3900+4450)//n_number
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)
print('target_level_differ_4 : ',target_level_differ_4)
print('-------------##root node##-------------')
root_node = (-3/7*np.log2(3/7)) + ((-2/7*np.log2(2/7))*2)
print('root_entropy: ',root_node)
print('-------------750 start-------------')
low_node_750 = (-1/1*np.log2(1/1))
print('low_node_750: ', low_node_750)
low_weight_node_750 = 1/7*low_node_750
print('low_weight_node_750: ',low_weight_node_750)
high_node_750 = (-3/6*np.log2(3/6))+(-2/6*np.log2(2/6))+(-1/6*np.log2(1/6))
print('high_node_750: ', high_node_750)
high_weight_node_750 = 6/7*high_node_750
print('high_weight_node_750: ',high_weight_node_750)
ig_750 = root_node - (high_weight_node_750+low_weight_node_750)
print('lower than 750 ig: ',ig_750)
print('-------------1350 start-------------')
low_node_1350 = (-1/2*np.log2(1/2))*2
print('low_weight_node_1350: ',low_node_1350)
low_weight_node_1350 = 2/7*low_node_1350
print('low_weight_node_1350: ',low_weight_node_1350)
high_node_1350 = (-2/5*np.log2(2/5))+(-2/5*np.log2(2/5))+(-1/5*np.log2(1/5))
print('high_node_1350: ', high_node_1350)
high_weight_node_1350 = 5/7*high_node_1350
print('high_weight_node_1350: ',high_weight_node_1350)
ig_1350 = root_node - (low_weight_node_1350+high_weight_node_1350)
print('lower than 1350 ig: ',ig_1350)
print('-------------2250 start-------------')
low_node_2250 = ((-2/3*np.log2(2/3))+(-1/3*np.log2(1/3)))
print('low_node_2250: ', low_node_2250)
low_weight_node_2250 = 3/7*low_node_2250
print('low_weight_node_2250: ',low_weight_node_2250)
high_node_2250 = ((-1/2*np.log2(1/2))*2)
print('high_node_2250: ', high_node_2250)
high_weight_node_2250 = 4/7*high_node_2250
print('high_weight_node_2250: ',high_weight_node_2250)
ig_2250 = root_node - (low_weight_node_2250+high_weight_node_2250)
print('lower than 2250 ig: ',ig_2250)
print('-------------4150 start-------------')
low_node_4150 = (-2/5*np.log2(2/5))+(-3/5*np.log2(3/5))
print('low_node_4150: ', low_node_4150)
low_weight_node_4150 = 5/7*low_node_4150
print('low_weight_node_4150: ',low_weight_node_4150)
high_node_4150 = ((-1*np.log2(1))*2)
print('high_node_4150: ', high_node_4150)
high_weight_node_4150 = 2/7*high_node_4150
print('high_weight_node_4150: ',high_weight_node_4150)
ig_4150 = root_node - (low_weight_node_4150+high_weight_node_4150)
print('lower than 4150 ig: ',ig_4150)
print('-------------4150 seperated -------------')
elevation_4150_node = (-3/5*np.log2(3/5)) + ((-2/5*np.log2(2/5)))
print('elevation_4150_node: ',elevation_4150_node)
print('-------------stream seperated test -------------')
stream_true_node_weight = 3/5 * ((-2/3*np.log2(2/3)) + ((-1/3*np.log2(1/3))))
print('stream_true_node_weight: ',stream_true_node_weight)
stream_false_node_weight = 2/5*(-1*np.log2(1))
print('stream_false_node_weight: ',stream_false_node_weight)
ig = elevation_4150_node-(stream_true_node_weight+stream_false_node_weight)
print('stream ig: ',ig)
print('-------------slope seperated test -------------')
slope_moderate_node_weight = 1/5*(-1*np.log2(1))
print('slope_moderate_node_weight: ',slope_moderate_node_weight)
slope_steep_node_weight = 4/5*((-3/4*np.log2(3/4))+(-1/4*np.log2(1/4)))
print('slope_steep_node_weight: ',slope_steep_node_weight)
ig = elevation_4150_node-(slope_moderate_node_weight+slope_steep_node_weight)
print('slope ig: ',ig)
print('-------------elevation seperated test -------------')
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)
print('-------------elevation seperated 750 test -------------')
low_weight_node_750 = 1/5*low_node_750
print('low_weight_node_750: ',low_weight_node_750)
high_node_750 = (-3/4*np.log2(3/4))+(-1/4*np.log2(1/4))
print('high_node_750: ', high_node_750)
high_weight_node_750 = 4/5*high_node_750
print('high_weight_node_750: ',high_weight_node_750)
ig_750 = elevation_4150_node - (high_weight_node_750+low_weight_node_750)
print('lower than 750 ig: ',ig_750)
print('-------------elevation seperated 1350 test -------------')
low_weight_node_1350 = 2/5*low_node_1350
print('low_weight_node_1350: ',low_weight_node_1350)
high_node_1350 = (-1/3*np.log2(1/3))+(-2/3*np.log2(2/3))
print('high_node_1350: ', high_node_1350)
high_weight_node_1350 = 3/5*high_node_1350
print('high_weight_node_1350: ',high_weight_node_1350)
ig_1350 = elevation_4150_node - (low_weight_node_1350+high_weight_node_1350)
print('lower than 1350 ig: ',ig_1350)
print('-------------elevation seperated 2250 test -------------')
low_weight_node_2250 = 3/5*low_node_2250
print('low_weight_node_2250: ',low_weight_node_2250)
high_node_2250 = ((-1/1*np.log2(1/1))*2)
print('high_node_2250: ', high_node_2250)
high_weight_node_2250 = 2/5*high_node_2250
print('high_weight_node_2250: ',high_weight_node_2250)
ig_2250 = elevation_4150_node - (low_weight_node_2250+high_weight_node_2250)
print('lower than 2250 ig: ',ig_2250)
print('-------------3rd v1 : elevation seperated 2250 -------------')
elevation_2nd_2250_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('elevation_2nd_2250_node: ',elevation_2nd_2250_node)
print('-------------3rd : stream test --------------')
stream_3rd_false_node_weight = 0
print('stream_3rd_false_node_weight: ',stream_false_node_weight)
ig = elevation_2nd_2250_node-(stream_3rd_false_node_weight)
print('stream ig: ',ig)
print('-------------3rd : slope test --------------')
slope_3rd_moderate_node_weight = 0
print('slope_3rd_moderate_node_weight: ',slope_3rd_moderate_node_weight)
slope_3rd_steep_node_weight = 2/3*((-1/2*np.log2(1/2))+(-1/2*np.log2(1/2)))
print('slope_steep_node_weight: ',slope_steep_node_weight)
ig = elevation_2nd_2250_node-(slope_3rd_moderate_node_weight+slope_3rd_steep_node_weight)
print('slope ig: ',ig)
print('-------------3rd : elevation test start--------------')
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('-------------3rd : 750 test --------------')
print('750 ig: ',ig)
print('-------------3rd : 1350 test --------------')
print('1350 ig: ',ig)
print('-------------3rd v2 : stream -------------')
true_3rd_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('true_3rd_node: ',true_3rd_node)
print('-------------3rd v2 : slope test -------------')
import numpy as np
#season 때 많이 타냐, day 때 많이 타는지 분석하고 회귀분석
def calculate_variance(data):
n = len(data)
mean = sum(data) / n
squared_deviations = [(x - mean) ** 2 for x in data]
variance = sum(squared_deviations) / n
return variance
total_data = [800,826,900,2100,4740,4900,3000,5800,6200,2910,2880,2820]
n = len(total_data)
mean = sum(total_data)/n
#편차를 제곱한다.
squared_deviations = [(x-mean)**2 for x in total_data]
#평균한다.
#분산은 편차 제곱의 평균.
total_variance = sum(squared_deviations)/n
print(total_variance)
winter_data = [800, 826, 900]
spring_data = [2100, 4740, 4900]
summer_data = [3000, 5800, 6200]
autumn_data = [2910, 2880, 2820]
winter_variance = calculate_variance(winter_data)
print('winter_variance:', winter_variance)
spring_variance = calculate_variance(spring_data)
print('spring_variance:', spring_variance)
summer_variance = calculate_variance(summer_data)
print('summer_variance:', summer_variance)
autumn_variance = calculate_variance(autumn_data)
print('autumn_variance:', autumn_variance)
season_variance = (1/4 * spring_variance) + (1/4 * summer_variance) + (1/4 * autumn_variance) + (1/4 * winter_variance)
print('season_variance:', season_variance)
print('------------####day start#####-------------')
false_data = [800,826,2100,3000,2910,2880]
true_data = [900,4740,4900,5800,6200,2820]
false_variance = calculate_variance(false_data)
print('false_variance:', false_variance)
true_variance = calculate_variance(true_data)
print('true_variance:', true_variance)
day_variance = (1/2 * false_variance) + (1/2 * true_variance)
print('day_variance:', day_variance)
#season 때 많이 타냐, day 때 많이 타는지 분석하고 회귀분석
def calculate_variance(data):
n = len(data)
mean = sum(data) / n
squared_deviations = [(x - mean) ** 2 for x in data]
variance = sum(squared_deviations) / n
return variance
total_data = [800,826,900,2100,4740,4900,3000,5800,6200,2910,2880,2820]
n = len(total_data)
mean = sum(total_data)/n
#편차를 제곱한다.
squared_deviations = [(x-mean)**2 for x in total_data]
#평균한다.
#분산은 편차 제곱의 평균.
total_variance = sum(squared_deviations)/n
print(total_variance)
winter_data = [800, 826, 900]
spring_data = [2100, 4740, 4900]
summer_data = [3000, 5800, 6200]
autumn_data = [2910, 2880, 2820]
winter_variance = calculate_variance(winter_data)
print('winter_variance:', winter_variance)
spring_variance = calculate_variance(spring_data)
print('spring_variance:', spring_variance)
summer_variance = calculate_variance(summer_data)
print('summer_variance:', summer_variance)
autumn_variance = calculate_variance(autumn_data)
print('autumn_variance:', autumn_variance)
season_variance = (1/4 * spring_variance) + (1/4 * summer_variance) + (1/4 * autumn_variance) + (1/4 * winter_variance)
print('season_variance:', season_variance)
print('------------####day start#####-------------')
false_data = [800,826,2100,3000,2910,2880]
true_data = [900,4740,4900,5800,6200,2820]
false_variance = calculate_variance(false_data)
print('false_variance:', false_variance)
true_variance = calculate_variance(true_data)
print('true_variance:', true_variance)
day_variance = (1/2 * false_variance) + (1/2 * true_variance)
print('day_variance:', day_variance)
728x90