경기도미래기술학교 AI개발자 부트캠프 31일차 TIL- Continuous Descriptive Features

2023. 6. 21. 17:43카테고리 없음

반응형

https://process-mining.tistory.com/42

 

Decision Tree(의사 결정 나무)란? (Decision tree 설명)

Decision Tree는 tree 구조를 활용하여 entropy가 최소화되는 방향으로 데이터를 분류하거나 원하는 어떤 결과값을 예측하는 분석 방법을 말한다. 이번 포스팅에서는 Decision Tree가 무엇이고, 이를 어떻

process-mining.tistory.com

 

import numpy as np
# elevation_list = [300,1200,1500,3000,3900,4450,5000]

# vegetation_list = [rip,cha,rip,cha,cha,con,con]

#4개로 나뉜다.
# rip/cha/rip/cha,cha/con,con

# 750

n_number = 2

target_level_differ_1 = (300+1200)//n_number
target_level_differ_2 = (1200+1500)//n_number
target_level_differ_3 = (1500+3000)//n_number
target_level_differ_4 = (3900+4450)//n_number

print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)
print('target_level_differ_4 : ',target_level_differ_4)

print('-------------##root node##-------------')
root_node = (-3/7*np.log2(3/7)) + ((-2/7*np.log2(2/7))*2)
print('root_entropy: ',root_node)
print('-------------750 start-------------')
low_node_750 = (-1/1*np.log2(1/1))

print('low_node_750: ', low_node_750)

low_weight_node_750 = 1/7*low_node_750

print('low_weight_node_750: ',low_weight_node_750)

high_node_750 = (-3/6*np.log2(3/6))+(-2/6*np.log2(2/6))+(-1/6*np.log2(1/6))

print('high_node_750: ', high_node_750)

high_weight_node_750 = 6/7*high_node_750

print('high_weight_node_750: ',high_weight_node_750)

ig_750 = root_node - (high_weight_node_750+low_weight_node_750)

print('lower than 750 ig: ',ig_750)

print('-------------1350 start-------------')
low_node_1350 = (-1/2*np.log2(1/2))*2

print('low_weight_node_1350: ',low_node_1350)

low_weight_node_1350 = 2/7*low_node_1350

print('low_weight_node_1350: ',low_weight_node_1350)

high_node_1350 = (-2/5*np.log2(2/5))+(-2/5*np.log2(2/5))+(-1/5*np.log2(1/5))

print('high_node_1350: ', high_node_1350)

high_weight_node_1350 = 5/7*high_node_1350

print('high_weight_node_1350: ',high_weight_node_1350)

ig_1350 = root_node - (low_weight_node_1350+high_weight_node_1350)

print('lower than 1350 ig: ',ig_1350)

print('-------------2250 start-------------')

low_node_2250 = ((-2/3*np.log2(2/3))+(-1/3*np.log2(1/3)))

print('low_node_2250: ', low_node_2250)

low_weight_node_2250 = 3/7*low_node_2250

print('low_weight_node_2250: ',low_weight_node_2250)

high_node_2250 = ((-1/2*np.log2(1/2))*2)

print('high_node_2250: ', high_node_2250)

high_weight_node_2250 = 4/7*high_node_2250

print('high_weight_node_2250: ',high_weight_node_2250)



ig_2250 = root_node - (low_weight_node_2250+high_weight_node_2250)

print('lower than 2250 ig: ',ig_2250)

print('-------------4150 start-------------')

low_node_4150 = (-2/5*np.log2(2/5))+(-3/5*np.log2(3/5))

print('low_node_4150: ', low_node_4150)

low_weight_node_4150 = 5/7*low_node_4150

print('low_weight_node_4150: ',low_weight_node_4150)

high_node_4150 = ((-1*np.log2(1))*2)

print('high_node_4150: ', high_node_4150)

high_weight_node_4150 = 2/7*high_node_4150

print('high_weight_node_4150: ',high_weight_node_4150)

ig_4150 = root_node - (low_weight_node_4150+high_weight_node_4150)

print('lower than 4150 ig: ',ig_4150)

print('-------------4150 seperated -------------')
elevation_4150_node = (-3/5*np.log2(3/5)) + ((-2/5*np.log2(2/5)))
print('elevation_4150_node: ',elevation_4150_node)
print('-------------stream seperated test -------------')
stream_true_node_weight = 3/5 * ((-2/3*np.log2(2/3)) + ((-1/3*np.log2(1/3))))

print('stream_true_node_weight: ',stream_true_node_weight)

stream_false_node_weight = 2/5*(-1*np.log2(1))

print('stream_false_node_weight: ',stream_false_node_weight)

ig = elevation_4150_node-(stream_true_node_weight+stream_false_node_weight)
print('stream ig: ',ig)

print('-------------slope seperated test -------------')

slope_moderate_node_weight = 1/5*(-1*np.log2(1))

print('slope_moderate_node_weight: ',slope_moderate_node_weight)

slope_steep_node_weight = 4/5*((-3/4*np.log2(3/4))+(-1/4*np.log2(1/4)))

print('slope_steep_node_weight: ',slope_steep_node_weight)

ig = elevation_4150_node-(slope_moderate_node_weight+slope_steep_node_weight)
print('slope ig: ',ig)

print('-------------elevation seperated test -------------')

print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)
print('target_level_differ_3 : ',target_level_differ_3)

print('-------------elevation seperated 750 test -------------')
low_weight_node_750 = 1/5*low_node_750

print('low_weight_node_750: ',low_weight_node_750)

high_node_750 = (-3/4*np.log2(3/4))+(-1/4*np.log2(1/4))

print('high_node_750: ', high_node_750)

high_weight_node_750 = 4/5*high_node_750

print('high_weight_node_750: ',high_weight_node_750)

ig_750 = elevation_4150_node - (high_weight_node_750+low_weight_node_750)

print('lower than 750 ig: ',ig_750)

print('-------------elevation seperated 1350 test -------------')

low_weight_node_1350 = 2/5*low_node_1350

print('low_weight_node_1350: ',low_weight_node_1350)

high_node_1350 = (-1/3*np.log2(1/3))+(-2/3*np.log2(2/3))

print('high_node_1350: ', high_node_1350)

high_weight_node_1350 = 3/5*high_node_1350

print('high_weight_node_1350: ',high_weight_node_1350)

ig_1350 = elevation_4150_node - (low_weight_node_1350+high_weight_node_1350)

print('lower than 1350 ig: ',ig_1350)

print('-------------elevation seperated 2250 test -------------')

low_weight_node_2250 = 3/5*low_node_2250

print('low_weight_node_2250: ',low_weight_node_2250)

high_node_2250 = ((-1/1*np.log2(1/1))*2)

print('high_node_2250: ', high_node_2250)

high_weight_node_2250 = 2/5*high_node_2250

print('high_weight_node_2250: ',high_weight_node_2250)

ig_2250 = elevation_4150_node - (low_weight_node_2250+high_weight_node_2250)

print('lower than 2250 ig: ',ig_2250)


print('-------------3rd v1 : elevation seperated 2250 -------------')
elevation_2nd_2250_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('elevation_2nd_2250_node: ',elevation_2nd_2250_node)

print('-------------3rd : stream test --------------')

stream_3rd_false_node_weight = 0

print('stream_3rd_false_node_weight: ',stream_false_node_weight)

ig = elevation_2nd_2250_node-(stream_3rd_false_node_weight)
print('stream ig: ',ig)

print('-------------3rd : slope test --------------')

slope_3rd_moderate_node_weight = 0

print('slope_3rd_moderate_node_weight: ',slope_3rd_moderate_node_weight)

slope_3rd_steep_node_weight = 2/3*((-1/2*np.log2(1/2))+(-1/2*np.log2(1/2)))

print('slope_steep_node_weight: ',slope_steep_node_weight)

ig = elevation_2nd_2250_node-(slope_3rd_moderate_node_weight+slope_3rd_steep_node_weight)
print('slope ig: ',ig)

print('-------------3rd : elevation test start--------------')
print('target_level_differ_1 : ',target_level_differ_1)
print('target_level_differ_2 : ',target_level_differ_2)

print('-------------3rd : 750 test --------------')

print('750 ig: ',ig)

print('-------------3rd : 1350 test --------------')

print('1350 ig: ',ig)

print('-------------3rd v2 : stream -------------')

true_3rd_node = (-2/3*np.log2(2/3)) + (-1/3*np.log2(1/3))
print('true_3rd_node: ',true_3rd_node)

print('-------------3rd v2 : slope test -------------')

 
import numpy as np

#season 때 많이 타냐, day 때 많이 타는지 분석하고 회귀분석

def calculate_variance(data):
n = len(data)
mean = sum(data) / n
squared_deviations = [(x - mean) ** 2 for x in data]
variance = sum(squared_deviations) / n
return variance


total_data = [800,826,900,2100,4740,4900,3000,5800,6200,2910,2880,2820]

n = len(total_data)
mean = sum(total_data)/n

#편차를 제곱한다.
squared_deviations = [(x-mean)**2 for x in total_data]
#평균한다.
#분산은 편차 제곱의 평균.
total_variance = sum(squared_deviations)/n

print(total_variance)

winter_data = [800, 826, 900]
spring_data = [2100, 4740, 4900]
summer_data = [3000, 5800, 6200]
autumn_data = [2910, 2880, 2820]

winter_variance = calculate_variance(winter_data)
print('winter_variance:', winter_variance)

spring_variance = calculate_variance(spring_data)
print('spring_variance:', spring_variance)

summer_variance = calculate_variance(summer_data)
print('summer_variance:', summer_variance)

autumn_variance = calculate_variance(autumn_data)
print('autumn_variance:', autumn_variance)

season_variance = (1/4 * spring_variance) + (1/4 * summer_variance) + (1/4 * autumn_variance) + (1/4 * winter_variance)
print('season_variance:', season_variance)

print('------------####day start#####-------------')

false_data = [800,826,2100,3000,2910,2880]
true_data = [900,4740,4900,5800,6200,2820]

false_variance = calculate_variance(false_data)
print('false_variance:', false_variance)

true_variance = calculate_variance(true_data)
print('true_variance:', true_variance)

day_variance = (1/2 * false_variance) + (1/2 * true_variance)
print('day_variance:', day_variance)

728x90