[ R ] ch14 연습문제

yul_S2 2022. 11. 27. 08:52

2. 다음은 drinking,water_example.sav 파일의 데이터셋이 구성된 테이블이다. 전체 2 개의 요인에 의해서 7 개의 변수로 구성되어 있다. 아래에서 제시된 각 단계에 맞게 요인 분석을 수행하시오

(1) 데이터파일 가져오기

library(memisc)
setwd("C:/")

data.spss <- as.data.set(spss.system.file('drinking_water_example.sav'))
data.spss

drinking_water_exam <- data.spss[1:7]
drinking_water_exam_df <- as.data.frame(drinking_water_exam)

str(drinking_water_exam_df)
# 'data.frame': 380 obs. of  7 variables:
#   $ Q1: num  3 3 3 3 3 1 2 2 2 4 ...
# ..- attr(*, "label")= chr "A&G0 D#9P551"
# $ Q2: num  2 3 3 3 3 1 2 2 2 3 ...
# ..- attr(*, "label")= chr "A&G0 D#9P552"
# $ Q3: num  3 3 3 3 2 1 2 1 1 3 ...
# ..- attr(*, "label")= chr "A&G0 D#9P553"
# $ Q4: num  3 3 4 2 2 3 4 2 3 4 ...
# ..- attr(*, "label")= chr "A&G0 88A7551"
# $ Q5: num  4 3 4 2 2 3 4 2 2 2 ...
# ..- attr(*, "label")= chr "A&G0 88A7552"
# $ Q6: num  3 2 4 2 2 3 4 2 3 3 ...
# ..- attr(*, "label")= chr "A&G0 88A7553"
# $ Q7: num  4 3 4 2 2 3 4 2 1 4 ...
# ..- attr(*, "label")= chr "A&G0 88A7554"

(2) 베리맥스 회전법, 요인수 2, 요인점수 회귀분석 방법을 적용하여 요인 분석

result <- factanal(drinking_water_exam_df, factors = 2, rotation = "varimax",scores = "regression")
result
# Call:
#   factanal(x = drinking_water_exam_df, factors = 2, scores = "regression",     rotation = "varimax")
#
# Uniquenesses:
# Q1 Q2 Q3 Q4 Q5 Q6 Q7
# 0.333 0.222 0.298 0.388 0.200 0.231 0.410
#
# Loadings:
#   Factor1 Factor2
# Q1 0.212   0.789
# Q2 0.182   0.863
# Q3 0.170   0.820
# Q4 0.724   0.296
# Q5 0.882   0.149
# Q6 0.860   0.172
# Q7 0.742   0.198
#
# Factor1 Factor2
# SS loadings      2.700   2.219
# Proportion Var   0.386   0.317
# Cumulative Var   0.386   0.703
#
# Test of the hypothesis that 2 factors are sufficient.
# The chi square statistic is 12.93 on 8 degrees of freedom.
# The p-value is 0.114

(3) 요인적재량 행렬의 컬럼명 변경

loadings <- result$loadings
colnames(loadings) <- c("제품친밀도","제품만족도")

loadings
# Loadings:
#   제품친밀도 제품만족도
# Q1 0.212      0.789
# Q2 0.182      0.863
# Q3 0.170      0.820
# Q4 0.724      0.296
# Q5 0.882      0.149
# Q6 0.860      0.172
# Q7 0.742      0.198
#
# 제품친밀도 제품만족도
# SS loadings         2.700      2.219
# Proportion Var      0.386      0.317
# Cumulative Var      0.386      0.703

(4) 요인점수를 이용한 요인적재량 시각화

plot(result$scores[,c(1,2)], main="제품친밀도와 제품만족도 요인점수 행렬")

text(result$scores[,1], result$scores[,2], labels=name,cex=0.7,pos=3,col="blue")

points(result$loadings[,c(1:2)],pch=19,col="red")

text(result$loadings[,1], result$loadings[,2], labels=rownames(result$loadings),cex=0.7,pos=3,col="green3")

(5)요인별 변수 묶기

a <- data.frame(drinking_water_exam_df$Q1,
                drinking_water_exam_df$Q2,
                drinking_water_exam_df$Q3)

b <- data.frame(drinking_water_exam_df$Q4,
                drinking_water_exam_df$Q5,
                drinking_water_exam_df$Q6,
                drinking_water_exam_df$Q7)

2. 1 번에서 생성된 두 개의 요인을 데이터프레임으로 생성한 후 이를 이용하여 두 요인 간의 상관관계 계수를 제시하시오.

x <- round((a$drinking_water_exam_df.Q1 +
              a$drinking_water_exam_df.Q2 +
              a$drinking_water_exam_df.Q3)/ncol(a),2)
y <- round((b$drinking_water_exam_df.Q4 +
              b$drinking_water_exam_df.Q5 +
              b$drinking_water_exam_df.Q6 +
              b$drinking_water_exam_df.Q7)/ncol(b),2)
a$drinking_water_exam_df.Q1

subject_facor_df <- data.frame(x,y)
cor(subject_facor_df)
#           x         y
# x 1.0000000 0.4047543
# y 0.4047543 1.0000000