재미삼아.. 국민청원 청원인 로그인 플랫폼 확인하기 코드
별 뜻 없이 재미삼아 올려봅니다.
요즘 뉴스도 안보다가 국민청원 경쟁 소식에 뉴스 보고 있습니다.
library(rvest)
library(dplyr)
library(foreach)
library(stringr)
library(doSNOW)
library(progress)
options(scipen=999)
url_hankook="https://www1.president.go.kr/petitions/579779"
url_minjoo="https://www1.president.go.kr/petitions/579682"
NCLUSTER=6
PAGE_TOTAL=4000
run<-function(input_url) {
get_data<-function(url_base,page_id) {
url=paste0(url_base,"?page=",page_id)
page=read_html(url)
result=page%>%html_nodes("li.Reply_Reply_list")%>%html_text()%>%str_trim()%>%str_extract(regex('^\\w+'))
result
}
crawler<-makeCluster(NCLUSTER)
registerDoSNOW(crawler)
total=PAGE_TOTAL
pb<-progress_bar$new(total=total)
progress<-function(n) pb$tick()
opts<-list(progress=progress)
return_value=foreach(page_id=1:total,
.packages=c('rvest','dplyr','stringr'),
.options.snow=opts) %dopar% {
get_data(input_url,page_id)
}
stopCluster(crawler)
closeAllConnections()
do.call(c,return_value)
}
hankook=run(url_hankook)
minjoo=run(url_minjoo)
#
petition_all=c(hankook,minjoo)
prop_result=prop.table(table(petition_all))
plot(prop_result,xlab="Platform",ylab="Proportion")
요즘 뉴스도 안보다가 국민청원 경쟁 소식에 뉴스 보고 있습니다.
library(rvest)
library(dplyr)
library(foreach)
library(stringr)
library(doSNOW)
library(progress)
options(scipen=999)
url_hankook="https://www1.president.go.kr/petitions/579779"
url_minjoo="https://www1.president.go.kr/petitions/579682"
NCLUSTER=6
PAGE_TOTAL=4000
run<-function(input_url) {
get_data<-function(url_base,page_id) {
url=paste0(url_base,"?page=",page_id)
page=read_html(url)
result=page%>%html_nodes("li.Reply_Reply_list")%>%html_text()%>%str_trim()%>%str_extract(regex('^\\w+'))
result
}
crawler<-makeCluster(NCLUSTER)
registerDoSNOW(crawler)
total=PAGE_TOTAL
pb<-progress_bar$new(total=total)
progress<-function(n) pb$tick()
opts<-list(progress=progress)
return_value=foreach(page_id=1:total,
.packages=c('rvest','dplyr','stringr'),
.options.snow=opts) %dopar% {
get_data(input_url,page_id)
}
stopCluster(crawler)
closeAllConnections()
do.call(c,return_value)
}
hankook=run(url_hankook)
minjoo=run(url_minjoo)
#
petition_all=c(hankook,minjoo)
prop_result=prop.table(table(petition_all))
plot(prop_result,xlab="Platform",ylab="Proportion")
댓글
댓글 쓰기