International collaborations in filmmaking offer the potential to make content more accessible to a global audience. These partnerships can introduce diverse perspectives to the production process and attract additional capital and investment. But do these advantages lead to higher viewer ratings? This study examines whether international collaboration on a single title is associated with higher IMDB ratings. Moreover, it explores the relationship between revenue and IMDB ratings, recognizing the influence of other factors, such as the COVID-19 pandemic, which increased demand for streaming services. I also focused specifically on Netflix Original movies, analyzing how their ratings have evolved as Netflix’s global subscriber base has expanded, and whether international collaboration correlates with higher ratings.
IMDB ratings typically average around 6.8, with a range from 1 to 10.
More International Collaboration:
Higher IMDB Ratings: Both in general and for Netflix Originals, international collaborations are associated with higher ratings.
More IMDB Votes: Internationally collaborative films receive more votes on IMDB, which can serve as a proxy for viewer counts, indicating broader global appeal.
Netflix Originals Are Receiving Lower Ratings: Although Netflix Originals are receiving lower ratings, this trend does not necessarily correlate with revenue or profit. Further research is needed to explore the underlying causes.
Recommendations for Further Research: More data, such as user interviews and reviews, is needed to pinpoint the factors driving these effects.
Dataset
Caveat: All data is based on titles available on Netflix as of mid-2021.
# Load data
netflix<-read.csv('data/netflix_movies_shows_titles.csv')
netflix_originals<-read.csv('data/NetflixOriginals.csv')
netflix_originals=merge(netflix_originals,netflix %>% select(Title,country),by=c('Title'))
imdb_titles<-read.csv('data/imdb.title.basics.csv')
imdb_ratings<-read.csv('data/imdb.title.ratings.csv')
# Clean data
imdb_titles['startYear'][imdb_titles['startYear']=='\\N']<-NA
imdb_titles['endYear'][imdb_titles['endYear']=='\\N']<-NA
imdb_titles['runtimeMinutes'][imdb_titles['runtimeMinutes']=='\\N']<-NA
imdb_titles=imdb_titles %>% mutate(startYear=as.numeric(startYear),endYear=as.numeric(endYear),runtimeMinutes=as.numeric(runtimeMinutes))
imdb_df<-merge(imdb_titles,imdb_ratings, by=c('tconst'))
yr=c()
for (i in 1:dim(netflix['date_added'])[1]){yr[i]=as.numeric(str_split_i(netflix['date_added'][i,],"-",1))}
netflix=netflix %>% mutate(year_added_to_netflix=yr) # add a 'year added to netflix' column
netflix['country'][netflix['country']=='']<-NA # blank to NA
yr=c()
for (i in 1:dim(netflix_originals['Premiere'])[1]){yr[i]=as.numeric(str_split_i(netflix_originals['Premiere'][i,],"-",1))}
netflix_originals=netflix_originals %>% mutate(premiere_year=yr) # add a 'year added to netflix' column
netflix_originals['country'][netflix_originals['country']=='']<-NA # blank to NA
# merge netflix titles with imdb ratings
netflix_ratings<-merge(netflix, imdb_df,by.x=c('Title','release_year'),by.y=c('primaryTitle','startYear'))
netflix_ratings=netflix_ratings %>% distinct(Title,director,release_year,.keep_all=TRUE) # remove duplicates
# sum(is.na(netflix_ratings$country)) # check the number of NAs (missing country data) N=118Movies and shows on Netflix were produced by 122 different countries.
A total of 8,807 movies/shows are included in this analysis.
Number of Contributing Countries per Title:
Titles Produced by More Than 5 Countries:
# sum(is.na(netflix$country)) # check the number of NAs (missing country data) N=831
# Extract country data
temp=netflix %>% mutate(country1=str_split_i(country,",",1),
country2=str_split_i(country,",",2),
country3=str_split_i(country,",",3),
country4=str_split_i(country,",",4),
country5=str_split_i(country,",",5),
country6=str_split_i(country,",",6),
country7=str_split_i(country,",",7),
country8=str_split_i(country,",",8),
country9=str_split_i(country,",",9),
country10=str_split_i(country,",",10),
country11=str_split_i(country,",",11),
country12=str_split_i(country,",",12)) %>%
select(Title,country1,country2,country3,country4,country5,country6,country7,country8,country9,country10,country11, country12,year_added_to_netflix,Genre)
netflix_long_df<-melt(temp,id=c("Title","year_added_to_netflix","Genre"),na.rm=T) %>% arrange(Title)
netflix_long_df = netflix_long_df %>% mutate(country=str_trim(value,side=c("left")))
# Data engineering
netflix_long_df[459,]['country']<-"United States"
netflix_long_df[2639,]['country']<-"United Kingdom"
# which(netflix_long_df['country']=='')
netflix_long_df<-netflix_long_df[-c(1748,1990,2620,2934,3571,8661,9548),] # remove blanks
# Rank which country produced most shows/movies on Neflix
# unique(netflix_long_df$country) # movies/shows on Netflix were produced by 122 different countries
country_count=netflix_long_df %>% count(country) %>% arrange(desc(n)) %>%
mutate(perc=round(`n`/sum(`n`),3)) %>% mutate(percentage = scales::percent(perc))
# netflix_long_df %>% group_by(country) %>% count() %>%
# ungroup() %>%
# mutate(perc=round(`n`/sum(`n`),2)) %>% mutate(percentage = scales::percent(perc)) %>% arrange(desc(perc))
plot_df=temp%>%
mutate(number_of_countries=ifelse(is.na(country2),1,
ifelse(is.na(country3),2,
ifelse(is.na(country4),3,
ifelse(is.na(country5),4,
ifelse(is.na(country6),5,6
))))))plot_df=plot_df %>% count(number_of_countries)
ggplot(plot_df, aes(x=number_of_countries,y=n,fill=n))+ geom_col(fill = "black") +
geom_text(aes(label=n,fontface="bold"), vjust = -0.5, size =3,color="#E50914",family = 'AppleGothic') + theme_bw() +theme_classic()+
xlab(c("Number of Countries"))+ylab(c("Count"))+
scale_x_discrete(limits=c(seq(1,6,1)))+
ggtitle("Number of Countries that Collaborated on a Movie/Show on Netflix")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono'),
axis.title.x = element_text(size=10,face="bold", family = 'mono',hjust=1),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=1,hjust=1))country_count=country_count %>% mutate("Production Country"=country, "Number of Titles"=n,"Percentage (%)"=perc*100,"Rank"=row_number())
# plot distribution of countries in table
formattable(country_count %>% select("Rank","Production Country", "Number of Titles", "Percentage (%)"), align=c("c","r","r","r"),
list(`Number of Titles`=color_bar("#E50914",fun=proportion),`Percentage (%)`=color_bar("gray")),
table.attr = 'style="font-size: 18px; font-family: AppleGothic";\"') | Rank | Production Country | Number of Titles | Percentage (%) |
|---|---|---|---|
| 1 | United States | 3687 | 36.8 |
| 2 | India | 1046 | 10.4 |
| 3 | United Kingdom | 806 | 8.1 |
| 4 | Canada | 445 | 4.4 |
| 5 | France | 393 | 3.9 |
| 6 | Japan | 318 | 3.2 |
| 7 | Spain | 232 | 2.3 |
| 8 | South Korea | 231 | 2.3 |
| 9 | Germany | 226 | 2.3 |
| 10 | Mexico | 169 | 1.7 |
| 11 | China | 161 | 1.6 |
| 12 | Australia | 160 | 1.6 |
| 13 | Egypt | 117 | 1.2 |
| 14 | Turkey | 112 | 1.1 |
| 15 | Hong Kong | 105 | 1.0 |
| 16 | Nigeria | 103 | 1.0 |
| 17 | Italy | 100 | 1.0 |
| 18 | Brazil | 97 | 1.0 |
| 19 | Argentina | 91 | 0.9 |
| 20 | Belgium | 90 | 0.9 |
| 21 | Indonesia | 90 | 0.9 |
| 22 | Taiwan | 89 | 0.9 |
| 23 | Philippines | 83 | 0.8 |
| 24 | Thailand | 70 | 0.7 |
| 25 | South Africa | 62 | 0.6 |
| 26 | Colombia | 51 | 0.5 |
| 27 | Netherlands | 50 | 0.5 |
| 28 | Denmark | 48 | 0.5 |
| 29 | Ireland | 46 | 0.5 |
| 30 | Sweden | 42 | 0.4 |
| 31 | Poland | 41 | 0.4 |
| 32 | Singapore | 41 | 0.4 |
| 33 | United Arab Emirates | 37 | 0.4 |
| 34 | New Zealand | 33 | 0.3 |
| 35 | Lebanon | 31 | 0.3 |
| 36 | Israel | 30 | 0.3 |
| 37 | Norway | 30 | 0.3 |
| 38 | Chile | 29 | 0.3 |
| 39 | Russia | 27 | 0.3 |
| 40 | Malaysia | 26 | 0.3 |
| 41 | Pakistan | 24 | 0.2 |
| 42 | Czech Republic | 22 | 0.2 |
| 43 | Switzerland | 19 | 0.2 |
| 44 | Romania | 14 | 0.1 |
| 45 | Uruguay | 14 | 0.1 |
| 46 | Saudi Arabia | 13 | 0.1 |
| 47 | Austria | 12 | 0.1 |
| 48 | Luxembourg | 12 | 0.1 |
| 49 | Finland | 11 | 0.1 |
| 50 | Greece | 11 | 0.1 |
| 51 | Hungary | 11 | 0.1 |
| 52 | Iceland | 11 | 0.1 |
| 53 | Bulgaria | 10 | 0.1 |
| 54 | Peru | 10 | 0.1 |
| 55 | Qatar | 10 | 0.1 |
| 56 | Jordan | 9 | 0.1 |
| 57 | Kuwait | 8 | 0.1 |
| 58 | 7 | 0.1 | |
| 59 | Serbia | 7 | 0.1 |
| 60 | Cambodia | 6 | 0.1 |
| 61 | Kenya | 6 | 0.1 |
| 62 | Morocco | 6 | 0.1 |
| 63 | Portugal | 6 | 0.1 |
| 64 | Vietnam | 6 | 0.1 |
| 65 | Ghana | 5 | 0.0 |
| 66 | West Germany | 5 | 0.0 |
| 67 | Bangladesh | 4 | 0.0 |
| 68 | Croatia | 4 | 0.0 |
| 69 | Iran | 4 | 0.0 |
| 70 | Venezuela | 4 | 0.0 |
| 71 | Algeria | 3 | 0.0 |
| 72 | Malta | 3 | 0.0 |
| 73 | Senegal | 3 | 0.0 |
| 74 | Slovenia | 3 | 0.0 |
| 75 | Soviet Union | 3 | 0.0 |
| 76 | Syria | 3 | 0.0 |
| 77 | Ukraine | 3 | 0.0 |
| 78 | Zimbabwe | 3 | 0.0 |
| 79 | Cayman Islands | 2 | 0.0 |
| 80 | Georgia | 2 | 0.0 |
| 81 | Guatemala | 2 | 0.0 |
| 82 | Iraq | 2 | 0.0 |
| 83 | Mauritius | 2 | 0.0 |
| 84 | Namibia | 2 | 0.0 |
| 85 | Nepal | 2 | 0.0 |
| 86 | Afghanistan | 1 | 0.0 |
| 87 | Albania | 1 | 0.0 |
| 88 | Angola | 1 | 0.0 |
| 89 | Armenia | 1 | 0.0 |
| 90 | Azerbaijan | 1 | 0.0 |
| 91 | Bahamas | 1 | 0.0 |
| 92 | Belarus | 1 | 0.0 |
| 93 | Bermuda | 1 | 0.0 |
| 94 | Botswana | 1 | 0.0 |
| 95 | Burkina Faso | 1 | 0.0 |
| 96 | Cameroon | 1 | 0.0 |
| 97 | Cuba | 1 | 0.0 |
| 98 | Cyprus | 1 | 0.0 |
| 99 | Dominican Republic | 1 | 0.0 |
| 100 | East Germany | 1 | 0.0 |
| 101 | Ecuador | 1 | 0.0 |
| 102 | Ethiopia | 1 | 0.0 |
| 103 | Jamaica | 1 | 0.0 |
| 104 | Kazakhstan | 1 | 0.0 |
| 105 | Latvia | 1 | 0.0 |
| 106 | Liechtenstein | 1 | 0.0 |
| 107 | Lithuania | 1 | 0.0 |
| 108 | Malawi | 1 | 0.0 |
| 109 | Mongolia | 1 | 0.0 |
| 110 | Montenegro | 1 | 0.0 |
| 111 | Mozambique | 1 | 0.0 |
| 112 | Nicaragua | 1 | 0.0 |
| 113 | Palestine | 1 | 0.0 |
| 114 | Panama | 1 | 0.0 |
| 115 | Paraguay | 1 | 0.0 |
| 116 | Puerto Rico | 1 | 0.0 |
| 117 | Samoa | 1 | 0.0 |
| 118 | Slovakia | 1 | 0.0 |
| 119 | Somalia | 1 | 0.0 |
| 120 | Sri Lanka | 1 | 0.0 |
| 121 | Sudan | 1 | 0.0 |
| 122 | Uganda | 1 | 0.0 |
| 123 | Vatican City | 1 | 0.0 |
# Data preparation
# extract country information
temp=netflix_ratings %>% mutate(country1=str_split_i(country,",",1),
country2=str_split_i(country,",",2),
country3=str_split_i(country,",",3),
country4=str_split_i(country,",",4),
country5=str_split_i(country,",",5),
country6=str_split_i(country,",",6),
country7=str_split_i(country,",",7),
country8=str_split_i(country,",",8),country9=str_split_i(country,",",9))
# sum(is.na(temp$country)) # 118 titles have no country information
# sum(is.na(temp$country1)==FALSE) # 1799 movies/shows have 1 country involved
# sum(is.na(temp$country2)==FALSE) # 318 movies/shows have 2 countries involved
# sum(is.na(temp$country3)==FALSE) # 115 movies/shows have 3 countries involved
# sum(is.na(temp$country4)==FALSE) # 49 movies/shows have 4 countries involved
# sum(is.na(temp$country5)==FALSE) # 13 movies/shows have 5 countries involved
# sum(is.na(temp$country6)==FALSE) # 3 movies/shows have 6 countries involved
# sum(is.na(temp$country7)==FALSE) # 4 movies/shows have 7 countries involved
# sum(is.na(temp$country8)==FALSE) # 1 movies/shows have 8 countries involved
# sum(is.na(temp$country9)==FALSE) # 1 movies/shows have 12 countries involved (max number)
temp=temp %>%
select(Title,country,country1,country2,country3,country4,country5,country6,country7,country8,country9,
year_added_to_netflix,Genre,averageRating,numVotes)
nf_df_cleaned=temp%>%
mutate(number_of_countries=ifelse(is.na(country2),1,
ifelse(is.na(country3),2,
ifelse(is.na(country4),3,
ifelse(is.na(country5),4,
ifelse(is.na(country6),5,6)))))) # 6 means 6 or more countries
# U.S. vs. other countries (binary coding: other 0 vs. U.S. 1) (collab 1 vs. single country 0)
nf_df_cleaned=nf_df_cleaned %>% mutate(collab_or_not = ifelse(number_of_countries>=2,1,0),
us_or_not=ifelse(grepl('United States',country),1,0))ggplot(data=nf_df_cleaned,aes(x=number_of_countries,y=averageRating))+
geom_smooth(method=lm,colour="#E50914",size=2,se=FALSE)+
scale_x_discrete(limits=c(seq(1,6,1))) +
theme_bw()+theme_classic()+
labs(x="Number of Countries", y="Average IMDb Ratings", title="International Collaborations Predict Higher Ratings on IMDb")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,size=15,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono',angle=45),
axis.title.x = element_text(size=10,face="bold", family = 'mono',hjust=1),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=1,hjust=1))# summary(lm(averageRating~number_of_countries,data=nf_df_cleaned))
# qqnorm(x=nf_df_cleaned$number_of_countries,y=nf_df_cleaned$averageRating) normality
# cor.test(nf_df_cleaned$numVotes,nf_df_cleaned$averageRating) # titles that have more votes tend to score higher on ratingsggplot(data=nf_df_cleaned,aes(x=number_of_countries,y=numVotes))+
geom_smooth(method=lm,colour="#E50914",size=2,se=FALSE)+
scale_x_discrete(limits=c(seq(1,6,1)))+ theme_bw()+theme_classic()+
labs(x="Number of Countries", y="Number of Votes", title="International Collaborations Predict More Votes on IMDb")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,size=15,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono',angle=45),
axis.title.x = element_text(size=10,face="bold", family = 'mono',hjust=1),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=1,hjust=1))# summary(lm(log(numVotes)~log(number_of_countries),data=nf_df_cleaned)) # proxy for the number of viewers # log transformed for normality
# cor.test(nf_df_cleaned$numVotes,nf_df_cleaned$averageRating) # titles that have more votes tHow about Netflix Originals, specifically?
# Data Preparation
# Extract country data
temp=netflix_originals %>% mutate(country1=str_split_i(country,",",1),
country2=str_split_i(country,",",2),
country3=str_split_i(country,",",3),
country4=str_split_i(country,",",4))
temp=temp %>% distinct(Title,Genre,Language,.keep_all=TRUE) # remove duplicates
temp1=temp %>%
select(Title,country1,country2,country3,country4,premiere_year,IMDB.Score,Genre)
netflix_originals_long_df<-melt(temp1,id=c("Title","premiere_year","IMDB.Score","Genre"),na.rm=T) %>% arrange(Title)
netflix_originals_long_df = netflix_originals_long_df %>% mutate(country=str_trim(value,side=c("left")))
# Rank which country produced most Netflix Original movies
# unique(netflix_originals_long_df$country) # Netflix original movies were produced by 43 different countries
country_count=netflix_originals_long_df %>% count(country) %>% arrange(desc(n)) %>%
mutate(perc=round(`n`/sum(`n`),3)) %>% mutate(percentage = scales::percent(perc))
# plot table
formattable(country_count %>% select(country,n,percentage), list(`n`=color_bar(color="lightblue")))| country | n | percentage |
|---|---|---|
| United States | 327 | 56.4% |
| India | 38 | 6.6% |
| United Kingdom | 38 | 6.6% |
| Canada | 17 | 2.9% |
| France | 16 | 2.8% |
| Italy | 16 | 2.8% |
| Spain | 14 | 2.4% |
| Mexico | 11 | 1.9% |
| Indonesia | 9 | 1.6% |
| Brazil | 8 | 1.4% |
| Germany | 8 | 1.4% |
| South Korea | 8 | 1.4% |
| Japan | 7 | 1.2% |
| Argentina | 6 | 1.0% |
| Hungary | 4 | 0.7% |
| South Africa | 4 | 0.7% |
| Turkey | 4 | 0.7% |
| Australia | 3 | 0.5% |
| Belgium | 3 | 0.5% |
| Netherlands | 3 | 0.5% |
| Philippines | 3 | 0.5% |
| Poland | 3 | 0.5% |
| Thailand | 3 | 0.5% |
| Austria | 2 | 0.3% |
| China | 2 | 0.3% |
| Denmark | 2 | 0.3% |
| Ireland | 2 | 0.3% |
| Nigeria | 2 | 0.3% |
| Norway | 2 | 0.3% |
| Sweden | 2 | 0.3% |
| 1 | 0.2% | |
| Albania | 1 | 0.2% |
| Cambodia | 1 | 0.2% |
| Chile | 1 | 0.2% |
| Greece | 1 | 0.2% |
| Iceland | 1 | 0.2% |
| Iran | 1 | 0.2% |
| Israel | 1 | 0.2% |
| Malaysia | 1 | 0.2% |
| Pakistan | 1 | 0.2% |
| Switzerland | 1 | 0.2% |
| Ukraine | 1 | 0.2% |
| United Arab Emirates | 1 | 0.2% |
nf_originals_df_cleaned=temp%>%
mutate(number_of_countries=ifelse(is.na(country2),1,
ifelse(is.na(country3),2,
ifelse(is.na(country4),3,4)))) # 6 means 6 or more countries
# U.S. vs. other countries (binary coding: other 0 vs. U.S. 1) (collab 1 vs. single country 0)
nf_originals_df_cleaned=nf_originals_df_cleaned %>% mutate(collab_or_not = ifelse(number_of_countries>=2,1,0),
us_or_not=ifelse(grepl('United States',country),1,0))
originals_country_count=netflix_originals_long_df %>% count(country) %>%
arrange(desc(n)) %>%mutate(perc=round(`n`/sum(`n`),3)) %>% mutate(percentage = scales::percent(perc))
originals_country_count=originals_country_count %>% mutate("Production Country"=country,
"Number of Movies/Shows"=n,"Percentage (%)"=perc*100 )plot_originals_df<-nf_originals_df_cleaned %>% count(number_of_countries)
ggplot(plot_originals_df, aes(x=number_of_countries,y=n,fill=n))+ geom_col(fill = "black") +
geom_text(aes(label=n,fontface="bold"), vjust = -0.5, size =3,color="#E50914",family = 'AppleGothic') + theme_bw() +theme_classic()+
xlab(c("Number of Countries"))+ylab(c("Count"))+
scale_x_discrete(limits=c(seq(1,4,1)))+
ggtitle("Number of Countries that Collaborated on a Netflix Original movie")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono'),
axis.title.x = element_text(size=10,face="bold", family = 'mono',hjust=1),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=3,hjust=1))# plot distribution of countries in table
formattable(originals_country_count %>% select("Production Country", "Number of Movies/Shows", "Percentage (%)"), align=c("r","r","r"),
list(`Number of Movies/Shows`=color_bar("#E50914"),`Percentage (%)`=color_bar("gray"))) | Production Country | Number of Movies/Shows | Percentage (%) |
|---|---|---|
| United States | 327 | 56.4 |
| India | 38 | 6.6 |
| United Kingdom | 38 | 6.6 |
| Canada | 17 | 2.9 |
| France | 16 | 2.8 |
| Italy | 16 | 2.8 |
| Spain | 14 | 2.4 |
| Mexico | 11 | 1.9 |
| Indonesia | 9 | 1.6 |
| Brazil | 8 | 1.4 |
| Germany | 8 | 1.4 |
| South Korea | 8 | 1.4 |
| Japan | 7 | 1.2 |
| Argentina | 6 | 1.0 |
| Hungary | 4 | 0.7 |
| South Africa | 4 | 0.7 |
| Turkey | 4 | 0.7 |
| Australia | 3 | 0.5 |
| Belgium | 3 | 0.5 |
| Netherlands | 3 | 0.5 |
| Philippines | 3 | 0.5 |
| Poland | 3 | 0.5 |
| Thailand | 3 | 0.5 |
| Austria | 2 | 0.3 |
| China | 2 | 0.3 |
| Denmark | 2 | 0.3 |
| Ireland | 2 | 0.3 |
| Nigeria | 2 | 0.3 |
| Norway | 2 | 0.3 |
| Sweden | 2 | 0.3 |
| 1 | 0.2 | |
| Albania | 1 | 0.2 |
| Cambodia | 1 | 0.2 |
| Chile | 1 | 0.2 |
| Greece | 1 | 0.2 |
| Iceland | 1 | 0.2 |
| Iran | 1 | 0.2 |
| Israel | 1 | 0.2 |
| Malaysia | 1 | 0.2 |
| Pakistan | 1 | 0.2 |
| Switzerland | 1 | 0.2 |
| Ukraine | 1 | 0.2 |
| United Arab Emirates | 1 | 0.2 |
# summary(lm(IMDB.Score~number_of_countries,data=nf_originals_df_cleaned))
ggplot(data=nf_originals_df_cleaned,aes(x=number_of_countries,y=IMDB.Score))+
geom_smooth(method=lm,colour="#E50914",size=2,se=FALSE)+
scale_x_discrete(limits=c(seq(1,6,1))) + coord_cartesian(ylim=c(6,7))+
theme_bw()+theme_classic()+
labs(x="Number of Countries", y="Average IMDb Ratings", title="International Collaborations Predict Higher Ratings")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,size=15,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono',angle=45),
axis.title.x = element_text(size=10,face="bold", family = 'mono',hjust=1),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=1,hjust=1))# nf_originals_df_cleaned %>% group_by(premiere_year) %>% summarise(average_n_of_countries=mean(number_of_countries))# nf_originals_df_cleaned[which(nf_originals_df_cleaned$IMDB.Score==max(nf_originals_df_cleaned$IMDB.Score))[1],] # highest rating
# nf_originals_df_cleaned[which(nf_originals_df_cleaned$IMDB.Score==min(nf_originals_df_cleaned$IMDB.Score))[1],] # lowest rating
# t.test(nf_originals_df_cleaned$IMDB.Score,nf_df_cleaned$averageRating) # netflix originals have significantly lower ratings, compared to all contents available on Netflix combined
bar_df<-data.frame(group=c('Netflix Overall','Netflix Originals'),Average_Ratings=c(round(mean(nf_df_cleaned$averageRating),2),round(mean(nf_originals_df_cleaned$IMDB.Score),2)))
ggplot(bar_df, aes(x=group,y=Average_Ratings,fill=Average_Ratings))+
geom_col(fill = "black",width=0.5) +
coord_cartesian(ylim=c(5,7))+
geom_text(aes(label=Average_Ratings,fontface="bold"), vjust = -0.5, size =3,color="#E50914",family = 'AppleGothic') +
theme_bw() +theme_classic()+ylab(c("Average IMDb Ratings"))+
ggtitle("IMDb Ratings: Overall Content on Netflix vs. Netflix Originals")+
theme(plot.title = element_text(hjust = 0.5,vjust=2.5,face="bold",family = 'AppleGothic'),
axis.text.x = element_text(size=10, family = 'mono'),
axis.text.y = element_text(size=10, family = 'mono'),
axis.title.x = element_blank(),
axis.title.y = element_text(size=10, face="bold",family = 'mono',vjust=3,hjust=1))