Analysis

Libraries and connection parameters are provided here.

Show the code

library(RMySQL)
library(readr)
library(dplyr)
library(knitr)
library(ggplot2)
library(plotly)
library(leaflet)
library(tidyr)
library(httr)
library(jsonlite)
library(lubridate)

# Set up the connection parameters
db_host <- "127.0.0.1"  # Localhost address
db_user <- "root"  # MySQL username
db_password <- ""  # MySQL password
db_name <- "emu415_ciphers"  # The name of database

# Establish the connection
db_connection <- dbConnect(RMySQL::MySQL(), 
                           host = db_host,
                           user = db_user,
                           password = db_password,
                           dbname = db_name)

1 Overall Rating per Company

Show the code

# Write your SQL query to calculate overall rating per company
query <- "
SELECT 
    c.company_name,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships AS i
JOIN companies AS c ON i.company_id = c.company_id
JOIN ratings AS r ON i.internship_id = r.internship_id
GROUP BY c.company_name
ORDER BY overall_rating DESC
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Reverse the order of the results dataframe
results <- results[order(-results$overall_rating), ]

kable(head(results, 10))

company_name	avg_transportation_rating	avg_work_env_rating	avg_learning_opp_rating	avg_food_rating	avg_hr_rating	avg_mentor_rating	overall_rating
Hayat Kimya	5.00	5.00	5.0	5.00	5.00	5.00	5.00
Daikin	5.00	5.00	5.0	5.00	5.00	5.00	5.00
Otokar	5.00	5.00	4.0	5.00	5.00	5.00	4.83
TÜPRAŞ	5.00	5.00	5.0	4.00	4.00	5.00	4.67
Aselsan	4.75	4.50	4.5	4.25	4.75	4.75	4.58
Baykar Mak.	4.50	4.50	4.0	5.00	4.50	4.00	4.42
Hidromek	3.00	5.00	5.0	4.00	4.00	5.00	4.33
Otokoç Otomotiv	1.00	5.00	5.0	5.00	5.00	5.00	4.33
Roketsan	4.50	4.75	4.5	4.00	4.25	4.00	4.33
RND Yazılım ve E-Ticaret	4.00	4.00	5.0	4.00	4.00	5.00	4.33

Show the code

# Plot the results using plotly
plot_ly(results, x = ~company_name, y = ~overall_rating, type = 'bar', name = 'Overall Rating') %>%
  layout(title = "Overall Rating per Company",
         xaxis = list(title = "", tickangle = 45, tickfont = list(size = 10), categoryorder = "array", categoryarray = ~company_name),
         yaxis = list(title = "Overall Rating", range = c(1, 5)))

2 Number of Interns in Each Company

Show the code

# Write your SQL query to count the number of interns per company
query <- "
SELECT 
    companies.company_name,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships
JOIN companies ON internships.company_id = companies.company_id
LEFT JOIN ratings r ON internships.internship_id = r.internship_id
GROUP BY companies.company_name
HAVING COUNT(*) > 1
"

# Execute the query and fetch the results
company_results <- dbGetQuery(db_connection, query)

kable(head(company_results, 10))

company_name	num_interns	avg_transportation_rating	avg_work_env_rating	avg_learning_opp_rating	avg_food_rating	avg_hr_rating	avg_mentor_rating	overall_rating
Aselsan	4	4.75	4.50	4.50	4.25	4.75	4.75	4.58
Baykar Mak.	2	4.50	4.50	4.00	5.00	4.50	4.00	4.42
Makine ve Kimya Endüstrisi A.Ş.	5	4.20	3.20	3.40	3.40	4.20	3.60	3.67
Mercedes	2	5.00	4.00	4.50	3.50	4.50	3.50	4.17
Nurus	2	4.00	3.50	3.50	2.50	3.50	5.00	3.67
Roketsan	4	4.50	4.75	4.50	4.00	4.25	4.00	4.33
Türk Havacılık ve Uzay Sanayii(TUSAŞ)	7	4.14	4.00	3.43	4.14	3.29	3.14	3.69
Türk Traktör	5	4.60	3.20	3.80	4.40	3.40	3.00	3.73
Vestel	2	4.50	3.50	2.50	5.00	4.00	2.50	3.67

Show the code

# Plot the results using plotly
plot_ly(company_results, x = ~company_name, y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~company_name, y = ~overall_rating, type = 'scatter', mode = 'lines+markers', name = 'Overall Rating', yaxis = "y2", line = list(shape = 'li', connectgaps = TRUE)) %>%
  layout(title = "Number of Interns and Overall Rating per Company (with more than 1 intern)",
         xaxis = list(title = "Company Name"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Overall Rating", overlaying = "y", side = "right", range = c(0, max(company_results$overall_rating) + 1)))

3 Number of Interns per Industry

Show the code

# Write your SQL query to calculate the overall rating per industry
query <- "
SELECT 
    c.industry,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships i
JOIN companies c ON i.company_id = c.company_id
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY c.industry
HAVING num_interns > 1
ORDER BY num_interns DESC;
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Reorder the results dataframe by num_interns in descending order
results <- results[order(-results$num_interns), ]

kable(head(results, 10))

industry	num_interns	avg_transportation_rating	avg_work_env_rating	avg_learning_opp_rating	avg_food_rating	avg_hr_rating	avg_mentor_rating	overall_rating
Automotive	12	4.00	3.58	4.00	4.00	3.83	3.58	3.83
Aerospace	9	4.22	4.11	3.56	4.33	3.56	3.33	3.85
Defense	8	4.63	4.63	4.50	4.13	4.50	4.38	4.46
Ammo	5	4.20	3.20	3.40	3.40	4.20	3.60	3.67
Furniture	2	4.00	3.50	3.50	2.50	3.50	5.00	3.67
Electronics	2	4.50	3.50	2.50	5.00	4.00	2.50	3.67

Show the code

# Plot the results using plotly
plot_ly(results, x = ~reorder(industry, -num_interns), y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~reorder(industry, -num_interns), y = ~overall_rating, type = 'scatter', mode = 'lines+markers', name = 'Overall Rating', yaxis = "y2", connectgaps = TRUE) %>%
  layout(title = "Number of Interns and Overall Rating per Industry (with more than 1 intern)",
         xaxis = list(title = "Industry", categoryorder = "total descending"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Overall Rating", overlaying = "y", side = "right", range = c(0, max(results$overall_rating) + 1)))

4 Number of Interns per Department

Show the code

# Write your SQL query to count the number of interns per department and calculate the average learning opportunity rating
query <- "
SELECT 
    i.department,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating
FROM internships i
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY i.department;
"

# Execute the query and fetch the results
department_results <- dbGetQuery(db_connection, query)

kable(head(department_results, 10))

department	num_interns	avg_learning_opp_rating
Bilgi Teknolojileri	1	5.00
Çaycı	1	4.00
Dijital Dönüşüm	1	5.00
E-Ticaret	1	5.00
Finans	1	5.00
Global Order Center	1	4.00
İnsan Kaynakları	2	2.00
İş Geliştirme	3	4.67
Kalite	3	3.33
Lojistik	3	3.67

Show the code

# Plot the results using plotly
plot_ly(department_results, x = ~department, y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~department, y = ~avg_learning_opp_rating, type = 'scatter', mode = 'lines+markers', name = 'Avg Learning Opp Rating', yaxis = "y2", connectgaps = TRUE) %>%
  layout(title = "Number of Interns and Average Learning Opp Rating per Department",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Avg Learning Opp Rating", overlaying = "y", side = "right", range = c(0, max(department_results$avg_learning_opp_rating) + 1)))

5 Comments

Show the code

# SQL query
query <- "
SELECT 
    s.first_name,
    s.last_name,
    c.company_name,
    ROUND((r.transportation_rating + r.work_env_rating + r.learning_opp_rating + r.food_rating + r.hr_rating + r.mentor_rating) / 6, 2) AS overall_rating,
    r.comment
FROM 
    ratings r
JOIN 
    internships i ON r.internship_id = i.internship_id
JOIN 
    students s ON i.student_id = s.student_id
JOIN 
    companies c ON i.company_id = c.company_id
WHERE 
    r.comment IS NOT NULL
ORDER BY
    s.first_name;
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Display the results
kable(results)

first_name	last_name	company_name	overall_rating	comment
Ahad Tanay	Uyar	Roketsan	5.00	Kendini geliştirmek ve Proje Yönetimini öğrenmek adına harika bir şikret. Mert Harika ikiside.
Ahad Tanay	Uyar	Türk Havacılık ve Uzay Sanayii(TUSAŞ)	4.50	Depo her ne kadar kötü olsada staj deneyimi açısından Türkiye’nin konuştuğu ürünleri görmek güzel bir deneyimdi.
Arda	Türkan	RND Yazılım ve E-Ticaret	4.33	E-ticaret sisteminin arkasındaki operasyonları, sunulan hizmetleri öğrenebilirsiniz.
Arda	Türkan	Daikin	5.00	Yaz sıcaklarında soğutma ekipmanı satışı, stajyerliktense çalışan olarak görünmenizi sağlıyor.
Ayşenur	Eren	Vestel	2.67	Kariyer gelişimime çok faydalı olduğunu düşünmüyorum ama yediğim en güzel şirket yemeğini burada yedim, o yüzden pişman sayılmam.
Begüm	Bahçekapılı	Nurus	4.33	Staj sürecinde kendi projemizi yapmamız ve sunmamız beklendi. Bunun bakış açısına göre olumlu ve olumsuz görülebilecek yanları oldu.
Begüm	Bahçekapılı	Hidromek	4.33	Henüz 2. sınıfı yeni bitirmiş olmama rağmen bu staj sırasında şirkette gerçekleşen tüm süreçleri yakından incelememe olanak tanındı. Çok memnun kaldım.
Dilan Su	Fırat	Türk Traktör	3.33	Mentorümden rapor için bilgiler almam çok zorlu oldu.
Dilan Su	Fırat	Kazan Soda	3.17	Kimya Mühendisleri için güzel bir staj yeri, fakat diğer bölümler için geliştirici bir staj değildi.
Doğa	Oral	ARD Grup Bilişim	3.67	Ulaşım ve yemek yoktu, o yüzden 1 verdim.
Emre	Gül	Otokoç Otomotiv	4.33	Otokoç Otomotiv gibi güzide bir şirketin listede default gelmemesini kınıyorum.
Mert	Çetinkanat	Aselsan	4.33	Güzel bir deneyim oldu, fikirlerim önemsendi ve hazırladığım yenilikler hayata geçirildi.
Mert Bekir	Kaplan	Türk Traktör	4.50	Çok güzel bir staj programıydı, benim için dolu dolu geçti yemeklerine ayrı hayran kaldım.
Mert Bekir	Kaplan	Makine ve Kimya Endüstrisi A.Ş.	2.33	Kendinizi geliştirmek istiyorsanız burayı tercih etmenizi önermem.
Sena	Enşici	Roketsan	4.50	Gittiğim birimde stajyer nefreti vardı, mobinge maruz kaldım. İş hayatını öğrendim.
Sena	Enşici	Baykar Mak.	4.50	Çok ciddi gizlilik kuralları vardı, gün sonunda disiplini öğretti.
Tuncay	Özkardeş	Türk Traktör	3.33	Mert Bekir Kaplan harika bir adam.

6 Geographical Distribution of Companies

Show the code

# Function to geocode using OpenStreetMap Nominatim API
geocode_nominatim <- function(location) {
  url <- paste0("https://nominatim.openstreetmap.org/search?q=", URLencode(location), "&format=json&addressdetails=1")
  response <- GET(url)
  if (http_error(response)) {
    stop("Error occurred while geocoding")
  }
  result <- content(response)
  if (length(result) == 0) {
    stop("No results found for the location")
  }
  # Extract latitude and longitude from the first result
  latitude <- as.numeric(result[[1]]$lat)
  longitude <- as.numeric(result[[1]]$lon)
  return(data.frame(latitude = latitude,
                    longitude = longitude,
                    location = location))
}

# Query to fetch company name, location, and overall rating
query <- "
SELECT c.company_name, c.location, 
ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships i
JOIN companies c ON i.company_id = c.company_id
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY c.company_name, c.location
"

# Retrieve company name, location, and overall rating data
company_data <- dbGetQuery(db_connection, query)

# Geocode the locations using OpenStreetMap Nominatim
geocoded_locations <- lapply(company_data$location, geocode_nominatim)
geocoded_locations <- do.call(rbind, geocoded_locations)

# Create a Leaflet map
map <- leaflet() %>%
  addTiles() %>%
  setView(lng = 35.1686, lat = 39.9334, zoom = 6) %>%  # Center the map on Turkey
  addCircleMarkers(data = geocoded_locations, lng = ~longitude, lat = ~latitude,
                   radius = 6, color = "blue", fillOpacity = 0.8,
                   popup = paste("<b>Company:</b> ", company_data$company_name, "<br>",
                                 "<b>Overall Rating:</b> ", company_data$overall_rating),
                   stroke = FALSE, group = "markers") %>%
  addLayersControl(overlayGroups = "markers", options = layersControlOptions(collapsed = FALSE))

# Display the map
map