Analysis

Libraries and connection parameters are provided here.

Show the code
library(RMySQL)
library(readr)
library(dplyr)
library(knitr)
library(ggplot2)
library(plotly)
library(leaflet)
library(tidyr)
library(httr)
library(jsonlite)
library(lubridate)

# Set up the connection parameters
db_host <- "127.0.0.1"  # Localhost address
db_user <- "root"  # MySQL username
db_password <- ""  # MySQL password
db_name <- "emu415_ciphers"  # The name of database

# Establish the connection
db_connection <- dbConnect(RMySQL::MySQL(), 
                           host = db_host,
                           user = db_user,
                           password = db_password,
                           dbname = db_name)

1 Overall Rating per Company

Show the code
# Write your SQL query to calculate overall rating per company
query <- "
SELECT 
    c.company_name,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships AS i
JOIN companies AS c ON i.company_id = c.company_id
JOIN ratings AS r ON i.internship_id = r.internship_id
GROUP BY c.company_name
ORDER BY overall_rating DESC
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Reverse the order of the results dataframe
results <- results[order(-results$overall_rating), ]

kable(head(results, 10))
company_name avg_transportation_rating avg_work_env_rating avg_learning_opp_rating avg_food_rating avg_hr_rating avg_mentor_rating overall_rating
Hayat Kimya 5.00 5.00 5.0 5.00 5.00 5.00 5.00
Daikin 5.00 5.00 5.0 5.00 5.00 5.00 5.00
Otokar 5.00 5.00 4.0 5.00 5.00 5.00 4.83
TÜPRAŞ 5.00 5.00 5.0 4.00 4.00 5.00 4.67
Aselsan 4.75 4.50 4.5 4.25 4.75 4.75 4.58
Baykar Mak. 4.50 4.50 4.0 5.00 4.50 4.00 4.42
Hidromek 3.00 5.00 5.0 4.00 4.00 5.00 4.33
Otokoç Otomotiv 1.00 5.00 5.0 5.00 5.00 5.00 4.33
Roketsan 4.50 4.75 4.5 4.00 4.25 4.00 4.33
RND Yazılım ve E-Ticaret 4.00 4.00 5.0 4.00 4.00 5.00 4.33
Show the code
# Plot the results using plotly
plot_ly(results, x = ~company_name, y = ~overall_rating, type = 'bar', name = 'Overall Rating') %>%
  layout(title = "Overall Rating per Company",
         xaxis = list(title = "", tickangle = 45, tickfont = list(size = 10), categoryorder = "array", categoryarray = ~company_name),
         yaxis = list(title = "Overall Rating", range = c(1, 5)))

2 Number of Interns in Each Company

Show the code
# Write your SQL query to count the number of interns per company
query <- "
SELECT 
    companies.company_name,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships
JOIN companies ON internships.company_id = companies.company_id
LEFT JOIN ratings r ON internships.internship_id = r.internship_id
GROUP BY companies.company_name
HAVING COUNT(*) > 1
"

# Execute the query and fetch the results
company_results <- dbGetQuery(db_connection, query)

kable(head(company_results, 10))
company_name num_interns avg_transportation_rating avg_work_env_rating avg_learning_opp_rating avg_food_rating avg_hr_rating avg_mentor_rating overall_rating
Aselsan 4 4.75 4.50 4.50 4.25 4.75 4.75 4.58
Baykar Mak. 2 4.50 4.50 4.00 5.00 4.50 4.00 4.42
Makine ve Kimya Endüstrisi A.Ş. 5 4.20 3.20 3.40 3.40 4.20 3.60 3.67
Mercedes 2 5.00 4.00 4.50 3.50 4.50 3.50 4.17
Nurus 2 4.00 3.50 3.50 2.50 3.50 5.00 3.67
Roketsan 4 4.50 4.75 4.50 4.00 4.25 4.00 4.33
Türk Havacılık ve Uzay Sanayii(TUSAŞ) 7 4.14 4.00 3.43 4.14 3.29 3.14 3.69
Türk Traktör 5 4.60 3.20 3.80 4.40 3.40 3.00 3.73
Vestel 2 4.50 3.50 2.50 5.00 4.00 2.50 3.67
Show the code
# Plot the results using plotly
plot_ly(company_results, x = ~company_name, y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~company_name, y = ~overall_rating, type = 'scatter', mode = 'lines+markers', name = 'Overall Rating', yaxis = "y2", line = list(shape = 'li', connectgaps = TRUE)) %>%
  layout(title = "Number of Interns and Overall Rating per Company (with more than 1 intern)",
         xaxis = list(title = "Company Name"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Overall Rating", overlaying = "y", side = "right", range = c(0, max(company_results$overall_rating) + 1)))

3 Number of Interns per Industry

Show the code
# Write your SQL query to calculate the overall rating per industry
query <- "
SELECT 
    c.industry,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.transportation_rating), 2) AS avg_transportation_rating,
    ROUND(AVG(r.work_env_rating), 2) AS avg_work_env_rating,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating,
    ROUND(AVG(r.food_rating), 2) AS avg_food_rating,
    ROUND(AVG(r.hr_rating), 2) AS avg_hr_rating,
    ROUND(AVG(r.mentor_rating), 2) AS avg_mentor_rating,
    ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships i
JOIN companies c ON i.company_id = c.company_id
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY c.industry
HAVING num_interns > 1
ORDER BY num_interns DESC;
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Reorder the results dataframe by num_interns in descending order
results <- results[order(-results$num_interns), ]

kable(head(results, 10))
industry num_interns avg_transportation_rating avg_work_env_rating avg_learning_opp_rating avg_food_rating avg_hr_rating avg_mentor_rating overall_rating
Automotive 12 4.00 3.58 4.00 4.00 3.83 3.58 3.83
Aerospace 9 4.22 4.11 3.56 4.33 3.56 3.33 3.85
Defense 8 4.63 4.63 4.50 4.13 4.50 4.38 4.46
Ammo 5 4.20 3.20 3.40 3.40 4.20 3.60 3.67
Furniture 2 4.00 3.50 3.50 2.50 3.50 5.00 3.67
Electronics 2 4.50 3.50 2.50 5.00 4.00 2.50 3.67
Show the code
# Plot the results using plotly
plot_ly(results, x = ~reorder(industry, -num_interns), y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~reorder(industry, -num_interns), y = ~overall_rating, type = 'scatter', mode = 'lines+markers', name = 'Overall Rating', yaxis = "y2", connectgaps = TRUE) %>%
  layout(title = "Number of Interns and Overall Rating per Industry (with more than 1 intern)",
         xaxis = list(title = "Industry", categoryorder = "total descending"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Overall Rating", overlaying = "y", side = "right", range = c(0, max(results$overall_rating) + 1)))

4 Number of Interns per Department

Show the code
# Write your SQL query to count the number of interns per department and calculate the average learning opportunity rating
query <- "
SELECT 
    i.department,
    COUNT(*) AS num_interns,
    ROUND(AVG(r.learning_opp_rating), 2) AS avg_learning_opp_rating
FROM internships i
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY i.department;
"

# Execute the query and fetch the results
department_results <- dbGetQuery(db_connection, query)

kable(head(department_results, 10))
department num_interns avg_learning_opp_rating
Bilgi Teknolojileri 1 5.00
Çaycı 1 4.00
Dijital Dönüşüm 1 5.00
E-Ticaret 1 5.00
Finans 1 5.00
Global Order Center 1 4.00
İnsan Kaynakları 2 2.00
İş Geliştirme 3 4.67
Kalite 3 3.33
Lojistik 3 3.67
Show the code
# Plot the results using plotly
plot_ly(department_results, x = ~department, y = ~num_interns, type = 'bar', name = 'Number of Interns') %>%
  add_trace(x = ~department, y = ~avg_learning_opp_rating, type = 'scatter', mode = 'lines+markers', name = 'Avg Learning Opp Rating', yaxis = "y2", connectgaps = TRUE) %>%
  layout(title = "Number of Interns and Average Learning Opp Rating per Department",
         xaxis = list(title = "Department"),
         yaxis = list(title = "Number of Interns"),
         yaxis2 = list(title = "Avg Learning Opp Rating", overlaying = "y", side = "right", range = c(0, max(department_results$avg_learning_opp_rating) + 1)))

5 Comments

Show the code
# SQL query
query <- "
SELECT 
    s.first_name,
    s.last_name,
    c.company_name,
    ROUND((r.transportation_rating + r.work_env_rating + r.learning_opp_rating + r.food_rating + r.hr_rating + r.mentor_rating) / 6, 2) AS overall_rating,
    r.comment
FROM 
    ratings r
JOIN 
    internships i ON r.internship_id = i.internship_id
JOIN 
    students s ON i.student_id = s.student_id
JOIN 
    companies c ON i.company_id = c.company_id
WHERE 
    r.comment IS NOT NULL
ORDER BY
    s.first_name;
"

# Execute the query and fetch the results
results <- dbGetQuery(db_connection, query)

# Display the results
kable(results)
first_name last_name company_name overall_rating comment
Ahad Tanay Uyar Roketsan 5.00 Kendini geliştirmek ve Proje Yönetimini öğrenmek adına harika bir şikret. Mert Harika ikiside.
Ahad Tanay Uyar Türk Havacılık ve Uzay Sanayii(TUSAŞ) 4.50 Depo her ne kadar kötü olsada staj deneyimi açısından Türkiye’nin konuştuğu ürünleri görmek güzel bir deneyimdi.
Arda Türkan RND Yazılım ve E-Ticaret 4.33 E-ticaret sisteminin arkasındaki operasyonları, sunulan hizmetleri öğrenebilirsiniz.
Arda Türkan Daikin 5.00 Yaz sıcaklarında soğutma ekipmanı satışı, stajyerliktense çalışan olarak görünmenizi sağlıyor.
Ayşenur Eren Vestel 2.67 Kariyer gelişimime çok faydalı olduğunu düşünmüyorum ama yediğim en güzel şirket yemeğini burada yedim, o yüzden pişman sayılmam.
Begüm Bahçekapılı Nurus 4.33 Staj sürecinde kendi projemizi yapmamız ve sunmamız beklendi. Bunun bakış açısına göre olumlu ve olumsuz görülebilecek yanları oldu.
Begüm Bahçekapılı Hidromek 4.33 Henüz 2. sınıfı yeni bitirmiş olmama rağmen bu staj sırasında şirkette gerçekleşen tüm süreçleri yakından incelememe olanak tanındı. Çok memnun kaldım.
Dilan Su Fırat Türk Traktör 3.33 Mentorümden rapor için bilgiler almam çok zorlu oldu.
Dilan Su Fırat Kazan Soda 3.17 Kimya Mühendisleri için güzel bir staj yeri, fakat diğer bölümler için geliştirici bir staj değildi.
Doğa Oral ARD Grup Bilişim 3.67 Ulaşım ve yemek yoktu, o yüzden 1 verdim.
Emre Gül Otokoç Otomotiv 4.33 Otokoç Otomotiv gibi güzide bir şirketin listede default gelmemesini kınıyorum.
Mert Çetinkanat Aselsan 4.33 Güzel bir deneyim oldu, fikirlerim önemsendi ve hazırladığım yenilikler hayata geçirildi.
Mert Bekir Kaplan Türk Traktör 4.50 Çok güzel bir staj programıydı, benim için dolu dolu geçti yemeklerine ayrı hayran kaldım.
Mert Bekir Kaplan Makine ve Kimya Endüstrisi A.Ş. 2.33 Kendinizi geliştirmek istiyorsanız burayı tercih etmenizi önermem.
Sena Enşici Roketsan 4.50 Gittiğim birimde stajyer nefreti vardı, mobinge maruz kaldım. İş hayatını öğrendim.
Sena Enşici Baykar Mak. 4.50 Çok ciddi gizlilik kuralları vardı, gün sonunda disiplini öğretti.
Tuncay Özkardeş Türk Traktör 3.33 Mert Bekir Kaplan harika bir adam.

6 Geographical Distribution of Companies

Show the code
# Function to geocode using OpenStreetMap Nominatim API
geocode_nominatim <- function(location) {
  url <- paste0("https://nominatim.openstreetmap.org/search?q=", URLencode(location), "&format=json&addressdetails=1")
  response <- GET(url)
  if (http_error(response)) {
    stop("Error occurred while geocoding")
  }
  result <- content(response)
  if (length(result) == 0) {
    stop("No results found for the location")
  }
  # Extract latitude and longitude from the first result
  latitude <- as.numeric(result[[1]]$lat)
  longitude <- as.numeric(result[[1]]$lon)
  return(data.frame(latitude = latitude,
                    longitude = longitude,
                    location = location))
}

# Query to fetch company name, location, and overall rating
query <- "
SELECT c.company_name, c.location, 
ROUND((AVG(r.transportation_rating) + AVG(r.work_env_rating) + AVG(r.learning_opp_rating) + AVG(r.food_rating) + AVG(r.hr_rating) + AVG(r.mentor_rating)) / 6, 2) AS overall_rating
FROM internships i
JOIN companies c ON i.company_id = c.company_id
LEFT JOIN ratings r ON i.internship_id = r.internship_id
GROUP BY c.company_name, c.location
"

# Retrieve company name, location, and overall rating data
company_data <- dbGetQuery(db_connection, query)

# Geocode the locations using OpenStreetMap Nominatim
geocoded_locations <- lapply(company_data$location, geocode_nominatim)
geocoded_locations <- do.call(rbind, geocoded_locations)

# Create a Leaflet map
map <- leaflet() %>%
  addTiles() %>%
  setView(lng = 35.1686, lat = 39.9334, zoom = 6) %>%  # Center the map on Turkey
  addCircleMarkers(data = geocoded_locations, lng = ~longitude, lat = ~latitude,
                   radius = 6, color = "blue", fillOpacity = 0.8,
                   popup = paste("<b>Company:</b> ", company_data$company_name, "<br>",
                                 "<b>Overall Rating:</b> ", company_data$overall_rating),
                   stroke = FALSE, group = "markers") %>%
  addLayersControl(overlayGroups = "markers", options = layersControlOptions(collapsed = FALSE))

# Display the map
map
Back to top