library(tidyverse)
library(lubridate)
library(hms)
library(scales)
library(knitr)

theme_set(theme_bw())

taxi <- read_csv("taxi.csv")

taxi$trip_start <- ymd_hms(taxi$trip_start,tz="America/Chicago")
taxi$trip_end <- ymd_hms(taxi$trip_end,tz="America/Chicago")

How many taxi trips occured every week?

 taxi %>%  mutate(week = week(trip_start)) %>% 
  ggplot(aes(week)) +
  geom_bar()
## Warning: package 'bindrcpp' was built under R version 3.4.4

How many taxi trips occured between dates 2.4.2016 - 8.4.2016?

option 1

my_interval<-interval(dmy("2.1.2016"),dmy("8.1.2016"),tz="America/Chicago")

length(which(taxi$trip_start %within% my_interval))
## [1] 810

option 2 - if you want to keep those rows…

within_taxi_trips<- taxi %>% filter(trip_start %within% my_interval)

What are the the taxi trip duration in hours?

taxi %>% 
  mutate(duration = difftime(trip_end, trip_start,units = "hour")) %>% 
  ggplot(aes(duration)) +
  geom_histogram(bins = 25)+
  scale_x_continuous(breaks = seq(0,2,0.25),limits = c(0,2))
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Add 2 hours to each trip duration

two_hour <- as.period(2, unit = "hours")

taxi %>% 
  mutate(trip_end = trip_end + two_hour) %>% 
  mutate(duration = difftime(trip_end, trip_start,units = "hour")) %>% 
  ggplot(aes(duration)) +
  geom_histogram(bins = 25)+
  scale_x_continuous(breaks = seq(0,4,0.5),limits = c(0,4))
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Plot the trip cosat verses the taxi end_trip

can you show the x axis in format of hms (e.g 07:35:17) and show tick_marks every four hours?

 taxi <- taxi %>%
  mutate(time = as.hms(trip_end)) %>% 
  mutate(fake_date = ymd_hms(paste("2000-01-01", time))) 
## Warning: 1 failed to parse.
   ggplot(taxi, aes(fake_date, fare)) +
    geom_point(color = "blue", alpha = 0.1) + 
    xlab("Trip end (hour)") + 
    ylab("Trip cost ($)")+
    scale_x_datetime(date_breaks = "4 hours", 
                   labels = date_format("%H:%M:$S"))
## Warning: Removed 1 rows containing missing values (geom_point).