Create schema allinspections_updated; use allinspections_updated; CREATE TABLE `business` ( `Business_id` varchar(40) NOT NULL, `Name` varchar(100) DEFAULT NULL, `Address` varchar(40) DEFAULT NULL, `City` varchar(15) DEFAULT NULL, `State` varchar(4) DEFAULT NULL, `Postal_Code` varchar(10) DEFAULT NULL, `Latitude` varchar(10) DEFAULT NULL, `Longitude` varchar(10) DEFAULT NULL, `Phone_Number` varchar(15) DEFAULT NULL, PRIMARY KEY (`Business_id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; LOAD DATA LOCAL INFILE 'C:\\businesses.csv' INTO TABLE business FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY'\n' IGNORE 1 LINES; select count(*) from business; select * from business; CREATE TABLE`inspections` ( `business_id` varchar(36) DEFAULT NULL, `score` int(11) DEFAULT NULL, `date` date DEFAULT NULL, KEY `inspections_FK_idx` (`business_id`), CONSTRAINT `inspections_FK` FOREIGN KEY (`business_id`) REFERENCES `business` (`Business_id`) ON DELETE NO ACTION ON UPDATE NO ACTION ) ENGINE=InnoDB DEFAULT CHARSET=utf8; #### note that for this load statement, we used the "set date" function to change the date to the format that MySQL prefers: year, month, day, ##### which will allow us to join it to the date field in "violations" table once we begin linking the tables LOAD DATA LOCAL INFILE 'C:\\inspections.csv' INTO TABLE inspections FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY'\n' IGNORE 1 LINES (business_id, score, @date) set date = str_to_date(@date, '%Y%m%d'); update inspections set date = str_to_date(left(date, 8), '%Y%m%d'); select count(*) from inspections; select * from inspections order by date desc; CREATE TABLE`violations` ( `Business_id` varchar(36) NOT NULL, `Date` date DEFAULT NULL, `Code` varchar(5) DEFAULT NULL, `Description` varchar(200) DEFAULT NULL, KEY `violations_fk_idx` (`Business_id`), CONSTRAINT `violations_fk` FOREIGN KEY (`Business_id`) REFERENCES `business` (`Business_id`) ON DELETE NO ACTION ON UPDATE NO ACTION ) ENGINE=InnoDB DEFAULT CHARSET=utf8; LOAD DATA LOCAL INFILE 'C:\\violations.csv' INTO TABLE violations FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY'\n' IGNORE 1 LINES; select count(*) from violations; select * from violations; #### the legend contains the description for the scores, which comes in handy when using the "where" statement to filter our queries CREATE TABLE`legend` ( `Minimum` int(11) DEFAULT NULL, `Maximum` int(11) DEFAULT NULL, `Legend` varchar(20) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8; select * from legend; #### this query does two things, creates a master table from the tables that we have joined using "where" statement, which is explained #### in The Data Journalists' chapter, beginning on page 100 CREATE VIEW All_Inspection_Master AS SELECT business.Business_id, business.Name, business.Address,business.Phone_Number,violations.Date, violations.Code, violations.Description, inspections.score FROM business, inspections, violations where business.Business_id = inspections.business_id AND business.Business_id = violations.business_id AND inspections.date = violations.date; select count(*) from all_inspection_master; select * from all_inspection_master order by Date desc; #### once we have created our master table, it's a good idea to run queries that group the key categories in order to determine the #### the most problematic ones. Think of these queries as performing the same task as a pivot table in Excel select Name, count(Business_id) Name from all_inspection_master group by Name order by count(Name) desc; select code, count(Business_id) 'code violations' from all_inspection_master group by Code order by count(Business_id) desc; select Description, count(Business_id) 'violation descriptions' from all_inspection_master group by Description order by count(Business_id) desc; #### this query acts like a pivot table in that is pulls the year out of the date, groups the records by year, and then counts them select Name, count(Name) name, year(Date) Year from all_inspection_master group by Name, Year order by count(Name) desc; select Name, count(Name) name, year(Date) Year from all_inspection_master where score='0' group by Name, Year order by count(Name) desc; #### now we can begin running queries to drill down. Be sure to export the results you like as csv files to continue working with them select * from all_inspection_master where name like "Red Oak%" and date >'2016-01-01' and score ='0' order by date desc; select * from all_inspection_master where date >'2016-01-01' order by date desc; select Name, count(name) Name from all_inspection_master group by name; SELECT * FROM all_inspection_master WHERE Name like '%Tim%'; SELECT * FROM all_inspection_master WHERE Name like '%Tim%' and score ='0'; select count(Business_id) 'Number of violations', Name, Address from all_inspection_master where score = '0' group by Name, Address order by count(Business_id) desc; select * from all_inspection_master where score = '0' and Name = 'DAISY NAIL & SPA - ST LAURENT'; select * from all_inspection_master where Name like '%retirement%'; select * from all_inspection_master where Name like '%retirement%' and score = '0';