创建表
hive
查看本地的文件
#Can execute local commands within CLI, place a command in between ! and ;!cat data/text.txt;文件的内容
user1,Funny Story,1343182026191user2,Cool Deal,1343182133839user4,Interesting Post,1343182154633user5,Yet Another Blog,13431839394建表
CREATE TABLE posts (user STRING, post STRING, time BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;hive> show tables;
hive> describe posts;加载数据
把文件加载到表中
LOAD DATA LOCAL INPATH 'text.txt' OVERWRITE INTO TABLE posts;– Load data from HDFS location
hive> LOAD DATA INPATH '/training/hive/user-posts.txt'> OVERWRITE INTO TABLE posts;– Load data from a local file system
hive> LOAD DATA LOCAL INPATH 'data/user-posts.txt'> OVERWRITE INTO TABLE posts;如果在hdfs里存在大量的同类型格式的文件,可以通过已有文件来建表
hive> CREATE EXTERNAL TABLE posts
> (user STRING, post STRING, time BIGINT)> ROW FORMAT DELIMITED> FIELDS TERMINATED BY ','> STORED AS TEXTFILE> LOCATION '/training/hive/'; 测试一下是否在hdfs里hdfs dfs -cat /user/hive/warehouse/posts/text.txt查询数据
select count (1) from posts;select * from posts where user="user2";select * from posts where time<=1343182133839 limit 2;DROP TABLE posts;
将会在hdfs系统里执行删除hdfs dfs -ls /user/hive/warehouse/带分区的表
为了提高性能,还可以使用分区,如果设置了分区,用户
在插入数据时必须指定一个分区
CREATE TABLE posts1 (user STRING, post STRING, time BIGINT)
PARTITIONED BY(country STRING)ROW FORMAT DELIMITEDFIELDS TERMINATED BY ','STORED AS TEXTFILE;describe posts1;
用户在插入数据时必须指定一个分区
LOAD DATA LOCAL INPATH 'text.txt' OVERWRITE INTO TABLE posts1 PARTITION(country='US');LOAD DATA LOCAL INPATH 'text.txt'
OVERWRITE INTO TABLE posts1 PARTITION(country='AUSTRALIA');显示已有的分区
show partitions posts;hdfs dfs -ls -R /user/hive/warehouse/posts查询
select * from posts where country='US' limit 10;Join表
CREATE TABLE posts_likes (user STRING, post STRING, likes_count INT);
INSERT OVERWRITE TABLE posts_likes> SELECT p.user, p.post, l.count> FROM posts p JOIN likes l ON (p.user = l.user);Outer Join表
SELECT p.*, l.*FROM posts p LEFT OUTER JOIN likes l ON (p.user = l.user)limit 10;SELECT p.*, l.*FROM posts p RIGHT OUTER JOIN likes l ON (p.user = l.user)limit 10;SELECT p.*, l.*FROM posts p FULL OUTER JOIN likes l ON (p.user = l.user)limit 10;ResourcesJDBC Program
import java.sql.SQLException;import java.sql.Connection;import java.sql.ResultSet;import java.sql.Statement;import java.sql.DriverManager;public class HiveCreateTable {private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";public static void main(String[] args) throws SQLException {// Register driver and create driver instanceClass.forName(driverName);// get connectionConnection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", "");// create statementStatement stmt = con.createStatement();// execute statementstmt.executeQuery("CREATE TABLE IF NOT EXISTS "+" employee ( eid int, name String, "+" salary String, destignation String)"+" COMMENT ‘Employee details’"+" ROW FORMAT DELIMITED"+" FIELDS TERMINATED BY ‘\t’"+" LINES TERMINATED BY ‘\n’"+" STORED AS TEXTFILE;");System.out.println(“ Table employee created.”);con.close();}}
$ javac HiveCreateDb.java
$ java HiveCreateDbimport java.sql.SQLException;import java.sql.Connection;import java.sql.ResultSet;import java.sql.Statement;import java.sql.DriverManager;public class HiveQLWhere {private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";public static void main(String[] args) throws SQLException {// Register driver and create driver instanceClass.forName(driverName);// get connectionConnection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", "");// create statementStatement stmt = con.createStatement();// execute statementResultset res = stmt.executeQuery("SELECT * FROM employee WHERE salary>30000;");System.out.println("Result:");System.out.println(" ID \t Name \t Salary \t Designation \t Dept ");while (res.next()) {System.out.println(res.getInt(1) + " " + res.getString(2) + " " + res.getDouble(3) + " " + res.getString(4) + " " + res.getString(5));}con.close();}}
$ javac HiveQLWhere.java
$ java HiveQLWhere