Analysing
JSON (JavaScript Object Notation) Document:
you can use TEXTFILE as the input
and output format, then use a JSON SerDe to parse each JSON document
as a record.
Example 1:
1: Create a test file in Json
format
$ cat> jsont.txt
{"a" :10, "b"
:11, "c" :15}
{"a" :20, "b"
:21, "c" :25}
{"a" :30, "b"
:31, "c" :35}
{"a" :40, "b"
:41, "c" :45}
{"a" :50, "b"
:51, "c" :55}
{"a" :60, "b"
:61, "c" :65}
^d
2: Create a hive table
hive> create table
jsont1(str string);
3: Load data into hive table from
local xml file
hive> load data local
inpath 'jsont.txt' into table jsont1;
4: Create another table to
extract the json data
hive>create table jsont2(a
int, b int, c int);
5: Insert jsont1 table data into
jsont2 table
hive>insert overwrite table
jsont2 select get_json_object(str, '$.a'), get_json_object(str,
'$.b'), get_json_object(str, '$.c') from jsont1;
Example2 :
$Cat>jsonex.txt
{ "top" : [
{"table":"user",
"data":{
"name":"John
Doe","userid":"2036586","age":"74","code":"297994","status":1}},
{"table":"user",
"data":{
"name":"Mary
Ann","userid":"14294734","age":"64","code":"142798","status":1}},
{"table":"user",
"data":{
"name":"Carl
Smith","userid":"13998600","age":"36","code":"32866","status":1}},
{"table":"user",
"data":{
"name":"Anil
Kumar":"2614012","age":"69","code":"208672","status":1}},
{"table":"user",
"data":{
"name":"Kim
Lee","userid":"10471190","age":"53","code":"79365","status":1}}
]}
^d
CREATE
TABLE user (line
string)
ROW
FORMAT DELIMITED FIELDS TERMINATED
BY '\n'
STORED AS
TEXTFILE
LAOD
DATA LOCAL INPATH ‘jsonex.txt’
OVERWRITE INTO TABLE
user;
SELECT
get_json_object(col0,
'$.name')
as name,
get_json_object(col0,
'$.userid')
as uid,
get_json_object(col0,
'$.age') as
age,
get_json_object(col0,
'$.code')
as code,
get_json_object(col0,
'$.status')
as status
FROM
(SELECT
get_json_object(user.line,
'$.data')
as col0
FROM
user
WHERE
get_json_object(user.line,
'$.data')
is not null)
temp;
Note:
A string like $.user.id means to take each record,
represented by $, find the user key, which is assumed to be a JSON
map in this case, and finally extract the value for the id key inside the
user. This value for the id is used as the value for the user_id
column.