amazon-redshift Reading JSON array in Redshift Reading array elements in JSON


Example

-- Create a sample JSON with ARRAY

create table car_sample(dim_id integer, info varchar(2000)); insert into car_sample values (200, '{"cars": [ { "Manufacturer": "Nissan", "Models": [{"Name":"Sentra", "doors":4}, {"Name":"Maxima", "doors":4} ]}, {"Manufacturer": "Ford", "Models": [{"Name":"Taurus", "doors":4}, {"Name":"Escort", "doors":4} ]} ] }')

-- Create a supporting table for CROSS JOIN

create table series1_10 (number integer );
insert into series1_10 values (1);
insert into series1_10 values (2);
insert into series1_10 values (3);
insert into series1_10 values (4);
insert into series1_10 values (5);
insert into series1_10 values (6);
insert into series1_10 values (7);
insert into series1_10 values (8);
insert into series1_10 values (9);
insert into series1_10 values (10);

-- UDF for extracting JSON array into one ^ delimited string
CREATE OR REPLACE FUNCTION f_extractJson (jsonVar varchar) RETURNS varchar IMMUTABLE as $$
 def myfunc(myParm):
  import json
  cars=json.loads(jsonVar)
  parsedString=''
  for car in cars["cars"]:
   for model in car["Models"]:
    parsedString=parsedString+car["Manufacturer"]+'~'+model["Name"]+'^'
  return parsedString

 return myfunc(jsonVar)
$$ LANGUAGE plpythonu;

-- Check the data

select dim_id, f_extractJson(info) from car_sample;

-- Pivot rows

WITH w1 AS (select dim_id, f_extractJson(info) info from car_sample)
select dim_id,number, info, split_part(split_part(info,'^',number),'~', 1)
Manufacturer, split_part(split_part(info,'^',number),'~', 2) Model
from w1 cross join series1_10
where number <= regexp_count(info,'[=^=]') ;