Extracts constants in code to settings.

pull/1/head
Abhinav Sarkar 2015-12-18 01:00:32 +05:30
parent 21497269ee
commit ea9e100f8f
6 changed files with 138 additions and 87 deletions

View File

@ -39,28 +39,28 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table) dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
| (_, tabs) <- dimTables | (_, tabs) <- dimTables
, table <- tabs , table <- tabs
, table `notElem` envTables ] , table `notElem` envTables ]
factTableDefnSQLs = [ (Create factTableDefnSQLs = [ (Create
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table) , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
| (fact, table) <- factTables ] | (fact, table) <- factTables ]
dimTableInsertSQLs = [ (Populate dimTableInsertSQLs = [ (Populate
, tableName table , tableName table
, sqlStr $ dimensionTableInsertSQL env fact (tableName table)) , sqlStr $ dimensionTableInsertSQL env fact (tableName table))
| (fact, tabs) <- dimTables | (fact, tabs) <- dimTables
, table <- tabs , table <- tabs
, table `notElem` envTables ] , table `notElem` envTables ]
fctTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact) factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
| (fact, table) <- factTables ] | (fact, table) <- factTables ]
sqls = concat [ dimTableDefnSQLs sqls = concat [ dimTableDefnSQLs
, factTableDefnSQLs , factTableDefnSQLs
, dimTableInsertSQLs , dimTableInsertSQLs
, fctTableInsertSQLs , factTableInsertSQLs
] ]
sqlStr s = Text.unpack $ s <> ";\n" sqlStr s = Text.unpack $ s <> ";\n"

View File

@ -34,6 +34,34 @@ settingsParser = let Settings {..} = defSettings
<> completeWith timeunits <> completeWith timeunits
<> help ("Time unit granularity for fact tables. Possible values: " <> help ("Time unit granularity for fact tables. Possible values: "
++ intercalate ", " timeunits)) ++ intercalate ", " timeunits))
<*> minorOption "avg-count-col-suffix"
settingAvgCountColumSuffix
"Suffix for average count columns"
<*> minorOption "avg-sum-col-suffix"
settingAvgSumColumnSuffix
"Suffix for average sum columns"
<*> minorOption "count-distinct-col-suffix"
settingCountDistinctColumSuffix
"Suffix for count distinct bucket columns"
<*> minorOption "dim-id-col-name"
settingDimTableIdColumnName
"Name of dimention table id columns"
<*> minorOption "dim-id-col-type"
settingDimTableIdColumnType
"Type of dimention table id columns"
<*> minorOption "fact-count-col-type"
settingFactCountColumnType
"Type of fact table count columns"
<*> minorOption "fact-infix"
settingFactInfix
"Infix for fact tables"
where
minorOption longDesc defValue helpTxt =
Text.pack <$> strOption (long longDesc
<> hidden
<> value (Text.unpack defValue)
<> showDefault
<> help helpTxt)
progArgsParser :: Parser ProgArgs progArgsParser :: Parser ProgArgs
progArgsParser = progArgsParser =
@ -47,10 +75,9 @@ progArgsParser =
<> help "Output directory") <> help "Output directory")
parseArgs :: IO ProgArgs parseArgs :: IO ProgArgs
parseArgs = execParser opts parseArgs = execParser $
where info (helper <*> progArgsParser)
opts = info (helper <*> progArgsParser) (fullDesc
(fullDesc <> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas" <> header "ringo - OLTP to OLAP schema transformer"
<> header "ringo - OLTP to OLAP schema transformer" <> footer "Source: http://github.com/quintype/ringo")
<> footer "Source: http://github.com/quintype/ringo")

View File

@ -18,33 +18,39 @@ extractFactTable fact = do
allDims <- extractAllDimensionTables fact allDims <- extractAllDimensionTables fact
table <- asks $ fromJust . findTable (factTableName fact) . envTables table <- asks $ fromJust . findTable (factTableName fact) . envTables
let intType = "integer" let countColType = settingFactCountColumnType
dimIdColName = settingDimTableIdColumnName
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
columns = flip concatMap (factColumns fact) $ \col -> case col of columns = flip concatMap (factColumns fact) $ \col -> case col of
DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ] DimTime cName -> [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "integer" NotNull ]
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table] NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
FactCount cName -> [ Column cName intType NotNull ] FactCount cName -> [ Column cName countColType NotNull ]
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull FactAverage scName cName ->
, Column (averageSumColumnName cName) (sourceColumnType scName) NotNull [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
] , Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull
FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ] ]
FactCountDistinct cName ->
[ Column (cName <> settingCountDistinctColumSuffix) (countColType <> "[]") NotNull ]
_ -> [] _ -> []
fks = flip map allDims $ \(_, Table {..}) -> fks = flip map allDims $ \(_, Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull
in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")]) in ( Column colName (idColTypeToFKIdColType settingDimTableIdColumnType) colNullable
, ForeignKey tableName [(colName, dimIdColName)]
)
ukColNames = ukColNames =
(++ map (columnName . fst) fks) (++ map (columnName . fst) fks)
. flip mapMaybe (factColumns fact) $ \col -> case col of . flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit) DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
NoDimId cName -> Just cName NoDimId cName -> Just cName
_ -> Nothing _ -> Nothing
return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit return Table { tableName =
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
, tableColumns = columns ++ map fst fks , tableColumns = columns ++ map fst fks
, tableConstraints = UniqueKey ukColNames : map snd fks , tableConstraints = UniqueKey ukColNames : map snd fks
} }

View File

@ -8,53 +8,55 @@ import Data.Function (on)
import Data.Maybe (mapMaybe, fromMaybe, fromJust) import Data.Maybe (mapMaybe, fromMaybe, fromJust)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.List (nub, nubBy) import Data.List (nub, nubBy)
import Data.Text (Text)
import Ringo.Types import Ringo.Types
import Ringo.Utils import Ringo.Utils
dimColumnName :: Text.Text -> ColumnName -> ColumnName dimColumnName :: Text -> ColumnName -> ColumnName
dimColumnName dimName columnName = dimColumnName dimName columnName =
fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName
timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id" timeUnitColumnName dimIdColName colName timeUnit =
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
averageCountColummName :: ColumnName -> ColumnName factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName
averageCountColummName colName = colName <> "_count" factDimFKIdColumnName dimPrefix dimIdColName dimTableName =
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName
averageSumColumnName :: ColumnName -> ColumnName extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
averageSumColumnName colName = colName <> "_sum" extractedFactTableName factPrefix factInfix factName timeUnit =
factPrefix <> factName <> factInfix <> timeUnitName timeUnit
countDistinctColumnName :: ColumnName -> ColumnName idColTypeToFKIdColType :: Text -> Text
countDistinctColumnName colName = colName <> "_hll" idColTypeToFKIdColType typ = case Text.toLower typ of
"serial" -> "integer"
factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName "smallserial" -> "smallint"
factDimFKIdColumnName dimPrefix dimTableName = "bigserial" -> "bigint"
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id" _ -> typ
extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName
extractedFactTableName factPrefix factName timeUnit =
factPrefix <> factName <> "_by_" <> timeUnitName timeUnit
extractDimensionTables :: Fact -> Reader Env [Table] extractDimensionTables :: Fact -> Reader Env [Table]
extractDimensionTables fact = do extractDimensionTables fact = do
settings <- asks envSettings
tables <- asks envTables tables <- asks envTables
prefix <- settingDimPrefix <$> asks envSettings
let table = fromJust . findTable (factTableName fact) $ tables let table = fromJust . findTable (factTableName fact) $ tables
return $ dimsFromIds tables ++ dimsFromVals prefix (tableColumns table) return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
where where
dimsFromIds tables = dimsFromIds tables =
flip mapMaybe (factColumns fact) $ \fcol -> case fcol of flip mapMaybe (factColumns fact) $ \fcol -> case fcol of
DimId d _ -> findTable d tables DimId d _ -> findTable d tables
_ -> Nothing _ -> Nothing
dimsFromVals prefix tableColumns = dimsFromVals Settings {..} tableColumns =
map (\(dim, cols) -> Table { tableName = prefix <> dim map (\(dim, cols) ->
, tableColumns = Column "id" "serial" NotNull : cols Table { tableName = settingDimPrefix <> dim
, tableConstraints = [ PrimaryKey "id" , tableColumns =
, UniqueKey (map columnName cols) Column settingDimTableIdColumnName settingDimTableIdColumnType NotNull : cols
] , tableConstraints = [ PrimaryKey settingDimTableIdColumnName
}) , UniqueKey (map columnName cols)
]
})
. Map.toList . Map.toList
. Map.mapWithKey (\dim -> . Map.mapWithKey (\dim ->
map (\col@Column {..} -> col { columnName = dimColumnName dim columnName }) map (\col@Column {..} -> col { columnName = dimColumnName dim columnName })

View File

@ -52,12 +52,12 @@ factTableDefnSQL fact table = do
allDims <- extractAllDimensionTables fact allDims <- extractAllDimensionTables fact
let factCols = flip mapMaybe (factColumns fact) $ \col -> case col of let factCols = flip mapMaybe (factColumns fact) $ \col -> case col of
DimTime cName -> Just $ timeUnitColumnName cName settingTimeUnit DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
NoDimId cName -> Just cName NoDimId cName -> Just cName
_ -> Nothing _ -> Nothing
dimCols = flip map allDims $ \(_, Table {..}) -> dimCols = flip map allDims $ \(_, Table {..}) ->
factDimFKIdColumnName settingDimPrefix tableName factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
indexSQLs = flip map (factCols ++ dimCols) $ \col -> indexSQLs = flip map (factCols ++ dimCols) $ \col ->
"CREATE INDEX ON " <> tableName table <> " USING btree (" <> col <> ")" "CREATE INDEX ON " <> tableName table <> " USING btree (" <> col <> ")"
@ -83,14 +83,15 @@ dimensionTableInsertSQL fact dimTableName = do
factTableInsertSQL :: Fact -> Reader Env Text factTableInsertSQL :: Fact -> Reader Env Text
factTableInsertSQL fact = do factTableInsertSQL fact = do
let fTableName = factTableName fact let fTableName = factTableName fact
Settings {..} <- asks envSettings Settings {..} <- asks envSettings
allDims <- extractAllDimensionTables fact allDims <- extractAllDimensionTables fact
tables <- asks envTables tables <- asks envTables
let table = fromJust . findTable fTableName $ tables let table = fromJust . findTable fTableName $ tables
dimIdColName = settingDimTableIdColumnName
let timeUnitColumnInsertSQL cName = let timeUnitColumnInsertSQL cName =
let colName = timeUnitColumnName cName settingTimeUnit let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/" in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")") <> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")")
@ -99,14 +100,14 @@ factTableInsertSQL fact = do
NoDimId cName -> [ (cName, fullColName fTableName cName) ] NoDimId cName -> [ (cName, fullColName fTableName cName) ]
FactCount cName -> [ (cName, "count(*)") ] FactCount cName -> [ (cName, "count(*)") ]
FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ] FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ]
FactAverage scName cName -> [ ( averageCountColummName cName FactAverage scName cName -> [ ( cName <> settingAvgCountColumSuffix
, "count(" <> fullColName fTableName scName <> ")") , "count(" <> fullColName fTableName scName <> ")")
, ( averageSumColumnName cName , ( cName <> settingAvgSumColumnSuffix
, "sum(" <> fullColName fTableName scName <> ")") ] , "sum(" <> fullColName fTableName scName <> ")") ]
_ -> [] _ -> []
dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) -> dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) ->
let colName = factDimFKIdColumnName settingDimPrefix tableName let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
factSourceTableName = factTableName dimFact factSourceTableName = factTableName dimFact
insertSQL = insertSQL =
if factTable `elem` tables if factTable `elem` tables
@ -116,7 +117,7 @@ factTableInsertSQL fact = do
map (\(c1, c2) -> map (\(c1, c2) ->
fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2) fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2)
$ dimColumnMapping settingDimPrefix dimFact tableName $ dimColumnMapping settingDimPrefix dimFact tableName
in "SELECT id FROM " <> tableName <> "\nWHERE " in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses) <> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
in (colName, insertSQL) in (colName, insertSQL)
@ -128,7 +129,8 @@ factTableInsertSQL fact = do
. map (\(dimFact, _) -> factTableName dimFact) . map (\(dimFact, _) -> factTableName dimFact)
$ allDims $ allDims
return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit return $ "INSERT INTO "
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
<> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)" <> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)"
<> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap) <> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap)
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses) <> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)

View File

@ -5,7 +5,7 @@ import qualified Data.Text as Text
type ColumnName = Text type ColumnName = Text
type ColumnType = Text type ColumnType = Text
type TableName = Text type TableName = Text
data Nullable = Null | NotNull deriving (Eq, Enum, Show) data Nullable = Null | NotNull deriving (Eq, Enum, Show)
@ -67,25 +67,39 @@ factColumnName (FactAverage cName _) = Just cName
factColumnName (FactCountDistinct _) = Nothing factColumnName (FactCountDistinct _) = Nothing
data Settings = Settings data Settings = Settings
{ settingDimPrefix :: !Text { settingDimPrefix :: !Text
, settingFactPrefix :: !Text , settingFactPrefix :: !Text
, settingTimeUnit :: !TimeUnit , settingTimeUnit :: !TimeUnit
, settingAvgCountColumSuffix :: !Text
, settingAvgSumColumnSuffix :: !Text
, settingCountDistinctColumSuffix :: !Text
, settingDimTableIdColumnName :: !Text
, settingDimTableIdColumnType :: !Text
, settingFactCountColumnType :: !Text
, settingFactInfix :: !Text
} deriving (Eq, Show) } deriving (Eq, Show)
defSettings :: Settings defSettings :: Settings
defSettings = Settings defSettings = Settings
{ settingDimPrefix = "dim_" { settingDimPrefix = "dim_"
, settingFactPrefix = "fact_" , settingFactPrefix = "fact_"
, settingTimeUnit = Minute , settingTimeUnit = Minute
, settingAvgCountColumSuffix = "_count"
, settingAvgSumColumnSuffix = "_sum"
, settingCountDistinctColumSuffix = "_hll"
, settingDimTableIdColumnName = "id"
, settingDimTableIdColumnType = "serial"
, settingFactCountColumnType = "integer"
, settingFactInfix = "_by_"
} }
data ValidationError = MissingTable TableName data ValidationError = MissingTable !TableName
| MissingFact TableName | MissingFact !TableName
| MissingColumn TableName ColumnName | MissingColumn !TableName !ColumnName
deriving (Eq, Show) deriving (Eq, Show)
data Env = Env data Env = Env
{ envTables :: [Table] { envTables :: ![Table]
, envFacts :: [Fact] , envFacts :: ![Fact]
, envSettings :: Settings , envSettings :: !Settings
} deriving (Eq, Show) } deriving (Eq, Show)