Extracts constants in code to settings.
This commit is contained in:
parent
21497269ee
commit
ea9e100f8f
34
app/Main.hs
34
app/Main.hs
@ -39,28 +39,28 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do
|
||||
dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts
|
||||
factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts
|
||||
|
||||
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
|
||||
| (_, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
factTableDefnSQLs = [ (Create
|
||||
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
||||
| (fact, table) <- factTables ]
|
||||
dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table)
|
||||
| (_, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
factTableDefnSQLs = [ (Create
|
||||
, tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table)
|
||||
| (fact, table) <- factTables ]
|
||||
|
||||
dimTableInsertSQLs = [ (Populate
|
||||
, tableName table
|
||||
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
|
||||
| (fact, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
dimTableInsertSQLs = [ (Populate
|
||||
, tableName table
|
||||
, sqlStr $ dimensionTableInsertSQL env fact (tableName table))
|
||||
| (fact, tabs) <- dimTables
|
||||
, table <- tabs
|
||||
, table `notElem` envTables ]
|
||||
|
||||
fctTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
|
||||
| (fact, table) <- factTables ]
|
||||
factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact)
|
||||
| (fact, table) <- factTables ]
|
||||
|
||||
sqls = concat [ dimTableDefnSQLs
|
||||
, factTableDefnSQLs
|
||||
, dimTableInsertSQLs
|
||||
, fctTableInsertSQLs
|
||||
, factTableInsertSQLs
|
||||
]
|
||||
|
||||
sqlStr s = Text.unpack $ s <> ";\n"
|
||||
sqlStr s = Text.unpack $ s <> ";\n"
|
||||
|
@ -34,6 +34,34 @@ settingsParser = let Settings {..} = defSettings
|
||||
<> completeWith timeunits
|
||||
<> help ("Time unit granularity for fact tables. Possible values: "
|
||||
++ intercalate ", " timeunits))
|
||||
<*> minorOption "avg-count-col-suffix"
|
||||
settingAvgCountColumSuffix
|
||||
"Suffix for average count columns"
|
||||
<*> minorOption "avg-sum-col-suffix"
|
||||
settingAvgSumColumnSuffix
|
||||
"Suffix for average sum columns"
|
||||
<*> minorOption "count-distinct-col-suffix"
|
||||
settingCountDistinctColumSuffix
|
||||
"Suffix for count distinct bucket columns"
|
||||
<*> minorOption "dim-id-col-name"
|
||||
settingDimTableIdColumnName
|
||||
"Name of dimention table id columns"
|
||||
<*> minorOption "dim-id-col-type"
|
||||
settingDimTableIdColumnType
|
||||
"Type of dimention table id columns"
|
||||
<*> minorOption "fact-count-col-type"
|
||||
settingFactCountColumnType
|
||||
"Type of fact table count columns"
|
||||
<*> minorOption "fact-infix"
|
||||
settingFactInfix
|
||||
"Infix for fact tables"
|
||||
where
|
||||
minorOption longDesc defValue helpTxt =
|
||||
Text.pack <$> strOption (long longDesc
|
||||
<> hidden
|
||||
<> value (Text.unpack defValue)
|
||||
<> showDefault
|
||||
<> help helpTxt)
|
||||
|
||||
progArgsParser :: Parser ProgArgs
|
||||
progArgsParser =
|
||||
@ -47,10 +75,9 @@ progArgsParser =
|
||||
<> help "Output directory")
|
||||
|
||||
parseArgs :: IO ProgArgs
|
||||
parseArgs = execParser opts
|
||||
where
|
||||
opts = info (helper <*> progArgsParser)
|
||||
(fullDesc
|
||||
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
|
||||
<> header "ringo - OLTP to OLAP schema transformer"
|
||||
<> footer "Source: http://github.com/quintype/ringo")
|
||||
parseArgs = execParser $
|
||||
info (helper <*> progArgsParser)
|
||||
(fullDesc
|
||||
<> progDesc "Transforms OLTP database schemas to OLAP database star schemas"
|
||||
<> header "ringo - OLTP to OLAP schema transformer"
|
||||
<> footer "Source: http://github.com/quintype/ringo")
|
||||
|
@ -18,33 +18,39 @@ extractFactTable fact = do
|
||||
allDims <- extractAllDimensionTables fact
|
||||
table <- asks $ fromJust . findTable (factTableName fact) . envTables
|
||||
|
||||
let intType = "integer"
|
||||
let countColType = settingFactCountColumnType
|
||||
dimIdColName = settingDimTableIdColumnName
|
||||
sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table
|
||||
|
||||
columns = flip concatMap (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ]
|
||||
DimTime cName -> [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "integer" NotNull ]
|
||||
NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table]
|
||||
FactCount cName -> [ Column cName intType NotNull ]
|
||||
FactCount cName -> [ Column cName countColType NotNull ]
|
||||
FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ]
|
||||
FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull
|
||||
, Column (averageSumColumnName cName) (sourceColumnType scName) NotNull
|
||||
]
|
||||
FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ]
|
||||
FactAverage scName cName ->
|
||||
[ Column (cName <> settingAvgCountColumSuffix) countColType NotNull
|
||||
, Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull
|
||||
]
|
||||
FactCountDistinct cName ->
|
||||
[ Column (cName <> settingCountDistinctColumSuffix) (countColType <> "[]") NotNull ]
|
||||
_ -> []
|
||||
|
||||
fks = flip map allDims $ \(_, Table {..}) ->
|
||||
let colName = factDimFKIdColumnName settingDimPrefix tableName
|
||||
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
|
||||
colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull
|
||||
in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")])
|
||||
in ( Column colName (idColTypeToFKIdColType settingDimTableIdColumnType) colNullable
|
||||
, ForeignKey tableName [(colName, dimIdColName)]
|
||||
)
|
||||
|
||||
ukColNames =
|
||||
(++ map (columnName . fst) fks)
|
||||
. flip mapMaybe (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit)
|
||||
DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit)
|
||||
NoDimId cName -> Just cName
|
||||
_ -> Nothing
|
||||
|
||||
return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
|
||||
return Table { tableName =
|
||||
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||
, tableColumns = columns ++ map fst fks
|
||||
, tableConstraints = UniqueKey ukColNames : map snd fks
|
||||
}
|
||||
|
@ -8,53 +8,55 @@ import Data.Function (on)
|
||||
import Data.Maybe (mapMaybe, fromMaybe, fromJust)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.List (nub, nubBy)
|
||||
import Data.Text (Text)
|
||||
|
||||
import Ringo.Types
|
||||
import Ringo.Utils
|
||||
|
||||
dimColumnName :: Text.Text -> ColumnName -> ColumnName
|
||||
dimColumnName :: Text -> ColumnName -> ColumnName
|
||||
dimColumnName dimName columnName =
|
||||
fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName
|
||||
|
||||
timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName
|
||||
timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id"
|
||||
timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName
|
||||
timeUnitColumnName dimIdColName colName timeUnit =
|
||||
colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName
|
||||
|
||||
averageCountColummName :: ColumnName -> ColumnName
|
||||
averageCountColummName colName = colName <> "_count"
|
||||
factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName
|
||||
factDimFKIdColumnName dimPrefix dimIdColName dimTableName =
|
||||
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName
|
||||
|
||||
averageSumColumnName :: ColumnName -> ColumnName
|
||||
averageSumColumnName colName = colName <> "_sum"
|
||||
extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName
|
||||
extractedFactTableName factPrefix factInfix factName timeUnit =
|
||||
factPrefix <> factName <> factInfix <> timeUnitName timeUnit
|
||||
|
||||
countDistinctColumnName :: ColumnName -> ColumnName
|
||||
countDistinctColumnName colName = colName <> "_hll"
|
||||
|
||||
factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName
|
||||
factDimFKIdColumnName dimPrefix dimTableName =
|
||||
fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id"
|
||||
|
||||
extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName
|
||||
extractedFactTableName factPrefix factName timeUnit =
|
||||
factPrefix <> factName <> "_by_" <> timeUnitName timeUnit
|
||||
idColTypeToFKIdColType :: Text -> Text
|
||||
idColTypeToFKIdColType typ = case Text.toLower typ of
|
||||
"serial" -> "integer"
|
||||
"smallserial" -> "smallint"
|
||||
"bigserial" -> "bigint"
|
||||
_ -> typ
|
||||
|
||||
extractDimensionTables :: Fact -> Reader Env [Table]
|
||||
extractDimensionTables fact = do
|
||||
settings <- asks envSettings
|
||||
tables <- asks envTables
|
||||
prefix <- settingDimPrefix <$> asks envSettings
|
||||
let table = fromJust . findTable (factTableName fact) $ tables
|
||||
return $ dimsFromIds tables ++ dimsFromVals prefix (tableColumns table)
|
||||
return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table)
|
||||
where
|
||||
dimsFromIds tables =
|
||||
flip mapMaybe (factColumns fact) $ \fcol -> case fcol of
|
||||
DimId d _ -> findTable d tables
|
||||
_ -> Nothing
|
||||
|
||||
dimsFromVals prefix tableColumns =
|
||||
map (\(dim, cols) -> Table { tableName = prefix <> dim
|
||||
, tableColumns = Column "id" "serial" NotNull : cols
|
||||
, tableConstraints = [ PrimaryKey "id"
|
||||
, UniqueKey (map columnName cols)
|
||||
]
|
||||
})
|
||||
dimsFromVals Settings {..} tableColumns =
|
||||
map (\(dim, cols) ->
|
||||
Table { tableName = settingDimPrefix <> dim
|
||||
, tableColumns =
|
||||
Column settingDimTableIdColumnName settingDimTableIdColumnType NotNull : cols
|
||||
, tableConstraints = [ PrimaryKey settingDimTableIdColumnName
|
||||
, UniqueKey (map columnName cols)
|
||||
]
|
||||
})
|
||||
. Map.toList
|
||||
. Map.mapWithKey (\dim ->
|
||||
map (\col@Column {..} -> col { columnName = dimColumnName dim columnName })
|
||||
|
@ -52,12 +52,12 @@ factTableDefnSQL fact table = do
|
||||
allDims <- extractAllDimensionTables fact
|
||||
|
||||
let factCols = flip mapMaybe (factColumns fact) $ \col -> case col of
|
||||
DimTime cName -> Just $ timeUnitColumnName cName settingTimeUnit
|
||||
DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit
|
||||
NoDimId cName -> Just cName
|
||||
_ -> Nothing
|
||||
|
||||
dimCols = flip map allDims $ \(_, Table {..}) ->
|
||||
factDimFKIdColumnName settingDimPrefix tableName
|
||||
factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName
|
||||
|
||||
indexSQLs = flip map (factCols ++ dimCols) $ \col ->
|
||||
"CREATE INDEX ON " <> tableName table <> " USING btree (" <> col <> ")"
|
||||
@ -83,14 +83,15 @@ dimensionTableInsertSQL fact dimTableName = do
|
||||
|
||||
factTableInsertSQL :: Fact -> Reader Env Text
|
||||
factTableInsertSQL fact = do
|
||||
let fTableName = factTableName fact
|
||||
Settings {..} <- asks envSettings
|
||||
allDims <- extractAllDimensionTables fact
|
||||
tables <- asks envTables
|
||||
let table = fromJust . findTable fTableName $ tables
|
||||
let fTableName = factTableName fact
|
||||
Settings {..} <- asks envSettings
|
||||
allDims <- extractAllDimensionTables fact
|
||||
tables <- asks envTables
|
||||
let table = fromJust . findTable fTableName $ tables
|
||||
dimIdColName = settingDimTableIdColumnName
|
||||
|
||||
let timeUnitColumnInsertSQL cName =
|
||||
let colName = timeUnitColumnName cName settingTimeUnit
|
||||
let colName = timeUnitColumnName dimIdColName cName settingTimeUnit
|
||||
in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/"
|
||||
<> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")")
|
||||
|
||||
@ -99,14 +100,14 @@ factTableInsertSQL fact = do
|
||||
NoDimId cName -> [ (cName, fullColName fTableName cName) ]
|
||||
FactCount cName -> [ (cName, "count(*)") ]
|
||||
FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ]
|
||||
FactAverage scName cName -> [ ( averageCountColummName cName
|
||||
FactAverage scName cName -> [ ( cName <> settingAvgCountColumSuffix
|
||||
, "count(" <> fullColName fTableName scName <> ")")
|
||||
, ( averageSumColumnName cName
|
||||
, ( cName <> settingAvgSumColumnSuffix
|
||||
, "sum(" <> fullColName fTableName scName <> ")") ]
|
||||
_ -> []
|
||||
|
||||
dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) ->
|
||||
let colName = factDimFKIdColumnName settingDimPrefix tableName
|
||||
let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName
|
||||
factSourceTableName = factTableName dimFact
|
||||
insertSQL =
|
||||
if factTable `elem` tables
|
||||
@ -116,7 +117,7 @@ factTableInsertSQL fact = do
|
||||
map (\(c1, c2) ->
|
||||
fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2)
|
||||
$ dimColumnMapping settingDimPrefix dimFact tableName
|
||||
in "SELECT id FROM " <> tableName <> "\nWHERE "
|
||||
in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE "
|
||||
<> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses)
|
||||
in (colName, insertSQL)
|
||||
|
||||
@ -128,7 +129,8 @@ factTableInsertSQL fact = do
|
||||
. map (\(dimFact, _) -> factTableName dimFact)
|
||||
$ allDims
|
||||
|
||||
return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit
|
||||
return $ "INSERT INTO "
|
||||
<> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||
<> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)"
|
||||
<> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap)
|
||||
<> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses)
|
||||
|
@ -5,7 +5,7 @@ import qualified Data.Text as Text
|
||||
|
||||
type ColumnName = Text
|
||||
type ColumnType = Text
|
||||
type TableName = Text
|
||||
type TableName = Text
|
||||
|
||||
data Nullable = Null | NotNull deriving (Eq, Enum, Show)
|
||||
|
||||
@ -67,25 +67,39 @@ factColumnName (FactAverage cName _) = Just cName
|
||||
factColumnName (FactCountDistinct _) = Nothing
|
||||
|
||||
data Settings = Settings
|
||||
{ settingDimPrefix :: !Text
|
||||
, settingFactPrefix :: !Text
|
||||
, settingTimeUnit :: !TimeUnit
|
||||
{ settingDimPrefix :: !Text
|
||||
, settingFactPrefix :: !Text
|
||||
, settingTimeUnit :: !TimeUnit
|
||||
, settingAvgCountColumSuffix :: !Text
|
||||
, settingAvgSumColumnSuffix :: !Text
|
||||
, settingCountDistinctColumSuffix :: !Text
|
||||
, settingDimTableIdColumnName :: !Text
|
||||
, settingDimTableIdColumnType :: !Text
|
||||
, settingFactCountColumnType :: !Text
|
||||
, settingFactInfix :: !Text
|
||||
} deriving (Eq, Show)
|
||||
|
||||
defSettings :: Settings
|
||||
defSettings = Settings
|
||||
{ settingDimPrefix = "dim_"
|
||||
, settingFactPrefix = "fact_"
|
||||
, settingTimeUnit = Minute
|
||||
{ settingDimPrefix = "dim_"
|
||||
, settingFactPrefix = "fact_"
|
||||
, settingTimeUnit = Minute
|
||||
, settingAvgCountColumSuffix = "_count"
|
||||
, settingAvgSumColumnSuffix = "_sum"
|
||||
, settingCountDistinctColumSuffix = "_hll"
|
||||
, settingDimTableIdColumnName = "id"
|
||||
, settingDimTableIdColumnType = "serial"
|
||||
, settingFactCountColumnType = "integer"
|
||||
, settingFactInfix = "_by_"
|
||||
}
|
||||
|
||||
data ValidationError = MissingTable TableName
|
||||
| MissingFact TableName
|
||||
| MissingColumn TableName ColumnName
|
||||
data ValidationError = MissingTable !TableName
|
||||
| MissingFact !TableName
|
||||
| MissingColumn !TableName !ColumnName
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Env = Env
|
||||
{ envTables :: [Table]
|
||||
, envFacts :: [Fact]
|
||||
, envSettings :: Settings
|
||||
{ envTables :: ![Table]
|
||||
, envFacts :: ![Fact]
|
||||
, envSettings :: !Settings
|
||||
} deriving (Eq, Show)
|
||||
|
Loading…
Reference in New Issue
Block a user