From ea9e100f8fdf2dd5196f6fe536c66a3235f6aae0 Mon Sep 17 00:00:00 2001 From: Abhinav Sarkar Date: Fri, 18 Dec 2015 01:00:32 +0530 Subject: [PATCH] Extracts constants in code to settings. --- app/Main.hs | 34 ++++++++++----------- app/Ringo/ArgParser.hs | 41 ++++++++++++++++++++----- src/Ringo/Extractor.hs | 28 ++++++++++------- src/Ringo/Extractor/Internal.hs | 54 +++++++++++++++++---------------- src/Ringo/Generator.hs | 28 +++++++++-------- src/Ringo/Types.hs | 40 ++++++++++++++++-------- 6 files changed, 138 insertions(+), 87 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index d88520a..377448e 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -39,28 +39,28 @@ writeSQLFiles outputDir env@Env{..} = forM_ sqls $ \(sqlType, table, sql) -> do dimTables = map (\fact -> (fact, extractDimensionTables env fact)) envFacts factTables = map (\fact -> (fact, extractFactTable env fact)) envFacts - dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table) - | (_, tabs) <- dimTables - , table <- tabs - , table `notElem` envTables ] - factTableDefnSQLs = [ (Create - , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table) - | (fact, table) <- factTables ] + dimTableDefnSQLs = [ (Create, tableName table, unlines . map sqlStr . tableDefnSQL $ table) + | (_, tabs) <- dimTables + , table <- tabs + , table `notElem` envTables ] + factTableDefnSQLs = [ (Create + , tableName table, unlines . map sqlStr $ factTableDefnSQL env fact table) + | (fact, table) <- factTables ] - dimTableInsertSQLs = [ (Populate - , tableName table - , sqlStr $ dimensionTableInsertSQL env fact (tableName table)) - | (fact, tabs) <- dimTables - , table <- tabs - , table `notElem` envTables ] + dimTableInsertSQLs = [ (Populate + , tableName table + , sqlStr $ dimensionTableInsertSQL env fact (tableName table)) + | (fact, tabs) <- dimTables + , table <- tabs + , table `notElem` envTables ] - fctTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact) - | (fact, table) <- factTables ] + factTableInsertSQLs = [ (Populate, tableName table, sqlStr $ factTableInsertSQL env fact) + | (fact, table) <- factTables ] sqls = concat [ dimTableDefnSQLs , factTableDefnSQLs , dimTableInsertSQLs - , fctTableInsertSQLs + , factTableInsertSQLs ] - sqlStr s = Text.unpack $ s <> ";\n" + sqlStr s = Text.unpack $ s <> ";\n" diff --git a/app/Ringo/ArgParser.hs b/app/Ringo/ArgParser.hs index e68c8be..0824006 100644 --- a/app/Ringo/ArgParser.hs +++ b/app/Ringo/ArgParser.hs @@ -34,6 +34,34 @@ settingsParser = let Settings {..} = defSettings <> completeWith timeunits <> help ("Time unit granularity for fact tables. Possible values: " ++ intercalate ", " timeunits)) + <*> minorOption "avg-count-col-suffix" + settingAvgCountColumSuffix + "Suffix for average count columns" + <*> minorOption "avg-sum-col-suffix" + settingAvgSumColumnSuffix + "Suffix for average sum columns" + <*> minorOption "count-distinct-col-suffix" + settingCountDistinctColumSuffix + "Suffix for count distinct bucket columns" + <*> minorOption "dim-id-col-name" + settingDimTableIdColumnName + "Name of dimention table id columns" + <*> minorOption "dim-id-col-type" + settingDimTableIdColumnType + "Type of dimention table id columns" + <*> minorOption "fact-count-col-type" + settingFactCountColumnType + "Type of fact table count columns" + <*> minorOption "fact-infix" + settingFactInfix + "Infix for fact tables" + where + minorOption longDesc defValue helpTxt = + Text.pack <$> strOption (long longDesc + <> hidden + <> value (Text.unpack defValue) + <> showDefault + <> help helpTxt) progArgsParser :: Parser ProgArgs progArgsParser = @@ -47,10 +75,9 @@ progArgsParser = <> help "Output directory") parseArgs :: IO ProgArgs -parseArgs = execParser opts - where - opts = info (helper <*> progArgsParser) - (fullDesc - <> progDesc "Transforms OLTP database schemas to OLAP database star schemas" - <> header "ringo - OLTP to OLAP schema transformer" - <> footer "Source: http://github.com/quintype/ringo") +parseArgs = execParser $ + info (helper <*> progArgsParser) + (fullDesc + <> progDesc "Transforms OLTP database schemas to OLAP database star schemas" + <> header "ringo - OLTP to OLAP schema transformer" + <> footer "Source: http://github.com/quintype/ringo") diff --git a/src/Ringo/Extractor.hs b/src/Ringo/Extractor.hs index 7cc4302..1b73716 100644 --- a/src/Ringo/Extractor.hs +++ b/src/Ringo/Extractor.hs @@ -18,33 +18,39 @@ extractFactTable fact = do allDims <- extractAllDimensionTables fact table <- asks $ fromJust . findTable (factTableName fact) . envTables - let intType = "integer" + let countColType = settingFactCountColumnType + dimIdColName = settingDimTableIdColumnName sourceColumnType colName = columnType . fromJust . findColumn colName . tableColumns $ table columns = flip concatMap (factColumns fact) $ \col -> case col of - DimTime cName -> [ Column (timeUnitColumnName cName settingTimeUnit) intType NotNull ] + DimTime cName -> [ Column (timeUnitColumnName dimIdColName cName settingTimeUnit) "integer" NotNull ] NoDimId cName -> [ fromJust . findColumn cName . tableColumns $ table] - FactCount cName -> [ Column cName intType NotNull ] + FactCount cName -> [ Column cName countColType NotNull ] FactSum scName cName -> [ Column cName (sourceColumnType scName) NotNull ] - FactAverage scName cName -> [ Column (averageCountColummName cName) intType NotNull - , Column (averageSumColumnName cName) (sourceColumnType scName) NotNull - ] - FactCountDistinct cName -> [ Column (countDistinctColumnName cName) (intType <> "[]") NotNull ] + FactAverage scName cName -> + [ Column (cName <> settingAvgCountColumSuffix) countColType NotNull + , Column (cName <> settingAvgSumColumnSuffix) (sourceColumnType scName) NotNull + ] + FactCountDistinct cName -> + [ Column (cName <> settingCountDistinctColumSuffix) (countColType <> "[]") NotNull ] _ -> [] fks = flip map allDims $ \(_, Table {..}) -> - let colName = factDimFKIdColumnName settingDimPrefix tableName + let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName colNullable = if any ((== Null) . columnNullable) tableColumns then Null else NotNull - in (Column colName intType colNullable, ForeignKey tableName [(colName, "id")]) + in ( Column colName (idColTypeToFKIdColType settingDimTableIdColumnType) colNullable + , ForeignKey tableName [(colName, dimIdColName)] + ) ukColNames = (++ map (columnName . fst) fks) . flip mapMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just (timeUnitColumnName cName settingTimeUnit) + DimTime cName -> Just (timeUnitColumnName dimIdColName cName settingTimeUnit) NoDimId cName -> Just cName _ -> Nothing - return Table { tableName = extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit + return Table { tableName = + extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit , tableColumns = columns ++ map fst fks , tableConstraints = UniqueKey ukColNames : map snd fks } diff --git a/src/Ringo/Extractor/Internal.hs b/src/Ringo/Extractor/Internal.hs index 27309d7..4449d73 100644 --- a/src/Ringo/Extractor/Internal.hs +++ b/src/Ringo/Extractor/Internal.hs @@ -8,53 +8,55 @@ import Data.Function (on) import Data.Maybe (mapMaybe, fromMaybe, fromJust) import Data.Monoid ((<>)) import Data.List (nub, nubBy) +import Data.Text (Text) import Ringo.Types import Ringo.Utils -dimColumnName :: Text.Text -> ColumnName -> ColumnName +dimColumnName :: Text -> ColumnName -> ColumnName dimColumnName dimName columnName = fromMaybe columnName . Text.stripPrefix (dimName <> "_") $ columnName -timeUnitColumnName :: ColumnName -> TimeUnit -> ColumnName -timeUnitColumnName colName timeUnit = colName <> "_" <> timeUnitName timeUnit <> "_id" +timeUnitColumnName :: Text -> ColumnName -> TimeUnit -> ColumnName +timeUnitColumnName dimIdColName colName timeUnit = + colName <> "_" <> timeUnitName timeUnit <> "_" <> dimIdColName -averageCountColummName :: ColumnName -> ColumnName -averageCountColummName colName = colName <> "_count" +factDimFKIdColumnName :: Text -> Text -> TableName -> ColumnName +factDimFKIdColumnName dimPrefix dimIdColName dimTableName = + fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_" <> dimIdColName -averageSumColumnName :: ColumnName -> ColumnName -averageSumColumnName colName = colName <> "_sum" +extractedFactTableName :: Text -> Text -> TableName -> TimeUnit -> TableName +extractedFactTableName factPrefix factInfix factName timeUnit = + factPrefix <> factName <> factInfix <> timeUnitName timeUnit -countDistinctColumnName :: ColumnName -> ColumnName -countDistinctColumnName colName = colName <> "_hll" - -factDimFKIdColumnName :: Text.Text -> TableName -> ColumnName -factDimFKIdColumnName dimPrefix dimTableName = - fromMaybe dimTableName (Text.stripPrefix dimPrefix dimTableName) <> "_id" - -extractedFactTableName :: Text.Text -> TableName -> TimeUnit -> TableName -extractedFactTableName factPrefix factName timeUnit = - factPrefix <> factName <> "_by_" <> timeUnitName timeUnit +idColTypeToFKIdColType :: Text -> Text +idColTypeToFKIdColType typ = case Text.toLower typ of + "serial" -> "integer" + "smallserial" -> "smallint" + "bigserial" -> "bigint" + _ -> typ extractDimensionTables :: Fact -> Reader Env [Table] extractDimensionTables fact = do + settings <- asks envSettings tables <- asks envTables - prefix <- settingDimPrefix <$> asks envSettings let table = fromJust . findTable (factTableName fact) $ tables - return $ dimsFromIds tables ++ dimsFromVals prefix (tableColumns table) + return $ dimsFromIds tables ++ dimsFromVals settings (tableColumns table) where dimsFromIds tables = flip mapMaybe (factColumns fact) $ \fcol -> case fcol of DimId d _ -> findTable d tables _ -> Nothing - dimsFromVals prefix tableColumns = - map (\(dim, cols) -> Table { tableName = prefix <> dim - , tableColumns = Column "id" "serial" NotNull : cols - , tableConstraints = [ PrimaryKey "id" - , UniqueKey (map columnName cols) - ] - }) + dimsFromVals Settings {..} tableColumns = + map (\(dim, cols) -> + Table { tableName = settingDimPrefix <> dim + , tableColumns = + Column settingDimTableIdColumnName settingDimTableIdColumnType NotNull : cols + , tableConstraints = [ PrimaryKey settingDimTableIdColumnName + , UniqueKey (map columnName cols) + ] + }) . Map.toList . Map.mapWithKey (\dim -> map (\col@Column {..} -> col { columnName = dimColumnName dim columnName }) diff --git a/src/Ringo/Generator.hs b/src/Ringo/Generator.hs index 2bad100..c4f3df1 100644 --- a/src/Ringo/Generator.hs +++ b/src/Ringo/Generator.hs @@ -52,12 +52,12 @@ factTableDefnSQL fact table = do allDims <- extractAllDimensionTables fact let factCols = flip mapMaybe (factColumns fact) $ \col -> case col of - DimTime cName -> Just $ timeUnitColumnName cName settingTimeUnit + DimTime cName -> Just $ timeUnitColumnName settingDimTableIdColumnName cName settingTimeUnit NoDimId cName -> Just cName _ -> Nothing dimCols = flip map allDims $ \(_, Table {..}) -> - factDimFKIdColumnName settingDimPrefix tableName + factDimFKIdColumnName settingDimPrefix settingDimTableIdColumnName tableName indexSQLs = flip map (factCols ++ dimCols) $ \col -> "CREATE INDEX ON " <> tableName table <> " USING btree (" <> col <> ")" @@ -83,14 +83,15 @@ dimensionTableInsertSQL fact dimTableName = do factTableInsertSQL :: Fact -> Reader Env Text factTableInsertSQL fact = do - let fTableName = factTableName fact - Settings {..} <- asks envSettings - allDims <- extractAllDimensionTables fact - tables <- asks envTables - let table = fromJust . findTable fTableName $ tables + let fTableName = factTableName fact + Settings {..} <- asks envSettings + allDims <- extractAllDimensionTables fact + tables <- asks envTables + let table = fromJust . findTable fTableName $ tables + dimIdColName = settingDimTableIdColumnName let timeUnitColumnInsertSQL cName = - let colName = timeUnitColumnName cName settingTimeUnit + let colName = timeUnitColumnName dimIdColName cName settingTimeUnit in (colName, "floor(extract(epoch from " <> fullColName fTableName cName <> ")/" <> Text.pack (show $ timeUnitToSeconds settingTimeUnit) <> ")") @@ -99,14 +100,14 @@ factTableInsertSQL fact = do NoDimId cName -> [ (cName, fullColName fTableName cName) ] FactCount cName -> [ (cName, "count(*)") ] FactSum scName cName -> [ (cName, "sum(" <> fullColName fTableName scName <> ")") ] - FactAverage scName cName -> [ ( averageCountColummName cName + FactAverage scName cName -> [ ( cName <> settingAvgCountColumSuffix , "count(" <> fullColName fTableName scName <> ")") - , ( averageSumColumnName cName + , ( cName <> settingAvgSumColumnSuffix , "sum(" <> fullColName fTableName scName <> ")") ] _ -> [] dimColMap = flip map allDims $ \(dimFact, factTable@Table {..}) -> - let colName = factDimFKIdColumnName settingDimPrefix tableName + let colName = factDimFKIdColumnName settingDimPrefix dimIdColName tableName factSourceTableName = factTableName dimFact insertSQL = if factTable `elem` tables @@ -116,7 +117,7 @@ factTableInsertSQL fact = do map (\(c1, c2) -> fullColName tableName c1 <> " = " <> fullColName factSourceTableName c2) $ dimColumnMapping settingDimPrefix dimFact tableName - in "SELECT id FROM " <> tableName <> "\nWHERE " + in "SELECT " <> dimIdColName <> " FROM " <> tableName <> "\nWHERE " <> (Text.concat . intersperse "\n AND " $ dimLookupWhereClauses) in (colName, insertSQL) @@ -128,7 +129,8 @@ factTableInsertSQL fact = do . map (\(dimFact, _) -> factTableName dimFact) $ allDims - return $ "INSERT INTO " <> extractedFactTableName settingFactPrefix (factName fact) settingTimeUnit + return $ "INSERT INTO " + <> extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit <> " (\n" <> Text.concat (intersperse ",\n " . map fst $ colMap) <> "\n)" <> "\nSELECT \n" <> Text.concat (intersperse ",\n " . map snd $ colMap) <> "\nFROM " <> fTableName <> "\n" <> Text.concat (intersperse "\n" joinClauses) diff --git a/src/Ringo/Types.hs b/src/Ringo/Types.hs index fb9c97a..59bc797 100644 --- a/src/Ringo/Types.hs +++ b/src/Ringo/Types.hs @@ -5,7 +5,7 @@ import qualified Data.Text as Text type ColumnName = Text type ColumnType = Text -type TableName = Text +type TableName = Text data Nullable = Null | NotNull deriving (Eq, Enum, Show) @@ -67,25 +67,39 @@ factColumnName (FactAverage cName _) = Just cName factColumnName (FactCountDistinct _) = Nothing data Settings = Settings - { settingDimPrefix :: !Text - , settingFactPrefix :: !Text - , settingTimeUnit :: !TimeUnit + { settingDimPrefix :: !Text + , settingFactPrefix :: !Text + , settingTimeUnit :: !TimeUnit + , settingAvgCountColumSuffix :: !Text + , settingAvgSumColumnSuffix :: !Text + , settingCountDistinctColumSuffix :: !Text + , settingDimTableIdColumnName :: !Text + , settingDimTableIdColumnType :: !Text + , settingFactCountColumnType :: !Text + , settingFactInfix :: !Text } deriving (Eq, Show) defSettings :: Settings defSettings = Settings - { settingDimPrefix = "dim_" - , settingFactPrefix = "fact_" - , settingTimeUnit = Minute + { settingDimPrefix = "dim_" + , settingFactPrefix = "fact_" + , settingTimeUnit = Minute + , settingAvgCountColumSuffix = "_count" + , settingAvgSumColumnSuffix = "_sum" + , settingCountDistinctColumSuffix = "_hll" + , settingDimTableIdColumnName = "id" + , settingDimTableIdColumnType = "serial" + , settingFactCountColumnType = "integer" + , settingFactInfix = "_by_" } -data ValidationError = MissingTable TableName - | MissingFact TableName - | MissingColumn TableName ColumnName +data ValidationError = MissingTable !TableName + | MissingFact !TableName + | MissingColumn !TableName !ColumnName deriving (Eq, Show) data Env = Env - { envTables :: [Table] - , envFacts :: [Fact] - , envSettings :: Settings + { envTables :: ![Table] + , envFacts :: ![Fact] + , envSettings :: !Settings } deriving (Eq, Show)