Changes count distinct algo to use a specialized int log2 function.
This commit is contained in:
parent
2d5a49f53e
commit
601eed9a3c
@ -29,7 +29,8 @@ library
|
|||||||
build-depends: base >=4.7 && <5,
|
build-depends: base >=4.7 && <5,
|
||||||
text >=1.2 && <1.3,
|
text >=1.2 && <1.3,
|
||||||
containers >=0.5 && <0.6,
|
containers >=0.5 && <0.6,
|
||||||
mtl >=2.1 && <2.3
|
mtl >=2.1 && <2.3,
|
||||||
|
raw-strings-qq >= 1.0 && <1.2
|
||||||
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
|
ghc-options: -Wall -fno-warn-unused-do-bind -funbox-strict-fields -fno-warn-orphans -O2
|
||||||
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
|
default-extensions: OverloadedStrings, RecordWildCards, ScopedTypeVariables, BangPatterns,
|
||||||
TupleSections, CPP, NamedFieldPuns
|
TupleSections, CPP, NamedFieldPuns
|
||||||
|
@ -4,7 +4,6 @@ import qualified Data.Map as Map
|
|||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
import Data.List (find)
|
import Data.List (find)
|
||||||
import Data.Maybe (fromMaybe)
|
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
|
||||||
@ -31,8 +30,7 @@ coalesceColumn defaults tName Column{..} =
|
|||||||
fqColName = fullColumnName tName columnName
|
fqColName = fullColumnName tName columnName
|
||||||
|
|
||||||
defVal colType =
|
defVal colType =
|
||||||
fromMaybe (error $ "Default value not known for column type: " ++ Text.unpack colType)
|
maybe (error $ "Default value not known for column type: " ++ Text.unpack colType) snd
|
||||||
. fmap snd
|
|
||||||
. find (\(k, _) -> k `Text.isPrefixOf` colType)
|
. find (\(k, _) -> k `Text.isPrefixOf` colType)
|
||||||
. Map.toList
|
. Map.toList
|
||||||
$ defaults
|
$ defaults
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
{-# LANGUAGE QuasiQuotes #-}
|
||||||
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
|
module Ringo.Generator.Populate.Fact (factTablePopulateSQL) where
|
||||||
|
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
@ -12,12 +13,38 @@ import Data.List (nub)
|
|||||||
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
|
import Data.Maybe (fromJust, fromMaybe, mapMaybe, listToMaybe)
|
||||||
import Data.Monoid ((<>))
|
import Data.Monoid ((<>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
import Text.RawString.QQ (r)
|
||||||
|
|
||||||
import Ringo.Extractor.Internal
|
import Ringo.Extractor.Internal
|
||||||
import Ringo.Generator.Internal
|
import Ringo.Generator.Internal
|
||||||
import Ringo.Types
|
import Ringo.Types
|
||||||
import Ringo.Utils
|
import Ringo.Utils
|
||||||
|
|
||||||
|
ilog2FunctionString :: Text
|
||||||
|
ilog2FunctionString = [r|CREATE OR REPLACE FUNCTION ilog2(v integer)
|
||||||
|
RETURNS integer AS
|
||||||
|
$$
|
||||||
|
DECLARE
|
||||||
|
r integer;
|
||||||
|
shift integer;
|
||||||
|
BEGIN
|
||||||
|
IF v > x'FFFF'::integer THEN r := 1 << 4; ELSE r := 0 << 4; END IF;
|
||||||
|
v := v >> r;
|
||||||
|
IF v > x'FF'::integer THEN shift := 1 << 3; ELSE shift := 0 << 3; END IF;
|
||||||
|
v := v >> shift;
|
||||||
|
r := r | shift;
|
||||||
|
IF v > x'F'::integer THEN shift := 1 << 2; ELSE shift := 0 << 2; END IF;
|
||||||
|
v := v >> shift;
|
||||||
|
r := r | shift;
|
||||||
|
IF v > x'3'::integer THEN shift := 1 << 1; ELSE shift := 0 << 3; END IF;
|
||||||
|
v := v >> shift;
|
||||||
|
r := r | shift;
|
||||||
|
r := r | (v >> 1);
|
||||||
|
RETURN r;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE 'plpgsql' IMMUTABLE|]
|
||||||
|
|
||||||
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
|
data FactTablePopulateSelectSQL = FactTablePopulateSelectSQL
|
||||||
{ ftpsSelectCols :: ![(Text, Text)]
|
{ ftpsSelectCols :: ![(Text, Text)]
|
||||||
, ftpsSelectTable :: !Text
|
, ftpsSelectTable :: !Text
|
||||||
@ -37,7 +64,8 @@ factTableUpdateSQL fact groupByColPrefix populateSelectSQL@FactTablePopulateSele
|
|||||||
extFactTableName =
|
extFactTableName =
|
||||||
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
extractedFactTableName settingFactPrefix settingFactInfix (factName fact) settingTimeUnit
|
||||||
|
|
||||||
return $ for countDistinctCols $ \(FactCountDistinct scName cName) ->
|
return . (\xs -> if null xs then xs else ilog2FunctionString : xs)
|
||||||
|
$ for countDistinctCols $ \(FactCountDistinct scName cName) ->
|
||||||
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
|
let unqCol = fullColumnName fTableName (fromMaybe tablePKColName scName) <> "::text"
|
||||||
|
|
||||||
bucketSelectCols =
|
bucketSelectCols =
|
||||||
@ -45,7 +73,7 @@ factTableUpdateSQL fact groupByColPrefix populateSelectSQL@FactTablePopulateSele
|
|||||||
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
|
<> Text.pack (show $ bucketCount settingFactCountDistinctErrorRate - 1)
|
||||||
, cName <> "_bnum"
|
, cName <> "_bnum"
|
||||||
)
|
)
|
||||||
, ( "31 - floor(log(2, min(hashtext(" <> unqCol <> ") & ~(1 << 31))))::int"
|
, ( "31 - ilog2(min(hashtext(" <> unqCol <> ") & ~(1 << 31)))"
|
||||||
, cName <> "_bhash"
|
, cName <> "_bhash"
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -60,7 +60,7 @@ validateFact Fact {..} = do
|
|||||||
, let col = findColumn cName (tableColumns table)
|
, let col = findColumn cName (tableColumns table)
|
||||||
, isJust col
|
, isJust col
|
||||||
, let cType = columnType $ fromJust col
|
, let cType = columnType $ fromJust col
|
||||||
, null . filter (`Text.isPrefixOf` cType) $ defaults ]
|
, not . any (`Text.isPrefixOf` cType) $ defaults ]
|
||||||
|
|
||||||
return $ tableVs ++ parentVs ++ colVs ++ timeVs ++ notNullVs ++ typeDefaultVs
|
return $ tableVs ++ parentVs ++ colVs ++ timeVs ++ notNullVs ++ typeDefaultVs
|
||||||
where
|
where
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md
|
# For more information, see: https://github.com/commercialhaskell/stack/blob/master/doc/yaml_configuration.md
|
||||||
|
|
||||||
# Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2)
|
# Specifies the GHC version and set of packages available (e.g., lts-3.5, nightly-2015-09-21, ghc-7.10.2)
|
||||||
resolver: lts-3.19
|
resolver: lts-3.20
|
||||||
|
|
||||||
# Local packages, usually specified by relative directory name
|
# Local packages, usually specified by relative directory name
|
||||||
packages:
|
packages:
|
||||||
|
Loading…
Reference in New Issue
Block a user