Вы находитесь на странице: 1из 13

Business Intelligence

Guidelines Conceptual
Framework
Microsoft Confidential. 2006 Microsoft Corporation. All rights reserved. These materials are
confidential to and maintained as a trade secret by Microsoft Corporation. Information in these
materials is restricted to Microsoft authoried recipients only. Any use! distribution or public
discussion of! and any feedbac" to! these materials is sub#ect to the terms of the attached
license. $y providing any feedbac" on these materials to Microsoft! you agree to the terms of that
license.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Microsoft Corporation Technical Documentation License Agreement (Standard)
READ THIS THIS IS A LEGAL AGREEMENT BETWEEN MICROSOFT CORPORATION ("MICROSOFT") AND
THE RECIPIENT OF THESE MATERIALS, WHETHER AN INDIVIDUAL OR AN ENTITY ("YOU"). IF YOU HAVE
ACCESSED THIS AGREEMENT IN THE PROCESS OF DOWNLOADING MATERIALS ("MATERIALS") FROM A
MICROSOFT WEB SITE, BY CLICKING "I ACCEPT", DOWNLOADING, USING OR PROVIDING FEEDBACK ON
THE MATERIALS, YOU AGREE TO THESE TERMS. IF THIS AGREEMENT IS ATTACHED TO MATERIALS, BY
ACCESSING, USING OR PROVIDING FEEDBACK ON THE ATTACHED MATERIALS, YOU AGREE TO THESE
TERMS.
1. For oo! "#! $"%&"'%( )o#*+!(r",+o#, ,-( r()(+., "#! *&//+)+(#)0 o/ 1-+)- "r( ")2#o1%(!(!, Yo& "#!
M+)ro*o/, "r(( "* /o%%o1*3
(") I/ Yo& "r( "# "&,-or+4(! r(.r(*(#,",+$( o/ ,-( )or.or",+o# or o,-(r (#,+,0 !(*+#",(! '(%o1
("Compan!"), "#! *&)- Co5."#0 -"* (6()&,(! " M+)ro*o/, Cor.or",+o# No#7D+*)%o*&r( Ar((5(#, ,-", +*
#o, %+5+,(! ,o " *.()+/+) *&'8(), 5",,(r or ($(#, ("Microsoft "DA"), Yo& r(.r(*(#, ,-", Yo& -"$(
"&,-or+,0 ,o "), o# '(-"%/ o/ Co5."#0 "#! "r(( ,-", ,-( Co#/+!(#,+"% I#/or5",+o#, "* !(/+#(! +# ,-(
M+)ro*o/, NDA, +* *&'8(), ,o ,-( ,(r5* "#! )o#!+,+o#* o/ ,-( M+)ro*o/, NDA "#! ,-", Co5."#0 1+%% ,r(", ,-(
Co#/+!(#,+"% I#/or5",+o# "))or!+#%09
(') I/ Yo& "r( "# +#!+$+!&"%, "#! -"$( (6()&,(! " M+)ro*o/, NDA, Yo& "r(( ,-", ,-( Co#/+!(#,+"%
I#/or5",+o#, "* !(/+#(! +# ,-( M+)ro*o/, NDA, +* *&'8(), ,o ,-( ,(r5* "#! )o#!+,+o#* o/ ,-( M+)ro*o/, NDA
"#! ,-", Yo& 1+%% ,r(", ,-( Co#/+!(#,+"% I#/or5",+o# "))or!+#%09 or
())I/ " M+)ro*o/, NDA -"* #o, '((# (6()&,(!, Yo& (+/ Yo& "r( "# +#!+$+!&"%), or Co5."#0 (+/ Yo& "r( "#
"&,-or+4(! r(.r(*(#,",+$( o/ Co5."#0), "* "..%+)"'%(, "r((*3 (") ,o r(/r"+# /ro5 !+*)%o*+# or !+*,r+'&,+#
,-( Co#/+!(#,+"% I#/or5",+o# ,o "#0 ,-+r! ."r,0 /or /+$( (:) 0("r* /ro5 ,-( !",( o/ !+*)%o*&r( o/ ,-(
Co#/+!(#,+"% I#/or5",+o# '0 M+)ro*o/, ,o Co5."#0;Yo&9 (') ,o r(/r"+# /ro5 r(.ro!&)+# or *&55"r+4+# ,-(
Co#/+!(#,+"% I#/or5",+o#9 "#! ()) ,o ,"2( r("*o#"'%( *()&r+,0 .r()"&,+o#*, ", %("*, "* r(", "* ,-(
.r()"&,+o#* +, ,"2(* ,o .ro,(), +,* o1# )o#/+!(#,+"% +#/or5",+o#, '&, #o %(** ,-"# r("*o#"'%( )"r(, ,o 2((.
)o#/+!(#,+"% ,-( Co#/+!(#,+"% I#/or5",+o#. Yo&;Co5."#0, -o1($(r, 5"0 !+*)%o*( Co#/+!(#,+"% I#/or5",+o# +#
"))or!"#)( 1+,- " 8&!+)+"% or o,-(r o$(r#5(#,"% or!(r, .ro$+!(! Yo&;Co5."#0 (+,-(r (+) +$(* M+)ro*o/,
r("*o#"'%( #o,+)( .r+or ,o *&)- !+*)%o*&r( "#! ,o "%%o1 M+)ro*o/, " r("*o#"'%( o..or,&#+,0 ,o *((2 "
.ro,(),+$( or!(r or (<&+$"%(#,, or (++) o',"+#* 1r+,,(# "**&r"#)( /ro5 ,-( "..%+)"'%( 8&!+)+"% or
o$(r#5(#,"% (#,+,0 ,-", +, 1+%% "//or! ,-( Co#/+!(#,+"% I#/or5",+o# ,-( -+-(*, %($(% o/ .ro,(),+o# "//or!(!
&#!(r "..%+)"'%( %"1 or r(&%",+o#. Co#/+!(#,+"% I#/or5",+o# *-"%% #o, +#)%&!( "#0 +#/or5",+o#, -o1($(r
!(*+#",(!, ,-",3 (+) +* or *&'*(<&(#,%0 '()o5(* .&'%+)%0 "$"+%"'%( 1+,-o&, Yo&r;Co5."#0=* 'r(")- o/ "#0
o'%+",+o# o1(! ,o M+)ro*o/,9 (++) '()"5( 2#o1# ,o Yo&;Co5."#0 .r+or ,o M+)ro*o/,=* !+*)%o*&r( o/ *&)-
+#/or5",+o# ,o Yo&;Co5."#0 .&r*&"#, ,o ,-( ,(r5* o/ ,-+* Ar((5(#,9 (+++) '()"5( 2#o1# ,o
Yo&;Co5."#0 /ro5 " *o&r)( o,-(r ,-"# M+)ro*o/, o,-(r ,-"# '0 ,-( 'r(")- o/ "# o'%+",+o# o/
)o#/+!(#,+"%+,0 o1(! ,o M+)ro*o/,9 or (+$) +* +#!(.(#!(#,%0 !($(%o.(! '0 Yo&;Co5."#0. For .&r.o*(* o/ ,-+*
."r"r".-, "Co#/+!(#,+"% I#/or5",+o#" 5("#* #o#.&'%+) +#/or5",+o# ,-", M+)ro*o/, !(*+#",(* "* '(+#
)o#/+!(#,+"% or 1-+)-, &#!(r ,-( )+r)&5*,"#)(* *&rro&#!+# !+*)%o*&r( o&-, ,o '( ,r(",(! "* )o#/+!(#,+"%
'0 R()+.+(#,. "Co#/+!(#,+"% I#/or5",+o#" +#)%&!(*, 1+,-o&, %+5+,",+o#, +#/or5",+o# +# ,"#+'%( or +#,"#+'%(
/or5 r(%",+# ,o "#!;or +#)%&!+# r(%("*(! or &#r(%("*(! M+)ro*o/, *o/,1"r( or -"r!1"r( .ro!&),*, ,-(
5"r2(,+# or .ro5o,+o# o/ "#0 M+)ro*o/, .ro!&),, M+)ro*o/,>* '&*+#(** .o%+)+(* or .r"),+)(*, "#!
+#/or5",+o# r()(+$(! /ro5 o,-(r* ,-", M+)ro*o/, +* o'%+",(! ,o ,r(", "* )o#/+!(#,+"%.
?. Yo& 5"0 r($+(1 ,-(*( M",(r+"%* o#%0 (") "* " r(/(r(#)( ,o "**+*, Yo& +# .%"##+# "#! !(*+#+# Yo&r
.ro!&),, *(r$+)( or ,()-#o%o0 ("Pro!&),") ,o +#,(r/")( 1+,- " M+)ro*o/, Pro!&), "* !(*)r+'(! +# ,-(*(
M",(r+"%*9 "#! (') ,o .ro$+!( /((!'")2 o# ,-(*( M",(r+"%* ,o M+)ro*o/,. A%% o,-(r r+-,* "r( r(,"+#(! '0
M+)ro*o/,9 ,-+* "r((5(#, !o(* #o, +$( Yo& r+-,* &#!(r "#0 M+)ro*o/, .",(#,*. Yo& 5"0 #o, (+) !&.%+)",(
"#0 ."r, o/ ,-(*( M",(r+"%*, (++) r(5o$( ,-+* "r((5(#, or "#0 #o,+)(* /ro5 ,-(*( M",(r+"%*, or (+++) +$(
"#0 ."r, o/ ,-(*( M",(r+"%*, or "**+# or o,-(r1+*( .ro$+!( Yo&r r+-,* &#!(r ,-+* "r((5(#,, ,o "#0o#(
(%*(.
@. T-(*( M",(r+"%* 5"0 )o#,"+# .r(%+5+#"r0 +#/or5",+o# or +#"))&r")+(*, "#! 5"0 #o, )orr(),%0 r(.r(*(#,
"#0 "**o)+",(! M+)ro*o/, Pro!&), "* )o55(r)+"%%0 r(%("*(!. A%% M",(r+"%* "r( .ro$+!(! (#,+r(%0 "AS IS." To
,-( (6,(#, .(r5+,,(! '0 %"1, MICROSOFT MAKES NO WARRANTY OF ANY KIND, DISCLAIMS ALL EAPRESS,
IMPLIED AND STATUTORY WARRANTIES, AND ASSUMES NO LIABILITY TO YOU FOR ANY DAMAGES OF
ANY TYPE IN CONNECTION WITH THESE MATERIALS OR ANY INTELLECTUAL PROPERTY IN THEM.
B. I/ Yo& "r( "# (#,+,0 "#! (") 5(r( +#,o "#o,-(r (#,+,0 or (') " )o#,ro%%+# o1#(r*-+. +#,(r(*, +# Yo&
)-"#(*, Yo&r r+-, ,o &*( ,-(*( M",(r+"%* "&,o5",+)"%%0 ,(r5+#",(* "#! Yo& 5&*, !(*,ro0 ,-(5.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
:. Yo& -"$( #o o'%+",+o# ,o +$( M+)ro*o/, "#0 *&(*,+o#*, )o55(#,* or o,-(r /((!'")2 ("F((!'")2")
r(%",+# ,o ,-(*( M",(r+"%*. Ho1($(r, "#0 F((!'")2 0o& $o%&#,"r+%0 .ro$+!( 5"0 '( &*(! +# M+)ro*o/,
Pro!&),* "#! r(%",(! *.()+/+)",+o#* or o,-(r !o)&5(#,",+o# ()o%%(),+$(%0, "M+)ro*o/, O//(r+#*") 1-+)- +#
,&r# 5"0 '( r(%+(! &.o# '0 o,-(r ,-+r! ."r,+(* ,o !($(%o. ,-(+r o1# Pro!&),*. A))or!+#%0, +/ Yo& !o +$(
M+)ro*o/, F((!'")2 o# "#0 $(r*+o# o/ ,-(*( M",(r+"%* or ,-( M+)ro*o/, O//(r+#* ,o 1-+)- ,-(0 "..%0, Yo&
"r((3 (") M+)ro*o/, 5"0 /r((%0 &*(, r(.ro!&)(, %+)(#*(, !+*,r+'&,(, "#! o,-(r1+*( )o55(r)+"%+4( Yo&r
F((!'")2 +# "#0 M+)ro*o/, O//(r+#9 (') Yo& "%*o r"#, ,-+r! ."r,+(*, 1+,-o&, )-"r(, o#%0 ,-o*( .",(#,
r+-,* #()(**"r0 ,o (#"'%( o,-(r Pro!&),* ,o &*( or +#,(r/")( 1+,- "#0 *.()+/+) ."r,* o/ " M+)ro*o/, Pro!&),
,-", +#)or.or",( Yo&r F((!'")29 "#! ()) Yo& 1+%% #o, +$( M+)ro*o/, "#0 F((!'")2 (+) ,-", Yo& -"$( r("*o#
,o '(%+($( +* *&'8(), ,o "#0 .",(#,, )o.0r+-, or o,-(r +#,(%%(),&"% .ro.(r,0 )%"+5 or r+-, o/ "#0 ,-+r!
."r,09 or (++) *&'8(), ,o %+)(#*( ,(r5* 1-+)- *((2 ,o r(<&+r( "#0 M+)ro*o/, O//(r+# +#)or.or",+# or !(r+$(!
/ro5 *&)- F((!'")2, or o,-(r M+)ro*o/, +#,(%%(),&"% .ro.(r,0, ,o '( %+)(#*(! ,o or o,-(r1+*( *-"r(! 1+,-
"#0 ,-+r! ."r,0.
C. M+)ro*o/, -"* #o o'%+",+o# ,o 5"+#,"+# )o#/+!(#,+"%+,0 o/ "#0 M+)ro*o/, O//(r+#, '&, o,-(r1+*( ,-(
)o#/+!(#,+"%+,0 o/ Yo&r F((!'")2, +#)%&!+# Yo&r +!(#,+,0 "* ,-( *o&r)( o/ *&)- F((!'")2, +* o$(r#(! '0
Yo&r NDA.
D. T-+* "r((5(#, +* o$(r#(! '0 ,-( %"1* o/ ,-( S,",( o/ W"*-+#,o#. A#0 !+*.&,( +#$o%$+# +, 5&*, '(
'ro&-, +# ,-( /(!(r"% or *,",( *&.(r+or )o&r,* %o)",(! +# K+# Co&#,0, W"*-+#,o#, "#! Yo& 1"+$( "#0
!(/(#*(* "%%o1+# ,-( !+*.&,( ,o '( %+,+",(! (%*(1-(r(. I/ ,-(r( +* %+,+",+o#, ,-( %o*+# ."r,0 5&*, ."0 ,-(
o,-(r ."r,0=* r("*o#"'%( ",,or#(0*= /((*, )o*,* "#! o,-(r (6.(#*(*. I/ "#0 ."r, o/ ,-+* "r((5(#, +*
&#(#/or)("'%(, +, 1+%% '( )o#*+!(r(! 5o!+/+(! ,o ,-( (6,(#, #()(**"r0 ,o 5"2( +, (#/or)("'%(, "#! ,-(
r(5"+#!(r *-"%% )o#,+#&( +# (//(),. T-+* "r((5(#, +* ,-( (#,+r( "r((5(#, '(,1((# Yo& "#! M+)ro*o/,
)o#)(r#+# ,-(*( M",(r+"%*9 +, 5"0 '( )-"#(! o#%0 '0 " 1r+,,(# !o)&5(#, *+#(!
'0 'o,- Yo& "#! M+)ro*o/,.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
An increasing number of businesses and corporations $ho are using traditional online
transaction processing %&'(!) applications are incorporating analytics into the system.
(ransactional systems process massive *uantities of data. (urning that data into
consumable information that can facilitate decisions is not a lu+ury but a necessity to stay
competitive in the business. ,ithout the help of critical indicators providing timely
information decision makers spend their time organi-ing data rather than e+tracting
meaning from the data. ,hen implemented $ell analytic systems provide decision
makers $ith the necessary tools to perform analysis %such as trend and comparative
analysis) of the data at various levels of granularity to vie$ the details to see
relationships bet$een the data and to e+plore ne$ possibilities.
Conceptual Framework for Business Intelligence
(he term Business "ntelligence %B") incorporates the concept of deriving useful
information from the data in an organi-ation. .esigning a B" application involves
multiple layers. (he goal of this section is to provide a common frame$ork for architects
and developers. (his frame$ork is conceptual technology agnostic and covers the ma/or
phases features and functionality re*uired to effectively implement a B" solution. (he
conceptual architecture in #igure 0 is comprised of five ma/or areas and a set of cross1
cutting concerns.
Figure 1
Conceptual system architecture
.ata 2torage is the end result of .ata 2ource and .ata "ntegration layers. .ata 2torage
can be termed as .ata ,arehouse %.,)3.ata Mart %.M). .ata Analysis and .ata
!resentation $ill leverage the information stored in .ata 2torage.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

S
o
u
r
c
e
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
Data Source
&ne of the challenges $hen $orking $ith data in a B" system is that it typically
originates in many different data storage systems. 4+tracting data from those different
sources and merging the data into a single consistent dataset is challenging. (he
comple+ity of the source system makes the situation more difficult. "n this section
e+traction of data sources is outlined.
Figure 2
Data source in conceptual system architecture
#igure 5 sho$s that as interactions bet$een the potential elements in the data source
increase the comple+ity of the system also increases.
Figure 3
Conceptual system architecture
,hen $orking $ith diverse data sources the follo$ing issues may be considered
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
Data
Source
Different source environments with different systems, including different
platforms and operating systems6 7eterogeneous environments are silos of
information due to differences in business and applications. 4ach application has its
o$n database business rules and schema. &n the other hand homogeneous
environments $here both the source and destination are using similar technologies
and versions are much easier to deal $ith. #or e+ample a homogeneous environment
may use 28' 2erver 2009 on both the &('! and online analytical processing
%&'A!) systems.
Different database systems: 2ource data may originate in many different types of
database system including &racle .B2 28' 2erver or others. (he components that
read the source data are usually referred to as data adapters. Connecting to a $ide
variety of data sources means choosing the right source adaptor to facilitate the
connection and retrieval of data from various data sources. Consider the type of host
systems and applications for e+ample the data could be read from a standard
database %2A! .B2 &racle 28' 2erver) from a flat file such as an ""2 log or a
*ueue or data could be read via a third party broker or ,eb service. ,hen connecting
to a data source system consider appropriate security precautions such as
authentication authori-ation and secure communication.
Different schemas, data formats, and naming conventions: 'egacy transaction
systems may use 4BC."C string formats. &: the source database systems may use
;2AM files $hich are not relational in nature. (able and column names from an
enterprise application such as 2A! may be difficult to understand
Geographically separated source locations6 "f the data $arehouse collects data
from geographically dispersed locations it is re*uired to consider the implications for
timing re*uirements on timelines and band$idth to address the latency challenges.
Source system ownership. "f the same team product or department o$ns both the
&'(! and ., system then it is relatively easy to $ork out issues over o$nership.
&$nership pertains to both access permission and data *uality. <etting permission to
read data from transaction system databases can be a comple+ political problem.
7o$ever it is still critical to understand trust boundaries so that appropriate *uality
gates in place. "f control over the source system is restricted then consider all
necessary precautions for limitations or variations of the source system.
mpact on the source system. :etrieving large *uantities of data from transactional
system databases can have negative impact on the operational applications. "t may be
appropriate to consider techni*ues that $ould minimi-e usage of source resources and
also e+tract all the re*uired information timely. All the precautions need to be taken to
e+tract only the data that is re*uired for business analysis.
Source data volatility6 (he source data can be volatile as a result of streaming data
transactional data periodic snapshot data stand1alone data and replicated data.
Consider appropriate $indo$ for data e+traction from source systems as they are
meant to change continuously $ith operational or batch process transactions.
Source data volume6 .ata volume is a ma/or consideration for data e+traction
operation as it $ould affect both processing as $ell as resource utili-ation. Consider
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
e+traction techni*ues that $ould reduce the data volume on ongoing basis to meet the
business re*uirements.
Data Integration
"ntegration is critical for connecting business functionality process and data. "ntegration
of fragmented information re*uires addressing a comple+ set of challenges. 2ome of
these challenges are already mentioned in the =.ata 2ource= section. "n this section
critical factors relating to a data integration system are e+plained in detail.
Figure 3
Data integration in conceptual system architecture
(he follo$ing issues need to considered $hile integrating data from various sources
Data profiling: >pon having access to data source study the underlying data its
dependencies and rules is a comple+ task. (he ability to profile and analy-e the source
data to identify potential issues such as anomalies outliers dependency violations and
redundant or orphaned data is commonly referred to as data profiling. (he three
commonly sited aspects of data profiling are6 column analysis dependency analysis and
redundancy analysis. Column or attribute analysis evaluates the distribution range
completeness uni*ueness format type si-e and fre*uency of data. .ependency or
referential analysis looks for relationships integrity and business rule dependencies.
#inally redundancy analysis as the name suggests is the techni*ue for identifying
duplicate data in addition to orphan records.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
Data
Integratio
n
D
a
t
a

S
o
u
r
c
e
Profling,
Extraction,
Cleansing,
Transformat
ion,
Loading,
Staging
Data e!traction. &nce the source data is studied e+tracting meaningful data is the
ne+t big challenge $ith data integration. 4+tracting data from the source re*uires
connecting to heterogeneous or homogeneous data sources using a source adapter.
.uring the e+traction follo$ing issues needs to be addressed6 ,hat source format are
you dealing $ith? ,hat is the fre*uency of the e+traction? ,hat is the load? ,hat if
there is no record tracking for the transactions on the source system? 7o$ do you
select and e+tract data that has only been changed since your last e+traction? 7o$ do
you minimi-e the load on the source data during e+traction?
Data staging. 2taging is a location $here data is temporarily stored before loading
the data into the destination $arehouse. 2taging may not be necessary for simple
cleansing or transformation operations@ ho$ever staging is for other reasons for
e+ample you may $ant to avoid the overhead on the source system so checks are
performed locally on the staging server for cleansing and transformations. (here may
be procedural reasons causing a time lag bet$een data e+traction and loading. Aou
may need to perform comple+ transformations that re*uire you to access multiple
sources lookups or fact tables.
Data transformation. &nce you have connected to the source system identified
relevant data and e+tracted the data from the source it is ready to be transformed.
.uring the transformation process you could perform various actions on the data
depending on the scenario. Aou could sort split merge lookup address slo$ly
changing dimensions audit pivot or aggregate the data. (his process may go hand in
hand $ith data cleansing. .ata transformation can be done at various stages
depending on the type of operation you are performing and the data load. Aou could
choose to do it at the source before the staging or before loading the data into the
destination $arehouse database.
Data cleansing. .ata cleansing ensures inconsistent and invalid data is cleaned
before loading it into the data $arehouse. .uring this process common data *uality
problems such as absent data values inconsistent values duplicate values primary
key reuse and violation of business rules are detected and corrected. Correcting data
*uality issues at the source is ideal@ ho$ever in most cases it is not possible. #or
e+ample you may not have o$nership or influence over the source system the source
system may have dependencies $hich increase the comple+ity.
Data loading. !opulating the data $arehouse is the last step of the 4+tract (ransform
and 'oad %4(') process. "f you are populating the $arehouse for the first time you
$ill load the historical data follo$ed by a ne$ transactional data on a periodic basis.
#or loading large transaction data into fact tables you have to consider issues such as6
loading data during off1peak usage loading data into temporary tables creating
inde+es on those temporary tables similar to fact tables and merging the temporary
table as a partition back into the fact table or dropping inde+es on the fact table
before loading the data.
2everal of these topics are the sub/ects of specific guides in this series.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Data Storage
(he data stored in a $arehouse is typically loaded by the 4(' process. (he schema for
storing information in a $arehouse is different from the transactional system. "n this
section you $ill learn critical factors relating to a data storage system.
Figure "
Data storage in conceptual system architecture
,hen you manage data storage for your $arehouse you $ill need to think about many
issues including the follo$ing6
Dimensional modeling. 2trategies for effectively organi-ing data are critical to
implementing B"3., systems. (he techni*ue used in modeling the logical data
$arehouse is commonly referred to as dimensional modeling. (he guidelines for
designing data $arehouse solutions are different from transaction systems. As a
designer you have to choose appropriate schema types such as star or sno$flake
schema design fact tables $ith measures relevant for the business and at the
appropriate level of granularity and address attributes that change over time by
picking appropriate types of slo$ changing dimensions %2C.).
#artitions. ,arehouse databases typically contain millions of ro$s in tables.
.ividing large tables and their inde+es into multiple segments and assigning them to
filegroups is called partitioning. As a designer you have to create a scalable partition
that enables the best possible performance you have to choose an appropriate
partitioning strategy choose optimal partitioning functions appropriately place
partitions in filegroups use inde+ alignment and plan for data management such as
moving ne$ data inside a partition and removing aging data out of the partition.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
D
a
t
a

S
o
u
r
c
e
Data
Storage
Dimensiona
l Modeling,
Partitions,
Indexes
nde!es6 .esigning an appropriate inde+ing strategy taking into account various
factors such as usage patterns schema design column types and storage needs are
important for efficient operation.
Data Analysis
Because B" systems typically contain massive *uantites of data tools and techni*ues for
managing summari-ing *uerying and analy-ing the data are critical. "n this section you
$ill learn critical factors relating to a data analysis system.
Figure $
Data analysis in conceptual system architecture
,hen you design $ays to analy-e data from your $arehouse you $ill need to think
about issues including the follo$ing6
%&'#. "n addition to providing data storage an &'A! engine such as 28' 2erver
Analysis 2ervices also facilitates is designed for analysis of business measures
optimi-ed for bulk loads and superior performance for business intelligence and large
comple+ *ueries. .ata is organi-ed and preprocessed into multidimensional cubes
based on a dimensional model that enables you to rapidly summari-e information for
analytical *ueries.
Data (ining. Aou can use sophisticated and comple+ data mining algorithms to
analy-e the data for e+posing interesting information useful for decision makers. Aou
can create comple+ models bro$se and *uery them perform predictions against
those models and test the modelBs accuracy. Choosing an appropriate data mining
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
D
a
t
a

S
o
u
r
c
e
Data
Analysis
OLAP
Data
Mining
algorithm for a specific business problem re*uires testing various algorithms
separately or together and e+ploring the results.
Data Presentation
!resenting meaningful and visually appealing information in interesting $ays is critical
to helping analysts managers and leaders make informed decisions. "n this section you
$ill learn critical factors relating to a data presentation system.
Figure )
Data presentation in conceptual system architecture
,hen you plan $ays to present data from your Business "ntelligence $arehouse you $ill
need to think about many issues including the follo$ing6
*avigation. (he presentation layer provides easy access to comple+ *uery results in a
rich user interface that facilitates navigation. >sers have the ability to interactively
e+plore the data by drilling pivoting and drag1drop capabilities. ;isuali-ing and
navigating through data to analy-e root cause helps organi-ations gain deeper insight
into $hat business drivers matter the most.
Format. .epending on the type of information available and the message that you
$ant to deliver it is important to choose an appropriate format. Choosing the right
format such as chart graph table report dashboard or Cey !erformance "ndicator
%C!") depends on the usage patterns such as trends behaviors comparisons
correlation change classifications or facts.
+ost. Choosing an appropriate client depends on the business needs. "f the business
demands access to information at any time any$here and through any device the
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Meta data, Security, Performance & Operations
Meta data, Security, Performance & Operations
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

S
o
u
r
c
e
Data
Presentatio
n
a!igation
,
"ormat,
#ost
design needs to accommodate such a re*uirement. <iven the broad spectrum of client
devices available in the market today D desktop !Cs (ablet !Cs and handheld
mobile devices such as !ocket !Cs and 2martphones D your client presentation can be
a thin client smart client or mobile client. 7o$ever each type of client has its
advantages and disadvantages. ,hen designing your application you $ill need to
carefully consider the specifics of your situation before you can determine $hich is
appropriate.
General Concerns
:egardless of $hich layer of the data flo$ you deal $ith several issues must be
constantly addressed. "n this section you $ill learn about the critical factors6 metadata
security performance and operations.
Figure )
General concerns in conceptual system architecture
,hen you plan $ays to present data from your Business "ntelligence $arehouse you $ill
need to think about many issues including the follo$ing6
(etadata..
Security.,
#erformance.
%perations..
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.
Metadata, Security, Performance & Operations
Metadata, Security, Performance & Operations
D
a
t
a

I
n
t
e
g
r
a
t
i
o
n
D
a
t
a

S
t
o
r
a
g
e
D
a
t
a

A
n
a
l
y
s
i
s
D
a
t
a

S
o
u
r
c
e
D
a
t
a

P
r
e
s
e
n
t
a
t
i
o
n
Summary
(he conceptual frame$ork presented in this section helps simplify the comple+ity of the
B" system. Aou can use the conceptual frame$ork to navigate scenarios architecture and
design challenges solutions technical options and also the interaction bet$een layers
and sub1categories. (his frame$ork is a starting point and $ill be e+plored e+tensively in
more detail in subse*uent chapters.
Copyright 2006 by Microsoft Corporation. All rights reserved. By using or providing
feedback on these materials you agree to the attached license agreement. !lease provide
feedback at B" #eedback Alias.

Вам также может понравиться