Академический Документы
Профессиональный Документы
Культура Документы
Supervisor:
Jaroslav Zdralek
Authors
Jaroslav Zdralek
Zdenka Chmelikova
Ostrava * 2014
ii VŠB-TU Ostrava
Contents
1 Introduction...................................................................................................................... 1
1.1 Basic terms ............................................................................................................... 1
1.2 Endianness................................................................................................................ 3
1.3 Binary prefixes– Standard IEC .................................................................................. 3
1.4 Reference ................................................................................................................. 4
2 Numeral systems .............................................................................................................. 5
2.1 Polynomial of numeral system ................................................................................. 6
2.2 Numeral systems used in digital systems ................................................................. 7
2.3 Conversion between numeral systems .................................................................... 8
2.4 Reference ............................................................................................................... 10
3 Boolean algebra.............................................................................................................. 13
3.1 Propositional calculus............................................................................................. 13
3.2 Definition of Boolean algebra ................................................................................ 16
3.3 Boolean function .................................................................................................... 19
3.4 Boolean expression ................................................................................................ 21
3.5 Reference ............................................................................................................... 24
4 Design of Boolean function ............................................................................................ 25
4.1 Logic gate................................................................................................................ 26
4.2 Synthesis ................................................................................................................. 30
4.3 Minimization by Karnaugh map ............................................................................. 34
4.4 Realization by NAND and NOR logic gates ............................................................. 37
4.5 Algorithm of synthesis ............................................................................................ 38
4.6 Reference ............................................................................................................... 39
5 Real numbers.................................................................................................................. 41
5.1 Some famous bugs ................................................................................................. 42
5.2 Serious problems .................................................................................................... 43
Chaotic bank ........................................................................................................... 43
Rump’s problem ..................................................................................................... 44
iv VŠB-TU Ostrava
8.8 Multiplication ......................................................................................................... 86
8.9 Division ................................................................................................................... 88
8.10 References .............................................................................................................. 89
9 Floating point numbers .................................................................................................. 91
9.1 Significand .............................................................................................................. 94
9.2 Precision ................................................................................................................. 95
9.3 Floating point values .............................................................................................. 95
9.4 Sets of floating-point data ...................................................................................... 96
9.5 Formats defined by IEEE 754-2008 ........................................................................ 97
9.6 Binary interchange format encodings .................................................................. 100
9.7 Decimal interchange floating point format .......................................................... 104
9.8 Declet and densely-packed decimal ..................................................................... 109
9.9 Rounding .............................................................................................................. 110
9.10 Not a Number ....................................................................................................... 111
9.11 Infinity .................................................................................................................. 112
9.12 Default exceptions................................................................................................ 113
9.13 Implementation .................................................................................................... 113
9.14 References ............................................................................................................ 114
9.15 Annex 09A ............................................................................................................ 117
9.16 Annex 09B............................................................................................................. 119
9.17 Annex 09C............................................................................................................. 122
10 Floating point arithmetic .......................................................................................... 127
10.1 Rounding .............................................................................................................. 129
10.2 Exception .............................................................................................................. 132
10.3 Operation on result .............................................................................................. 132
10.4 Minifloat floating point format ............................................................................ 133
10.5 Addition and subtraction...................................................................................... 135
10.6 Multiplication ....................................................................................................... 136
10.7 Division ................................................................................................................. 138
10.8 References ............................................................................................................ 138
11 Characters and Unicode ........................................................................................... 141
11.1 Terminology.......................................................................................................... 141
11.2 Fonts ..................................................................................................................... 145
11.3 Bitmap font........................................................................................................... 145
11.4 Outline fonts ......................................................................................................... 146
VŠB-TU Ostrava v
11.5 Stroke fonts .......................................................................................................... 146
11.6 ASCII...................................................................................................................... 147
11.7 Code pages ........................................................................................................... 150
11.8 C0 and C1 control codes ....................................................................................... 152
11.9 Unicode ................................................................................................................ 153
11.10 Using Unicode....................................................................................................... 155
11.11 UTF-32 .................................................................................................................. 155
11.12 UTF-16 .................................................................................................................. 155
11.13 UTF-8 .................................................................................................................... 157
11.14 Byte order mark.................................................................................................... 159
11.15 Whitespace character .......................................................................................... 160
11.16 Newline................................................................................................................. 161
11.17 Possible notations of Unicode .............................................................................. 162
11.18 References ............................................................................................................ 163
12 Finite state machine ................................................................................................. 167
12.1 Discrete time ........................................................................................................ 169
12.2 Definitions of finite state machine ....................................................................... 169
12.3 Synchronous and asynchronous machine ............................................................ 171
12.4 Block diagram of synchronous FSM ..................................................................... 172
12.5 Description of FSM behavior ................................................................................ 173
12.6 Examples of finite state machine ......................................................................... 175
12.7 Table notation of FSM .......................................................................................... 177
12.8 Synchronization of input ...................................................................................... 178
12.9 Notation in programming languages.................................................................... 179
12.10 References ............................................................................................................ 181
13 Synchronous digital system ...................................................................................... 183
13.1 Decimal adder ...................................................................................................... 184
13.2 Data unit for decimal adder ................................................................................. 185
13.3 Control unit .......................................................................................................... 187
13.4 Simulation and realization.................................................................................... 189
13.5 Reference ............................................................................................................. 190
13.6 Annex 13A ............................................................................................................ 191
vi VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
1 Introduction
Digital system and information technology are very closely related terms. Definition of in-
formation is comprehensive; nowadays, information is typically connected with digital data,
digital system. Then, digital system is possible to understand as a system which processes
and produces information. Typical representatives of digital system are mobile phones,
digital television and radio, digital photos and movies, and so on. Computers cannot be
omitted because they were at the beginning of modern era of digital systems. Different
modifications of computers are used in systems listed above. Also, the term of computer
changed into personal computer, notebook, and tablet.
Design and description of hardware is performed by using Boolean function and finite state
machine. After that, synthesis tools are applied and these tools transfer the description to a
suitable form for realization. For this process, it is important to know the format of data
and the algorithm of processing. In this process of design, it is also required to know the
terminology.
Bit is a fundamental and the lowest unit of information in computing and tele-
Bit.□
communications. A bit has two values that can be understood as logical or binary values,
depending on the usage. Typical values of a bit are 0 or 1; True or False; plus or minus; Low
or High; etc. A bit was created in 1943 by J. W. Tukey as an abbreviation of the words binary
digit. A lowercase letter b is used as an abbreviation for a bit as a unit. More information is
in literature [wiki_0102].
VŠB-TU Ostrava 1
1 Introduction
Byte is a unit of data. The term of byte term was coined by Dr. Werner Buchholz in July
□
1956, during the early design phase of the IBM Stretch computer. A capital letter B is used Byte.
as an abbreviation for a byte as a unit. Byte is a group of 8 bits, where each bit has its bit
position and order. It also means that a byte can contain numbers from 0 to 255 in decimal,
from 0 to FF in hexadecimal and 0000 0000 to 1111 1111, in binary numeral system,
[wiki_0103]. Octet is a term which is often used in telecommunications. Nowadays, the
term of octet is not frequently used and it is often replaced by byte, [wiki_0104].
Nibble is a group of 4 bits, which corresponds to a half of byte. Nibble is used to store one
□
hexadecimal digit. One byte has two nibbles or two hexadecimal digits. One nibble may Nibble.
have decimal values from 0 to 15, binary values from 0000 to 1111 and hexadecimal values
from 0 to F. More information is in literature [wiki_0105].
Word is a group of bytes. In history, one word had a different number of bits. Today, a
number of bits in a word is given by computer architecture (size of registers, memory, etc.).
Word size can be 16, 32 or 64 bits depending on the architecture of the processor. More
information is in [wiki_0106].
Bit numbering or position of bits or indexing of bits is a number to indicate the position of a
bit or a coefficient, Fig. 01-02, [wiki_0109]. If a group of n bits is given, then preferred bit
numbering is such that the leftmost position has index n-1 and the rightmost position has
index 0. If the group is an integer number, then indexes correspond to orders, Fig. 01-02.
This principle of indexing is not obligatory and it is necessary to realize the value of index
and the value of order. It does not have to be same, Fig. 01-02.
Preferred
Change of
7 0 0 7
position
2 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
1.2 Endianness
Endianness is a way of storing numbers, codes in the computer memory, organization bytes
for serial transmission, etc. Information of n-bit width is split into smaller groups, called
atomic elements. Atomic element is a byte, typically, but it can be a word or other m-tuple.
Endianness determines which atomic element is stored in the memory at a lower address,
which atomic element is transmitted first, etc. Endianness has two principles which are
called little and big endian, Fig. 01-03 and Fig. 01-04. More information is in [wiki_0110].
is a word is a byte 0x 08 09 0A 0B 0C 0D 0E 0F
a 0809 a 08
0A0B a+1 09
0C0D a+2 0A Big endian, MSB atomic
a+3 0E0F 0B element is stored at a lower
0C
0D
address.□
0E
a+7 a+7 0F
Little endian is a principle where LSB atomic element is stored at a lower address and it has
a lower index during the transmission, Fig. 01-03. Big endian is the opposite, it is the princi-
ple where MSB atomic element is stored at a lower address and it has a lower index during
the transmission, Fig. 01-04.
VŠB-TU Ostrava 3
1 Introduction
plicated situation relates to the 3 1/2 inch floppy disk with the capacity 1.44 MB. The capac-
ity is 1 474 560 bytes (1.44 * 103 * 210). Standard IEC 60027-2 solves these irregularities and
it has been valid in the Czech Republic since 1 April 2004. The standard introduces new
prefixes, which are derived from base 2 and they are called binary prefixes, [wiki_0111].
The binary prefixes are in Fig. 01-05.
1.4 Reference
[wiki_0101] Digital data; http://en.wikipedia.org/wiki/Digital_data; on line 2014-10-21
[IEC 60027-2] IEC 60027-2, International Standard, Letter symbols to be used in electrical
technology – Part 2: Telecommunications and electronics
4 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
2 Numeral systems
A prehistoric man depicted numbers using tools available - fingers, stones, notches etc.
Some tribes in Africa used the quinary system, using the fingers of one hand. Quinary sys-
tem is a system with radix 5. Because a man has twenty fingers, we often use the vigesimal
system, a system with radix 20. Mayan Indians used this system up to 6th century AD. Su-
merians used a positional system with the basis 60. The counting time (24 hours a
day, each hour has 60 minutes, each minute has 60 seconds) has survived four thou- A number is an ar-
sand years. Indians are regarded as the discoverers of a positional system we use ranged group of sym-
today. The oldest numbering system originated in India in 3rd century BC and then it bols called digits.□
was gradually taken over by Arabs and further spread to Greece and Europe [Inter-
net_0201], [wiki_0201].
VŠB-TU Ostrava 5
2 Numeral systems
power of number 10 and defines the specific position of a digit in the series. Decimal point
enables us to use a negative number as the exponent of power. For example, decimal num-
ber 0.39 can be expressed like this: 0.39 = 3x10-1 + 9x10-2.
Positional notation means that all digits from a relevant numeral system are used on each
order. After using all the digits in given order, a higher order is added. In the decimal nu-
meral system it is usual that after using all digits 0, 1 …9 with weight 100, a higher order 101
is added, ..8, 9, 10, 11…. This principle of adding weight is valid in all positional numeral
systems. Fig. 02-01 also shows the principle of adding a higher order for binary, octal and
hexadecimal numeral systems. The added orders are marked by a yellow color of the back-
ground.
In the following text, different numeral systems are used. For correct interpretation of a
number, the radix of numeral system is stated with the number. Possible notations of radix
are shown in Fig. 02-02.
Where
6 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
m is a number of orders in the fractional part where the index (-m) is the rightmost
digit and it is called the lowest-order or the least significant digit.
1234.5678
The most significant digit The least significant digit
The digit in the leftmost position is also called the most significant digit. And the digit in the
rightmost position is also called the least significant digit. In the situation when the number The most and least
does not have a fractional part, the least significant digit is the digit with weight R0. significant digits.□
The decimal numeral system is the most common. The radix of this system is number 10
(R = 10) and decimal numbers are often marked with capital letter D. This decimal numeral Decimal numeral
□
system uses the digits 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 and certain weights have special names, 101 system.
is ten, 102 is hundred, 103 is thousand, 106 is million, 109 is billion, 1012 is trillion and so on.
VŠB-TU Ostrava 7
2 Numeral systems
Each number can be expressed by equation (0201). For example, number 3725 can be ex-
pressed as the polynomial:
Binary numeral system is fundamental in digital systems. The radix of this system is number
2 (R = 2) and binary numbers are often marked with capital letter B. This binary numeral Binary numeral
□
system only uses the digits 0, 1. Each number can be expressed by polynomial (0201). For system.
example, number 1101.101 as the polynomial is:
Hexadecimal numeral system is used for shortening the notation of long binary numbers.
The radix of this system is number 16 (R = 16) and capital letter H is used for indicating hex- Hexadecimal
adecimal numbers. The hexadecimal numeral system uses the digits 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 numeral system.□
and letters A, B, C, D, E, F for the remaining values, where A = 10, B = 11, C = 12, D = 13, E =
14, F = 15. Each hexadecimal number can be expressed by the polynomial:
Octal numeral system is used very little nowadays and it was also used for shortening the
notation of binary numbers. The radix of this system is number of 8 (R = 8) and capital letter Octal numeral
□
O is used for indicating octal numbers. The octal numeral system uses the digits 0, 1, 2, 3, 4, system.
5, 6, 7. Each octal number can be expressed by the polynomial:
(572)O = 5 · 82 + 7 · 81 + 2 · 80
20123 = (2 · 33 + 0 · 32 + 1 · 31 + 2 · 30 = 2 · 27 + 0 + 3 + 2 ) = 5910
110110.01B = (1 · 25 + 1 · 24 + 0 · 23 + 1 · 22 + 1 · 21 + 0 · 20 + 0 2-1 + 1 2-2 = 32 + 16 Conversion to
+ 4 + 2 + 0.25) = 54.25D the decimal nu-
456.811 = (4 112 + 5 111 + 6 110 + 8 11-1 = 484 + 55 + 6 + 0.728) = 545.72810 meral system.□
D4C.B16 = (13 · 162 + 4 161 + 12 · 160 + 11 16-1 = 256 + 54 + 12 + 0.6875) =
322.687510
0x2AF8 = (2 · 163 +10 · 162 + 15 · 161 + 8 · 160 = 8192 + 2560 + 240 + 8) = 0d11000
The conversion from decimal to any system is carried out separately for integer and fraction
Conversion from
parts and these parts are defined by formulas (0202) and (0203). The conversion of the
the decimal nu-
integer part is performed by division and the conversion of the fractional part is performed
meral system.□
by multiplication. The example of conversion of integer number is in Fig. 02-04 and the
following algorithm was used:
8 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The integer number is divided by radix R, the result is quotient Q0 and remainder a0,
Q0 = N/R = an-1Rn-2 + an-2Rn-3 + … a2R1 + a1R0 and remainder a0.
The next step is a division of quotient Q0 by radix R, the result is quotient Q1 and
remainder a1, Q1 = Q0/R = an-1Rn-3 + an-2Rn-4 + … a2R0 and remainder a1. Algorithm of the
Division is applied until the quotient is zero and remainder an-1, Qn-1 = Qn-2/R = 0 and integer number
remainder an-1. conversion.□
Remainders are concatenated into the string an-1, an-2…a1a0 and it is the integer
number in a new numeral system.
Convert (0.367)10 to numeral system with radix R = 16. The given precision is 16 bits.
The solution is:
0.367 * 16 = 5.872; the integer part of product is the first digit of searched
number, a-1 = 5.
0.872 * 16 = 13.952; the integer part of product is digit a-2 = 1310 = 0xD.
0.952 * 16 = 15.232; the integer part of product is digit a-3 = 1510 = 0xF.
0.232 * 16 = 3.712; the integer part of product is digit a-4 = 3.
The given precision is achieved.
Answer: (0.367)D = (0.5DF3)H
VŠB-TU Ostrava 9
2 Numeral systems
The procedure for converting binary numbers to octal and hexadecimal numbers is similar.
A binary number is split into groups from the radix point. These groups are 3 bits for an
octal number and 4 bits for a hexadecimal number. Then, each group is expressed by an
octal or a hexadecimal digit. In the case when the transferred binary number cannot be
divided into groups of three or four bits, a necessary number of zeros is added on the left-
most or rightmost side of the number. Then, the number 1100 1011 1001.1112 is equal to
CB9.E16. The basic idea of conversion is in Fig. 02-06.
Conversion be-
32475.55206 Octal number
To octal number tween a binary
number and octal
011 010 100 111 101.101 101 010 000 110 Groups of 3 bits
or hexadecimal
11010100111101.10110101000011 Binary number numbers.□
Fig. 02-06 Conversion between binary, octal and hexadecimal numeral systems
All numbers from 0 to 15 can be written as the sum of numbers 8, 4, 2, 1. In this way, we
can quickly convert numbers from the decimal to the binary system and backwards. Num-
bers 8, 4, 2, and 1 are the weights of the binary numeral system and the corresponding Principle 8, 4, 2, 1.□
exponents of the powers of 2 are 3, 2, 1 and 0. These exponents are the orders of a binary
number. For example, decimal number 11 is a sum of numbers 8, 2 and 1. It means that the
binary number has ones in orders 3, 1 and 0. Then 1110 is equal to 10112. Fig. 02-07 shows
other examples.
2.4 Reference
[Internet_0201]ČÍSELNÉ SOUSTAVY; http://www.prevod.cz/popis.php?str=564&parent=y;
on line 2014-10-21]
10 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
VŠB-TU Ostrava 11
Digital systems for joint teaching programme of BUT and VSB-TUO
3 Boolean algebra
Boolean algebra is a fundamental mathematical tool for analyzing and synthesizing logical
circuits of all types. Before we get into the theory of Boolean algebra, we will concentrate
on the question: “What is the logic?”
Example 1. We have a bucket with water. And we have to decide when water flows. There
are two cases.
VŠB-TU Ostrava 13
3 Boolean algebra
a) In the first case, if at least one of the taps Tap 1 Tap 2 is open, water flows. This sit-
uation is in Fig. 03-01, including the corresponding truth table.
b) In the second case, water flows if both tapsTap 1 and Tap 2 are open. This situation
is in Fig. 03-02, including the corresponding truth table.
Example 2. We have an electrical circuit, Fig. 03-03. The light is turned on/off by the switch.
This means that if the switch is on, the bulb is on. Values of voltage and/or current are not
important. There are two cases again.
Switch
a) Serial connection. In this case, the bulb is on, if both switches A and B are on. Oth-
erwise, if at least one switch is off, the bulb is off, see Fig. 03-04. A logical expres-
sion of this example is: the bulb is on if the switch A AND the switch B is on.
A B Bulb lights
Off Off No
A B
Off On No
On Off No
On On Yes
14 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
b) Parallel connection. In this case, the bulb is on if at least one of the switches A and
B is on. If both switches A and B are off, bulb is off, see Fig. 03-05. A logical expres-
sion of this example is: the bulb is on if the switch A OR the switch B is on.
A A B Bulb lights
Off Off No
Off On Yes
B On Off Yes
On On Yes
Reading via propositional word connectives: If it's raining AND I'll take a raincoat,
then I will not be soaked.
b) It is not true that John plays the guitar and the piano. John cannot play the guitar.
Reading via propositional word connectives: It isn’t true, that John plays the guitar
AND John plays the piano AND it isn’t true that John can play the guitar.
VŠB-TU Ostrava 15
3 Boolean algebra
In literature, Boolean algebra is defined in different ways, [wiki_0301]. The following defini-
tion is based on axioms and derived theorems, [Wakerly_2006], [Roth_2004]. Boolean the-
orems have their names. The definition of Boolean algebra uses the term of element. The
element can be a value, a variable, an expression.
Boolean algebra is a six-tuple consisting of a set A, binary operation (and), binary opera- Definition
tion + (or), a unary operation ‘ (not, complement) and two elements 0 and 1. In such a six- of Boolean
tuple, the following axioms are valid for all elements a, b … of set A and elements 0 and 1: algebra. □
(A1) a= 0 if a ≠ 1 (A1D) a = 1 if a ≠ 0
16 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The theorems T13 and T14 are called De Morgan rules. These rules are used to swap a logi-
cal sum and a logical product. De Morgan rules can also be expressed by sentences:
De Morgan
• The negation of a logical multiplication is the logical addition of the negations. rules. □
• The negation of a logical addition is the logical multiplication of the negations.
Fig. 03-06, Fig. 03-07 and Fig. 03-08 show the application of Boolean theorems and axioms.
Fig. 03-06 and Fig. 03-07 show the application of theorems T8 and T8D in disjunctive and
conjunctive forms. Fig. 03-09 shows the application of De Morgan theorems for simplifica-
tion of an expression.
VŠB-TU Ostrava 17
3 Boolean algebra
Theorem T8 Theorem T8
= ab + a´c´e
18 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Expression is any combination of variables, logical values and operations, for in-
stance a ∙ b + c, and the simplest expression as 0, 1, a, a’ …
Equation has two sides equal to each other. The value of the left side equals to
the value of the right side, for instance
z = a, Decode = (a + b’) · c + a ∙ d
0 = 3x - 1
Statement is an element of programming language, typically assigning the value
of expression on the right side to the variable on the left side. Many program-
ming languages use for the assigning operation the sign equal (=). In C language,
the statement “z = a” is read as z assigns a. example:
VŠB-TU Ostrava 19
3 Boolean algebra
The basic way of expressing the mapping is a table. For Boolean function, the table is called
truth table, [wiki_0304]. The domain of definition contains 2n combinations of logical values
□
for n variables, and therefore the truth table has 2n rows. The output value of Boolean func- Truth table.
tion is assigned for each combination. Only the truth table describes Boolean function
clearly and uniquely. Fig. 03-09 shows Boolean function with 3 variables, where the map-
ping is f: {0, 1}3 → {0, 1}.
An incomplete Boolean function can have 3 values on the output: 0, 1 and X. The value X is
called “don’t care”. An incomplete Boolean function is the mapping: Incomplete
Boolean
f: {0, 1}n → {0, 1, X} (0302)
function.□
Where
Example of incomplete function is in Fig. 03-10. We can often see the value “don’t care” as
an input value. One column is added to the truth table and it contains the number that
corresponds to the combinations of variables. This number enables us a better orientation
in truth table.
No. x2 x1 x0 f
No. x1 x0 f 0 and 1 0 0 X 0
0 0 0 0 2 and 3 0 1 X 0
1 0 1 X 4 1 0 0 1
2 1 0 0 5 1 0 1 1
3 1 1 X 6 and 7 1 1 X 1
Not(y implies x)
x = y (x xnor y)
x ≠ y (x xor y)
Not (x and y)
Not (x or y)
x implies y
y implies x
x and y
x or y
not y
Name of
not x
zero
One
y
x
function
Where
20 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
For two input variables, there are four possible combinations of values 0 and 1. It is possible
to define 16 different Boolean functions, see Fig. 03-11. Only the marked functions are usu-
ally used in practice. For n variables, it is possible to define a total number of Boolean func-
tions according to formula (0303).
Literal – is a variable or the complement of a variable, for instance, a1, a1’, x1, y1, z1, Tap1 …
Product term – is a single literal or logical product of two or more literals, for instance,
x·y·z', Y'·Z·X ', z …
Sum term – is a single literal or a logical sum of two or more literals, for instance a’, x + y´+
z, X '+ Y …
Normal term – is a product or sum term in which each variable must appear only once.
Minterm – an n-variable minterm is a normal product term with n literals. The result of
minterm is equal to 1 only for one combination of values for n variables. And for remaining Minterm, mi.□
combinations the result of minterm is equal to 0. There are 2n such minterms.
The condition of assembling the minterm is such that the product must be equal to 1 only
for one combination of variables (axiom A4), and the product must be equal to 0 for re- Assembling the
maining combinations, Fig. 03-12. It means that if a variable x has value 0, then the com- minterm.□
plement of x is used in the minterm, not x. And, if a variable x has value 1, then the variable
is used in a direct form in the minterm, x.
Minterm is denoted by mi, (small letter m), with the index. The index is a decimal number
Indexing the
that corresponds to n-tuple. Each n-tuple of variables can be read as a binary number.
minterm.□
Maxterm – an n-variable maxterm is a normal sum term with n literals. The result of max-
term is equal to 0 only for one combination of values for n variables. And the result of max- Maxterm, Mi.□
term is equal to 1 for remaining combinations. There are 2n such maxterms.
VŠB-TU Ostrava 21
3 Boolean algebra
The condition of assembling the maxterm is such that the sum must be equal to 0 only for
one combination of variables (axiom A4D), and the sum must be equal to 1 for remaining Assembling
combinations, Fig. 03-12. It means that if a variable x has value 0, then the variable is used the maxterm.□
in a direct form in the maxterm, x. And, if a variable x has value 1, then the complement of
x is used in the maxterm, not x.
Maxterm is denoted by Mi, (capital letter M), with the index. The index is a decimal number Indexing the
that corresponds to n-tuple. Each n-tuple of variables can be read as a binary number. maxterm.□
Each Boolean function can be unfolded as the addition or multiplication of the simplest
functions fi, where each simplest function fi defines the output for one row of the truth
table. The sum of the simplest function fi is in Fig. 03-13 and axiom A5D and theorem T1
were used. The function f can be expressed by formula (0304). The theorem T1 (a + 0 = a)
simplifies formula (0304) into (0305).
a b f a b f0 a b f1 a b f2 a b f3
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 1 = 0 1 0 + 0 1 1 + 0 1 0 + 0 1 0
1 0 1 1 0 0 1 0 0 1 0 1 1 0 0
1 1 0 1 1 0 1 1 0 1 1 0 1 1 0
Fig. 03-13 Sum-decomposition of function
f = f0 + f1 + f2 + f3 (0304)
f = f1 + f2 (0305)
The simplest functions f1 and f2 fulfil a logical multiplication, only one combination of input
has value 1 and remaining combinations have value 0. Therefore, the simplest functions f1 The exclusive-
and f2 can be expressed by the minterms. The minterm for function f1 is a’b and the or function.□
minterm for function f2 is ab’. After applying minterms to formula (0305), there is formula
(0306). Formula (0306) also defines the exclusive-or function.
Similarly, if the axiom A5 and theorem T1D are applied, then Fig. 03-14 shows the product-
decomposition of function f to the simplest function fi. The function fi can be expressed by
formula (0307). The theorem T1D (a 1 = a) simplifies formula (0307) into (0308).
a b f a b f0 a b f1 a b f2 a b f3
0 0 0 0 0 0 0 0 1 0 0 1 0 0 1
0 1 1 = 0 1 1 * 0 1 1 * 0 1 1 * 0 1 1
1 0 1 1 0 1 1 0 1 1 0 1 1 0 1
1 1 0 1 1 1 1 1 1 1 1 1 1 1 0
f = f0 * f1 * f2 * f3 (0307)
f = f0 * f3 (0308)
22 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The simplest functions f0 and f3 fulfil logical addition, only one combination of input has
value 0 and remaining combinations have value 1. Therefore, the simplest functions f0 and
f3 can be expressed by the maxterms. The maxterm for function f0 is a + b and the maxterm
for function f2 is a’ + b’. After applying maxterms to formula (0308), there is formula (0309).
The forms of formulas (0306) and (0309) have their names. The name of formula (0306) is
the Canonical Disjunctive Normal Form (CDNF), the minterm canonical form or the sum of
products. It means that each product contains all variables and corresponds to one row of Canonical nor-
the truth table. The name of formula (0309) is the Canonical Conjunctive Normal Form mal form.□
(CCNF), the maxterm canonical form or the product of sums. It means that each sum con-
tains all variables and corresponds to one row of the truth table, [wiki_0303]. A general
notation of canonical normal forms is given by formula (0310) for the sum of products and
by formula (0311) for the product of sums.
𝑓(𝑥𝑛−1 … 𝑥1 , 𝑥0 ) = ∑ 𝑚𝑖 (0310)
𝑓(𝑥𝑛−1 … 𝑥1 , 𝑥0 ) = ∏ 𝑀𝑖 (0311)
Where
f(xn-1, …x1, x0) is a Boolean function with the definition of the variable orders.
mi are minterms.
Mi are maxterms.
The real practice uses the mintrem and the maxterm canonical forms for defining Boolean
function. The notation must take into account the incomplete Boolean function with the
value “don’t care”. The notations below, formulas (0310) and (0311), have two parts; the
first part describes the value 1 or 0 and the second one describes the value “don’t care”.
Formula (0310) uses minterms for description and formula (0311) uses maxtems.
Where
f(xn-1 … x1, x0) is a Boolean function with the definition of the variable orders.
m(i, j, …) is a list of indexes that correspond to the minterms.
d(k, l, …) is a list of indexes that correspond to the minterms for value “don’t care”.
M(o, p, …) is a list of indexes that correspond to the maxterms.
D(r, s, …) is a list of indexes that correspond to the maxterms for value “don’t care”.
Fig. 03-15 shows the definition of an incomplete Boolean function with needed derived
minterms and maxterms. The application of the minterm and the maxterm canonical forms
for the definition of Boolean function is given by formulas (0312) and (0313). The substitu-
tion of indexes by minterms or maxterms is problematic, because at this moment the
VŠB-TU Ostrava 23
3 Boolean algebra
“don’t care“ value gets a specific value, 1 or 0. After this substitution, the incomplete Bool-
ean function is not uniquely defined.
3.5 Reference
[Warkley_2006] Jon F. Warkley: Digital Design, Principles and Practices, Fourth Edi-
tion; Prenice Hall 2006, ISBN 0-13-186389-4
24 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The basic steps of the digital system design are synthesis and realization. Synthesis is a pro-
cess that transforms a description of the digital system into a suitable form for realization.
If the small level of integration is applied to a realization, then the circuit diagram is a suita-
ble result of synthesis. Circuit diagram shows the connection of gates, flip-flops, multiplex-
es, etc. With the increasing integration, the result of synthesis changes into a suitable file
that is independent on realization. In case of the programmable logic devices, the result of
synthesis is used to generate a file that describes the state of programmable fuses. In case
of the application-specific integrated circuit - ASIC, this file is used as an input for a design
of masks for producing integrated circuits.
Synthesis is made by special programs, called synthesis tools, which are a part of electronic
Synthesis.□
design automation - EDA tools, [wiki_0401]. Files in hardware description languages – HDL
are the input of synthesis. These files can describe Boolean functions, finite state machines,
system in a higher level of description as register transfer level - RTL description, and/or
system with component instantiation. This chapter is devoted to the synthesis of simple
Boolean function and the expected result is a combinational logic, [wiki_0403]. In this case, Combinational
the combinational logic is a collection of connected logic gates. The behavior of combina- logic.□
tional logic and Boolean function is the same, in both of them the output only depends on
the current input.
a y
b’
c y = a((b’c)cb’)
b’
c
VŠB-TU Ostrava 25
4 Design of Boolean function
The preferred result of the Boolean function synthesis is a two-level combinational logic.
The importance of 2-level logic lies in:
a y
b
c’ y = a + b c’
This chapter is devoted to synthesis of small two-level combinational logics, Fig. 04-02, and Minimization
to synthesis that results in the minimum number of basic logic gates. Logic gates AND, OR, criteria.□
NOT, NAND and NOR are considered as basic logic gates.
Synthesis with other criteria is outside of this textbook. These are, for example, synthesis of
multilevel logic, synthesis by using special logic gates as XOR gates, synthesis of large Bool-
ean functions with minimum propagation delay. These principles of synthesis can be found
in literature [Ergovac_Lang_2004], [Koren_2008], [Katz_Borriello_2005], [Roth_2004],
[Warkley_2006], [Fristacky_1986] and others.
Synthesis of 64-bit adder or multiplayer with the minimum propagation delay cri-
terion.
Synthesis of comparison circuit for two 128-bit numbers.
Synthesis of circuits that are based on linear algebra, i.e. coding, encryption, etc.□
The realization of logic gate can be based on pneumatic, hydraulic, electric and other prin-
ciples. However, famous logic gates are based on electronic principles and all modern digi-
tal systems are realized by these electronic logic gates.
Each logic gate can be described in several ways and there are more names of operations
for one gate. The basic description of logic gate is truth table and corresponding Karnaugh
map. Next, the gate is described by sentences and possible program statements. Some ex-
pressions are derived by using DeMorgan rules. The description also contains a graphical
symbol that is used in circuit diagram.
26 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
NOT gate. NOT gate is usually called an inverter, [wiki_0404]. It produces the output value
that is the opposite of its input value. An alternate name for complementation is inverter,
one’s complement, negation, complement, etc. The NOT gate can be described by:
AND gate. AND gate corresponds to Boolean multiplication and alternate names are con-
junction, operation AND, logical multiplication, logical product, [wiki_0405]. AND gate can
be described by one of the sentences:
AND gate produces 1 if and only if all of its inputs are equal to 1.
The product is equal to 1 if inputs “a” and simultaneously “b” are equal to 1; else is
equal to 0.
The product is equal to 0 if input “a” or “b” equals 0; else is equal to 1.
Truth table
Karnaugh map Graphic symbol
a b y a
0 0 0
0 1 0 0 0 a y
1 0 0 b
b 0 1
1 1 1
Fig. 04-04 AND gate
OR gate. OR gate corresponds to Boolean addition and alternate names are disjunction,
operation OR, logical addition, inclusive OR, logical sum, [wiki_0406]. OR gate can be de-
scribed by one of the sentences:
OR gate produces 1 if and only if one or more of its inputs are equal to 1.
VŠB-TU Ostrava 27
4 Design of Boolean function
The sum is equal to 1 if inputs “a” or “b” are equal to 1; else sum is equal to 0.
The sum is equal to 0 if inputs “a” and simultaneously “b” are equal to 0; else is
equal to 1.
a + b; a OR b; a # b; a b; a | b; a || b
z = a || b; zz = a | b; z = !(!a && !b); zz = !(!a && !b);
If (a == 1 | b == 1) then z = 1; else z = 0;
OR gate□
If (a == 0 & b == 0) z = 0; else z = 1;
Truth table
Karnaugh map Graphic symbol
a b y a
0 0 1
0 1 1 0 1 a y
b
1 0 1 b 1 1
1 1 0
Fig. 04-05 OR gate
NAND gate. NAND gate is the negation of AND, [wiki_0407]. Alternate names are non-
conjunction, operation NAND, negation AND, negation of multiplication, non-product,
complement logical multiplication, not logical product, Sheffer stroke (). NAND gate can
be described by one of the sentences:
!(a . b); (a . b)’; a NAND b; ¬(a b); ~(a & b); !(a && b);
NAND gate□
z := !(a && b); zz := !(a & b); z = !a || !b; zz = !a || !b; z := !(a & b & c);
If (a == 1 & b == 1) z = 0 else z = 1; NAND gate does not
If (a == 0 | b == 0) z = 1 else z = 0; fulfil associative law.□
Truth table
Karnaugh map Graphic symbol
a b y
a
0 0 1
0 1 1 a y
1 1
b
1 0 1 b 1 0
1 1 0
28 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
NOR gate. NOR gate is the negation of OR, [wiki_0408]. Alternate names are non-
disjunction, operation NOR, negation OR, negation of addition, Peirce's arrow (). NOR gate
can be described by one of the sentences:
NOR gate produces 1 if and only if all of its inputs are equal to 0.
The negation of addition is equal to 1 if variables “a” and simultaneously “b”
are equal to 0; else is equal to 0.
The negation of addition is equal to 0 if variable “a” or “b” is equal to 0; else is
equal to 1.
!(a + b); (a + b)’; a NOR b; ¬(a b); ~(a | b); !(a || b);
NOR gate□
z := !(a || b); zz := !(a | b); z = !a && !b; zz = !a & !b;
If (a == 1 | b == 1) z = 0; else z = 1;
NOR gate does not fulfil
If (a == 0 & b == 0) z = 1; else z = 0;
associative law.□
Truth table
Karnaugh map Graphic symbol
a
a b y
0 0 1 a y
1 0
b
0 1 0 b 0 0
1 0 0
1 1 0
Fig. 04-07 NOR gate
XOR gate. XOR operation corresponds to mathematical sum of modulo 2. XOR gate has
alternate names as non-equivalence, exclusive OR, [wiki_0409]. XOR gate can be described
by one of the sentences:
The output of 2-input XOR gate is equal to 1 if inputs are not equal.
The output of 2-input XOR gate is equal to 0 if inputs are equal.
XOR operation can be expressed as a XOR b = a ⊕ b=ab´+ a´b.
a ⊕ b; a XOR b; a ^ b;
z := a ^ b; z = (!a & b)||(a & !b);
If ((a & b) | (!a & !b)) z = 0; else z = 1; XOR gate□
If ((a | b) & (!a | !b)) z = 1; else z = 0;
VŠB-TU Ostrava 29
4 Design of Boolean function
XNOR gate. XNOR operation is non XOR. Alternate names are equivalence, exclusive NOR,
[wiki_0410]. XNOR gate can be described by one of the sentences:
Truth table
Karnaugh map Graphic symbol
a b y
a
0 0 1
0 1 0 1 0 a y
1 0 0 b
b 0 1
1 1 1
Fig. 04-09 XNOR gate
4.2 Synthesis
This subchapter is devoted to the synthesis of Boolean function into 2-level combinational
logic. The goal of this is to find the canonical form of Boolean expression with minimum
number of literals. The literal is a variable in direct or negation form, e.g. variable a or a’.
Therefore the result of synthesis will be either minimum Sum of Products (minimum dis-
junctive form) or minimum Product of Sums (minimum conjunctive).
For manual synthesis, two ways can be applied, minimization by using Boolean axioms and
theorems, and Karnaugh maps. Examples of minimization by Boolean theorems are in pre-
vious chapter and in Fig. 04-10. This principle is only suitable for a complete Boolean func-
tion. An incomplete Boolean function contains values 0, 1 and X – don’t care. Then, the
Boolean expression cannot work with the value X – “don’t care”. This is a reason why truth
table uniquely defines Boolean function.□
30 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
y = cba M(0, 4, 6) = M0 + M4 + M6
y = (b + a) (c’ + a)
2 3 4 5 7 6 4 5 7 6
b c c
12 13 15 14
d 8 9 11 10
Columns and rows are labeled by the line with the name of a variable. The cells covered by
the line have value 1 for given variable and the outside cells have value 0 for given variable. Each cell has
On this basis, it is possible to derive minterm and maxterm for each cell. Therefore, the minterm and
maxterm.□
a b a b
0 1 3 2 0 1 3 2 Variable d has
Variable a has
4 5 7 6 c 4 5 7 6 value 1
c value 1
12 13 15 14 12 13 15 14 Variable d has
Variable a has
d 8 9 11 10
d 8 9 11 10 value 0
value 0
VŠB-TU Ostrava 31
4 Design of Boolean function
minterm or maxterm can be expressed as an index and then each cell has its index (green
color). Minterm for cell with index 13, binary 1101, is dcb’a. Maxterm for cell with index 3,
binary 0011, is d + c + b’ + a’.
Truth table has rows and each row corresponds to different minterm or maxterm. Karnaugh
map has cells and each cell has different minterm or maxterm. Therefore, it is possible to
transfer truth table into Karnaugh map, Fig. 04-12. Each minterm or maxterm has its index
that is the same in truth table, in Karnaugh map and in the definition of Boolean function,
formulas (0401) and (0402).
Where
f(xn-1 … x1, x0) is a Boolean function with the definition of the variable orders.
m(i, j, …) is a list of indexes that correspond to the minterms.
d(k, l, …) is a list of indexes that correspond to the minterms for value “don’t care”.
M(o, p, …) is a list of indexes that correspond to the maxterms.
D(r, s, …) is a list of indexes that correspond to the maxterms for value “don’t care”.
No. c b a f
b
0 0 0 0 0 a
1 0 0 1 X 0 1 3 2
0 X 1 X
2 0 1 0 1
4 5 7 6
3 0 1 1 X c 1 0 0 1
4 1 0 0 1
5 1 0 1 0
6 1 1 0 1
7 1 1 1 0
Fig. 04-12 Enrolment of Boolean function into Karnaugh map
The arranged cells in Karnaugh map ensure that the adjacent cells only differ in the value of
one variable. Adjacent cells are arranged horizontally or vertically (not diagonally). The cells Adjacent cells.□
of outer rows and columns fulfil this condition, [Katz_Borriello_2005]. It is possible to apply
theorems T10 or T10D on adjacent cells or a group of adjacent cells. The number of adja-
cent cells is the power of 2.
Cell with index 2 has minterm d’c’ba’ and cell with index 3 has minterm d’c’ba, Fig. 04-13. If
sum of products is created, then corresponding Boolean expression is d’c’ba’ + d’c’ba. The
theorem T10 can be applied and the resulted expression is the product term d’c’b. It is a Product term.
□
blue circle in Karnaugh map, Fig. 04-13. Similarly, it is possible to derive the product term
for cells 6 and 7. These two circles are also adjacent, they only differ in one variable and the
32 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
theorem T10 can be applied. The result is the product term d’b, it is a red circle in Kar-
naugh map, Fig. 04-13.
Similarly, it is possible to apply the above steps for maxterms, Fig. 04-13. Cells with indexes
Sum term.□
4 and 6 are adjacent and the theorems T10D can be applied on maxterms, (d + c’+ b + a) (d
+ c’+ b’ + a) = (d + c’+ a). The result is the sum term. Cells with indexes 12 and 14 are adja-
cent to cells 4 and 6 and theorem T10D can be applied on sum terms, (d + c’+ a) (d’ + c’+ a)
= (c’+ a).
a b a b
0 1 3 2
d’c’b 0 1 3 2
1 1 d + c’ + a
4 5 7 6 d’cb 4 5 7 6
c 1 X c 0 X
12 13 15 14 12 13 15 14 c’ + a
d’b 0 0
d 8 9 11 10 d 8 9 11 10 d’ + c’ + a
In case of simplification and minimization of Boolean function, the circles are drawn in Kar-
naugh map. The number of cells in the circle is 1, 2, 4, 8 …, the power of 2, and must be
placed in a square or a rectangle. If the loop is plotted on Karnaugh map, then it is possible
to derive the corresponding product term or sum term directly from Karnaugh map.
Cells on the left/right outer edges or on the top/bottom outer edges of the map
are adjacent.
Consequently, the cells in the corners of the map are adjacent. □
VŠB-TU Ostrava 33
4 Design of Boolean function
If the circle covers areas of the map where the variable is 0 as well as areas where it
is 1, then the variable does not appear in the sum term.
If Karnaugh map is for n-variables and the circle contains 2i cells, then each term
contains n - i + 1 literals.
For example, Karnaugh map is for 4 variables.
If the circle contains 1 cell, then the term contains 4 literals.
If the circle contains 2 cells, then the term contains 3 literals.
If the circle contains 4 cells, then the term contains 2 literals.
Etc.□
Create all maximal circles that cover all 1-cells. The circle can cover suitable X-cells
that ensure maximal cells in the circle. The possible number of cells in the circle is
1, 2, 4, 8…, it is the power of 2.
Minimize the number of circles so that all 1-cells stay covered. The remaining cir-
cles must have minimum number of literals.
Express product terms for the remaining circles. Minimization for Sum of Products.□
Create Sum of Products.
All practical minimizations will be performed on incomplete Boolean function that is given
by formula (0403). Minimization for Sum of Products is shows in Fig. 04-14 and it leads to 4
possible results. All results are equivalent and correspond to given incomplete Boolean
function. However, they correspond to 4 different complete Boolean functions. The result-
ing functions are:
Function f1 contains yellow and green product terms and minimum Sum of Prod-
ucts is formula (0404).
Function f2 contains yellow and blue product terms and minimum Sum of Products
is formula (0405).
34 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
a b
f(d, c, b, a) = ∑m(2, 3, 6, 10) + ∑d(1, 7, 9, 11, 14)
0 X 1 1
c 0 0 X 1 Product terms
0 0 0 X
b d a’b
a 0 X X 1
a b
0 0 1X 31 21
Minimization 0 X 1 1 bd’
c 40 50 7X 61 c 0 0 X 1
4 possibilities 0 0 0 X
120 130 150 14X d
0 X X 1 ac’
d 8 9 11 X 101
0 X b
a
b 0 X 1 1 bc’
a
c 0 0 X 1
0 X 1 1
0 0 0 X
c 0 0 X 1 d
0 X X 1
0 0 0 X
d Minimization for Sum of Products.□
0 X X 1
Function f3 contains yellow and red product terms and minimum Sum of Products
is formula (0406).
Function f4 contains red and green product terms and minimum Sum of Products is
formula (0407).
One of possible logical networks of combinational logic for given Boolean function is in Fig.
04-15. For realization, logic gates AND and OR are chosen. The application of these logic
gates leads to 2-level combinational logic, and this realization is also called AND-OR. This
name is created by the order of logic gates in diagram. The network AND-OR is natural for
Sum of Products.
AND-OR combinational logic.□
a
b
f1 f1 = a’b + bd’
b
d’
The next possibility is to create minimum Product of Sums. Products of Sums form is de-
rived for value 0 by the following algorithm:
VŠB-TU Ostrava 35
4 Design of Boolean function
Create all maximal circles that cover all 0-cells. The circle can cover suitable X-cells
that ensure maximal cells in the circle. The possible number of cells in the circle is
1, 2, 4, 8…, it is the power of 2.
Minimize the number of circles so that all 0-cells stay covered. The remaining cir-
cles must have minimum number of literals.
Minimization for Product of Sums.□
Express sum term for the remaining circles.
Create Product of Sums.
Minimization for Product of Sums is shows in Fig. 04-16 and it leads to 3 possible results.
The function used is the same as in the previous example, formula (0403). All results are
equivalent and correspond to given incomplete Boolean function. However, they corre-
spond to 3 different complete Boolean functions. The resulting functions are:
Function f5 contains yellow and green sum terms and minimum Product of Sums is
formula (0408).
Function f6 contains yellow and red sum terms and minimum Product of Sums is
formula (0409).
Function f7 contains yellow and blue sum terms and minimum Product of Sums is
formula (0410).
f5 = b (a’+ c) (0408)
One of possible logical networks for Product of Sums is the application OR and AND logic
gates, in this order. Circuit diagram of combinational logic for given Boolean function is in
36 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Fig. 04-17. The application of these logic gates leads to 2-level combinational logic, and this
realization is also called OR-AND. This name is created by the order of logic gates in dia-
gram. The network OR-AND is natural for Product of Sums.
b f6 f6 = b (c’+ d’)
c’
d’
OR-AND combinational logic.□
One incomplete Boolean function was minimized. Let’s study “don’t care” cell, e.g. cell with
index 7. The original value of cell is “don’t care”. If the circles are created, the value 0 or 1 is
substituted into this cell. Boolean function contains more cells with “don’t care” value. The
result is 7 expressions for one incomplete Boolean function. Four expressions are minimum
Sum of Products and 3 expressions are minimum Product of Sums. All 7 expressions corre-
spond to one incomplete Boolean function. However, more complete Boolean functions
can be derived. This problem is caused by “don’t care” value.
If the complete Boolean function is minimized, then it is possible to obtain more minimum
expressions as Sum of Products or Product of Sums. Then all expressions correspond to one
complete function.
f1 = a’b + b’d
. a’
(ab) (bd) 1
b
3 f1
1st NAND 2nd NAND b
d’ 2
rd
3 NAND
NAND combinational logic.□
Fig. 04-18 NAND combinational logic
NAND two-level combinational logic is derived from Sum of Products by the application of
theorem T5 and DeMorgan rule. For an expression, e.g. f1, formula (0404), it is possible to
write:
VŠB-TU Ostrava 37
4 Design of Boolean function
̅̅̅̅̅̅̅̅̅̅̅̅̅̅̅
𝑓1 = 𝑎̅𝑏 + 𝑏𝑑̅ = ̿̿̿̿̿̿̿̿̿̿̿
𝑎̅𝑏 + 𝑏𝑑̅ = (𝑎
̅̅𝑏 ̅̅̅̅̅̅ )
̅̅̅) . (𝑏𝑑
The result is the expression that contains only NAND operations. The circuit diagram is in
Fig. 04-18 and it is 2-level combinational logic.
NOR two-level combinational logic is derived from Product of Sums by the application of
theorem T5 and DeMorgan rule. For an expression, e.g. f6, formula (0409), it is possible to
write:
̅̅̅̅̅̅̅̅̅̅̅̅̅̅̅̅
𝑓6 = 𝑏 (𝑐̅ + 𝑑̅ ) = ̿̿̿̿̿̿̿̿̿̿̿̿
𝑏 (𝑐̅ + 𝑑̅) = 𝑏̅ + (𝑐̅̅̅̅̅̅̅̅̅
+ 𝑑̅)
The result is the expression that contains only NOR operations. The circuit diagram is in Fig.
04-19 and it is 2-level combinational logic.
+ (c + d) b’
b f6
c’ 2
1st NOR d’ 1
f6 = b (c’+ d’)
2nd NOR
NOR combinational logic.□
Fig. 04-19 NOR combinational logic
38 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
4.6 Reference
[Ergovac_Lang_2004] Milos D. Ercegovac, Tomas Lang: Digital Arithmetic; Morgan Kauf-
mann Publishers, 2004, ISBN 1-55860-798-6
[Fristacky_1986] Frištacký N., Kolesár M., Kolenička J., Hlavatý J.: Logické systémy;
Alfa a SNTL 1986
[Warkley_2006] Jon F. Warkley: Digital Design, Principles and Practices, Fourth Edi-
tion; Prenice Hall 2006, ISBN 0-13-186389-4
VŠB-TU Ostrava 39
4 Design of Boolean function
40 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
5 Real numbers
All of us use numbers and this chapter deals with their distribution to groups and the possi-
ℝ, real numbers
bility to represent them in digital world, mainly in the computer. In mathematics, a real
are e.g.: +1; -1;
number is any number on the real number line from minus infinity to plus infinity,
+1.41; -5467.01;
[wiki_0501]. The symbol boldface R or ℝ (double-struck capital R, Unicode+211D) is used
…□
for the denotation of the set of real numbers. The set of real numbers is divided into two
groups, rational numbers and irrational numbers.
Rational number is any number that can be expressed as a quotient or a fraction p/q of two
ℚ, rational
integer numbers with the denominator q not equal to zero, [wiki_0502]. It means that inte-
numbers are
ger numbers are part of rational numbers with denominator equal to one, e.g. 5/1 equals 5.
e.g.: +1; -1/1;
Also numbers 25/100, b101/23 are rational numbers and it is possible to write them down
0.25; 2/3; …□
as 0.25, b0.101; in this case, the number of digits is finite. On the contrary, number 1/3
belongs to the second group of rational numbers, where the fraction is the only precise
denotation of a number. The notation with the radix point, e.g. 0.333…, is not precise
Irrational num-
enough. The set of rational numbers is denoted by boldface Q or ℚ (double-struck capital
bers are e.g.: √2;
Q, Unicode+211A).
π=3.14…;
Irrational numbers are the remaining numbers, opposite to rational numbers, [wiki_0503]. e=2.71…; …□
E.g. number equal to the square root of two (√2) cannot be expressed precisely as a num-
ber with radix point and finite number of digits (1.41…) or as a fraction. Another example
may be π number or e Euler value. We only use their approximate values, 3.14 or 2.71.
Integer numbers are a part of rational numbers that can be expressed by the fraction with
ℤ, integer num-
denominator equal to 1, [wiki_0504]. This means that integer number does not use the
bers are e.g.:
fractional part of a number or radix point. Integer numbers are in the range from minus
…-2; -1;
infinity to plus infinity. The set of integer numbers is denoted by boldface symbol Z or ℤ
0;
(double-struck capital Z, U+2124). As for the division of the integer set to subsets, opinion is
+1; +2…□
divided, [wiki_0504], [wiki_0505] and [wiki_0506]. One way, the set of integers consists of
the subsets of natural numbers {+1, +2, +3 …}, zero {0} and the opposites of the natural
numbers {−1, −2, −3 …} that are negative. The second way, the subset of natural numbers is ℕ, natural num-
{0, +1, +2, +3 …} and the subset of negative non-zero numbers is {-1, -2, -3 …}. The set of bers are e.g.:
natural numbers is usually denoted by boldface symbol N or ℕ (double-struck capital N, 0; +1; +2…
U+2115). or
+1; +2…□
The above mentioned summary represents the mathematical point of view on numbers but
in computer science a different terminology exists. There are also limitations that are given
by the finite quantity of bits for representing numbers. Therefore, the minimum and maxi-
mum numbers are defined and there is a space between two neighboring numbers. In
mathematics, where no limitations exist, it is possible to use numbers from the range of
VŠB-TU Ostrava 41
5 Real numbers
minus infinity to plus infinity and there is no space between two neighboring numbers. In
computer science, mainly in the definition of data types in programming language, different
terms are used:
Floating point numbers are numbers with radix point and exponent and correspond
to real numbers.
Fixed point numbers are numbers, where the position of radix point is defined be-
fore and correspond to rational numbers.
Integer numbers correspond to mathematical integer numbers and two data types
are defined, signed and unsigned integers.
o Signed integers are the set of integer numbers, ℤ.
o Unsigned integers correspond to natural numbers, set ℕ = {0, +1, +2, …}.
The problem of representing numbers and their precision is a separate science. In computer
science we can find more examples of wrong computation, some of them were produced
by people and others were produced by the theory of representing numbers. For better
understanding, some examples known from the literature are shown below.
4195835
3145727
= 1.333739068902037589 (0502)
This flaw was on the first Pentium model with frequency 60, 90 and 100 MHz. The Intel
Corporation recognized this flaw in the algorithm, repaired it very quickly and continued in
the production of new Pentium processor models. Moreover, the Intel Corporation offered
each customer to replace the original processor by a new one without the flaw.
Excel bug. The flaw in Excel 2007 was in the calculation with number 65535 and/or ap-
proaching number 65536, [Microsoft_0501], [Muller_2010]. Displayed results of formula
(0503) and multiplication (0504) were wrong.
The flaw was only in displayed results but in other calculations correct numbers were used.
Microsoft explains this flaw in article [Microsoft_0501] http://blogs.office.com/b/microsoft-
excel/archive/2007/09/25/calculation-issue-update.aspx and a patch is available from
http://blogs.msdn.com/excel/archive/2007/10/09/calculation-issue-update-fix-
available.aspx.
42 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Chaotic bank
The beginning of the chaotic bank flaw starts as a story, literature [Muller_2010] and [In-
ternet_0501]. “Recently, Mr. Gullible went to the Chaotic Bank Society, to learn more about
the new kind of account they offer to their best customers. He was told:
You first deposit $e − 1 on your account, where e = 2.7182818 · · · is the base of the
natural logarithms. The first year, we take $1 from your account as banking charges.
The second year is better for you: We multiply your capital by 2, and we take $1 of
banking charges. The third year is even better: We multiply your capital by 3, and
we take $1 of banking charges. And so on: The n-th year, your capital is multiplied
by n and we just take $1 of charges. Interesting, isn’t it?”
The question is how much money will be on the account after 25 years.
The bank officer started thinking and tried to simulate. The program in C language is in Fig.
05-01 and was compiled with mingw32-gcc version 4.7.1; its result is in Fig. 05-02.
Interesting, isn’t? Officer tries to check by calculation in Excel 2010 with the result equal to
$-2242373259. This indicates a problem, which result is correct? It is simple; the correct
value is $0 on the account.
int main()
{ float single_account = 1.71828182845904523536028747135;
double double_account = 1.71828182845904523536028747135;
long double long___account = 1.71828182845904523536028747135;
int i;
for (i = 1; i <= 25; i++)
{
single_account = i*single_account - 1;
double_account = i*double_account - 1;
long___account = i*long___account - 1;
}
printf("You will have $%+1.17e on your account. (Single precision)\n",
single_account);
printf("You will have $%+1.17e on your account. (Double_precision)\n",
double_account);
printf("You will have $%+1.17e on your account. (Long precision) \n",
long___account);
}
VŠB-TU Ostrava 43
5 Real numbers
Rump’s problem
The following formula (0505) was designed by Siegfried Rump in 1988, [Rump_1988] and
processed on computer IBM 370. The C program is in Fig. 05-03. The problem is also de-
scribed by literature [Muller_2010] and [Inernet_0501].
int main()
{
double a = 77617.0;
double b = 33096.0;
double b2,b4,b6,b8,a2,firstexpr,f;
b2 = b*b;
b4 = b2*b2;
b6 = b4*b2;
b8 = b4*b4;
a2 = a*a;
firstexpr = 11*a2*b2-b6-121*b4-2;
f = 333.75*b6 + a2 * firstexpr + 5.5*b8 + (a/(2.0*b));
The program was compiled with mingw32, version 4.7.1, and was run with operands
a = 77617 and b = 33069. The results are in Fig. 05-04. The checking calculation was pro-
duced in Excel 2010 with result -1.180591621E+21. What is the correct result? The correct
result is -0.8273960599 …, [Muller_ 2010]. Interesting, isn’t?
44 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
int main()
{ float a;
int i;
A simple example
A simple example is the program in Fig. 05-05, [wiki_0508]. The questions are:
5.4 References
[Inernet_0501] http://perso.ens-lyon.fr/jean-michel.muller/chapitre1.pdf; on line 2014-06-
29
[Intel_0501] http://www.intel.com/support/processors/pentium/sb/CS-012748.htm; on
line 2013-06-10
[Microsoft_0501] http://blogs.office.com/b/microsoft-
excel/archive/2007/09/25/calculation-issue-update.aspx, on line 2013-06-
12
[Microsoft_0502]http://blogs.msdn.com/excel/archive/2007/10/09/calculation-issue-
update-fix-available.aspx, on line 2013-06-12
VŠB-TU Ostrava 45
5 Real numbers
46 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
6 Integer numbers
Integer numbers with designation ℤ are numbers without a fraction part and their range is
from minus infinity to plus infinity. There are two opinions on the division of integer num- Integer numbers
bers into subsets, but in computer science the preferred opinion is that the natural num- contain natural
bers contain zero, {0, +1, +2, +3 …}, [wiktionary_0601], [wiki_0608] and [Internet_0601]. numbers and
Reference [proofwiki_0601] even wrote “However, using ℕ = {0, 1, 2, 3 …} is a more mod- negative num-
ern approach, particularly in the field of computer science, where starting the count at zero bers. □
is usual.” The application of integer numbers in computer science or informatics has some
limitations that are given by the binary numeral system and the limited word size. Firstly,
the finite word size causes a limited range of integer number representation. Secondly, the
The set of natural
integer numbers use a sign (plus or minus); the sign is a special glyph and it does not belong
numbers is {0,
among the digits of numeral system. However, in binary numeral system only two digits
+1, +2, +3 …} □
exist and special techniques must be used to represent negative numbers.
Integer numbers in the binary numeral system are the most frequently used in computers
and programming languages. Integer decimal numbers in BCD code are presented in a sep-
arate chapter below. Next description will concentrate on integer numbers in the binary
numeral system and the techniques for representing a sign.
The term of integer, or int, also relates to programming languages, where the integer be-
longs to a data type. The range of representation is defined by the number of bits used in
the word and it depends on the implementation of the programming language. It does not
have to be defined by the computer architecture because it is possible to use a 64 bit inte-
ger on 16 bit architecture. Programming languages use the terms as short integer, long,
long long, double long integer and the corresponding number of bits depends on the type
of language and its implementation.
Modern programming languages begin to use new names in the declaration; these names
contain the number of bits in the word, for example, int8_t, uint8_t and the same is valid
VŠB-TU Ostrava 47
6 Integer numbers
for 16, 32 and 64 bits. Notation int is meant for signed and uint for unsigned integer decla-
ration. These new declarations have been stated by standard ISO/IEC 9899:1999. This
standard is known as C99, C language version 1999, literature [wiki_0601]. These new dec-
larations can be used in language C++2011, [cppref_0601] and [Microsoft_0601].
Endianness or endian is also a common term and it defines the way of placing the number
in the memory. It specifies the order of atomic elements for n-bit object. For example,
there is 32 bit number and byte is atomic element, then the endian defines the order of
bytes in the memory, whether the MSB byte of 32 bit number will be placed on a lower or a
higher memory address.
MSB LSB
For 8-bit the range is from 0 to 255
n-1 0
0 1 0 1 0 0 1 1 Number 129D is coded as 1000 0001B
In the declaration of programming languages it is possible to see the notations for unsigned
integer data types:
unsigned integer or unsigned int, where the number of bits in the word depends
on the language and its implementation.
uintx_t, where x is 8, 16, 32 or 64, e.g. uint32_t is the declaration for 32 bit un-
signed integer; it is valid for C99/C++2011 version and above.
unsigned short int, unsigned long int or unsigned long long int.
48 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
zeros there will be. Some techniques or principles have two zeros, plus and minus, and the
rest only one zero, a plus zero.
In programming languages, the integer number is a basic data type and in the declaration it
is possible to use the following notation, similar to that of an unsigned integer. This declara-
tion automatically supposes the application of two’s complement:
Signed integer, integer or int, where the number of bits in the word depends on the
language and its implementation.
intx_t, where x is 8, 16, 32 or 64, e.g. int32_t for 32 bit signed integer; it is valid for
C99/C++2011 version and above.
Signed short int, signed long int or signed long long int.
VŠB-TU Ostrava 49
6 Integer numbers
1
A = 2n - 1 - A (0603)
1 Range of ones’ com-
A=~A (0604)
plement
-(2n-1-1) to +(2n-1-1) (0605) -(2 - 1) to +(2n-1 - 1)□
n-1
Where
1
A is the denotation of ones’ complement; it is ones’ complement to number A
(positive).
A is a positive value, for which ones’ complement is calculated.
n is number of bits in the word.
~ is bitwise negation, operator of C language.
Where
Range of two’s com-
2
A is the denotation of two’s complement; two’s complement of number A plement
(positive). -(2n-1) to +(2n-1 - 1)□
1
A is the denotation of ones’ complement.
A is a positive number for which two’s complement is calculated.
Only one zero, plus 0□
n is number of bits in the word.
~ is bitwise negation, operator of C language.
50 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
VŠB-TU Ostrava 51
6 Integer numbers
The second way, a direct conversion from two’s complement to the decimal numeral sys-
tem is given by formula (0609). It looks like a classical polynomial of the numeral system
but only the first element of the polynomial has a minus sign for MSB bit, Fig. 06-08.
𝑛−2
−𝑎𝑛−1 2𝑛−1 + 𝑎𝑛−2 2𝑛−2 + ⋯ 𝑎0 20 = −𝑎𝑛−1 2𝑛−1 + ∑𝑖=0 𝑎𝑖 2𝑖 (0609)
Where
ai is a binary digit.
n is number of bits in the word.
52 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
in the digital to analog or analog to digital converter and its application in floating point for
representing the exponent is commonly used.
Plus infinity
Offset, b
Offset binary
0 2n-1
For n-bit representation
Mathematical definition is given by formula (0610) and it is defined by number b, which is Biased exponent
called offset. Offset b may be any number and it moves the range of representation on the of floating point
number line. In computer science, two definitions of offset are used, 2n-1 and 2n-1-1 for n-bit is offset binary. □
word. The second definition is used in floating point according to IEEE 754, [IEEE 754-2008].
The range of representation is given by formula (0611).
B
A=A+b (0610)
B
A = A + b. □
-(b) to +(-b + 2n-1) (0611)
Where
B
A is an offset binary number and must be a natural number ℕ, (BA ≥ 0).
A is the integer number for which the offset binary is calculated, A is positive or Biased B
number
negative. A is unsigned
integer. □
b is offset or bias, in standard IEEE 754 for floating point, the bias is 2n-1 - 1, for n-bit
exponent.
The definition of offset as 2n-1 is very useful and on the binary level the MSB bit is the sign in
the opposite definition to the usual one, 0 is minus and 1 is plus. Then, it is possible to
change the offset binary to two’s complement by inverting the sign bit, MSB bit, see Fig. 06-
10. The conversion to two’s complement is suitable for arithmetic operations.
VŠB-TU Ostrava 53
6 Integer numbers
One of three fields of the floating point word is called the biased exponent and this field is
coded by the binary offset. The standard IEEE prefers the term of biased exponent and it
uses the term of bias instead of offset. The range of representation is given by formula
(0612) for n-bit word and the example of biased exponent coding for 32-bit binary floating
point word is in Fig. 06-11.
Where
54 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Decimal BCD
digit code
0 0000
1 0001 Sign Binary Hex
2 0010 + 1010 A
3 0011 - 1011 B
4 0100 + 1100 C Preferred
5 0101 - 1101 D Preferred
6 0110 + 1110 E
7 0111 unsigned 1111 F
8 1000
9 1001
Fig. 06-13 Definition of BCD code
BCD code is defined by the table, Fig. 06-13, where each decimal digit in the range 0 to 9 is Nibble is a 4-bit
expressed by a 4-bit binary number. Here, ten combinations are used, the remaining com- group and BCD is
binations are not used for coding the decimal digits but they are used for coding a possible placed to the
sign. This group of four bits is called the nibble; a byte has 2 nibbles and so on. When a dec- nibbles.□
imal number has more orders, then each decimal order is coded separately into one nibble;
all nibbles are arranged side by side and form a string, Fig. 06-14. Decoding means that the
string is divided into nibbles and each nibble is converted into a decimal digit. BCD umber has a
string format.□
Coding Decoding
691 0111100000110100
In computer science, BCD numbers use the BCD code and nibbles are placed into the byte
by two ways, the packed or the unpacked format, with the sign or without. The BCD num-
bers are understood as a string, therefore the number of weights is changeable, [DEC_VAX].
The terminologies related to the BCD numbers in computer science are:
Packed BCD means that each nibble of the byte or the word is used, Fig. 06-15. A
byte has 2 decimal digits. Both nibbles are used. In the word, each nibble is used for
the BCD code of a decimal digit or a sign.
VŠB-TU Ostrava 55
6 Integer numbers
Unpacked BCD means that only one digit is placed into one byte to a lower nibble.
A higher nibble equals zero. In the word, each byte is used for one BCD code of a
decimal digit or a sign.
Sign BCD numbers. Sign BCD numbers use the principle of the sign and magnitude.
One nibble is a sign and the sign is mostly placed as the least significant nibble. For
the sign, the combination higher than 9 is used. The preferred combination for a
plus sign is hex C, and for a minus sign it is hex D, Fig. 06-13. This convention was
derived from the accounting terms (Credit and Debit).
Unsigned BCD numbers. The string usually includes the combination hex F in the
least significant nibble as the expression of the unsigned format, [IBM_370],
[DEC_PDP] and [DEC_VAX].
The range of representation is given by the numbers of used nibbles. In the unpacked for-
mat, it is the number of bytes minus the sign byte. In the unsigned format, it is necessary to
pay attention whether the value hex F is used as the sign or not. Formulas (0613) and
(0614) define the range of representation.
Where
56 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
10
A = 9A + 1 (0615)
9
A: 0→9, 1→8, 2→7, 3→6, 4→5, 5→4, 6→3, 7→2, 8→1, 9→0 (0616)
10
A = 10n - A (0617) 10
A = 10n - A□
Where
9
A is nine’s complement of A, it is the representation defined by formula (0616),
where digit 0 is replaced by 9, digit 1 is replaced by 8, digit 2 is replaced by 7, …
10
A is ten’s complement of A.
n is the order.
Ten’s complement may be used in arithmetic or for representing a sign decimal number.
When it is used in arithmetic, then the algorithm takes this fact into account. When the
ten’s complement is used for representing a sign number, then it is necessary to define the
sign nibble; its values define a positive or a negative number. If the value is 0, 1, 2, 3 or 4,
then the sign is plus, values 5, 6, 7, 8 or 9 define a minus sign.
6.12 References
[DEC_VAX] VAX780 Architecture handbook; Digital Equipment Corporation, 1977;
(http://bitsavers.trailing-
edge.com/pdf/dec/vax/VAX_archHbkVol1_1977.pdf; on line 2013-09-24)
[IEEE 754-2008] IEEE Std 754™-2008, IEEE Standard for Floating-Point Arithmetic, 29 August
2008, revision of IEEE 754 – 1985
VŠB-TU Ostrava 57
6 Integer numbers
58 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The basic arithmetic operations are addition, subtraction, multiplication and division. In
computer, these operations are performed by ALU, Arithmetic Logic Unit as a part of the
processor. The algorithm for performing these operations depends on the principle of rep-
resentation of negative numbers and the finite word size of ALU. Therefore, some results
of the operations may be out of the range of representation and this situation is called the
overflow. Each processor has a status register or a similar register that contains the flags
characterizing the properties of results.
In computer, the basic arithmetic operations are realized by logical circuits as combination-
al circuits or as synchronous digital systems based on FSM – Finite State Machine. Number
of bits in ALU may be from one to n-bits. For example, one-bit arithmetic unit is used when
the operands are in a serial stream and then the arithmetic logic unit must be designed as a
synchronous digital system. The addition, subtraction and multiplication may be realized as
combinational circuits and/or synchronous digital systems, but the division is always a syn-
chronous digital system with FSM.
Operands may be unsigned or signed, in different codes. But every processor has a binary
adder and it is necessary to define the algorithm of arithmetic operations with operands in
different codes on the binary adder. For example, the operands in offset binary are added
by a binary adder, BCD numbers are also added by a binary adder and so on.
The choice of realization depends on the definition of the processor architecture and also
on the time needed for the calculation of the operations. This time is called the propagation
delay of the operation. Therefore, there are a lot of ways of possible realization.
N flag or S flag, N flag is a negative flag and S flag is a sign flag, [wiki_0702]. This flag
is always the MSB bit of the result. When the result is understood as two’s com-
Note to the range of representation
The ranges of representation are from 0 to 2n-1 for unsigned integer and from -(2n-1-1) to
+(2n-1-1) for two’s complement.□
VŠB-TU Ostrava 59
7 Arithmetic operations on integer numbers
V flag is the exclusive or operation of the carry into the MSB bit and the carry out of the
MSB bit.□
1 1 0 1 1 0 1 1
MSB LSB
15 8 7 0
1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1
Sign extension
Fig. 07-01 Sign extension
60 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
MSB LSB
15 0
1 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0
arithmetic shift right by one
1 1 1 0 1 1 0 1 1 0 0 0 0 0 0 0
One possibility for calculating the sign extension is to use the arithmetic shift right. This
shift copies the sign to the new sign bit; it means the value of the sign remains the same. In
the situation where 8-bit sign number is placed to 16-bit word, first the 8-bit sign number is
placed to the higher byte of word and then the arithmetic shift right is performed on the
word 8 times, Fig. 07-02. The result is that the higher byte contains the sign extension and
the lower byte contains the original 8-bit sign number.
meral system. Just remember, that 1 + 1 equals 10 in the binary numeral system and 10
binary is 2 decimal. Each bit position adds three values, two values of the operands plus the
carry from the previous order or bit. Also, each bit position generates two outputs, the sum Carry, the value
and the carry to the next position. This scheme is the same for each bit position and this is to next bit.
□
S = A + B mod 2n (0701)
Where
In computer, the adder size is defined as n-bit and only these n-bits may be used to place
the numbers. Then the addition in computer is defined by formula (0701). The formula
generates the n-bit result and each addition generates the sum and flags that describe this
result. This principle does not depend on the number of bits in the word, therefore 4-bit
arithmetic and 4-bit numbers are used in following examples. Below, there are examples of
the addition with flag setting, mainly with comments for V and C flag.
VŠB-TU Ostrava 61
7 Arithmetic operations on integer numbers
In Fig. 07-03, when the operands are unsigned, then the result has the overflow,
the carry flag is set and number 16 is out of the range. When the operands are in
two’s complement, the result is correct; the overflow as V flag is not set.
Check in decimal
Binary Unsigned 2’s com.
Operand A 0111 7 7
Operand B + 1001 + 9 + -7
Sum S 0000 N=0, Z=1, V=0, C=1 ?0 0
Carry out 1
In Fig. 07-04, when the operands are unsigned, the result is correct and C flag is not
set. When the operands are in two’s complement, the result is wrong, the overflow
N flag is MSB bit.
occurs, V flag is set. The correct result of the addition is +9, but the calculated result
Z flag is set, when
is out of the range of two’s complement representation. It is possible to explain this
the result is ze-
situation where the addition of two positive numbers generates a negative result.
ro.□
And vice versa, the addition of two negative numbers generates a positive result.
E.g., let is given 4-bit binary addition 0011 + 0110 = 1001. The result is correct for
unsigned number, 3 + 6 = 9. However, the result has overflow if the numbers are
understood as two’s complement, (+3) + (+6) = -7 is wrong.
Check in decimal
Binary Unsigned 2’s com.
Operand A 0111 7 7
Operand B + 0010 + 2 + 2
Sum S 1001 N=1, Z=0, V=1, C=0 9 ?-7
Carry out 0
Fig. 07-05 shows the situation of the addition of 8-bit numbers on 4-bit adder. The
carry from the addition of lower nibbles must be added to the addition of higher
nibbles. The least significant addition must start with the zero carry in. The realiza-
tion of this principle of the addition is a synchronous digital system with FSM.
Binary Binary
Carry in 0 1
Operand A 1010 0100
Operand B + 1001 + 0010
Sum S 0011 N=0, Z=0, 0111 N=0, Z=0,
Carry out 1 V=1,C=1 0 V=0,C=0
62 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Where
Check in decimal
Binary Binary Unsigned 2’s com.
Operand A 0100 0100 4 +4
Operand B - 0010 >2’s comp> + 1110 - 2 - +2
Sum S 0010 N=0, Z=0, 2 2
Carry out 1 V=0, C=?
Check in decimal
Binary Binary Unsigned 2’s com.
Operand A 0100 0100 4 +4
Operand B - 0110 >2’s comp> + 1010 - 6 - +6
Sum S 1110 N=0, Z=0, ?? -2
Carry out 0 V=0, C=?
Check in decimal
Binary Binary Unsigned 2’s com.
Operand A 0111 0111 7 +7
Operand B - 1010 >2’s comp> + 0110 - 10 - -6
Sum S 1101 N=0, Z=0, ?? ??
Carry out 0 V=1, C=?
Figures 07-06, 07-07 and 07-08 show the subtraction, where operand B is converted by
means of the two’s complement algorithm and then added to operand A. If the operands
are in two’s complement and the overflow does not occur, the result is correct and it is in
two’s complement. When the overflow occurs, then the result is incorrect. For unsigned
VŠB-TU Ostrava 63
7 Arithmetic operations on integer numbers
operands, the carry flag defines that the result is out of the range of representation. How-
ever, the carry flag has two definitions for the subtraction and its setting is defined by the
processor architecture.
Fig. 07-09 Comparison of the sign magnitude code and two’s complement
Where
2 S
If AS = 0 then A= A SA is the sign and magnitude
number A
If AS = 1 then AS=0, 2A = not (SA) + 1 AS is the sign of the sign and
magnitude number
2A is two’s complement
A direct performance of the addition and/or subtraction in the sign and magnitude code is
described as a graph, Fig. 07-11, literature [Kaps_2013]. The practical realization of this
algorithm may be the combinational circuit or synchronous digital system, where the graph
describes the behavior of control unit as FSM. This graph describes the addition and the
subtraction both for the manual and the digital system. The basic steps are:
Use the correct input, in case of subtraction, the sign is the swap.
Find the correct path after the branches.
First path, it is the addition of two operands with the same sign.
Second path, it is the subtraction of two operands, where the magnitude A is higher
than B.
64 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Third path, the magnitude A is equal to B, the result is the positive zero.
Forth path, it is the subtraction of two operands, where the magnitude A is less
than B.
Subtraction
A-B
Where
Addition A, B are operands in the sign and magnitude.
A+B BS = not BS
S is result of the addition or subtraction.
AS, BS, SS are signs of operands.
F AM, BM, SM are magnitudes of operands.
AS = BS
F
T AM > BM
T F
AM = BM
T
SM = AM + BM SM = AM - BM SM = 0 SM = BM - AM
SS = AS SS = AS SS = 0 SS = BS
Done
Fig. 07-11 Algorithm of the addition and subtraction in the sign and magnitude
is mathematically defined by formulas (0703) and (0704). The word mathematically means
B
that no overflow occurs and the only result must be a natural number. The example of the A = A + b and
B
addition is in Fig. 07-12. A≥0□
VŠB-TU Ostrava 65
7 Arithmetic operations on integer numbers
Where
B
Sum, BSub are results in offset binary and must be natural numbers, (BS ≥ 0).
A, B are operands.
B
A, BB are operands in offset binary.
b is bias or offset.
In computer, where numbers are placed to n-bit word, the overflow may occur on sub-
Biased number is
results or final result. Then the overflow causes that formulas (0703) and (0704) are not
always unsigned
valid. The overflow may occur, e.g., in the addition BA + BB, where the result is higher than
integer.□
2n – 1, or in the subtraction BA – BB where the results is less than zero. Therefore, it is nec-
essary to modify the original formulas (003) and (0704) by taking into account the n-bit
word. Then the formulas (0705) and (0708) are valid for any bias and have no limitation in
case of the overflow of sub-results. Floating point numbers use the floating point bias 2n-1 –
1; then formulas (0706), (0707) and (0709) are valid and have no limitation in case of the
overflow of sub-results. The overflow may only occur on the final addition and may be de-
tected by C flag as the carry out.
Subtraction is
For the addition replaced by addi-
tion of negative
B
Sum = (BA + BB) + not(b) + 1, valid for any bias (0705) number in two’s
B
Sum = (BA + BB) + b + 2, valid only for bias 2n-1-1 (0706) complement.□
B
Sum = (BA + BB) + 2n-1 + 1, valid only for bias 2n-1-1 (0707)
Where
B
Sum is the sum in n-bit offset binary.
B
Sub is the difference in n-bit offset binary.
B
A, BB are operands in n-bit offset binary.
b is bias or offset.
not(b) is bitwise not of bias b.
n is n-bit word for representation.
2n-1 - 1 is bias for floating point according to IEEE 754.
66 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Fig. 07-13 presents the addition, where the first addition generates an overflow and the
result corresponds to formula S = (a + b) mod 16 = 21 mod 16 = 5. However, the addition of
the correction “not b plus 1” generates the correct result without an overflow.
It is worth mentioning, that the result of the addition has one order more than the maximal
order of the operands, formula (0710). This is a typical property of each addition. In com-
puter, integer numbers in the BCD code are represented by the string. In the case, when
the string length has no limitation, the overflow cannot occur. In case of the restricted
string length, the BCD overflow can occur. It means that the destination string does not
have a sufficient length, [DEC_VAX].
nr = max(n1,n2) + 1 (0710)
Where
VŠB-TU Ostrava 67
7 Arithmetic operations on integer numbers
When the BCD addition is performed on, e.g., 3 BCD digits, the correction must be done on
each addition, Fig. 07-15. The first correction will be performed when the sum in nibbles is
higher than 9 or when the carry to the next nibble is generated. Performing the first correc-
tion by the addition may also generate numbers higher than 9, therefore it is necessary to
continue by adding 6 in the second correction. And again, this correction may generate the
nibble higher than 9 and so the correction continues. The correction is performed until each
nibble is higher than 9.
th nd st th
4 2 1 0 Check in
carry carry carry Note
nibble nibble nibble nibble decimal
0000 1000 0101 0110 856
+0000 +1001 +0100 +1000 +948
1 0 0 Carry to next nibble
0001 0001 1001 1110 First binary addition
+0110 +0000 +0110 First correction
0 0 1 Carry to next nibble
0001 0111 1010 0100 Second binary addition
0000 0000 0110 0000 Second correction
0 1 0 Carry to next nibble
0001 1000 0000 0100 Final result 1804
The subtraction can be performed by adding a negative number, Fig. 07-16. The negative
number is expressed by ten’s complement. For signaling, the best practice is to use the
most significant digit, where 0 is plus and 9 is negative. It means to add one nibble for the
sign. After performing the subtraction, the most significant digit determines the sign of the
result.
Subtraction is
Definition of
Carry replaced by addi-
subtraction in Operation BCD number Note
in
decimal tion of negative
47 0000 0100 0111 number in ten’s
-95 - 0000 1001 0101 complement.□
0000 0100 0111 First operand
10 9
+ 1001 0000 0100 1 A = A +1, second operand
1001 0100 1100 First addition
+ 0000 0000 0110 Correction of addition
1001 0101 0010 Result is negative
9
0000 0100 0111 1 A = A +1
-48 - 0100 1000 Final result in BCD and sign
68 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
7.8 Multiplication
The multiplication of unsigned integers is the basic arithmetic operation and the algorithm
for the binary unsigned integers is the same as for the decimal unsigned integers, Fig. 07- Multiplication of
17. The algorithm is valid both from the mathematical and the computer point of view and unsigned inte-
the application of this algorithm must take into account the following: ger.□
nr = n1 + n2 (0711)
Where
In case of a sign number, the multiplication must be performed by a special algorithm. The
basic one is the application of the algorithm for unsigned number with modification,
Fig. 07-18:
Separate the sign of both operands and calculate the sign of the product. The sign
of the result is the xor operation with both signs, step 1.
VŠB-TU Ostrava 69
7 Arithmetic operations on integer numbers
Separate the significant bits of operands as the absolute value of operands and per-
form the multiplication for unsigned numbers, step 2.
According to the sign of the product, convert the product to the defined form of
the representation, step 3.
It is possible to directly perform the multiplication of the sign numbers in two’s comple-
ment by a special algorithm, Booth's multiplication algorithm in [wiki_0707] and more algo-
rithms in [Ercegovac_2004], [Koren_2002] and [Stine_2012].
7.9 Division
Division is a very arduous arithmetic operation because the result can be in more different
forms. The division is also a time consuming operation. The result of the division is a ration- Rational numbers
al number. When the result is a number with radix point, it is the floating point division. ℚ are 1.1, 1/8 … □
This subchapter describes only the division of integer numbers, the integer division. In this
case, the result can have two forms, either a fraction or a quotation and a remainder. Frac-
Natural numbers
tion is a very accurate result of the division. A lot of programming languages use the frac-
ℕ are the set {0,
tion as the result, and they have the rational number as the data type, [Matlab_0701] and
1, 2, 3 …} □
[wiki_0708].
Formula (0712) defines the quotient and/or remainder as the result of division. This formu-
la is uniquely defined for the positive numbers only; these numbers are called the nomina-
tor and the denominator. One of the first division algorithms is the Euclidian division for
positive integer numbers, which was extended for positive and negative numbers. The basic
Euclidian divi-
idea of this algorithm is that the remainder is always positive, formula (0713), [wiki_0709].
sion, remainder
Another algorithm of the integer division admits a positive and/or negative remainder,
is always posi-
[Koren_2002] and [Ercegovac_2004]. This definition is used by a lot of mathematical sys-
tive. □
tems and programming languages.
n=d*q+r (0712)
0≤r<d (0713)
Quotient is equal
Where to nominator
/denominator. □
n is the nominator or dividend as an integer number.
d is the denominator or divisor as an integer and not equal to zero (d ≠ 0).
q is the quotient as an integer.
r is the remainder.
These different accesses to the integer division are shown in Fig. 07-19. The most of report-
ed systems have the integer division with truncation and the remainder can be either posi-
tive or negative. Only Euclidian algorithm and the Python programming language give dif-
ferent results. Only the MS Excel 2010 spreadsheet has no defined remainder and the
modulo operation is defined by a different way. The Octave system has a different operator
for the remainder and the modulo operation, resulting in different results. But the present
literature states that the remainder is the modulo calculation, [ISO/IEC_0701], [wiki_0710]
and [wiki_0711].
70 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The latest opinion on the integer division is formulated by standard ISO/IEC 10967-1:2012,
Information technology - Language independent arithmetic - Part 1: Integer and floating
point arithmetic, famous as LIA. This standard states the definition of quotient and remain-
der. The LIA standard states the integer division as:
“quotI (-3; 2) = -2 round toward minus infinity, specified in LIA-2” Floor division. □
“divtI (-3; 2) = -1 round toward zero, no longer specified by any part of LIA”
VŠB-TU Ostrava 71
7 Arithmetic operations on integer numbers
The Python programming language from version 2.2, including Python 3.x version, uses the
floor division, [Python_0701], [Python_0702] and it defines a new // operator (double
slash) as the integer floor division. The modulo operator remains the same, % (percent),
and produces the remainder. The original / operator (slash) gives the quotient according to
the floor division.
When integers are divided, the result of the / operator is the algebraic quotient with any
fractional part discarded. If the quotient a/b is representable, the expression (a/b)*b + a%b
shall equal a. This is often called ‘‘truncation toward zero’’.
The realization of division is performed by many algorithms which are described in litera-
ture [Ercegovac_2004], [Internet_0701], [Koren_2002], [wiki_0712] or [Muller_2010]. The
realization of the algorithms of division is only by FSM – Finite State Machine.
7.10 References
[DEC_PDP11] PDP11 processor handbook, PDP11/04/34a/44/60/70, instruction set and
instruction SUB; Digital Equipment Corporation, 1979;
(http://bitsavers.informatik.uni-
stuttgart.de/pdf/dec/pdp11/handbooks/PDP11_Handbook1979.pdf; on line
2013-09-24)
72 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
[Kaps_2013] Jens-Peter Kaps; Digital System Design, Signed Magnitude Addition – Sub-
traction Algorithm; George Mason University;
http://ece.gmu.edu/~jkaps/courses/ece331-
s07/resources/signedinteger.pdf
[Stine_2012] J. E. Stine; Digital Computer Arithmetic Datapath Design Using Verilog HDL;
Springer 2012; ISBN-13 978-1461347255
VŠB-TU Ostrava 73
7 Arithmetic operations on integer numbers
74 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The term of fixed point (or fixed point numbers) is used mainly in the computer
Fixed point numbers are a
science. Fixed point numbers can be understood as the numbers with radix point
subset of real numbers,
at the place defined beforehand. From mathematical point of view, rational num-
not vice versa. □
bers ℚ (U+211A) are the numbers that are expressed as a quotient or fraction
n/d. We begin to think about the fraction and the possibility for its writing and reading. It is
possible to write the real number 1.23 as e.g. 0.00123 * 1000 (0.00123/10 -3) … 1.23 * 1 and
also as 1.23*1/1 … 1230* 1/1000 etc. The multiplier is called a scaling factor, [wiki_0803]. Scaling factor.□
The multiplier is either the integer or the fraction in the form 1/denominator. Then, it is
possible to read the fraction as: 1.23 is 123 with a scaling factor of 1/100, 123 is 1.23 with a 1.23 is 123 with a
scaling factor of 100 etc., or 123 scaled by 1/100 is 1.23 and so on, [wiki_0803]. The same scaling factor of
principles are valid for negative numbers and any scaling factor, Fig. 08-01. 1/100.□
1.23 = 12.3/10 = 123/100 = 1 230/1 000 -1.1 = -22/20 = -33/30 123 scaled by
1/100 is 1.23.□
1.23 is 123 with a scaling factor of 1/100 -1.1 is -33 with a scaling factor of 1/30
Fraction is
123 scaled by 1/100 is 1.23 -22 scaled by 1/20 is -1.1 nominator
Fig. 08-01 Scaling /denominator. □
The useful scaling factors are the powers of 2 or 10 and they are chosen so that the
nominator is an integer number. Then, the fixed point numbers are represented by
Fixed point is the theo-
the division of the integer numbers and the numerator of fraction is used in the
ry of the transfor-
computation. It means that the integer arithmetic is used instead of floating point or
mation of real numbers
special arithmetic unit. The basic arithmetic operations of the fixed point numbers
to fraction of the inte-
are based on the fraction arithmetic, more details later. The scaling factor is used to
ger numbers. □
express the fixed point numbers, e.g. the scaling factor of 1/3600 is used for the
calculation of hours from seconds or for transforming angles, where the angle 2π
radians corresponds to number 64536.
The meaning of the fixed point is in the fact that the integer arithmetic is used instead of
Fixed point is
the floating point arithmetic. The integer arithmetic is faster than the floating point arith-
faster than float-
metic. Moreover, not every processor has a hardware floating point unit, e.g. DSP - digital
ing point. □
signal processor. In this situation, floating point operations are simulated by the software
library and this is a slower computation than the integer arithmetic. The second reason is
the precision, in the suitable definition of the fixed point it is possible to reach a higher
precision than by using the floating point with the same word size. More details later.
VŠB-TU Ostrava 75
8 Fixed point arithmetic
The fixed point numbers and binary scaling theory are mainly used in digital signal pro-
cessing and other areas, literature [wiki_0804]:
Digital signal processing – DSP. A lot of DSP processors are only of integer type,
floating point operations are only software simulated. DSP covers the applications
of the digital filter, digital image processing, speech to text and text to speech con-
version, and so on.
Binary angle, where 2π angle corresponds to e.g. 65536 = 216.
In the 1970’s and 1980’s, the fixed point was used in the intensive real time com-
puting, such as the flight simulator.
Some programs, where DCT – Discrete Cosine Transformation is used to compress
JPEG images.
Computer graphics.
The support for a rational number is possible to find in programming languages and alge-
braic computation systems, such as Mathematica and Maple, [wiki_0809]. In the program-
ming languages, the support is based on the software libraries. The better known languages
are Common Lisp, Perls, Ruby, C/C++, VHDL and others are mentioned in literature
[wiki_0809], [vhdl_0801]. For the languages C/C++, it is the project of GNU Multiple Preci-
sion Arithmetic Library, [wiki_0809]. The Python programming language has the module of
fractions, which provides the support for the rational number arithmetic, [Python_0801].
The libfxmath library is a platform-independent fixed point maths in the format ℚ16.16
under the license MIT, [Google_0801] and [wiki_0808].
76 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
fraction part and they are the absolute value of a fixed point number. Fractional part is a
part after the radix point and the scaling factor defines the size in bits of this fractional part.
In the example in Fig. 08-02, the scaling factor is 1/23, it means that the fractional part has 3
bits, counted from the radix point to the right. The weight of LSB bit is 2-n for scaling factor
of 1/2n. The fixed point value in Fig. 08-02 is IN = 53H as an integer and FX = 53H * 1/23 =
A.6H as a number with radix point.
7 0 7 0
0 1 0 1 0 0 1 1 0 1 0 1 0 0 1 1 Size of fraction part in
3
* 1/2 = bits is derived from
Nibble 1 Nibble 0 Integer Fraction scaling factor. □
part Radix part
Sign Sign point
Radix point is defined
Integer number - IN Scaling Fixed point number – FX by scaling factor. □
factor - SF or real number - Re
Fig. 08-03 shows the weight for the representation of the integer or fixed point in the byte.
In case of integer, the LSB has the weight of 20 and MSB bit is a sign or not. In case of fixed
point, the LSB bit has the weight of 2-n and MSB bit is a sign or not. The 20 weight of fixed
point is determined by the radix point. The sign bit has minus weight and it is used in two’s
complement. It is suitable to use this minus weight for the conversion to the decimal nu-
meral system.
7 6 5 4 3 2 1 0 4 3 2 1 0 -1 -2 -3
Weights for unsigned b b b b b b b b a a a a a a a a
7 6 5 4 3 2 1 0
Weights for signed - b +b +b +b +b +b +b +b - a4 +a3 +a2 +a1 +a0 +a-1 +a-2 +a-3
- 2.4 1 1 1 0 1 1 0 1 1 1 1 0 1 1 0 1
Sign extension
Integer number - IN Fixed point - FX
VŠB-TU Ostrava 77
8 Fixed point arithmetic
It is necessary to note that the required fixed point size in bits must be less than or equal to
the computer word size. When the fixed point size in bits is less than the computer word
size, then the fixed point number is placed to the word from LSB bit and the sign extension
is performed to the highest remaining significant bits, Fig. 08-04.
The position of radix point or binary scaling factor is defined by the user or software library.
In literature, we can find a lot of enrolments of the fixed point definition:
78 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Where
Where
Booth definitions use 32-bit word and fixed point representation is more precision about
two weights than floating point. Floating point in 32-bit definition is using only 23 bit but
signed ℚ1.30 format of fixed point is using 30 bit for fraction part. □
VŠB-TU Ostrava 79
8 Fixed point arithmetic
Each unsigned and signed range is defined by two equivalent formulas. Between two
neighboring numbers a small gap exists and it is defined by the number of fractional bits
1/2f, formula (0806). This gap is also called resolution ε (epsilon), [Oberstar_082007].
The range of representation also influences the computer word size of 16, 32 or 64 bits. In
some cases, when defined ℚ format does not use all bits of the word size, then the word
can increase the range of representation. Also, each arithmetic operation changes the for-
mat of the result, typically the number of bits in the integer and fractional part increases.
These new bits increase the accuracy and are used to rounding. They are called round and
sticky bits and have the same role as in floating point.
Limited number of bits for computation can cause the overflow. The overflow means the
number is out of the representation. The overflow is signalized by the flags C – carry and V Overflow.□
– overflow, depending if it is the sign or unsigned computation. The overflow of fixed point
must be derived from these flags.
UN = + am-1 * Bm-1 + … a1 * B1 + a0 *B0 + a-1 * B-1 + a-2 * B-2 … a-f * B-f (0807) For a signed
number, the co-
SN = - am-1 * Bm-1 + … a1 * B1 + a0 *B0 + a-1 * B-1 + a-2 * B-2 … a-f * B-f (0808) efficient am-1 is
FX = IN * 1/2f (0809) the sign bit.□
Where
80 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Sign numbers use two’s complement that is shortly defined by words – bitwise not plus 1,
formula (0810) or mathematical definition, formula (0811) that can be used in any numeral
system.
2
A = (~ A) + 1 (0810)
2
A = 2n – A (0811)
Where
2
A is the two’s complement of number A and corresponds to number –A.
A is a natural number.
n is the number of bits used for the representation.
The number of bits in the integer part of fixed point number is given by definition ℚm.f and
the number of bits in the integer part of the computer word can be different. Then the
fixed point number as integer also has a different value. This difference can be seen in case
of two’s complement, Fig. 08-03. However, formulas (0807), (0808) and (0809) give the
same value. Therefore in the examples of the conversion, the fixed point value and the
value derived from the computer word will also be calculated.
The conversion to fixed point means to calculate the integer number (IN) or the fixed point
number with the radix point and the corresponding value in the computer word. The con-
Conversion to
version to integer number and fixed point value can be performed in two ways; first by
fixed point.□
using a scaling factor and then by using the classical conversion to the binary or hexadeci-
mal numeral system. After that, the placement to the computer word will be made and sign
extension will be used. The format ℚm.f and the word size must be known and the conver-
sion to the fixed point begins from the known decimal real value. The performance of the
conversion is based on scaling factor, formula (0809), and subsequently the placement to
computer word can be made by algorithm, Fig. 08-05.
As you see in Fig. 08-05, the conversion is not accurate and the difference is caused by the
limited number of bits in the fractional part and by rounding. Only multiples of LSB weight
are represented accurately. Fig. 08-05 also states the reverse conversion as the check made
on the base of formula (0808).
Another way of the conversion is the possibility to convert the integer and fractional part
separately to the hexadecimal or binary numeral system. The number of bits in the frac-
VŠB-TU Ostrava 81
8 Fixed point arithmetic
tional part is f, which is stated in the definition ℚm.f, and therefore the calculation of the
fractional part must be made to f+2 bit and rounding must be performed. This algorithm is
used for checking in Fig. 08-06.
Positive real number 1.23 to Q2.11 in Negative real number -2.36 to Q2.10 in the 16-bit word
the 16-bit word
- 2.36 * 210 = - 2 416.64
11
1.23 * 2 = 2 519.04 - 2 416.64 ≈ - 2 417
2 519.04 ≈ 2 519 - 2 417D = - 971H
2 519D = 09D7H, (213)D - 971H = 2000H – 971H = 2168FH,
IN = 09D7H IN = -971H, FX = - 10.0101 1100 012 = -2.5C4H
Fx = 0 0001.0011 1010 111B = In two’s complement
1.3AEH IN = 168FH, Fx = 101.1010 0000 01B = 5.A01H
Placement to word is 0x09D7 Placement to word is 0xF68F, two’s complement
Value in the word IN = F68FH
IN = 09D7H F68FH * 1/210 (scaled by) = 211 1101.1010 0011 11B = 23D.A3CH
Fx = 1.3AEH □ Fx = 3D.A3CH, 2’s complement □
Check Check
Conversion from fixed point is also given by the above mention formulas, (0807) to (0811)
and the known format ℚm.f. The order of application of formulas is random, but it is neces- Conversion from
sary to calculate in the binary or hexadecimal system in the same way as in the decimal fixed point.□
numeral system. In the following text, two basic algorithms are described. The first possibil-
ity calculates the decimal integer number with a sign and then the scaling factor 1/2f is ap-
plied, Fig. 08-06. The algorithm is:
Convert the given number to the sign decimal integer. Only note that the sign is
MSB bit and the theory of 2’s complement is used, formula (0805) or (0806).
Multiply the obtained integer number by scaling factor 1/2f. The result is a real
number.
A variation is to calculate two’s complement in the decimal numeral system.
The second way uses the first scaling factor 1/2f and then the polynomial of the numeral
system, formulas (0808). All calculations are made on the word, which contains the signed
extended fixed point number, Fig. 08-07.
The given integer number is multiplied by the scaling factor of 1/2f, in the hexadec-
imal numeral system.
Apply formula (0803) and the result is a real number.
82 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The 16 bit number 0x062B with format ℚ3.8 The 16 bit number 0xDCBA with format ℚ4.11
Check Check
6D = 110B 4D = 100B
0.167 968 75D * 16 = … 0.409 179 687 5D * 2 = …
0.167 968 75D = 0.2BH 0.409 179 687 5D =0.68CH
6.2BH * 28 = 62BH - 4.409 179 687 5D = - 4.68CH
Sign extension to word it is 0x062B - 4.68CH * 211 = - 100.0110 1000 110B * 211
Word contains 0x062B □ = -2346H
Two’s complement with sign extension
(~2346H) + 1= 2DCBAH = 0xDCBA
Word contains 0xDCBA □
The 16 bit number 0x2ED8 with format ℚ7.7 The 16 bit number 0xE9AB with format ℚ5.9
Check Check
Fig. 08-07 Conversion to fixed point by means of the polynomial of numeral system
VŠB-TU Ostrava 83
8 Fixed point arithmetic
𝑎 𝑏 𝑎+𝑏
+ 2𝑓 = (0812)
2𝑓 2𝑓
𝑎 𝑏 𝑎∗𝑏
∗ 2𝑓2 = 2𝑓1+𝑓2 (0813)
2𝑓1
𝑎
2𝑓1 𝑎 2𝑓2
𝑏 = (0814)
𝑏 2𝑓1
2𝑓2
Where
The adjustment of the number means to change the scaling factor and subsequently the
Adjustment of
position of the radix point. The change of the position of the radix point is performed by
the number.□
the multiplication or division by the power of 2. The multiplication by the power of 2 can
be performed by arithmetic shift left, formula (0815). This shift changes the position of the
radix point and can change the sign. This situation is the overflow.
A * 2i = A << i (0815)
Where
Division by the power of 2 can be performed by arithmetic shift right, formula (0816). The
quotient corresponds to floor division, where rounding is towards minus infinity,
[ISO/IEC_0801]. This shift changes the position of radix point and the sign bit stays without
changes.
84 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
3 2
(-6/2 ) / (2/2) = -3/2
VŠB-TU Ostrava 85
8 Fixed point arithmetic
m1, m2 are sizes of the integer part of operands. The sum is the
max(m1, m2) is a function that chooses the maximum value from m1 and m2. result of addi-
f is the size of the fractional part of operands. tion. □
An example, when the addition changes the format of the result in in Fig 08-09. It is the
situation (0.1b + 0.1b = 1.0b), where one bit is generated to the integer part and increases
the size of the integer part. The size of the fractional part stays without any change. An
example of the addition of the negative and positive number is in Fig. 08-10. The negative
number is represented by two’s complement.
8.8 Multiplication
Fixed point multiplication is defined by multiplication fractions (0813) and the same scaling
f1 f2
factor is not so important. Operands can have different formats of ℚm.f, therefore the a/2 * b/2 =
f1 + f2 □
format of the result is calculated by formula (0818) for unsigned and formula (0819) for (a * b)/2
86 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
signed format, literature [Yates_082009] and [Oberstar_082007. The result can be changed
to the desired format by first rounding and then adjustment.
Where
A basic algorithm for integer multiplication is only defined for positive operands. If the op-
erand is negative, it is necessary to change the operand to a positive one and then multiply.
The sign of the result is calculated separately; when the result is negative, then 2’s com-
plement is calculated. Special algorithms exist for the multiplication operands in 2’s com-
plement, literature [wiki_0806] and [wiki_0807].
ℚ2.5 0 0 1 0 1 0 1 1 2BH/25
ℚ5.10 0 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0 0D1AH/210
Decimal rational
43/25 * 78/25 = 3354/210 = 3.275 390 625
number
An example of the multiplication is in Fig. 08-12 and the product has a new format. When
the same format is expected, the adjustment with rounding is made, Fig. 08-13. The round-
ing is performed by the principle of the rounding to nearest, ties to even, [IEEE 754-2008]
and [wiki_0810].
ℚ5.10 0 0 0 0 1 1 0 1 0 0 0 1 1 0 1 0
R bit = 1
S bit = 1
Then add 1/2ulp
ℚ5.5 0 0 0 0 1 1 0 1 0 0 1
VŠB-TU Ostrava 87
8 Fixed point arithmetic
8.9 Division
Division is given by the formula (0814), and it changes the scaling factor. The calculation of
a new format of quotient is complicated and more information can be found in literature (a/2f1) / (b/2f2) =
[Yates_082013]. In the situation when the nominator and the denominator have the same (a/b) / (2f2/2f1) =
scaling factor, the quotient has no scaling factor. If the result is expected in the same scal- (a/b) * 1/2f1- f2 □
ing factor, then formula (0814) is modified to formula (0820).
Where
Fig. 08-14 shows the example of the integer division of positive operands. The same scaling
factor for operands and result is used. In case of one negative operand, the quotient de-
pends on the algorithm used. The detailed description of the division algorithms is in litera-
ture [Ercegovac_2004], [Koren_2002] and [wiki_0811].
Calculation in hexadecimal:
(7A/25) ÷ (3/25) = (7A *25)/3) * (1/25) = (F40/3) * (1/25) = 515 * 1/25
Check: (0x7A/25) / (0x03/25) = (122D/25) / (3D/25) = 3.812 5/0.093 755 = 40.666 666 6…
Fig. 08-14 Example of fixed point division with the same scaling factor
88 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
8.10 References
[Ercegovac_2004] M. D. Ercegovac, M. Lang; Digital Arithmetic; Morgan Kaufmann
Publishers 2004; ISBN 1-55860-798-6
[IEEE 754-2008] IEEE Std 754™-2008, IEEE Standard for Floating-Point Arithmetic, 29
August 2008, revision of IEEE 754 – 1985
VŠB-TU Ostrava 89
8 Fixed point arithmetic
90 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
The term of floating point or floating point numbers or floating point data is mainly used in
computing for writing and displaying real numbers, [wiki_0901]. The illustration of floating
numbers is in Fig. 09-01 and a floating point number consists of significant digits with a sign,
scaled by the base raised to the power of n. The base of scale is a number, which defines
the base of the numeral system, e.g. 10 or 2. Significant digits can be a signed integer num-
ber or a signed number with a radix point. In this context, the term of radix point is more
suitable because it does not depend on the base of the numeral system like, for example,
the decimal point or the binary point. In general, the floating point format is given by the
formula (0901).
Where
significant digits with a sign are the digits from the range of 0 to b-1.
base is a base of the numeral system.
exp is an exponent, which is an integer number.
baseexp is a scaling factor.
Theoretic notation
VŠB-TU Ostrava 91
9 Floating point numbers
Significant digits and a base are expressed in the defined numeral system but the expo-
nent is always in the decimal numeral system. The decimal exponent is more suitable
for moving the radix point.
The terms of a decimal or a binary point depends on the numeral system but the radix
point is the general term for all numeral systems.□
The reason for defining the floating point number is the possibility to represent numbers in
a large range. For instance, the distance between galaxies in space is given in light-years;
however, the length of light wave is a small number, in nanometers. Both values may be
used in one computation with the maximal precision.
In the formula (0901), significant digits are often called as the significand or the mantissa.
Significand is the newest term, mantissa is a historical term. One problem of the floating
point numbers is how to write down these numbers in all situations. In literature, the clas- Scientific notation
sical typographic or mathematical conversion is used but the computer science uses anoth- 87.6 x 103,
er format. Following terms are connected with the floating point number enrolment. They 10.01 x 27
□
are the scientific notation, the normalized representation and the engineering notation.
Scientific notation, this format uses the sign, significant digits, base and exponent,
[wiki_0902]. The significand is any real number. Examples of scientific notation are: Engineering no-
2.0 x 102, 0.2 x 103, 123 x 1045, 12.3 x 10-67, 0.123 x 103, 1.23 x 102, 11.01 x 24, tation
1.101 x 25 …. 87.6 x 103,
3.01 x 10-12
Engineering notation is a variant of scientific notation with the base 10, where the □
E notation, Fig. 09-02, is the notation of floating point number to the line, where all
parts are written to one row, without the superscript of exponent, 106. E notation
uses the letter e or E (small e or capital E) to express and separate the exponent, for E notation
instance, 12.3e-3, 12.3E-3, 11.1e2 …. This format is used by a lot of calculators, 8.76E4,
spreadsheets and other programs. The programming languages as the Ada, C++, 1.001E8
□
MATLAB, Scilab, Perl, Java and Python use E notation, [wiki_0902].
92 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
When the number is shifted to the left, the exponent is decremented by one for
each position.
When the number is shifted to the right, the exponent is incremented by one
for each position. □
All these enrolments are used in real practice and are automatically understood as the
floating point numbers in the decimal numeral system. An enrolment in the numeral sys-
tems different from the decimal one is solved by special notations and differs case by case.
In the history of computer science, a lot of different definitions and representations existed
about how to represent a floating point number in bytes, words. The pioneers in this field
were Leonardo Torres y Quevedo and Konrad Zuse. In 1914, Torres y Quevedo designed an
electro-mechanical version of the Analytical Engine of Charles Babbage which included a
floating-point arithmetic. In 1938, Konrad Zuse, from Berlin, completed the Z1, the first
mechanical binary programmable computer; this was, however, unreliable in operation. It
worked with 24-bit binary floating-point numbers having a 7-bit signed exponent, a 16-bit
significand (including one implicit bit), and a sign bit. More information is in literature
[wiki_0901], [Randeli_1982] and [Rojas_1997].
After this period, new computer architectures were developed and each of them had its IEEE 754
own format and properties of the floating point. This led to the fact that these different
definitions were causing problems with the data exchange between users and different ISO/IEC/IEEE
computer architectures. All historical experience led to the IEEE Standard for Floating-Point 60559:2011
Arithmetic (IEEE 754). The first version of this standard was published in 1985 and covered □
only binary floating point arithmetic. Subsequently in 1987, the standard IEEE 854-1987 was
published for the radix-independent floating point arithmetic, [wiki_0906]. The second ver-
sion of IEEE 754 was published in 2008 and it includes the original version of IEEE 754-1985
and IEEE 854-1987. The standard IEEE 754-2008 is also the international standard
ISO/IEC/IEEE 60559:2011.
Following explanation will be based on this standard where the base 2 and base 10 of the
numeral system for the floating point data are defined. Literature [wiki_0906] states the
significance of the standard IEEE-754 as follows:
arithmetic formats: sets of binary and decimal floating-point data, which consist of
finite numbers (including signed zeros and subnormal numbers), infinities, and spe-
cial "not a number" values (NaNs)
interchange formats: encodings (bit strings) that may be used to exchange floating-
point data in an efficient and compact form
rounding rules: properties to be satisfied when rounding numbers during arithmetic
and conversions
VŠB-TU Ostrava 93
9 Floating point numbers
The standard also includes extensive recommendations for advanced exception handling,
additional operations (such as trigonometric functions), expression evaluation and for
achieving reproducible results.”
Arithmetic format can be understood as a value of floating point operands and a result of
the operation. Arithmetic format is used for calculation with floating point. Interchange
format is defined by the fields in a word and by the encoding scheme for the purpose of
placing the floating number into the word. Interchange format is useful for exchanging the
floating point data between different computer architectures.
Mantissa was the first historical term for designating significant digits in the floating
point notation. Konrad Zuse used the term of mantissa in the period of 1939–1941 in the
computer Z-3, [Zuse_2008], Burks in 1946 [Burks_1946] and [RFC 0382] in 1972.
Subsequently, the term of significand or fraction is used by the standard IEEE 754-1985
but standard IEEE 754-2008 only uses the term of significand that is denoted either by
the small letters c or m. Both small letters c and m are referred to as numbers in a specif-
ic form. In the other sources, the small letter c is referred to as a coefficient.
The term of mantissa is discouraged by the IEEE floating-point standard committee, be-
cause it conflicts with the pre-existing use of mantissa for the fractional part of a loga-
rithm, [wiki_0905].□
9.1 Significand
The significand is the newest and official term for significant digits, and it
is defined by standard IEEE 754-2008. Significand can be thought of as an
Significant digits x bexp
integer or a fraction. Used significand digits are defined by the base and □
they are from the range of 0 to b-1. The term of coefficient is also used
by this standard but mantissa is not. Mantissa was officially used in the past and more
people in computer branch and literature use this term until now. More information is in
literature [wiki_0905]. Number 12.34 in the decimal numeral system can be written down
in several ways:
1.234 * 10+1, the significand is in the form with radix point. This is a normalized
form of the notation.
0.1234 * 10+2, the significand is in the form with radix point. This notation is al-
lowed by LIA - Language Independent Arithmetic, [ISO/IEC_0901], and several pro-
gramming language standards, [wiki_0905].
1234 * 10-2, the significand is an integer.
(1234/104) * 10+2, the significand is a fraction. Fixed point notation is used.
94 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
9.2 Precision
The precision is the maximum number of digits in the significand and it is a
basic parameter of the interchange floating point format. The precision is Precision as letter p is the
denoted by small letter p, IEEE 754. Fig. 09-03 shows the definition of the pre- maximum number of digits
cision in the different notation of significand. in significand.□
1 234 567
In decimal 1.234567 𝑥 104 = 106
𝑥 104
111 1101
In binary 1.111101 𝑥 24 = 26
𝑥 24
𝑑0 𝑑1 𝑑2 … 𝑑𝑝−1
𝑑0 . 𝑑1 𝑑2 … 𝑑(𝑝−1) 𝑥 𝑏 𝑒𝑥𝑝 = 𝑥 𝑏 𝑒𝑥𝑝
𝑏 𝑝−1
Where
Infinity is the value of the floating point data. It is a situation when the result is out of the
representation range. The floating point data have plus or minus infinity. It means that fi- Infinity.□
nite values have minimum and maximum values, which are defined by the interchange
floating point formats. The infinity value can also be the input value of an operation.
Not a Number, abbreviation NaN, is a special value for the situation when the result of an
NaN, Not a
operation is not defined. For instance, for operations like: a square root of a negative num-
Number.□
ber with the result as a real number, e.g. √−2, or inverse sinus of a number higher than 1,
VŠB-TU Ostrava 95
9 Floating point numbers
e.g. arcsin(5). The standard IEEE 754 in this situation defines the result as Not a Number -
NaN. The standard defines two values of NaN, signaling NaN and quiet NaN. More infor-
mation about infinity and NaN is in standard IEEE 754.
The formula (0902) contains a sign, a significand and an exponent. The significand can be
expressed into two ways, either as a number with radix point or as an integer number. The Significand in
former one shows the significand in the scientific form (0903); the latter one shows the the scientific
significand as a coefficient (0904). Within each format, the following value of the floating- form.□
point data can be represented:
96 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
Both formulas (0903) and (0904) are equivalent and describe the exactly same finite values
of the floating point number. These values are zero and non-zero numbers. The radix point
is between digits d0 and d-1 in the formula (0903) and after digit dp-1 in the formula (0904).
Formula (0905) defines the relation between the exponent e and the quantum q and for-
mula (0906) for significands.
Significand
e = q + p -1 (0905) with scaling.□
m = c/bp-1 (0906)
(-1)S x c/bp-1 x be
c
𝑣 = (−1)𝑠 𝑏𝑒 (0907)
𝑏 𝑝−1
Where
Formula (0907) expresses the finite value of floating point data which is based on a scaling
factor of 1/bp-1. Subsequently, all formulas, (0903) - scientific form, (0904) - coefficient, and
(0907) - scaling, are equivalent and they correspond to the same finite floating point value.
VŠB-TU Ostrava 97
9 Floating point numbers
for storage purposes where high precision is not needed and they cannot be used
for arithmetic operations, [wiki_0910] and [wiki_0911].
Extended and extendable precision formats whose encodings are not specified, but
may match those of interchange formats, [Muller_2010].
Basic and interchange formats are only defined by the radix, precision and maximum expo-
nent. The remaining needed parameters are in Tables 09-01 and 09-02. The standard allows
to define new formats for the word size higher than 128 bits. The new word size k is the
multiple of 32 and it must be higher than or equal to 128. In generally, new names are bina-
ry{k} and decimal{k}. It means that it is possible to define a new binary or decimal format,
e.g. for 320-bit word and others.
The standard IEEE 754-2008 specifies the encoding floating point data into the sequence of
bits and it does not specify the placement in a memory. The placement of floating point
data is given by endiannes that specifies the rules of how to place a long word into the
smaller atomic elements, e.g. 128-bit word into the byte oriented memory. Basic endiannes
are a big endian and a little endian.
Note to formats
Standard defines an interchange and also an extended and an extendable precision for-
mat. Definitions of these formats according to IEEE 754-2008 are:
2.1.33 interchange format: A format that has a specific fixed-width encoding de-
fined in this standard.
2.1.20 extendable precision format: A format with precision and range that are
defined under user control.
2.1.21 extended precision format: A format that extends a supported basic format
by providing wider precision and range.□
98 VŠB-TU Ostrava
Digital systems for joint teaching programme of BUT and VSB-TUO
In the literature, it is possible to find different name of formats for floating point numbers,
some of them are: historical, used in real practice or newly defined. The decimal formats
were defined by standard IEEE 854-1987 for the base-independent numeral system;
[wiki_0906]. However, new standard IEEE 754-2008 defines the floating point data for the
base 2 and base 10. The possible names are:
Binary16, other names are half, half precision. This format is defined by IEEE-754-
2008 only for the storage purposes.
Binary32, other names are single, single precision. Programming languages use the
declaration float or real. This format was defined by IEEE-754-1985.
Binary64, other names are double, double precision. Programming languages use
the declaration double. This format was defined by IEEE-754-1985.
Binary128, other names are quad, quad precision, double-double precision. This
format is defined by IEEE-754-2008.
Decimal32, format is defined by IEEE 754-2008 only for storage purposes.
Decimal64, format is defined by IEEE 754-2008.
Decimal128, format is defined by IEEE 754-2008.
VŠB-TU Ostrava 99
9 Floating point numbers
8-bit binary format, called minifloat, is the format that is non-defined by standard.
It is mainly used for educational purposes and some special purposes, mostly in
computer graphics, [wiki_0912].
80-bit format, called extended precision, was the binary format of floating point da-
ta that was used in some processor architectures and this format is not widespread.
This format was defined by IEEE-754-1985 and it was rejected in 2008.
Significand of a binary normal number has the value of MSB digit d0 equal to 1. Sig-
nificand in the scientific form m is in the range of 1 ≤ m < 2 and the exponent is in
the range of emin ≤ e ≤ emax.
Significand of a binary subnormal number has the value of MSB digit equal to 0.
Significand m is in the range of 0 < m < 1 and the exponent is emin, emin = 1 - emax.
Hidden bit
Hidden bit
Standard IEEE 754-2008 states the representations of floating-point data in the binary in-
terchange formats, where each floating-point number has just one encoding in the binary Unique encod-
interchange format. This property is the unique encoding. ing □
In literature, it is also possible to find the term of denormal number, or denormalized num-
ber, that is the equivalent to the subnormal number. Subnormal numbers fill the underflow
gap around zero and increase the range of representation, [wiki_0909]. Subnormal num-
bers are in the range from minimum subnormal number to less than minimum normal
number.
1-bit sign S, the sign is 0 for positive and 1 for negative floating point data
w-bit biased exponent, E = e + bias
Offset binary
(t = p – 1)-bit trailing significand field digit string T = d1 d2 … dp −1; the leading bit of
representation
the significand, d0, is implicitly encoded in the biased exponent E. MSB bit of signifi-
is used for the
cand d0 is often referred as hidden bit.”
exponent e. □
Above, it is the citation of IEEE 754-2008.
S E T
Sign Biased exponent Trailing significand field
E0 …………..… Ew-1 d1 d2………………………………………..………..… dp-1
S E T
Sign Biased exponent Trailing significand field
E0 …………..… E7 d1 d2………………………………………..………..… d23
Fig. 09-05 Definition of fields for binary interchange floating point format
The trailing significand field contains significand without the digit d0, which is called the
hidden bit. The value of this bit is implicitly defined either by the normal or the subnormal
form. In the former one, hidden bit is 1, in the latter one, hidden bit is 0. Capital letter E is
biased exponent.
In text above, the exponent e (small letter e) was used in formulas. The format of the word
uses the biased exponent E (capital letter E). The exponent e is either a negative or positive Small letter e is
number, but the biased exponent E is only a positive number. The biased exponent E is the exponent. □
offset binary representation of an integer number.
The format of the word that is shown in Fig. 09-05, is able to represent all floating point
values as: NaN, infinity, finite value and zero. The first rule of encoding is the value of the
biased exponent E; and sometimes, the second rule is used, i.e. the value of the trailing
significand field T, Fig. 09-06. The detailed description of the encoding binary values is in
the Annex 09A of this chapter.
The standard 754 uses the term of biased exponent or biased number that corresponds
to the offset binary or excess-K representation of signed numbers. The bias is the offset
that is added to the number and it moves the signed numbers to unsigned integers.
Standard 754 defines the bias equal to 2w-1 - 1, where w is the size of the exponent field.
Example:
For w = 4, the bias is equal to 7.
For number -5, the biased number is -5 + 7 = 2.□
S E T
Sign Biased exponent Trailing significand field
E0 …………..… Ew-1 d1 d2………………………………………..………..… dp-1
11… sNaN
11….11 Not a Number
01… qNaN
The Not a Number is the unique encoding with the biased exponent E having only ones and
the trailing significand field T is not zero. The NaN value has two sub-values qNaN and Encoding
sNaN, which are encoded by the value of trailing significand field T. When the trailing field T of NaN□
begins with 1 (d1 = 1), it is a quiet NaN; and when the field T begins with 0 (d1 = 0), it is a
signaling NaN. The real non zero value of trailing field T has the diagnostic purpose. The sign
bit has no influence on the value NaN.
The infinity is encoded by the combination where the biased exponent E is all 1s and the
Encoding
trailing field T is zero. The infinity is plus or minus, according to the sign bit.
of infinity□
Note to the biased exponent E
The normal value is encoded when the biased exponent E is in the range from 1 to 2w – 2. It
is not all zeros and not all ones in the biased exponent E. Then the exponent e (small letter Encoding
e) is equal to E - bias. The trailing significand field T contains the bit string d-1d-2…dp-1 and it of normal
is a part of significand. The hidden bit is 1, d0 = 1. The whole significand in the scientific numbers□
form is the concatenation of the hidden bit d0 with the radix point and the bit string from
the trailing field T. The finite value can be calculated by formula (0903) where significand m
is in the scientific form. Another possibility is to modify formula (0907) because we know
that the hidden bit d0 is 1. So we get a new formula (0908) for the calculation of the normal
value from the binary interchange format. The fraction in formula (0908) is the fixed point
number with scaling factor.
𝑇
𝑣 = (−1)𝑆 ∗ (1 + 2𝑝−1 ) ∗ 2𝐸−𝑏𝑖𝑎𝑠 (0908)
𝑇
𝑣 = (−1)𝑆 ∗ (0 + ) ∗ 2𝑒𝑚𝑖𝑛 (0909)
2𝑝−1
Where
The subnormal value is encoded when the biased exponent E is all zeros and the trailing
Encoding of
significand field T is not zero. It means that the exponent e is equal to emin. The trailing
subnormal
significand field T contains a part of significand without leading hidden bit d0 that is equal to
numbers□
0. The whole significand in the scientific form is the concatenation of the hidden bit 0 with
the radix point and the value of T field. The value of subnormal form can be calculated by
formula (0903). Other possibility is to use formula (0909) that is derived from formula
(0907) knowing that the hidden bit is equal to 0.
Note to the border between the normal and subnormal numbers
The explanation is given for the real format binary32, where emax is 127, emin is -126
and bias is 127.
The smallest normal number with the exponent e = -126 is 1.0… x 2-126, the bi-
ased exponent is E = 1.
The following smaller number is 1.11… x 2-127 = 0.111… x 2-126. The second value
is the subnormal number with the exponent e = -126 and significand is non zero
with MSB bit equal to zero. For this situation, the biased exponent is E = 0.
When the biased exponent E = 0 and significand are equal to zero, then the ex-
ponent e = -126 and the value is 0.0 x 2-126 = 0.0, it is a plus zero.□
Zero is encoded when the biased exponent E and the trailing significand field T are all zeros.
Encoding
Only the sign bit defines the plus zero or the minus zero. When the operation on the float-
of zero□
ing point numbers produces a zero result, the standard prefers a plus zero, so it is a zero
word.
Fig. 09-07 shows the order of values on the number line for the binary floating point data.
There are numbers that cannot be represented. These are numbers between zero and the
area of subnormal numbers. Subnormal numbers are followed by normal numbers, then by
infinity and NaNs.
NaN, Not a Number in two forms, the signaling NaN and the quiet NaN.
Infinity, plus and minus.
Finite numbers, it is the zero and the non-zero decimal numbers with a sign.
Note to the exponent and quantum
Two exponents e and q are used in the interchange floating point formats and the rela-
tion between them is e = q + p -1. For better understanding the differences between
them, the example is used for binary32 and decimal32 format. The minimum exponent is
defined by emin = 1 – emax.
The binary32 format is defined by parameters, precision p2 = 24 bit, emax2 = 127, bias2 =
127. The range of exponent e is -126 ≤ e ≤ 127 and biased exponent E has the range of
1 ≤ E ≤ 254. This range is used by normal numbers to represent finite values.
The decimal32 format is defined by parameters, precision p10 = 7 digit, emax10 = 96, bi-
as10 = 101. This means, that exponent e is in the range of -95 ≤ e ≤ +96 but the quantum
q is in the range of -95 ≤ q + p10 - 1 ≤ 96 => -101 ≤ q ≤ 90 and the biased exponent E has
values in the range of 0 ≤ E ≤ 191. This range of quantum q is used by finite values. Zero,
infinity and NaN are not coded by the biased exponent.□
0.0
minus -1.0 +1.0 plus
Fig. 09-08 Values on the real number line for the decimal format
The formula (0910) defines the finite values of the decimal interchange floating point for-
mat and it is derived from formula (0904). The significand in the decimal interchange for-
mat is in the form of the coefficient C.
Where
S G T
Sign Combination field Trailing significand field
G0 ………….……..… Gw+4 Decimal encoding: J declets give 3×J = p – 1 digits
t–1
Binary encoding: t bits give values from 0 through 2
Source IEEE 754-2008
Fig. 09-09 Definition fields for the decimal floating point format
The fields of the word, Fig. 09-09, for the decimal interchange floating point format are
defined by the standard IEEE 754-2008 as:
a) 1-bit sign S.
b) A w + 5 bit combination field G encoding classification and, if the encoded datum is
Biased represen-
a finite number, the exponent q and four significand bits (1 or 3 of which are im-
tation is used for
plied). The biased exponent E is a w + 2 bit quantity q + bias, where the value of the
quantum. □
first two bits of the biased exponent taken together is either 0, 1, or 2.
c) A t-bit trailing significand field T that contains J × 10 bits and contains the bulk of
the significand. When this field is combined with the leading significand bits from Declet is a code,
the combination field, the format encodes a total of p = 3 × J + 1 decimal digits.” where three
decimal digits
The format of the word does not contain the value of biased exponent E and significant C in
the direct form, however, these values are encoded in the combination field G and the trail- are encoded
into ten bits. □
ing signicand field T. The new term “declet” is defined and used in conjunction with the
decimal encoding of significand C. The declet contains 3 decimal digits and the width of the
declet is 10 bits. Therefore, the size of the trailing signicand field is always defined as the
multiple of 10 bits.
The canonical form is a new term which is related to the decimal interchange floating point
format. The canonical term means that the combination in any fields is defined by the
standard IEEE 754-2008. All combinations in the fields or declets are not used and these
combinations are called as the non-canonical. The canonical form is produced by any float-
ing point operation and the non-canonical form is accepted in operands. The canonical
form relates to all fields, not only to declets. Therefore, some combinations of the combina-
tion field G and the combinations of the field G with the trailing significand field T are non-
canonical.
The values of the decimal interchange floating point format are inferred from the sign,
combination field G and trailing significand field T. The encoding begins with the leading
bits of the combination field G, Fig. 09-10. The description of the encoding decimal values is
in Annex 09B of this chapter, in details.
G
Combination field G0 ……G5
G0 …………….……..… Gw+4
1111 11… sNaN
Not a Number
1111 10… qNaN
G0 ……G4
E and
Finite non zero numbers, v = (-1)S * C * 10E - bias
d0 or d0d1d2d3
decimal d0 = 0
Trailing significand field Zero plus or minus, (-1)S * 0 * 10q
or binary
T=0 Significand is zero.
d0d1d2d3 = 0000
The value Not a Number is encoded by the combination (G0…G4) = 11111 and the bit G5
encodes a quiet and signaling NaN. The sign and the remaining bits of the G field have no Encoding
influence on the NaN. The T field contains the payload for distinguishing various values of of NaN.□
NaN. The remaining bits (G6 to Gw+4) of NaN in canonical form are equal to zero and the
encoding of the payload is canonical.
The infinity is encoded by the combination (G0…G4) = 11110, regardless of the remaining
Encoding
bits of field G and field T. The sign determines the plus or minus infinity. The canonical form
of infinity.□
of infinity is defined in such a way that the remaining bits of the combination field
(G5…Gw+4) are equal to zero and the trailing significand field T is equal to 0.
The value zero is encoded when the significand is equal to zero, regardless of the quantum.
Encoding
The sign bit determines a plus or minus zero. The significand is equal to zero when both the
of zero. □
trailing significand field T and the leading bits or a digit are equal to zero. The leading bits or
a digit are encoded in the combination field G.
Finite value for the decimal interchange format is given by formula (0911) which is derived
Coefficient C
from formula (0910). The significand as the coefficient C can be expressed by a decimal or a
(capital C) is a
binary integer number. The form of the significand is given by the implementation or it is
significand. □
agreed beforehand. It is impossible to distinguish from the decimal interchange floating
point format whether the coefficient is a binary or a decimal number.
Where
Significand C is the
v is a finite value of the floating point number.
coefficient either in
S is a sign.
the binary or dec-
E is a biased exponent that will be encoded in the binary form.
imal numeral sys-
bias is a defined by a constant according to the format.
tem.□
E-bias, is a quantum q, it is the exponent in case the significand is a integer.
C (capital C), is a significant as a coefficient, a decimal or binary unsigned integer.
Fig. 09-11 shows the situation of the encoding decimal significand C10 as a coefficient which
has p decimal digits in the BCD code. The combination field G encodes the biased exponent
E and MSB decimal digit d0 of the significand. The trailing significand field T contains the J
declets of the significand, where each declet encodes 3 BDC numbers. Therefore, the trail-
ing significand field T contains the p-1 decimal digits d1d2d3…dp-1. Then the decimal signifi-
cand C10 as a coefficient is the concatenation of MSB digit d0 and the decimal digits from
trailing field, d1d2d3…dp-1. The declet uses the densely-packed decimal encoding which is
described in the subchapter below. The detailed description of the combination field en-
coding is in Annex 09C of this chapter.
Fig. 09-12 shows a similar situation for the binary significand as a coefficient, which has t+4
bits. The combination field G encodes the biased exponent E and the leading 4 bits of the
significand, d0d1d2d3. The trailing significand field T contains the remaining bits of the binary
S G T
Sign Combination field Trailing significand field
G0 ………….……..… Gw+4 J declets give 3×J = p – 1 digits
S G T
Sign Combination field Trailing significand field
G0 ………….……..… Gw+4 t bits
v = (-1)S x C2 x 10(E2-bias)
Decimal32 has the precision p = 7, which means 7 decimal digits, and the range
of the significand is from 0 to 9 999 99910.
The corresponding range in the binary numeral system is from 0 to 98 967F16.
The higher binary numbers are non-canonical. The important point is that the
most significant 4 bits have a value only from 0000 to 10012.
Therefore, the combination field G of the interchange format encodes only
numbers from 0 to 9.□
The basic access to the design of the densely packed format is according to literature [Cowl-
ishaw_2000] :
“The primary advantage of the encoding over a pure binary representation in ten bits is
that no arithmetic is needed for conversion to or from BCD. Only a very few Boolean opera-
tions are needed for conversions – in hardware, encoding or decoding can be achieved with
only 2–3 gate delays; in software, a simple table look-up suffices. In addition, the encoding
has other advantages, for example, the least-significant bit of each digit remains unencod-
ed, which allows bit-per-digit operations to be effected directly.”
9.9 Rounding
The floating point format can represent exactly only some numbers that can be drawn on
the real number line as points. In Fig. 09-13, they are green points and the width of the gap
depends on the precision and the value of the exponent. Between two nearest green
points, there is an infinite amount of numbers that cannot be represented. However, the
non-representable numbers can be the result of operations and it is desirable to place them
to the interchange floating point formats. Therefore, the non-representable numbers must
be rounded to one of the nearest green points.
1.0000 0000 00 x 2e
For half precision, it is:
1.0000 0000 01 x 2e
0.0000 0000 01 x 2e
In mathematics, there are more rules and also names for rounding. The basic rules for
rounding in the floating point arithmetic are in the standard IEEE 754, see [wiki_0906] and
[IEEE 754-2008].
Round to nearest. The number is rounded to the nearest possible number. The
problem occurs, when the rounded number lies in the middle, then the distance to
both nearest points is the same. In the decimal numeral system, it is number 5, e.g.
12.345, the distance to 12.34 and 12.35 is the same and equaled to 0.005. There-
Round to near-
fore, two definitions exist for this case of rounding.
est, ties to even,
Round to nearest, ties to even. It means that the number is rounded to the
default rounding.
nearest value. When the number falls into the midway, it is rounded to the
□
even value. This rounding is the default for binary floating point and it is the
recommended default for decimal floating point.
Round to nearest, ties away from zero. It means that the number is round- Round to near-
ed to the nearest value. When the number falls into the midway, it is est, ties away
rounded to the nearest value in the direction from zero. For positive from 0.□
+ 20.4 + 20 + 20 + 20 + 21 + 20
+ 20.5 + 20 + 21 + 20 + 21 + 20
+20.6 + 21 + 21 + 20 + 21 + 20
+ 21.4 + 21 + 21 + 21 + 22 + 21
+ 21.5 + 22 + 22 + 21 + 22 + 21
+ 21.6 + 22 + 22 + 21 + 22 + 21
- 20.4 - 20 - 20 - 20 - 20 - 21
- 20.5 - 20 - 21 - 20 - 20 - 21
- 20.6 - 21 - 21 - 20 - 20 - 21
- 21.4 - 21 - 21 - 21 - 21 - 22
- 21.5 - 22 - 22 - 21 - 21 - 22
- 21.6 - 22 - 22 - 21 - 21 - 22
bers, the division by infinity, the multiplication by infinity and so on. Literature [wiki_0908]
and [Goldberg_1991] states a situation when the NaN is used.
Operations with a NaN as at least one operand. In this situation the input value
NaN is produced by a previous arithmetic operation.
Indeterminate forms:
Divisions, 0/0 and ±∞/±∞. Notice that the division of finite number by zero
results in infinity.
Multiplications, 0 × ±∞ and ±∞ × 0.
Additions, ∞ + (−∞), (−∞) + ∞ and equivalent subtractions.
The standard has alternative functions for powers:
The standard pow function and the integer exponent pown func-
tion define 00, 1∞, and ∞0 as 1.
The powr function defines all three indeterminate forms as invalid
operations and so returns NaN.
Real operations with complex results, for example:
The square root of a negative number.
The logarithm of a negative number.
The inverse sine or cosine of a number that is less than −1 or greater than
+1.
The standard IEEE 754 defines two NaN values, quiet NaN and signaling NaN. The basic
difference is in setting the exception, where only signaling NaN sets the invalid exception
with following trap (interrupt service routine), in case it is enabled. More information about
traps and interrupt handlers is in literature [wiki_0914], [wiki_0915], [wiki_0916], [Mul-
ler_2010] and [Ergovac_Lang_2004]. More information about NaN is in literature [IEEE 754-
2008], [wiki_0908], [Muller_2010], [Ergovac_Lang_2004] and [Goldberg_1991].
9.11 Infinity
Infinity is a normal mathematical term and its use is related to mathematical limits. Infin-
Infinity is a normal
ity in case of the floating point can be understood as a number that is outside the finite
result of some
numbers. The symbol of infinity is ∞, U+221E. Infinity can be produced by arithmetic
operations.□
operations or functions and it can also be the input operand. The following operations
with infinity do not cause the exception with trap handler, [IEEE 754-2008]:
The exceptions are set and following trap handler can be is run when [IEEE 754-2008]:
9.13 Implementation
The implementation of the floating point arithmetic can be realized by hardware or soft-
ware. Hardware implementation is a faster way; the execution time of operations is mini-
mal. Vice versa, software implementation is a slower way and the execution time of opera-
tions is longer.
Hardware realization is known as FPU, Floating Point Unit or coprocessor. This FPU is made
by the producer of processor as a separate unit. Most of currently used processors have
FPU implemented directly. The instruction set of FPU typically contains instructions for the
basic floating point arithmetic as the addition, subtraction, multiplication and division.
More complex functions, such as logarithm or trigonometric functions, are implemented by
software.
Division belongs to the slowest operations of a processor for all data types. Division has
no implementation as a combinational circuit. The performance of the division is a se-
quence of additions and subtractions that is given by the algorithm of the division. It is a
classical digital synchronous system and the sequence is generated by FSM. □
The first version of standard IEEE 754 was issued in 1985 and its revised version in 2008.
Today, floating point arithmetic according to this standard is implemented in a lot of pro-
cessors and systems. The implementation of binary floating point arithmetic has been long
known and it depends on the producer of a processor. The decimal floating arithmetic ac-
cording to standard IEEE 754-2008 is newer and it has been introduced in practice. The web
speleotrove.com states following implementation of decimal floating point arithmetic ac-
cording to standard IEEE 754-2008, [spel_0901]:
“The decimal-encoded formats and arithmetic described in the new standard now have
many implementations in hardware and software, including:
the hardware decimal floating-point unit in the IBM POWER6 and POWER7 proces-
sors, the firmware (with assists) in the IBM System z9 mainframe, and the hardware
decimal floating-point unit in the IBM System z10 mainframe (see this paper for de-
tails)
SilMinds’ Decimal Floating Point Arithmetic hardware IP Cores Family (see also their
presentation for some details)
Fujitsu’s decimal instructions in the SPARC64 X processor (see presentation, charts
5 & 6).
IBM XL C/C++ for AIX, Linux and z/OS, DB2 for z/OS, Linux, UNIX, and Windows, and
Enterprise PL/I for z/OS; IBM is also adding support to many other software prod-
ucts including z/VM V5.2, System i/OS, the dbx debugger, and Debug Tool Version
8.1
SAP NetWeaver 7.1, which includes the new DECFLOAT datatype in ABAP, with
support for hardware decimal floating-point on Power6
GCC 4.2 and later includes support for the proposed ISO C extensions for decimal
floating point.”
9.14 References
[Burks_1946] Burks, Arthur W.; Goldstine, Herman H.; Von Neumann, John (1946).
Preliminary discussion of the logical design of an electronic computing
instrument. Technical Report, Institute for Advanced Study, Princeton, NJ.
In Von Neumann, Collected Works, Vol. 5, A. H. Taub, ed., MacMillan, New
York, 1963, p. 42:
[Goldberg_1991] David Goldberg: What Every Computer Scientist Should Know About
Floating-Point Arithmetic; published in March, 1991 issue of Computing
Surveys. Copyright 1991, Association for Computing Machinery Inc.
[IEEE 754-2008] IEEE Std 754™-2008, IEEE Standard for Floating-Point Arithmetic, 29
August 2008, revision of IEEE 754 – 1985
[Randeli_1982] B. Randell (1982). From analytical engine to electronic digital computer: the
contributions of Ludgate, Torres, and Bush. IEEE Annals of the History of
Computing, 04(4). pp. 327–341.
[Rojas_1997] R. Rojas: "Konrad Zuse’s Legacy: The Architecture of the Z1 and Z3". IEEE
Annals of the History of Computing 19 (2): 5–15. 1997; http://ed-
thelen.org/comp-hist/Zuse_Z1_and_Z3.pdf; on line 2013-06-18
Each floating-point number has just one encoding in a binary interchange format. To make the en-
coding unique, in terms of the parameters in 3.3, the value of the significand m is maximized by
decreasing e until either e = emin or m ≥ 1. After this process is done, if e = emin and 0 < m < 1, the
floating-point number is subnormal. Subnormal numbers (and zero) are encoded with a reserved
biased exponent value.
1-bit sign S
w-bit biased exponent, E = e + bias
(t = p – 1)-bit trailing significand field digit string T = d1 d2 … dp −1; the leading bit of
the significand, d0, is implicitly encoded in the biased exponent E.
S E T
Sign Biased exponent Trailing significand field
E0 …………..… Ew-1 d1 d2………………………………………..………..… dp-1
The values of k, p, t, w, and bias for binary interchange formats are listed in Table 3.5 (see
3.6). The range of the encoding’s biased exponent E shall include:
The representation r of the floating-point datum, and value v of the floating-point datum
represented, are inferred from the constituent fields as follows:
The calculation of a value v for a normal and/or subnormal form uses the scaling factor
1/2(p-1). The second possibility is to use the scientific form of significand. The trailing signifi-
cand field contains a bit string of a part of the significand, d1 d2 … dp-1. The biased expo-
nent E determines either a normal or a subnormal form and, according to this determina-
tion, the MSB bit will be 1 or 0. Formulas for the calculation of a value v will be v = (−1)S ×
2E−bias × (1.T) for normal numbers and v = (−1)S × 2emin × (0.T) for subnormal numbers.
For better understanding, all values of the interchange binary floating point format are
shown in Table 09-A01. The important part is the biased exponent field; if this field has all
ones, the value is NaN or plus/minus infinity, according to the trailing significand field. Val-
ue zero is a very simple combination because the biased exponent and the trailing signifi-
cand field have all zeros; only the sign defines a plus or a minus zero. The preferred combi-
nation for zero is a plus zero, in this case all three fields have all zeros. The remaining com-
binations correspond to the normal or subnormal value.
a) 1-bit sign S.
b) A w + 5 bit combination field G encoding classification and, if the encoded datum is
a finite number, the exponent q and four significand bits (1 or 3 of which are im-
plied). The biased exponent E is a w + 2 bit quantity q + bias, where the value of the
first two bits of the biased exponent taken together is either 0, 1, or 2.
c) A t-bit trailing significand field T that contains J × 10 bits and contains the bulk of
the significand. When this field is combined with the leading significand bits from
the combination field, the format encodes a total of p = 3 × J + 1 decimal digits.
S G T
Sign Combination field Trailing significand field
G0 ………….……..… Gw+4
Decimal encoding: J declets give 3×J = p – 1 digits
t–1
Binary encoding: t bits give values from 0 through 2
The representation r of the floating-point datum, and value v of the floating-point datum
represented, are inferred from the constituent fields as follows:
The NaN payload is encoded similarly to finite numbers described below, with G
treated as though all bits were zero. The payload corresponds to the significand of
finite numbers, interpreted as an integer with a maximum value of 10(3×J) − 1, and
the exponent field is ignored (it is treated as if it were zero). A NaN is in its pre-
ferred (canonical) representation if the bits G6 through Gw+4 are zero and the en-
coding of the payload is canonical.
b) If G0 through G4 are 11110 then r and v = (−1)S × (+∞). The values of the remaining
bits in G, and T, are ignored. The two canonical representations of infinity have bits
G5 through Gw+4 = 0, and T = 0.
c) For finite numbers, r is (S, E − bias, C) and v = (−1)S × 10(E−bias) × C, where C is the
concatenation of the leading significand digit or bits from the combination field G
and the trailing significand field T, and where the biased exponent E is encoded in
the combination field. The encoding within these fields depends on whether the
implementation uses the decimal or the binary encoding for the significand.
1. If the implementation uses the decimal encoding for the significand, then
the least significant w bits of the exponent are G5 through Gw+4. The most
significant two bits of the biased exponent and the decimal digit string d0
d1…dp−1 of the significand are formed from bits G0 through G4 and T as fol-
lows:
i. When the most significant five bits of G are 110xx or 1110x, the
leading significand digit d0 is 8 + G4, a value 8 or 9, and the leading
biased exponent bits are 2G2 + G3 , a value 0, 1, or 2.
ii. When the most significant five bits of G are 0xxxx or 10xxx, the
leading significand digit d0 is 4G2 + 2G3 + G4, a value in the range of
0−7, and the leading biased exponent bits are 2G0 + G1, a value 0, 1,
or 2. Consequently if T is 0 and the most significant five bits of G
are 00000, 01000, or 10000, then v = (−1)S × (+0).
2. Alternatively, if the implementation uses the binary encoding for the signif-
icand, then:
i. If G0 and G1 together are one of 00, 01, or 10, then the biased ex-
ponent E is formed from G0 through Gw+1 and the significand is
formed from bits Gw+2 through the end of the encoding (includ-
ing T).
ii. If G0 and G1 together are 11 and G2 and G3 together are one of 00,
01, or 10, then the biased exponent E is formed from G2 through
Gw+3 and the significand is formed by prefixing the 4 bits (8 + Gw+4)
to T.
Table 09-B01 shows the encoding of the combination field G for Not a Number and infinity.
In case of NaN, the trailing significand field contains a payload, which describes NaN in de-
tails. In case of infinity, the trailing field has no reason. Table 09-B02 continues in the en-
coding for finite values. In this case, the combination field contains a biased exponent and a
leading digit or bits of significand. A decimal significand uses densely-packet encoding in the
trailing field and a binary significand is placed into the trailing field directly without encod-
ing. Zero is encoded by the significand equal to zero.
Table 09-B01 Values of NaN and infinity in the decimal interchange format
Combination
Sign Trailing field Significand Value
field G
v = (−1)S × C10 x 10E−bias
0/1 E d0 d1 … dp-1 C10 =d0 + d1 … dp-1
Significand C is not zero
v = (−1)S × C2 x 10E−bias
0/1 E d0d1d2d3 d4d5d6d7….dt+3 C2= d0d1d2d3 + d4….dt+3
Significand C is not zero
Plus or minus zero
0/1 0 0 C=0
Significand C is zero
Explanation:
Operator “+”is the overloaded operator and it means the concatenation
S G T
Sign Combination field Trailing significand field
G0 ………….……..… Gw+4 Decimal encoding: J declets give 3×J = p – 1 digits
t–1
Binary encoding: t bits give values from 0 through 2
Source IEEE 754-2008
Fig. 09C-01 Definition of fields in the decimal floating point format
In case that the coefficient is in the binary numeral system, the trailing significand field con-
tains the least significant t bits of the coefficient. The most significant 4 bits are encoded in
the combination field.
The following tables show the encoding scheme of the biased exponent E and leading bits
or a decimal digit. Table 09C-01 shows the detailed encoding of the leading decimal digit
and biased exponent from the combination field. Table 09C-02 shows the encoding scheme
for value zero.
Table 09C-03 shows the detailed encoding scheme of the leading bits of the binary coeffi-
cient and the biased exponent. Table 09C-04 shows the encoding scheme for value zero.
Leading
Combination field
Biased exponent E decimal
Note G Note
digit
G0 ….. Gw+4 E0 …. Ew+1 d0
G0 … G3 1111 ….. NaN or infinity
11101 … 10 + G5…Gw+4 9 Decimal digit is 8 plus G4
11100 … 10 + G5…Gw+4 8
11011 … 01 + G5…Gw+4 9 These are all combina-
G0 … G4 11010 … 01 + G5…Gw+4 8 tions for digits 8 and 9,
11001 … 00 + G5…Gw+4 9 with all combinations of
11000 … 00 + G5…Gw+4 8 the most significant 2
bits of biased exponent.
10111 … 10 + G5…Gw+4 7 Decimal digit is 0G2G3G4
10110 … 10 + G5…Gw+4 6
10101 … 10 + G5…Gw+4 5 These are all combina-
10100 … 10 + G5…Gw+4 4 tions for digits 0 to 7,
G0 … G4
10011 … 10 + G5…Gw+4 3 with one combination of
10010 … 10 + G5…Gw+4 2 102 as the most signifi-
10001 … 10 + G5…Gw+4 1 cant 2 bits of biased ex-
10000 … 10 + G5…Gw+4 0 ponent.
01111 … 01 + G5…Gw+4 7
01110 … 01 + G5…Gw+4 6 Decimal digit is 0G2G3G4
01101 … 01 + G5…Gw+4 5
These are all combina-
01100 … 01 + G5…Gw+4 4
G0 … G4 tions of digits 0 to 7, with
01011 … 01 + G5…Gw+4 3
one combination of 012
01010 … 01 + G5…Gw+4 2
as the most significant 2
01001 … 01 + G5…Gw+4 1
bits of biased exponent.
01000 … 01 + G5…Gw+4 0
00111 … 00 + G5…Gw+4 7
Decimal digit is 0G2G3G4
00110 … 00 + G5…Gw+4 6
00101 … 00 + G5…Gw+4 5
These are all combina-
00100 … 00 + G5…Gw+4 4
G0 … G4 tions of digits 0 to 7, with
00011 … 00 + G5…Gw+4 3
one combination of 002
00010 … 00 + G5…Gw+4 2
as the most significant 2
00001 … 00 + G5…Gw+4 1 bits of biased exponent.
00000 … 00 + G5…Gw+4 0
Explanation:
Blue colored bits belong to the biased exponent E.
Red colored bits determine the leading decimal digit d0.
Operator “+” is the overloaded operator and it means the concatenation of
strings.
Table 09C-01 Encoding scheme of biased exponent and leading digit for decimal coeffi-
cient
Combi- Leading
Trailing signifi-
nation decimal
Note cand field T Value Note
field G digit
G0 … Gw+4 d0
10000 …
Significand C is
G0 to G4 01000 … 0 =0 (-1)S x 0
equal to 0
00000 …
11101 … 9
trough
10000 … 0 ≠0 Finite values
01111 … 9
G0 to G4 trough (-1)S x C x 10E-bias Significand C is
01000 … 0 ≠0 not equal to
00111 … 9 zero
trough
00000 … 0 ≠0
Note: declet 000 is encoded by 10-bit tuple (00 0000 0000)B
Explanation:
Blue colored bits belong to the biased exponent E.
Red colored bits determine the leading decimal digit d0.
Table 09C-02 Encoding scheme for zero and finite numbers for decimal coefficient
Combination Leading 4
Biased exponent E
Note field G bit binary Note
G0 …. Gw+4 E0 ….. Ew+1 d0d1d2d3
1111 ………… NaN or infinity
1110 ……… 1 1001
10 + G4 … Gw +3
1110 ……… 0 1000
G0 G1 G2 G3…… Gw+4 1101 ……… 1 1001
01 + G4 … Gw +3
1101 ……… 0 1000
1100 ……… 1 1001
00 + G4 … Gw +33
1100 ……… 0 1000
10 ……… 111 0111
10 ……… 110 0110
10 ……… 000 0101
10 ……… 000 0100
G0 G1 …… Gw+2 … Gw+4 10 + G2 … Gw+1
10 ……… 011 0011
10 ……… 010 0010
10 ……… 001 0001
10 ……… 000 0000
01 ……… 111 0111
01 ……… 110 0110
01 ……… 000 0101
01 ……… 000 0100
G0 G1 …… Gw+2 … Gw+4 01 + G2 … Gw+1
01 ……… 011 0011
01 ……… 010 0010
01 ……… 001 0001
01 ……… 000 0000
00 ……… 111 0111
00 ……… 110 0110
00 ……… 000 0101
00 ……… 000 0100
G0 G1 …… Gw+2 … Gw+4 01 + G2 … Gw+1
00 ……… 011 0011
00 ……… 010 0010
00 ……… 001 0001
00 ……… 000 0000
Explanation:
Blue colored bits belong to the biased exponent E.
Red colored bits determine the leading 4-bit tuple of binary coefficient, d0d1d2d3.
Operator “+” is the overloaded operator and it means the concatenation of
strings.
Table 09C-03 Encoding scheme of biased exponent and leading bit for binary coefficient
Explanation:
Blue colored bits belong to the biased exponent E.
Red colored bits determine the leading 4-bit tuple of binary coefficient, d0d1d2d3.
Table 09C-04 Encoding scheme for zero and finite numbers for binary coefficient
The floating point arithmetic also contains the basic mathematical operations and func-
tions. These functions are, e.g., trigonometric functions, logarithms, exponentiations and so
on. The inputs of these operations are operands in the interchange floating point format
and the result must be in the canonical interchange floating point format. During the per-
formance of the operations or functions, the own format is used to ensure the highest ac-
curacy of the result. The calculated result can have a higher precision than the interchange
floating point format requires. The increasing number of bits in the result can be seen in the
following examples. The addition of numbers can increase the size of the integer part by
one order, 1.01B + 1.01B = 10.1B, 9D + 4D = 13D. The multiplication has the maximum product Canonical format
size, which is the addition of the sizes of both operands, 1.01B * 1.01B = 1.1001B or 8D * 16D is a format de-
= 128D. These principles are valid for any numeral system. However, it is expected that the fined by IEEE
result will be in the canonical interchange floating point format. After each performance of 754.□
the floating point arithmetic, it is necessary to perform the normalization, rounding and
setting of exceptions.
𝑇
𝑣 = (−1)𝑆 ∗ (1 + 2𝑝−1 ) ∗ 2𝑒 (1001)
𝑇
𝑣 = (−1)𝑆 ∗ (0 + 2𝑝−1 ) ∗ 2𝑒𝑚𝑖𝑛 (1002)
Where
The normal form has the leading bit of the significand equal to 1, e.g. 1.0001 * 23. The sub-
normal form has the leading bit of the significand equal to 0 and the exponent e is equal to
emin. The example of a subnormal number in binary32 is 0.001 * 2-126. □
Decimal interchange floating point format has values that are defined by formula (1003).
Significand is a coefficient that is an unsigned decimal or an unsigned binary integer. Signifi-
cand in the decimal interchange format has only the preferred forms, Fig. 10-01. When the
number of valid digit is less than precision p, then there are more preferred forms. The
recommended form depends on the operation and the standard IEEE 754-2008 recom-
mends these forms in details. When the number of digits is equal to precision p, the num-
ber stays without changes. If the number of valid digits is higher than precision p, then
there is only one preferred form. The number has to be rounded to the p digits by incre-
menting the exponent. [Internet_1001].
0123400 * 10-2
1234000 * 10-3
Fig. 10-01 Preferred significand for the decimal format with precision p = 7
After the normalization, rounding and setting exceptions are performed. Non-all bits of the
calculated result are needed for performing these operations. Therefore, the calculated
result is transformed to the format with the significand with p bits or p digits and the auxil-
iary bits or digits. The auxiliary bits are called a guard bit, a round bit and a sticky bit,
[Koren_2008]. It is possible to find different names in literature, however, the meaning of
these bits is:
Additional integer bit, this bit is used by the normalization of the binary floating
point data and the result is shifted to the right. This bit is only significant, when the
integer part of the result can have 2 bits. After the right shift, a new value is as-
signed to a guard, a round and a sticky bit.
Guard bit is significant for the binary floating point format and normalization.
Guard bit is a bit on p position in the significand, it is dp. Some results of the opera- Guard, round
tions can have the integer part of the result equal to zero. In this case, the logical and sticky bit.□
left shift is performed as the normalization. After this normalization, a guard bit is
not needed and only the round and sticky bits remain. When the normalization is
not necessary, the guard bit is removed and the round and sticky bits are shifted to
the left by one position. A new value of sticky bit is calculated. More information is
in literature [wiki_1001], [Muller_2010] and [Koren_2008].
Round bit or a round digit are used for rounding, [wiki_1001] and [Koren_2008]. At
the beginning of final operations, the round bit is following the guard bit for a bina-
ry floating number. For a decimal floating number, the round digit is placed in the
p+1 position from the leading digit.
Sticky bit is calculated and it is used for rounding. The sticky bit is placed behind the
round bit. The sticky bit is always the logical OR of the remaining least significant
bits of the result behind the round bit, [wiki_1001] and [Koren_2008]. The sticky bit
determines whether the result is exactly in the middle of the ulp – unit in the last
place or not. When sticky is zero, the result is in the middle of the ulp, when sticky
is non zero, it means that the result is out of the middle of the ulp. This is a problem
of, for example, the number 2.500… that should be rounded to nearest. This num-
ber lies in the middle of the ulp, therefore rounding down to 2 or up to 3 can be
done. But the number 2.500…01 have sticky bit equal to one, and then the nearest
number is 3.
Calculated result
Logical OR
Precision
p
MSB LSB
0 p-1 GRS
G - Guard bit
R - Round bit
Additional integer bit S - Sticky bit
Significand
Fig. 10-02 Auxiliary bits in binary format, guard, round and sticky bit
All these bits are used in the binary floating point arithmetic, where the scientific form of
significand is used and results have more bits than the precision p. The guard bit and the
round bit are created simply by adding names to the bits in the correct position. The sticky
bit is calculated by the logical OR of the remaining bits, Fig. 10-02. The decimal floating
point uses the creation of the preferred form. When the number of digits in the result is
higher than precision p, rounding to the leading p digits is applied. Therefore, the round
digit and the sticky digit are significant.
10.1 Rounding
Rounding ensures that the result in the canonical floating point form is the most accurate.
More information about rounding is in one of the previous chapters. The standard IEEE
754-2008 states 5 principles of rounding:
Formula (1004) defines ulp for binary format, where significand is expressed in scientific
form and formula (1005) defines ulp for decimal format, where significand is expressed as
coefficient. All decimal formats have the same significand of ulp.
Where
Rounding has errors, the first error is that the result loses bits and thus the accuracy of the
result. Next errors of rounding are brought by the application of the associative and distrib-
utive rules into the computation and also by the number of rounding. Fig. 10-04 shows the
example of the decimal computation with the precision p = 7. The application of the associ-
ative rule in the addition of three numbers a, b, c and the rounding after each addition gen-
erates different results. The principle round to nearest was used, [wiki_1005].
-4 -5 -6
a = 1 234 567 * 10 b = 3 456 746 * 10 c = 1 000 088 * 10
a = 123.456 7 b = 34.567 46 c = 1.000 088
a+b+c 159.024 248 a+b 158.024 16 a+c 124,.456 788 b+c 35.567 548
round 158.024 2 round 124.456 8 round 35.567 5
plus c 159.024 288 plus b 159.024 248 plus a 159.024 248
round 159.024 2 round 159.024 3 round 159.024 2 round 159.024 2
The performance of rounding is made by adding 0, ½ ulp or ulp value to the result. The ad-
dition may change all digits of the significand because the carry is generated, [Koren_2008].
Therefore, after rounding it is necessary to check the interchange format and to set the Rounding as ad-
exception. Fig. 10-05 shows the table for the binary rounding to nearest, ties to even, dition.□
where the rounding depends on the LSB bit of significand, the round and sticky bit,
[Koren_2008]. Fig. 10-06 shows the table for the binary direction rounding, where the
rounding depends on the sign of significand and the round and sticky bit, [Koren_2008].
10.2 Exception
The exceptions characterize the result of floating point operation, [Muller_2010] and [IEEE
754-2008]. The practical realization uses more exceptions than the standard IEEE 754-2008
defines. The first setting of exceptions is made by operations or functions. The second set-
ting is made by normalization and rounding. More information is in the previous chapter.
The exceptions according to the standard are:
Invalid operation.
Division by zero.
Overflow.
Underflow.
Inexact.
The normalization and rounding can cause overflow or underflow. Normalization is the shift
with the correction of exponent and rounding is an addition. The result of both operations
can be out of the range of representation. Rounding always sets the inexact exception.
Post-normalization, the result is adjusted to the normal form by shifting the signifi-
cand with the correction of exponent. In some cases, the guard bit is used.
Checking exceptions.
Rounding. Post-normalized result is rounded. The standard 754 defines the possibil-
ities of rounding. The default principle of rounding is rounding to nearest, ties to
even.
Return to the first step until the result and exceptions do not change.
The normalization or the creation of preferred significand is realized by shifting with the
correction of exponent. Fig. 10-07 shows the basic principles of the normalization. The basic
principles of the normalization are:
When the number is shifted to right, the exponent is incremented by one for each
position.
When the number is shifted to left, the exponent is decremented by one for each
position.
OR
G - Guard bit R - Round bit S - Sticky bit
G R S
This is the calculated result with marking a guard, a round and a
10.011 1 0 0
sticky bit.
Normalization by shifting to right, after which the guard bit is
1.001 1 1 0 0
rejected. A new round bit and a new sticky bit are calculated.
OR
Position of the round bit is moved by one to the left and a new
1.001 1 1 sticky bit is logical OR of the remaining bits.
1.001 1
+0.000 1 Rounding to nearest, ties to even means to add 1/2ulp to
1.010 the number. It is 1 in R position.
The example of the final operation is in Fig. 10-08. The intended precision p is 4 bits. The
result has 2 bits in the integer part and a guard, a round and a sticky bit are labeled. The
first operation is the logical right shift to normalize the number. A guard bit is rejected be-
cause it is not needed. A round bit has a new shifted value. The new value of the sticky bit
is the logical OR of the previous values of the round and sticky bits. The next step is round-
ing. The result is positive and lies in the upper half of ulp. Fig. 10-05 contains the rules for
rounding to nearest, ties to even. The LSB, round and sticky bits are equal to 1, therefore
1/2ulp is added. After these operations, the auxiliary bits lose their function.
For 8 bits, p = 4
S E E E E T T T The bias is b=7, emax is +7 and emin is -6.
The precision is p = 4, trailing field has 3 bits and
Sign Biased ex- Trailing
MSB bit of the significand is hidden.
ponent
Plus numeral line
0.0 0.001 x 2-6 T=0 T≠0
Exponent. The biased exponent has 4 bits. The bias is 7, maximal exponent emax is
7, minimal exponent emin is -6, (emin = 1 - emax).
NaN. The biased exponent E is 0xF and the trailing field is not zero.
Infinity. The biased exponent E is 0xF and the trailing field is zero.
Normal form of finite numbers. The MSB bit is 1 as a hidden bit. The biased expo-
nent E is in the range from 1 to 0xE. Then, the exponent e is in the range
from -6 to +7.
Subnormal form of finite numbers. The MSB bit is 0 as a hidden bit. The biased ex-
ponent E is 0x0, the exponent e is -6.
Binary subtraction.□
The subtraction of 1.001 * 2-3 - 1.101 * 2-2 in minifloat
The mathematical definition of the addition and the subtraction is given by formulas (1006)
and (1007). In both formulas, the finite value of floating point data is supposed. The opera-
tions with infinite number and NaN are described in detail by [IEEE 754-2008].
Where
Hardware realization of the floating point addition and subtraction has two parts. The first
part deals with the exponent and the second part deals with the addition and the subtrac-
tion of significands. The binary floating point addition and subtraction use the integer bina-
ry adder and two’s complement. Description of the realization is in literature [Mul-
ler_2010], [Koren_2008] and [Ergovac_Lang_2004], in details.
-1.010 * 2-1 Result with the exponent. The exponent was incremented.
10.6 Multiplication
Multiplication of two floating point numbers has more parts. The first one is a separate
calculation of the resulting sign. On the bit level, it is logical XOR operation of both sign bits.
The second part is a separate calculation of the exponent of the result. It is the addition of
both exponents. The next step is a separate multiplication of the significands. The fixed
point principles are used, therefore the binary multiplication of integer numbers is applied.
The operands have the scaling factor of 1/2(p-1), then the product has the scaling factor of
1/22(p-1). The scaling factor of the product determines the position of radix point. The next
steps, the normalization, rounding and setting of exception are made. Fig. 10-12 and Fig.
10-13 show the examples of multiplication in the binary and decimal numeral systems.
product = ((-1)S1 m1 x 2E1) * ((-1)S2 m2 x 2E2) = (-1)(S1 xor S2) (m1*m2) x 2(E1+E2) (1008)
Where
The hardware realization of the binary multiplication can be a combinational logical circuit,
which can contain p-1 binary ripple-carry adder. For the binary64 format, where the preci-
sion is p = 54, the multiplier has 53 binary ripple-carry adders. This realization has a high
propagation delay and the design of the binary multiplier with a small propagation delay is
described in literature [Muller_2010], [Koren_2008] and [Ergovac_Lang_2004].
10.7 Division
Division of floating point numbers is defined by mathematical formula (1009), where both
floating point data are finite values. The division with the zero, infinity and NaN is described
by [IEEE 754-2008].
quotient = ((-1)S1 m1 x 2E1) / ((-1)S2 m2 x 2E2) = (-1)(S1 xor S2) (m1/m2) x 2(E1-E2) (1009)
Where
The floating point division only has the quotient, not the remainder. The algorithms of the
floating point division are described in detail in literature [Muller_2010], [Koren_2008],
[Ergovac_Lang_2004] and [wiki_1007]. The hardware realization of the floating point divi-
sion is a digital synchronous system, where the algorithm is implemented by FSM – Finite
State Machine. The division is considered the slowest operation in computer.
10.8 References
[EETimes_1001] Clive Maxfield: Design How-To, An introduction to different round-
ing algorithms; EETimes 1/4/2006,
http://www.eetimes.com/document.asp?doc_id=1274485&page_number=
1; on line 2014-08-04
[IEEE 754-2008] IEEE Std 754™-2008, IEEE Standard for Floating-Point Arithmetic, 29 August
2008, revision of IEEE 754 – 1985
http://ljk.imag.fr/membres/Carine.Lucas/TPScilab/JMMuller/ulp-toms.pdf;
on line 2014-08-05
At the beginning of the communication based on the electricity principles, the text was
transferred by electrical impulses and each letter of the alphabet was defined by a ASCII – American
sequence of impulses. Morse alphabet was the first widespread code and it was used for Standard Code
transferring the text, [wiki_1101]. The following important code was the 5-bit code that for Information
was used in the telex (teletype machine) for transferring the text, [wiki_1102], [wiki_1133]. Interchange.□
And in the 1960s, the ASCII was defined and standardized. This is an important milestone in
the history of encoding the characters. The ASCII code is used in the communication and
computers. The original ASCII code only contains the American alphabet. Later, when per-
sonal computers were introduced, the ASCII code was modified by adding national alpha-
bets. Today, the Unicode is a successor.
A current text on the monitor not only has text information but a graphical meaning and
properties as well. This text can be colored and different fonts, typeface and other features
can be used. A displayed text is not only a technical matter but rather a graphical design
that comes from the printing industry. In the time of computers, some terms from the
printing industry have changed or have a new meaning.
11.1 Terminology
The typography is a predecessor of today’s display of information by a computer. In the
information technology, a new terminology is used or the old terms have a new meaning. A
lot of meanings are taken from the Unicode.
“Character is the smallest component of written language that has semantic value;
refers to the abstract meaning and/or shape, rather than a specific shape (see also
glyph), though in code tables some form of visual representation is essential for
the reader’s understanding.
Synonym for abstract character.
The basic unit of encoding for the Unicode character encoding.
The English name for the ideographic written elements of Chinese origin.”
A character has a name and a basic glyph. It carries no information about the properties,
for example fonts, color, size and so on. □
Glyph is a way of representing a character. Glyph defines the shape of a character, litera-
ture [Internet_1101], [Unicode_1102] and [wiki_1105]. The difference between the charac- Character and
ter and the glyph is shown in Fig. 11-01 and Fig.11-02, [Unicode_1103]. One character can glyph.
□
Character encoding; it is an assignment of one element from some kind of encoding sys-
tem. A character can be encoded by a number, a sequence of electrical pulses or flags and
so on. In computer, character encoding is an assignment of a number that is called the code
of a character. Then each character is defined by its code, textual definition and by a basic
glyph, [wiki_1106] and [Unicode_1104].
[Wiki_1135] defines the character encoding in this way: “Computers and communication
equipment represent characters using a character encoding that assigns each character to
something — an integer quantity represented by a sequence of bits, typically — that can be
stored or transmitted through a network.”
Character set is a collection of characters and their encoding scheme that is used for repre-
senting information. The ASCII character set is famous; the next sets are the Unicode set Character set.□
and others, [Unicode_1105]. Some literature does not make differences between the char-
acter encoding and the character set.
Font is “a collection of glyphs that are used for the visual depiction of character. A font is
Font is a file that
often associated with a set of parameters (for example, size, posture, weight, and ser-
defines glyphs
ifness), which, when set to particular values, generate a collection of image able glyphs”,
for characters.□
[Unicode_1106]. Wikipedia defines a computer font as a file which has a set of glyphs,
[wiki_1107].
Script is “a collection of letters and other written signs or diacritics that are used to repre-
sent textual information in one or more writing systems (languages)”, [Unicode_1107]. For
example, the Czech script is defined by the Czech alphabet, German script is defined by the Script.
□
German alphabet and so on. In result, all these national scripts are subsets of one script,
the Latin script. It means that the Latin script contains the definition of all national letters in
the languages where the Latin alphabet is a base. The same applies to the Cyrillic script,
where Russian is written with a subset of the Cyrillic script; Ukrainian is written with a dif-
ferent subset. Some countries have more scripts, e.g. the Japanese writing system uses
several scripts, [Unicode_1107].
Typeface, [wiki_1108], in typography, it means more fonts, where all glyphs of a character
have the same properties, signs or slope. In other words, the typeface defines common Serif, San Serif,
design features that are shared by all fonts with the same typeface. Therefore more fonts Handwriting,
belong to a typical typeface and each kind of typeface has its name. Among famous type- Console.□
faces belong Serif, Sans Serif (also known as gothic), handwriting, calligraphy, console and
others. The examples of the typefaces are in Fig. 11-03.
Each font has four basic typefaces, normal, italic, bold and italic-bold, Fig. 11-04. These
typefaces are historical but, in computer, these typefaces are well-known and they are as-
Italic and bold.□
signed to each font. They are defined as separate files or by means of mathematics for vec-
tor fonts. The definition of these typefaces as separate fonts is preferred to reach a better
quality of displayed glyphs.
that each letter has a different width and a non-proportional one has a constant width.
Other terms for a non-proportional typeface are the monospaced, fixed space and console Proportional and
typeface. monospaced.□
Text Typeface
imlw imlw Proportional
imlw imlw Non-proportional or monospaced or console
Dot is the smallest element of graphics from the technical Fig. 11-06 Relief printing in the typewriter
point of view. Dot is controllable; it means each dot has its http://en.wikipedia.org/wiki/File:Typewriters.jpg
address, intensity of color and other properties. Dot has
different meanings according to the computer equipment. In printer, a dot is the smallest
point of one color that can be printed. In LCD monitor, a dot is the smallest point of one
color and a pixel has three dots, red, green and blue. A monochrome monitor only has dots.
Pixel is the smallest element of graphics, a picture or digital art. Each pixel is controllable, it
means each pixel has its address, color and others properties, [wiki_1111]. Pixels are typi-
cally used as elements of a graphic file and for defining the properties of LCD monitors,
scanners and cameras.
DPI, Dot per Inch, this parameter can be found in the specification of a printer, where DPI
means the number of dots in one inch. DPI parameter is indicated in the specification of
printers or scanners. In case of inkjet printers, dot is a drop.
PPI, Pixel per Inch, this parameter can be found in the specification of monitors, cameras
and as a parameter of raster graphic files or programs.
Pixel graphics, this term relates to the definition of graphics in the information technology,
where all graphical objects are defined by pixels. The term of raster graphics is also used for
this principle.
Vector graphics, this term relates to the definition of graphics, where all graphical objects
are defined by geometrical primitives such as points, lines, curves, circles, and so on. Every
geometrical primitive can be colored and it has other properties. It means that all graphical
objects are defined by maths as lines, vectors, Bezier curves and so on. A vector graphic
principle as an output representation is used by a cutting plotter.
3D is the newest 3-dimensional technology and it can be defined by pixels or vectors. To-
day, 3D devices are 3D scanners, 3D monitors, 3D cameras and 3D printers.
11.2 Fonts
Font is a set that defines glyphs for any character. The first fonts were used in the typogra-
phy industry, where the font is defined mechanically by a relief. In the computer area, a
Character.□
letterpress font was used at the beginning of printing for electric typewriters, raw and dai-
sywheel printers. When terminals or monitors began to be used, the font was defined by a
file. Today, it is possible to find three main definitions of fonts, a bitmap and two vector
definitions, [wiki_1107].
Bitmap fonts consist of the definition of dots or pixels in the matrix for representing
each glyph. A bitmap font is also called a raster font.
Outline fonts, each glyph in outline fonts is defined by outer curves. Bezier curves
are used for the mathematical definition of glyphs. Outline font is also called as a
vector font.
Stroke fonts use a series of specified lines, shapes and additional information to de-
fine a final glyph. A glyph consists of more shapes.
more space for small and capital letters, punctuation, etc. The spaces between the glyphs
and rows are composed into the matrix. Examples of possible glyphs are in Fig. 11-08.
Each raster font has one typeface and its height of glyphs. When a different height is re-
quired, then it is suitable to define a separate font for each height. The scaling of a raster
font is problematic and it leads to the deformation of glyphs. Each glyph in the matrix has ANSI escape
properties that define color, light intensity, inversion, flashing and so on. These properties code.□
in the text terminal are defined by the ANSI escape code, [wiki_1112].
The definitions of raster fonts are typically placed into the file in a specific format. These
formats are Portable Compiled Format (PCF), Glyph Bitmap Distribution Format (BDF),
Server Normal Format (SNF) and others. Raster fonts were the first fonts in computer area
and they are used till today in terminals and a lot of dot matrix printers or inkjet printers as
default fonts.
The first collection of outline fonts was destined for desktop Fig. 11-09 Principle of outline
font
publishing in the 1980s and Adobe Systems with its Post- PostScript is a
Script Type 1 Font was the first. Today, PostScript fonts are used in pdf documents. As a language for
competition to this, Apple and Microsoft created their own format of TrueType, also in the vector graphics.□
1980s. At the beginning of the 1990s, the collection of TrueType fonts was first applied in
operating systems MAC OS X and Windows 3. Later, the OpenType format was designed by OpenType is a regis-
Microsoft as a successor of TrueType and the OpenType format was issued as the standard tered trademark of
Microsoft Corpora-
ISO/IEC 14496-22:2009, Information technology – Coding of audio-visual objects – Part 22: tion.□
Open Font Format. More information is in literature [wiki_1113], [wiki_1114] and
[wiki_1115].
The main advantage of the outline fonts is their mathematical definition and some proper-
ties can be modified by the change of parameters in algorithm. Most Outline fonts are pro-
portional fonts and they are used in word processing and desktop publishing. Some Outline
fonts are monospaced and these are used in specific cases. Typically, there are emulators of
terminals, notations of programs in the articles or books, plain text editors, etc.
grams (pictograms) and CJKV symbols. The best example from literature is the CJKV charac-
ter 永 (pinyin: yǒng, "forever", "permanence"), Fig. 11-10, [wiki_1116]. This glyph is com- CJKV is the ab-
posed of eight calligraphic strokes. Each stroke has a name. Other examples are ideograms breviation for
(pictograms) which are composed of more pictograms. The ideogram “no dog”, Fig. 11-11, China, Japan,
is composed of two pictograms, dog and not allowed. Pictograms have different colors be- Korea and Vi-
cause the order is defined, the pictogram of “dog” covers the pictogram of “not allowed”, etnam.□
[wiki_1117].
11.6 ASCII
American Standard Code for Information Interchange – ASCII is a character encoding
scheme that is used by computers, communication devices and other devices with text Unicode name.□
processing. This standard was created in the 1960s and its last modification is from 1986.
The original ASCII is a 7-bit encoding scheme and it has two parts, 33 control characters and
95 printable characters. The printable characters are small and capital letters of American
alphabet, digits and special characters, Fig. 11-12.
LSB
MSB
0 1 2 3 4 5 6 7 8 9 A B C D E F
0 NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
1 DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US
2 ! " # $ % & ' ( ) * + , - . /
3 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
4 @ A B C D E F G H I J K L M N O
5 P Q R S T U V W X Y Z [ \ ] ^ _
6 ` a b c d e f g h i j k l m n o
7 p q r s t u v w x y z { | } ~ DEL
The newest standard for encoding is the Unicode, where the 7-bit ASCII code is the first
part of Unicode and it is called the Basic Latin Unicode block. In Unicode, each character
has a name, therefore, in the following text, this Unicode name will be used and in some
cases the Unicode name will be supplemented by a slang name or a very popular historical
name. In the Internet world, IANA preferred all the names on the World Wide Web in US
ASCII encoding scheme till 2007, then ASCII was surpassed by Unicode in format UTF-8, WWW uses
[wiki_1118]. UTF-8. □
The codes from 0 to 0x1F are the control codes and this area is also called as the control
code C0. The code 0x7F also belongs to the control codes and it means delete. These con-
trol codes were designed for controlling the peripheral equipment of computers and com-
munication devices, and for the flow control of transmission. In Fig. 11-13, it is possible to Control codes C0.
see the way of generating these codes as the caret notation and the notation in program- □
ming language, mainly C language. Fig. 11-13 shows the codes used and their meaning. The
codes in blue are frequently used. The unlisted codes can be considered obsolete and their
meaning can be found in literature [wiki_1118].
The ESCAPE is used in two ways, as a key on the keyboard or as an escape sequence. After
pressing the Esc key on the keyboard, the code 0x1B is sent to the operating system. The
implementation depends on the program; in some situations, ESCAPE may cause the exit,
[wiki_1137]. An escape sequence is a series of characters used to change the state of com-
puters and their attached peripheral devices. The ESC sequence begins with the ESCAPE code
which is followed by others code, [wiki_1126]. Famous escape codes are the Hayes com-
mand set, the ANSI escape code and ESC/P.
The Hayes commands is the set of sequences which are used for controlling a modem,
[wiki_1138]. These sequences can perform actions like dialing a phone number, answering
a phone, setting parameters of a transfer and so on.
ANSI escape code or ANSI escape sequence is a method of controlling text terminals,
ANSI escape
[wiki_1112]. This sequence can change the properties of each glyph or all text or the
code. □
screen. This ANSI escape sequences are still used in the Linux and UNIX operating systems.
The codes from 0x20 to 0x7E are printable codes and a part of them is the American alpha-
bet. Also this definition is a part of Unicode with the name of Basic Latin Block. The names
for alphabet character are shown in Fig. 11-14. The reaming names can be derived from this
table.
Fig. 11-15 shows special printable characters with official Unicode names [Unicode_1108].
But some glyphs are known by their slang names (blue word) or by names used in the past
or in another technology.
0x7F of 8-bit coding corresponds to 7-bit ASCII code. Only the range from 0 to 0x1F has two
meanings, according to the equipment which receives the code. When the code is sent to
the video adapter of PC in the text mode, the code generates a visual glyph, Fig. 11-16.
When this code is sent to the peripherals of personal computer, then the code is interpret-
ed as a control code. The upper area from 0x80 to 0xFF contains the characters with diacrit-
ic, Greece alphabet and semi-graphic symbols. This 8-bit definition is called the code page
and Fig. 11-16 shows the code page 437, [wiki_1119]. The code page 437 is default in many
systems.
Source: http://en.wikipedia.org/wiki/File:Codepage-437.png
Fig. 11-16 Code page 437
Personal computers expanded to the whole world and a lot of national characters were
missing, as Czech diacritic, Cyrillic alphabet, etc. Therefore, other code pages were defined
and contained the missing national characters. These definitions only changed the upper
area, the range from 0x80 to 0xFF, [wiki_1120]. The code pages 437, 850, 852… were de-
fined in the era of MS-DOS operating system, [wiki_1139]. The code pages Windows 1250;
Windows 1252, … were defined in the era of Windows operating system, [wiki_1140].
These code pages were international and were issued as the standard. Outside these, a lot
of code pages were defined locally, which causes the mutual incompatibility.
International Standard Organization and IEC – ISO/IEC defined 8-bit codes for all world lan-
guages and these codes are called as code pages ISO 8859-1, ISO 8859-2, ISO 8859-3, etc.
This standard follows the previous definitions and ASCII 7-bit code. All the range of the code
page was divided into the areas which contain:
The range from 0x20 to 0x7E, this area contains the Latin alphabet and corresponds
to the ASCII definition. This definition is the same in all pages.
The range from 0xA0 to 0xFF, this area contains the national characters. The defini-
tion depends on the language and the world area.
The ranges from 0 to 0x1F and 0x80 to 0x9F are not defined by this standard. The
codes in these ranges correspond to the codes that are defined by standard ISO/IEC
6429. They are the control codes C0 and C1.
The problem of incompatibility also existed in the Czech Republic, where the following pag-
es were used by users:
Code page 437 contains American alphabet. This code page is called as Basic Latin
Alphabet. This page was the first code page for personal computers, [wiki_1119].
Code page 852 is the page for Central Europe languages that use Latin script. This
page contains Czech alphabet. Also known as Latin-2, [wiki_1120].
Windows 1252 and ISO8859-1 are similar pages with Latin alphabet for Western
Europe, [wiki_1121], [wiki_1123].
Windows 1250 and ISO8859-2 are similar pages with Latin alphabet for Central Eu-
rope, [wiki_1122], [wiki_1124].
Code pages defined nationally and incompatible with each other, [wiki_1145].
Each typeface has a separate font file and one code page has a lot of font files for each
typeface. When the document is written in more languages, then the corresponding font
files have to be installed. This is no problem in one personal computer. The problem occurs
when the document is sent to a recipient or is published on the Internet. The reader may
not have installed all required code pages and the document becomes unreadable. This
problem and others are solved by Unicode.
The C1 control code is a new set of codes which is located in the range from 0x80 to 0x9F of
8-bit encoding scheme. All the definitions and explanations of these new codes can be
found in [wiki_1125]. The NEL code is a code for the next line; it is an attempt to solve the
ambiguity of the CR+LF sequence. Another code, which is worth mentioning, is CSI - CONTROL
SEQUENCE INTRODUCER. This code is the leading code of ANSI escape sequence and it is fol-
lowed by parameters. ANSI escape sequence is used for controlling text terminal in Linux
and UNIX operating systems. The CSI code can be replaced by the sequence of codes,
ESC + [, hexadecimal 0x1B 0x5B. The ESC is escape code from the C0 control code and it is
followed by the left square bracket.
11.9 Unicode
Unicode is a code that aims to encode any world alphabet.
The previous attempts were very problematic. Let’s under-
stand there are live and dead languages in the world. The “Unicode is a compu-
live languages are used by people in world till today. The ting industry standard
dead languages are historical languages as Egyptian hiero- for the consistent en-
glyph, Indian languages, etc. Therefore, the universal cod- coding, representation
ing of all alphabets of the world is desirable. And also, let’s and handling of text
understand that today’s documents are not only classical expressed in most of
texts. The documents contain other symbols from science, Fig. 11-18 Logo Unicode the world's writing
graphical area, art, etc. The work on a new coding of world systems.” □
alphabet started in the 1990s. Two groups, Unicode and ISO/IEC, cooperated on the Source
work. The Unicode was also published as ISO/IEC standard. The conclusions of both http://en.wikipedia.org/wiki/Unicode
groups are very similar. But the famous name is Unicode, where the Unicode ver-
sion 7 is from 2014.
The Unicode space comprises 1,114,112 code points and the corresponding range is from 0
to 0x10FFFF. It is 21-bit space where each character is defined by Unicode point or code
point, name and basic glyph, Fig. 11-19. Each part of this definition has rules for writing and
the order of these parts is not obligatory.
The Unicode 21-bit space is divided into 17 planes and each plane has 65,536 code points. It
means that each plane uses 16 bits, 216 = 65,536 and 17 * 65,536 = 1,114,112 code points.
Each plane has its number and its name, abbreviation and the range, Fig. 11-20.
B
a
U s 0000 - FFFF Plane 0 BMP Basic Multilingual Plane
n i
i c
c S Supplementary Multilingual
1 0000 – 1 FFFF Plane 1 SMP
o u Plane
d p
e p Supplementary Ideographic
2 0000 – 2 FFFF Plane 2 SIP
l Plane
p e
l m 3 0000 – D FFFF Plane 3 – 13 - Unassigned
a e
n n Supplementary Special-
e t E 0000 – E FFFF Plane 14 SSP
purpose Plane
s a
r S PUA Supplementary Private Use
F 0000 – 10 FFFF Plane 15 - 16
y A/B Area
Basic plane 0 has the name BMP – Basic Multilingual Plane. This plane is important and
contains the scripts for all live world languages and other graphical symbols. The first 256
code points, 0 to 0x00FF, correspond to the ISO 8859-1 standard and to the C0 and C1 con-
trol codes. And also the first 128 code points, 0 to 0x7F, correspond to the ASCII 7-bit code,
because ASCII code is a subset of ISO 8859-1. The notation of code points corresponding to ASCII code is the
the ASCII range in UTF-8 is the same as the ASCII 7-bit code. This definition in UTF-8 ensures first part of
the compatibility with ASCII encoding. Unicode. □
The remaining code points contain scripts for all modern world languages. It means that
BMP plane contains scripts for Cyrillic, Greek, Arabic, Chinese, Japan, Korea, many symbols
and etc. It means that most world documents can be written by using Basic Multilingual
Plane.
Supplementary plane 1 has the name SMP - Supplementary Multilingual Plane. This plane
BMP plane de-
contains the historical scripts as Egyptian hieroglyphs, Maya language etc. and a lot of sym-
fines only a part
bols. The symbols are mathematical alphanumeric symbols, today and past music symbols,
of CJK characters.
game symbols as playing cards, Mahjong, domino tiles, etc.
Therefore the 2nd
Supplementary plane 2 has the name SIP - Supplementary Ideographic Plane. This plane plane is deter-
contains CJK ideograms that were not included in earlier character encoding standards. mined for all CJK
ideograms. □
Supplementary planes 3 to 13 are unassigned planes.
Supplementary plane 14 has the name SSP - Supplementary Special-purpose Plane. This
plane currently contains non-graphical characters. This plane contains a block of tags that
are for old language tag characters for use when language cannot be indicated through
other protocols, [wiki_1146].
Supplementary planes 15 and 16 have the name SPUA A/B - Supplementary Private Use
Area A and B. These planes are designated for the private use by the parties that are out-
side of the ISO and the Unicode Consortium.
11.11 UTF-32
This notation uses 32-bit word. Unicode position has 21 bits and for increasing to 32 bits
the zero prefix is used. It means that the notation UTF-32 corresponds to Unicode point UTF-32
□
directly, [wiki_1127]. Programming language Python from version 3.2 uses the UTF-32 as
the unique encoding scheme, [wiki_1127].
11.12 UTF-16
UTF-16 is a variable length encoding scheme and covers all Unicode space. UTF-16 uses
UTF-16 □
16-bit words and the length of sequence is 1 or 2 words, where each of them has 16 bits,
[wiki_1128]. In case of two words, UTF-16 encoding scheme uses surrogate pair. It is two
numbers from the range 0xD800 to 0xDFFF. These values lie in the BMP plane and have no
defined characters. These values are only destined for UTF-16 encoding. UTF-16 encoding
scheme depends on the value of the code point and there are two principles.
The code points from the BMP plane are directly used as UTF-16. The first plane is a basic
plane and it lies in the range from 0 to 0xFFFF, only 16 significant bits are used. No trans-
formation is used in this case.
When the code point lies outside the basic plane, the surrogate pair is used for encoding
and the code point is transformed into two words. The lead and trail surrogates are used. Surrogate pair.
□
The leading surrogate is a number in the range from 0xD800 to 0xDBFF. The trailing surro-
gate is a number in the range from 0xDC00 to 0xDFFF. The algorithm is, [wiki_1128]:
0x010000 is subtracted from the code point, the result has maximum 20 significant
bits and it lies in the range from 0 to 0xFFFFF.
The result of subtraction is divided into two groups where each of them has 10 bits.
Both numbers are in the range from 0 to 0x3FF.
The number corresponding to top 10 bits is added to 0xD800. The sum is the first
Lead and trail
code unit or lead surrogate, which will be in the range from 0xD800 to 0xDBFF.
surrogate. □
(Previous versions of the Unicode Standard referred to these as high surrogates.)
The number corresponding to low 10 bits is added to 0xDC00, in order to obtain the
second code unit or trail surrogate, which will be in the range from 0xDC00 to
0xDFFF. (Previous versions of the Unicode Standard referred to these as low surro-
gates.)
The principle of encoding scheme is in Fig. 11-21. The first surrogate sequence 0xD800 and
0xDC00 corresponds to code point 0x010000.
Example of encoding using surrogate pairs is in Fig. 11-22 and the steps are:
The first step, subtract 0x010000 from code point. The result has only 20 significant
bits.
In the 2nd step, transform the difference to binary, divide into two of 10-bit groups
each. Transform each group to hexadecimal. The top group belongs to lead surro-
gate and the low one to trail surrogate.
The 3rd step, calculate lead surrogate by adding 0xD800 to the top group.
The 4th step, calculate trail surrogate by adding 0xDC00 to the low group.
In the end, UTF-16 sequence is lead and trail surrogates.
0x02 4A3D
Lead surrogate
-0x01 0000
0x01 4A3D 0xD800 + 0x052 = 0xD852
11.13 UTF-8
UTF-8 is a variable length encoding scheme and covers all Unicode space. This notation uses
8-bit byte as a basic element, [wiki_11029]. UTF-8 ensures backward compatibility with
ASCII and solves the problem of the endianness in UTF-16 and UTF-32. The number of used
bytes depends on the value of the code point. RFC 3629 from 2003 restricts the maximum
number of bytes to 4 and this is in accordance with UTF-32 and two words of UTF-16. Also 4
bytes of UTF-8 encode the code point in the range from 0 to 0x10FFFF. This range corre-
sponds to 21 bit n-tuple and 17 planes.
“Backward compatibility: One-byte codes are used only for the ASCII values 0
Backward com-
through 127. In this case the UTF-8 code has the same value as the ASCII code. The
patibility. □
high-order bit of these codes is always 0.
Clear distinction between multi-byte and single-byte characters: Code points larger
than 127 are represented by multi-byte sequences, composed of a leading byte and Unique encod-
one or more continuation bytes. The leading byte has two or more high-order 1s ing.□
followed by a 0, while continuation bytes all have '10' in the high-order position.
Self synchronization: Single bytes, leading bytes, and continuation bytes do not
share values. This makes the scheme self-synchronizing, allowing the start of a Self-
character to be found by backing up at most five bytes (three bytes in actual UTF‑8 synchronization. □
per RFC 3629 restriction, see above).
Clear indication of code sequence length: The number of high-order 1s in the lead-
ing byte of a multi-byte sequence indicates the number of bytes in the sequence, so
that the length of the sequence can be determined without examining the continu-
ation bytes.
Code structure: The remaining bits of the encoding are used for the bits of the code
point being encoded, padded with high-order 0s if necessary. The high-order bits go
in the lead byte, lower-order bits in succeeding continuation bytes. The number of
bytes in the encoding is the minimum required to hold all the significant bits of the
code point.”
UTF-8 sequence of bytes uses the names leading and continuation byte, Fig. 11-23. The first
byte of the sequence is the leading byte, followed by one or more continuation bytes. All
continuation bytes are uniquely encoded by the prefix 10B. The leading byte is uniquely
Unique encoding
encoded:
of leading byte.□
The prefix 0B is used, when the sequence has only one byte, the leading byte.
The prefixes 110B, 1110B and 11110B are used. In this case, the number of 1s in the
prefix defines the number of bytes in the sequence. It means the sum of leading
and continuation bytes.
4 21 U+1 0000 U+1F FFFF 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
Source: http://en.wikipedia.org/wiki/UTF-8
Fig. 11-23 UTF-8 encoding
The UTF-8 has self-synchronizing properties. It is important in the situation, when one or
more bytes in the flow of UTF-8 are missing. The decoder of UTF-8 is able to find leading
byte after the error and to continue in deciding. A few characters in the text are missing.
UTF-8 has invalid codes. The invalid codes are derived from surrogate pair, or they are
codes outside the Unicode space, or the code is specified by a longer sequence than desira-
ble. Each code point must be encoded by the shortest sequence.
00 0001
01 0001
Continuation byte 2
“10” + “01 0011” = “1010 0011” => 0xA3
01 0011 part 4
000 part 3 Continuation byte 3
part 2
Leading Continuation parts “10” + “00 0001” = “1000 0001” => 0x81
part 1 Continuation byte 4
Operator “+”means the concatenation “10” + “01 0001” = “1001 0001” => 0x91
Fig. 11-24 Example of UTF-8 encoding
The UTF-8 encoding is defined by Fig. 11-23. Example of UTF-8 encoding is in Fig. 11-24.
This example is only valid for Unicode point higher than 0x7F, outside of 7-bit ASCII code. ASCII code corre-
When the Unicode point is 7-bit ASCII, then UTF-8 is directly this code. For codes higher sponds to UTF-8.
□
The 2nd step, divide the binary string into 6-bit parts from LSB. The top part is short-
er than 6 bits. The top part belongs to the leading byte and the remaining parts be-
long to the appropriate continuation bytes.
The 3rd step, choose the appropriate line in the table according to the number of
continuation parts.
The 4th step, assemble the leading byte as the concatenation of the leading prefix
with the appropriate leading part. The number of 1s in the prefix defines the num-
ber of continuation bytes.
The 5th and/or next step, assemble all continuation bytes as the concatenation of
the continuation prefix 10B with the appropriate continuation part.
UTF-8 has become the dominant character encoding for the World Wide Web, ac-
counting for more than half of all Web pages.
The Internet Mail Consortium (IMC) recommends that all e-mail programs be able
to display and create mail using UTF-8.
The W3C recommends UTF-8 as default encoding in their main standards (XML and
HTML).
UTF-8 is also increasingly being used as the default character encoding in operating
systems, programming languages, APIs, and software applications, [wiki_1129].
UTF-8 is one of possible default character sets in Internet, [oracle_1101].
specific use as a byte-order indicator, the BOM character also indicates the UTF format.
Fig. 11-25 defines the use of BOM in UTF encoding, [wiki_1130]. The figure shows only def-
inition for UTF encoding, other values for different and older encoding are not presented.
i i+1 i i+1
The examples of the application of big and little endian are in Fig. 11-26 and Fig. 11-27. The
atomic element is a byte and generated stream can be seen in the hexadecimal editor. In Big and little
case of the big endian, the MSB byte is placed in the lower address and LSB in the higher endian. □
address. In case of the little endian, it is vice versa, LSB is placed in the lower address and
MSB in the higher address.
i i+1 i i+1
Fig. 11-27 Example of big and little endian in UTF-32
11.16 Newline
Newline is a special character or a sequence of characters that signifies the end of the text
line. The synonyms for newline are line ending, end of line (EOL), or line break. The mean-
ing of newline is that cursor or head moves to the first column and by one line down. The
actual codes for newline have different behavior across the operating systems. It means
that the text file exchange between the operating systems is a problem. ASCII uses the
characters LINE FEED and CARRIAGE RETURN to perform the newline. The abbreviations are LF
and CR.
Literature [wiki_1136] states about LINE FEED and CARRIAGE RETURN following: “The concepts
of line feed (LF) and carriage return (CR) are closely associated and can be either considered Newline.
□
separately or lumped together. In the physical media of typewriters and printers, two axes
of motion, "down" and "across", are needed to create another line (a new line) on the
page. Although the design of a machine (typewriter or printer) must consider them sepa-
rately, the abstract logic of software can lump them together as one event. This is why a
newline in character encoding can be defined as LF and CR combined into one (LF+CR, LFCR,
CR+LF, CRLF).”
The programing languages have the format strings \n and \r corresponding to line feed and
carriage return. The application of these strings does not mean that the codes 0x0A and
0x0D are assigned. The codes for newline across the operating systems are different.
UNIX and UNIX-like operating system use the LINE FEED 0x0A code for the new line.
UNIX-like operating systems are e.g. Linux, OS X – operating system of Mac com-
puters, Android, etc.
Windows operating system uses the sequence CARRIAGE RETURN and LINE FEED (0x0D
0x0A) for the new line.
Internet, where the sequence CARRIAGE RETURN and LINE FEED (0X0D 0X0A) should be
used on the protocol level of the most textual Internet protocols (FTP, HTTP…). But,
it is recommended that the tolerant application recognizes the LINE FEED as the new-
line, as well.
Unicode uses the previous codes for newline and defines the new codes. New codes have
to ensure the transformation between new and old documents. The codes associated with
newline are:
LINE FEED (LF), U+000A, graphical symbol in document is ␊, U+240A - SYMBOL FOR
LINEFEED.
LINE TABULATION U+000B, original name was vertical tab (VT). Graphical symbol in
document is ␋, U+240B - SYMBOL FOR LINE FABULATION.
FORM FEED (FF), U+000C, graphical symbol in text is ␌, U+240C - SYMBOL FOR FORM
FEED.
CARRIAGE RETURN (CR), U+000D, graphical symbol in text is ␍, U+240D - SYMBOL FOR
CARRIAGE RETURN.
Windows. In text documents, it is possible to use alt codes. Alt code input is entered by
holding Alt key and typing the decimal number on the numeric keypad, [wiki_1144]. The Alt code.
□
decimal number corresponds to the hexadecimal Unicode code point. The way for the input
of hexadecimal number is described in [wiki_1144].
UNIX/Linux. There are three ways for entering the Unicode characters missing on the key-
board, [wiki_1144]. Nevertheless, not all application programs allow all three methods of
entering.
Ctrlrl + Shift
Hold Ct Shift keys and type U followed by up to eight hexadecimal digits (on
Ctrl + Shift
main keyboard or numpad). Then release Ho Shift .
Ctrl + Shi
Hold Ctr ft + U U
Shift ke keys and type up to eight hexadecimal digits, then re-
Type Ctrl + Shift + U , then type up to eight hexadecimal digits, and then
type Enter .
HTML has defined a notation that is able to write all Unicode characters, Fig. 11-29. The
notation is a string with the prefix “&#x” or “&x”, followed by a number and ending charac-
ter “;”. A number in the middle corresponds to the code point. Prefix “&#x” is for hexadec-
imal number and “&x” for decimal number corresponding to the code point, [wiki_1142].
Another possibility is to use the defined name instead of the number. Not all Unicode char-
acters have the defined name in HTML, the list of these names is in [wiki_1143].
11.18 References
[Internet_1101]Glyph; http://whatis.techtarget.com/definition/glyph; on line 2013-12-04
[Interent_1102]Character; http://searchcio-
midmarket.techtarget.com/definition/character; on line 2013-12-04
[Internet_1103]Unicode; http://searchcio-midmarket.techtarget.com/definition/Unicode;
on line 2013-12-04
[Internet_1104]Coldewey D.: A quick PSA on "dots" versus "pixels" in LCDs; Jul 21, 2010;
http://techcrunch.com/2010/07/21/a-quick-psa-on-dots-versus-pixels-in-
lcds/; on line 2013-12-12
[Internet_1105]Bigman A.: PPI vs. DPI: what’s the difference? in Design Tips and Resources,
on February 26, 2013; http://99designs.com/designer-
blog/2013/02/26/ppi-vs-dpi-whats-the-difference/; on line 2013-12-12
The FSM - Finite State Machine is a mathematical system which is possible to explain in
simple examples. Let’s imagine a counter that counts the incoming and outgoing cars
to/from the garage. The output of the counter corresponds to the number of cars in the
garage. The counter is incremented by the incoming cars and decremented by the outgoing
cars. At the beginning, it is necessary to clear the counter and to start the counting from
zero. From the technical point of view, it is a digital system that has the input impulse signal
AUTO and the input level signals DO and RESET. When the signal DO is active, the car is
incoming to the garage and the counter is incremented by one. And vice versa, when the
signal DO is inactive, the counter is decremented by one. The increment or decrement is
performed on the leading edge of the impulse AUTO. During the active RESET signal, the
counter is being re-set, regardless of other signals. The incoming or outgoing cars have no
influence on the counting; the counter stays on the zero. When it is desirable to start the
counting of cars, the signal RESET is deactivated. The time is used for the description of the
The time is used
counter behavior. Fig. 12-01 shows the behavior of the counter in time and shows the situ-
for the descrip-
ation when 3 cars have entrance to the garage. In the timing waveform, the interesting
tion of behavior.□
areas are marked where the input signals have the same values, but the output is different.
This behavior cannot be described by Boolean function, which defines the unambiguous
representation of the input to the output. The unambiguous definition of Boolean function
means that the same input must be the same output. Another variable must be used for
the description of this behavior.
AUTO
AUTO OUTPUT DO
DO
Counter
RESET RESET
OUTPUT 0 1 2 3
time
Fig. 12-01 Counter of cars
The increment means to add one. The counter is incremented; it means that one is add-
ed to the contents of the counter.
The decrement means to subtract one. The counter is decremented; it means that one
is subtracted from the contents of the counter.
□
VŠB-TU Ostrava 167
12 Finite state machine
Let’s realize how many sequences can be drawn for three cars in the garage. The number of
the transitions between numbers 0, 1 and 2 of cars is not limited, Fig. 12-02. Theoretically,
it means that it is possible to design an infinite number of sequences for three cars in the
garage. The description of the counter behavior in the timing area is impossible.
AUTO
DO
The exhaustive
description in
RESET time is not real.□
OUTPUT 0 1 2 1 0 1 2 3
time
Fig. 12-02 Another sequence of cars
Therefore, another variable is defined, which allows the exhaustive description. This varia-
ble is a state and the abstract model is a finite state machine. The machine works in time
and the machine has two states, the present state and the next state. The present state State.
□
contains all information needed to plan and calculate the future. The next state will occur in
the future.
In our example, the output is equal to the present state. The output and the present state The present state
correspond to the number of cars in the garage. This number cumulates all events in the contains all in-
past. It is a result of incoming and outgoing cars. This number does not have the infor- formation need-
mation about how many cars entered into the garage or went out of the garage. The pre- ed to derive the
sent state has only the final information about the incoming and outgoing cars. When it is behavior in the
desirable to know the number of incoming and outgoing cars, other separate machines future.□
(counters) must be used. The next state is the future and the time is given by the moment
when the car goes into or out of the garage. The direction of cars determines the value of
the next state.
From the digital system point of view, the machine can have either synchronous or asyn- Synchronous
chronous behavior. The synchronous behavior uses the clock signal CLK and all actions of behavior.□
the synchronous Moore machine are performed on the edge of clock. In our example, it is
the impulse signal AUTO.
For the design of a real system, it is necessary to define the maximum number of cars in the Application of
garage. This limitation leads to the finite number of states. Then, FSM - Finite State Ma- FSM – Finite
chine is the mathematical model, which is used for the description of many systems. There State Machine.□
are computers and their parts, communication protocols, wending machines, checks of the
syntax of the programming languages - language parsing, artificial intelligence, etc. In non-
technical disciplines, FSM has been used for the description of neurological systems and in Sequential cir-
linguistics, for the description of the grammars of natural languages, [wiki_1201]. From the cuits or control
realization point of view, both hardware and software realization is possible. Hardware unit.□
realization is known as the sequential logic circuits or the control unit of the digital system.
FSM – Finite State Machine is a subset of Turing machine, which has more possibilities of
modeling, [wiki_1201]. The limited number of states is the first limitation. Literature
FSM is subset of
[Black_2008] says that “FSM is the Turing machine where head only reads and moves from
Turing machine.□
left to the right”.
6 7 8 9 11
The state or the value of the state is a suitable sum of information about the past. On the
basis of this sum, the future is derived in the conjunction with the future input. The finite Present state.
□
machine is only in one state in given time. This state is called the present state. The state,
which will occur in the future, is called the next state. The change from the present state to
Next state.□
the next state is called the transition. The transition occurs on the basis of any event on the
input.
“The state of a sequential circuit is a collection of state variables whose values at any one time
contain all the information about the past necessary to account for the circuit's future behavior.”
Source: Herbert Hellerman's book on Digital Computer System Principles (McGraw-Hill, 1997).□
The theory of finite state machine distinguishes two definitions of FSM according to Mr.
Mealy and Mr. Moore. More definitions are in literature and the next definition is from
Wikipedia, [wiki_1202], [wiki_1203] and [Fristacky_1986]. Mathematical definitions that
follow have a lot of in common. The difference is only in one representation and therefore
both definitions are placed in two neighboring columns.
Mealy and
Mealy machine Moore machine Moore machine.□
Mealy FSM is 6-tuple (S, S0, Σ, Λ, T, G), Moore FSM is 6-tuple (S, S0, Σ, Λ, T, G),
where where
S is a finite set of states S is a finite set of states
S0 is a start state (also called initial state) S0 is a start state (also called initial state)
which is an element of S which is an element of S
Σ is a finite set of the inputs Σ is a finite set of the inputs
Λ is a finite set of the output Λ is a finite set of the output
The representation T is the transition function which is defined in time, the present to the
future. The set of the state is mapped into itself. It is only possible in time. In details, the
present state and present input generate the next state. A new value of state occurs in dis-
crete time i+1. It means, that the present state is unchanged between discrete times i a i+1.
The realization of FSM must contain an element for storing the present state. More infor-
mation is in literature [Fristacky_1986], [Katz_Borriello_2005], [Roth_2004], [Warkley
_2006] and [Divis_2008].
The representation G is the output function. The mapping is defined between the different
sets. The time is not needed, the representation is the present. Mealy and Moore machine
has different definitions of the output function G:
Mealy machine maps the present state and the present input to the present out-
put.
Moore machine maps only the present state to the present output. The simplest
representation is situation when the present output is equal to the present state.
A typical example is counter.
Mealy G: S x Σ → Λ (1202)
Where
The output of Mealy machine can be generated without the change of the present state.
The definition of the output function enables to generate a new output when only the input
is changed. A new output of Moore machine is derived only on basis of the change of the
state.
The synchronous finite state machine uses a special input, from which it derives the transi-
tion between the states. This input is called a clock - CLK, [wiki_1205]. The clock ensures the
synchronization and the discrete points for transitions of states are either the leading edge
or the trailing edge or both edges.
The asynchronous finite state machine does not use the clock input, [wiki_1206]. The asyn-
chronous finite state machine derives the transition between the states on basis of any
input event. Then, the discrete points are any changes of inputs. The definition of the asyn-
chronous transition function is more demanding than that of the synchronous one. The
asynchronous definition of the transition function must eliminate possible infinite cycles of
the states. More information is in literature [Fristacky_1986]. Therefore, the synchronous
finite state machines are preferred for the hardware realization.
The output function G works only in the present and the synchronous or asynchronous
behavior has no influence. Only note down, a new output of Mealy machine can be derived
from a new input during one present state. This is impossible in case of Moore machine.
Mealy FSM
Mealy FSM.□
Output
Transition
function State function
register G
T Next Output
Input Present
state (present)
(present) state
Clock
CLK
Initial state
(Init) CLR
Output
Transition
function State function
T register G
Next Output
Input Present
state (present)
(present) state
Clock
CLK
Initial state
(Init) CLR
The block diagrams of Mealy and Moore are very alike and they only differ in the input of
the output function. The realization of FSM has three basic blocks: Moore FSM.□
State register stores the value of the present state. The register is synchronized by
the clock signal “Clock”. The signal “Initial state” is used for setting the initial state State register.□
on the output of the state register. This action can be synchronous or asynchro-
nous. The input of the state register is “Next state” as the output of the transition
function.
Transition function T corresponds to the representation which is given by formula
Transition
(1201). The output of this block is “Next state” which is calculated on the base of
function.□
“Input” and “Present state”. From the point of view of hardware realization, it is a
combinational circuit.
Output function G corresponds to the representation which is given by formulas
(1202) or (1203). This block calculates the “Output”. From the point of view of Output function.□
hardware realization it is a combinational circuit. The output can be defined in dif-
ferent ways, according to the type of FSM:
For Mealy machine, the representation of the output function is given by
formula (1202). The output is the function of “Present state” and “INPUT”.
For Moore machine, the representation of the output function is given by
formula (1203). The output is the function of only ”Present State”.
If the output is equal to the present state, then it is the simplest output function of
Moore machine. A typical example is a counter. □
The state diagram is an oriented graph where nodes correspond to the states and oriented
edges represent the transition function, Fig. 12-06. Oriented edges mean that each edge
has the arrow and the condition, when the edge can be used for the transition between the
states. More information about oriented graph is in [wiki_1209] and [wiki_1210].
Beginning
Telephone rings
The names of states can correspond to a real situation. Fig. 12-06 shows the example of Transition
picking up the telephone. The initial state is “I am home”. If somebody calls me, it is the function.□
input “Telephone ring”; I change the state and pass to the next state “I am calling”. When
telephone does not ring, the next state is “I am home”, I do not change the state. The
above sentences describe the transition function.
The state “I am home” is the initial state, from which FSM begins all activities. Also, the
study of behavior of FSM is meaningful. The initial state is generated by activating the signal Initial State.□
“Beginning”, regardless of any state or other activities.
Beginning
Telephone rings / Pick up Slash “/” is a
No ring/ separator be-
I am home
Study I am calling tween input
and output. □
The output function has two definitions, where the first definition is for the output of
Mealy machine, Fig. 12-07. The Mealy output function depends on the input and the pre-
sent state. The output is signified on each edge, separated with a slash "/" from the input.
The output function is expressed by action “Pick up” the phone and “Study”. This output
action “Pick up” is generated when FSM is in the present state “I am home” and the input is
”Telephone rings”. The output action “Study” is generated, when FSM is in the present
state “I am home” and the input is “No ring”. The description of the transition and output
functions can be connected into one sentence: Mealy output
function.□
FSM is in the present state “I am home” and the input is “Telephone rings”, then
the output is “Pick up” and FSM passes to the next state “I am calling”.
FSM is in the present state “I am home” and the input is “No ring”, then the output
is “Study” and FSM passes to the next state “I am home”.
Beginning
Telephone Slash “/” is a
rings separator be-
No ring State / output.□
I am home/ What to do/ I am calling tween state
Study Pick up
and output. □
In case of Moore machine, the output function depends only on the present state. The out-
put is signified on each state, separated with a slash "/" from the state. The output function
is expressed by action “Pick up” the phone and “Study”. This output action “Pick up” is gen-
erated when FSM is in the present state “What to do”. The output action “Study” is gener-
ated when FSM is in the present state “I am home”. The description of the transition and
output functions can be connected into one sentence:
FSM is in the present state “I am home” and FSM generates the output “Study”,
Moore output
and when the input is “Telephone rings”, then FSM passes to the next state “What
function.□
to do”.
FSM is in the present state “I am home” and FSM generates the output “Study”,
and when the input is “No ring”, then FSM passes to the next state “I am home”.
Etc.
There is no algorithm for the transformation between Moore and Mealy machine. At the
beginning of designing, it is necessary to choose either Mealy or Moore machine. Mealy
machine, Fig. 12-07, and Moore machine, Fig. 12-08, do not have the same number of
states, even if they describe the same problem. Moore machine has three states to reach
the same state “I am calling”. The transition between the states “What to do” to “I am call-
ing” is without the input.
Init
Input slash output
11/1
2
00/0 01/0
01/1 No carry Carry 10/0
10/1 00/1 11/1
1 3
4
Green color is remarks
The state diagram contains two states with the significance of carry. The initial state is “No
carry” and the second state “Carry” means that the carry has been generated. The full
comprehension and the design of the state diagram are in the sentences, which are neces-
sary to assemble for all combinations of inputs “a”, “b” and carry. The sentences are:
FSM is in the current state “No carry”, and when the input combination is “a=0”
and “b=0”, FSM generates the output “0”, and then FSM passes to the next state
“No carry”. This sentence corresponds to the edge 1. It is a situation of the addition
where “a + b + cin = 0 + 0 + 0 = 0”, all in binary, and cin is the carry in.
FSM is in the current state “No carry”, and when the input combination is “a=1”
and “b=1”, FSM generates the output “0”, and then FSM passes to the next state
“Carry”. This sentence corresponds to the edge 2. It is a situation of the addition
where “a + b + cin = 1 + 1 + 0 = 10”, all in binary, and cin is the carry in. The output
is “0” and the carry to the next order is “1”.
FSM is in the current state “Carry”, and when the input combination is “a=1” and
“b=1”, FSM generates the output “1”, and then FSM passes to the next state “Car-
ry”. This sentence corresponds to the edge 3. It is a situation of the addition where
“a + b + cin = 1 + 1 + 1 = 11”, all in binary, and cin is the carry in. The output is “1”
and the carry to the next order is “1”.
Init
01 or 10 01
00 1 or
No carry 00 No carry 10
with 0/0 with 1/1
The following machine is Moore machine with the same task. It is necessary to realize that
the carry was generated with the output equal to 0 or 1. This thought leads to four combi-
nations, and therefore Moore machine has four states. The names of states are the concat-
enation of the carry, yes or not, with the output value equal to 0 or 1. The initial state is
“No carry with 0”, it means the carry was not generated and the output is equal to zero.
There is no algorithm for the transformation between Moore and Mealy machine.
The full comprehension and the design of the state diagram are in the sentences, which are
necessary to assemble for all combinations of inputs “a”, “b” in each state. The sentences
are:
FSM is in the current state “No carry with 0” and FSM generates the output “0”,
and when the input combination of n-tuple “ab” is “01” or “10”, then FSM passes
to the next state “No carry with 1”. This sentence corresponds to the edge 1. It is a
situation of the addition where “a + b + cin = 0 + 1 + 0 = 1” or “a + b + cin = 1 + 0 + 0
= 1”, all in binary, and cin is the carry in.
FSM is in the current state “Carry with 0” and FSM generates the output “0”, and
when the input combination of n-tuple “ab” is “00”, then FSM passes to the next
state “No carry with 1”. This sentence corresponds to the edge 2. It is a situation of
the addition where “a + b + cin = 0 + 0 + 1 = 1”, all in binary, and cin is the carry in.
FSM is in the current state “Carry with 0” and FSM generates the output “0”, and
when the input combination of n-tuple “ab” is “11”, then FSM passes to the next
state “Carry with 1”. This sentence corresponds to the edge 3. It is a situation of the
addition where “a + b + cin = 1 + 1 + 1 = 11”, all in binary, and cin is the carry in.
This is a pattern showing how to understand the state graph and how to explain it. In the
design of a state graph, it is necessary to define nodes and also the significance of each
node. Then the proposed edges make sense and explain the behavior of the finite state
machine.
Rows of the table correspond to the current state and columns correspond to the combina-
tions of inputs. The first part is the state transition table where the green area is deter-
mined for the notation of the next states. The second part is a table for the output function
and the output values are written in the blue area. The initial state is not marked in a spe-
cial way, and typically, it is the current state in the first row.
Init
11/1
00/0 01/0
01/1 No carry Carry 10/0
10/1 00/1
11/1
Fig. 12-12 Transcription of the state diagram to table notation for Mealy machine
Fig. 12-12 shows the full transcription of the state diagram to the table notation. It is Mealy
machine of the serial adder, Fig. 12-09. The table for Moore machine is simpler because the
output is only the function of the current state, Fig. 12-13.
Init
01 or 10 01
or
00
No carry 00 No carry 10
with 0 /0 with 1 /1
01 11 00
11 00
or
10 11
Carry 11 Carry
with 0/0 with 1/1
01 or 10
Fig. 12-13 Transcription of the state diagram to table notation for Moore machine
Asynchronous Synchronous
inputs FSM output
Real
world
Generator
of clock
It is suitable to synchronize the asynchronous inputs by sampling. Then the sampling regis-
ter provides the synchronized inputs. This synchronization solves problems of timing in
realization.
In software realization, non-synchronized inputs of FSM can cause the situation in which
the transition function uses two different values of input in one calculation. This situation
could lead to a wrong next state. The sampling register solves this situation and it ensures
the constant input during the calculation of the transition and output functions.
V | \ \
1+---------+ CLOSE | \
| LISTEN | ---------- | |
+---------+ delete TCB | |
rcv SYN | | SEND | |
----------- | | ------- | V
+---------+ 2 snd SYN,ACK / \ snd SYN +---------+
| |<----------------- ------------------>| |
| SYN | rcv SYN | SYN |
| RCVD |<-----------------------------------------------| SENT |
| | snd ACK | |
| |------------------ -------------------| |
+---------+ rcv ACK of SYN \ / rcv SYN,ACK +---------+
| -------------- | | -----------
| x | | snd ACK
| V V
| CLOSE +---------+
| ------- | ESTAB |
| snd FIN +---------+
| CLOSE | | rcv FIN Source: RFC 793
The second possibility is that an application requires establishing the session with the des-
tination node. The node in state “LISTEN” received the statement “SEND” from an applica-
tion. After that, the node of network sends the flag “snd_SYN” to the destination and it
goes to the state “SYN_SENT”, where it waits for “rcv_SYN” from the destination.
:
05 enum state_type {LISTEN, SYN_RECEIVED, SYN_SENT, ESTABLISH};
06 enum state_type Current_state;
07 enum state_type Next_state;
08
09 bool SEND, INIT = false;
10 bool rcv_SYN, rcv_SYN_and_ACK, rcv_ACK_of_SYN = false;
11 bool snd_SYN, snd_SYN_and_ACK, snd_ACK;
12
13 int main()
14 {
15 void input (bool *inSYN, bool *inSYN_ACK, bool *inACK_of_SYN,
16 bool *inSEND, bool *inINIT);
17
18 Current_state = LISTEN;
19 Next_state = LISTEN;
20
21 while (true)
22 { snd_SYN = false; snd_SYN_and_ACK = false; snd_ACK = false;
23 rcv_SYN=false; rcv_SYN_and_ACK=false; rcv_ACK_of_SYN=false;
24 SEND=false; INIT=false;
25
26 // sampling of inputs
27 input(&rcv_SYN, &rcv_SYN_and_ACK, &rcv_ACK_of_SYN, &SEND, &INIT);
28
29 if (INIT) { Current_state = LISTEN;continue;
30 } // initialization of FSM
31
32 switch (Current_state) // transition and output functions
33 {case LISTEN:
34 if (rcv_SYN) {snd_SYN_and_ACK = true;
35 Next_state = SYN_RECEIVED;}
36 else if (SEND) {snd_SYN = true;
37 Next_state = SYN_SENT;}
38 else Next_state = LISTEN; break;
39
40 case SYN_RECEIVED:
41 if (rcv_ACK_of_SYN) Next_state = ESTABLISH;
42 else Next_state = SYN_RECEIVED; break;
43
44 case SYN_SENT:
45 if (rcv_SYN) {snd_ACK = true;
46 Next_state = SYN_RECEIVED;}
47 else if (rcv_SYN_and_ACK) {snd_ACK = true;
48 Next_state = ESTABLISH;}
49 else Next_state = SYN_SENT; break;
50
51 case ESTABLISH: break;
52 } // end of transition and output functions
53
54 Current_state = Next_state; // state register
55 }
56 return 0;
64 }
Fig. 12-16 shows the description of the state diagram from Fig. 12-15 in C programming
language. The program uses the same names of states as in the state diagram. The declara-
tion of the enumeration type is used for these names, row 5 of program listing. The varia-
bles “Current_state” and “Next_state” have values corresponding to the names of states.
The actual program begins in row 25 by infinite cycle. Row 31, the function “input” is called
to ensure sampling inputs. After that, all inputs of FSM are constant during the following
calculation. Row 37, the statement switch according to the variable “Current_state” is the
beginning of the description of the transition and output functions. In individual cases of
the current state, the next state is assigned and the output is set up. At the end, row 61, a
new value of the next state is written to the state register and a new value of the current
state is valid for the next cycle.
Two situations are marked in the state diagram in Fig. 12-15 and they are described by rows
from 32 to 38 in listing, Fig. 12-16. The following sentences are a basic model for the de-
scription and the explanation of the state diagram in Fig.12-15. They are:
Situation 1 and rows 33 and 38. FSM is in the present state “LISTEN”, and the input
is neither “rcv_SYN” nor “SEND”, then no output is generated and FSM goes to the
next state “LISTEN”.
Situation 2 and rows 33, 34 and 35. FSM is in the present state “LISTEN”, and the
input “rcv_SYN” is received, then the output “snd SYN, ACK” is activated and FSM
goes to the next state “SYN RCVD”.
12.10 References
Literature
[Black_2008] Black, Paul. E. (12 May 2008). "Finite State Machine". Dictionary of Algo-
rithms and Data Structures (U.S. National Institute of Standards and Tech-
nology).
[Divis_2008] Zdeněk Diviš, Zdeňka Chmelíková, Jaroslav Zdrálek: Logické obvody; skripta
VŠB-TU Ostrava, ISBN 978-80-248-1734-8
[Fristacky_1986] Frištacký, N., Kolesár, M., Kolenička, J., Hlavatý, J.: Logické systémy;
ALFA 1986; ISBN 80-05-00414-1
[Warkley_2006] Jon F. Warkley: Digital Design, Principles and Practices, Fourth Edi-
tion; Prenice Hall 2006, ISBN 0-13-186389-4
The synchronous digital system is a system which is synchronized by a clock. This principle
ensures the stability of the system and it is a more simple design, in comparison with asyn-
chronous systems. The basic block diagram of the synchronous digital system is in Fig.
13-01. This system has two basic blocks, the data unit and the control unit which are con-
nected by signals, both between each other and with the outside environment. The de-
scription of blocks and signals are:
The data unit is a block which can perform any operation. It contains combinational
logic circuits with registers. From the point of view of a processor, the data unit is
the arithmetic logic unit which contains combinational circuits and registers. Com-
binational circuits perform logic operations, binary arithmetic operations – addi-
tion, subtraction, multiplication, and encoding, multiplexing and etc. Registers are
used for storing inputs, outputs and auxiliary data. The composition of the data unit
depends on a concrete design. The data unit is connected with the control unit by
control and condition signals and it is connected to the outside environment by
flags and bidirectional data.
The control unit is a block which controls the data unit. The control unit is a finite
state diagram which has the condition signals from the data unit as the input; and it
has the control signals to the data unit as the output. The control unit is connected
to the outside environment by flags and commands.
Control
Data Unit
Unit
Conditions
CLK
Init
which define the output of a multiplexer that can choose the register for
the output,
which define an addition or a subtraction,
which define parameters of encoding,
etc.
Condition signals are signals which are generated by the data unit and they de-
scribe the results of performed actions. For example, ”result is equal to zero”, “sign
bit”, “carry out”, “overflow”, “underflow” and so on.
Data, it is a bidirectional bus for transferring data as the input and the output of the
data unit.
Flags, they are signals which are generated by the control unit or the data unit and
they signalize some information to the outside environment. Flags are only for
reading and they signalize, for example, “give the next command”, “set input data”,
“system is a busy”, “error” and so on.
Command signals, they are signals which are generated by the outside environment
of a digital system. For example, “start”, “termination of operation”, “parameters
of encoding” and so on.
CLK, it is a synchronization signal for all actions in the system. The clock signal de-
fines the transition between the next and the present state, the clock input of reg-
isters or flip-flops. All actions are derived from an edge of the clock.
Init, it is an initialization signal which ensures the transition to the initial state of the
finite state machine, the signal which clears flags, registers and so on. It is desirable
to start the operation of the digital system from a defined state. Sometimes this
state is called the default state.
If the sub-result in the nibble is higher than 9 or the nibble generates the carry to
the next nibble, then 6 must be added to the nibble.
If the addition of 6 to the nibble generates the carry, the nibble must be correct-
ed by adding 6.
The data unit only has an 8-bit adder and 16-bit registers. The 4-order decimal digits are
stored in registers and the sum must be performed two times, sequentially and consecu-
tively with the carry. The first addition adds the low bytes with storing the carry out. The
second addition preforms the addition of the high bytes with the carry from the previous
addition.
B register, 16-bit
A register, 16-bit
06
60
66
H L
A mux B mux
C8
3 8-bit binary adder
A6 Carry out Carry in C8
C4 A6
H L
Sum
Fig. 13-02 Block diagram of the data unit
The adder produces the carry outputs which can be used as the carry input of the adder, or
as the conditions in the control unit. The carry flags are:
“C8” is a carry from the 7th bit of adder, when low bytes are added. The carry “C8”is
used in the addition of high bytes as the carry in.
“A6”is a carry from the 7th bit of adder, when a low byte of the accumulator is add-
ed with one of the correction constants. The “A6”carry is used as carry in, when a
high byte of the accumulator is corrected.
“C4”is a carry from the third bit of adder. The “C4”carry expresses the carry from
the low nibble to the high nibble. It is used by the control unit as a condition for
branching.
The accumulator is a 16-bit register and the writing is performed by bytes. The accumulator
contains the circuits for decoding the situation when nibble is higher than 9. These 4 signals
“Nibbles > 9” are used as the conditions by the control unit.
B register, 16-bit
A register, 16-bit
06
60
66
H L
A mux B mux
C8
3 8-bit binary adder
A6 Carry out Carry in C8
C4 A6
H L
CLK
CLK
B register, 16-bit Load_B
Load_A A register, 16-bit
06
60
66
H L
A_sel B_sel
A mux B mux
Clear
C8
Load_xx 3 8-bit binary adder
A6 Carry out Carry in C8
CLK C4 A6
H L Cin_sel
Load_ACC
Nibbles > 9
Low/High 16-bit accumulator Synchronization
CLK
Control signals from the control unit
Following figures show the comparison with the general block diagram and the general
definition of the data unit. The data unit has two data inputs A and B, next a data output
and it produces the conditions for the control unit. These signals are marked in Fig. 13-03.
Fig. 13-04 shows the undrawn control and synchronization signals. Control signals are not
usually drawn. It is assumed that everybody knows the control signals of blocks. For exam-
ple, control signals of multiplexer are given by the knowledge of the function of multiplex-
er. And also, each register needs the enable signal and the synchronization clock signal,
where both signals define the time of writing. The data unit does not produce flags.
Flags Commands
- end_cal - Start
Load_A, Load_B, A_sel(2), B_sel(3),
Controls
CLK
Init
Fig. 13-05 Input and output of the control unit
The control unit is a finite state machine. The behavior is described by the state diagram of
Moore machine. The design and its description are:
form of the addition 999 + 25 = 1024, in decimal, from the IDE simulation. In this simula-
tion, the current state and next states show the way in the state diagram, which was used
in this example of addition.
IDE environment performs the synthesis, which is the realization in programmable logic
devices. The result of the synthesis is a file which is loaded into the PLD - Programmable
Logic Devices. PLD is a universal circuit which can realize any digital systems. Programmable
devices are CPLD and well known FPGA, [wiki_xx02], [wiki_xx03], [wiki_xx04],
[Zdralek_2006] and [Zdralek_2008].
13.5 Reference
[wiki_xx01] Integrated development environment;
http://en.wikipedia.org/wiki/Integrated_development_environment; on
line 2014-09-23
---------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
entity BCD_adder is
Port ( A: in STD_LOGIC_VECTOR (15 downto 0);
B: in STD_LOGIC_VECTOR (15 downto 0);
Sum: out STD_LOGIC_VECTOR (15 downto 0);
Start: in STD_LOGIC;
End_cal: out STD_LOGIC;
CLK: in std_logic;
Reset_L: in std_logic
);
end BCD_adder;
component data_unit is
Port ( A: in STD_LOGIC_VECTOR (15 downto 0);
B: in STD_LOGIC_VECTOR (15 downto 0);
Sum: out STD_LOGIC_VECTOR (15 downto 0);
CLK: in std_logic;
contr_word: in STD_LOGIC_VECTOR (13 downto 0);
flags: out std_logic_vector (5 downto 0)
);
end component data_unit;
begin
IO1: control_unit port map (flags, start, word, clk, reset_L, end_cal);
IO2: data_unit port map (A, B, Sum, clk, word, flags);
end Behavioral;
---------------------------------------------------------------------
--- Data Unit
---------------------------------------------------------------------
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;
component add_8_bit is
Port ( A: in STD_LOGIC_VECTOR (7 downto 0);
B: in STD_LOGIC_VECTOR (7 downto 0);
S: out STD_LOGIC_VECTOR (7 downto 0);
cin: in STD_LOGIC;
c4: out STD_LOGIC;
cout: out STD_LOGIC);
end component add_8_bit;
begin
Sum <= ACC;
-- A and B registers
A_reg <= A when clk='1' and clk'event and load_A_reg='1';
B_reg <= B when clk='1' and clk'event and load_B_reg='1';
-- AMUX
A_mux <= ACC(15 downto 08) when A_sel="00" else
ACC(07 downto 00) when A_sel="01" else
A_reg (15 downto 08) when A_sel="10" else
A_reg (07 downto 00) when A_sel="11";
-- B MUX
B_mux <= B_reg (15 downto 08) when B_sel="000" else
B_reg (07 downto 00) when B_sel="001" else
"00000000" when B_sel="010" else
"00000110" when B_sel="011" else
-- cin mux
cin <= '0' when cin_C8orA6='0' else
C8 or c_A6;
-- carry flags
process (clk) is
begin
if clk='1' and clk'event then
if load_C8='1' then C8 <= cout;
elsif clear_carry='1' then C8 <='0';
else C8 <= C8;
end if;
end if;
end process;
-- C4 flag
process (clk) is
begin
if clk='1' and clk'event then
if load_C4='1' then C4 <= cc44;
elsif clear_carry='1' then C4 <='0';
else C4 <= C4;
end if;
end if;
end process;
fC4 <= C4;
nibble(0) <= '1' when ACC(03 downto 00) > "1001" else '0';
nibble(1) <= '1' when ACC(07 downto 04) > "1001" else '0';
nibble(2) <= '1' when ACC(11 downto 08) > "1001" else '0';
nibble(3) <= '1' when ACC(15 downto 12) > "1001" else '0';
end Behavioral;
entity control_unit is
Port ( flags: in STD_LOGIC_VECTOR (5 downto 0);
start: in std_logic;
contr_word: out STD_LOGIC_VECTOR (13 downto 0);
clk: in STD_LOGIC;
reset_L: in STD_LOGIC;
end_cal: out std_logic);
end control_unit;
type state_type is
(First, Beginig, Low_add, Low_06, Low_60, Low_66,
High_add, High_06, High_60, High_66, End_state);
signal Next_state, Current_state: state_type;
begin
end case;
end process State_diagram;
end Behavioral;