Hugo Kornelis posted a wonderful reply on his blog to the Prime Number Challenge, which runs in 9.3 seconds on my laptop. Denis posted his solution in a reply on Hugo's blog; Denis' solution runs in 6 seconds.
WOW! Perhaps this isn't such a crazy undertaking after all.
I've been working on my previous syntax a little bit and I've got it to where I think it will run in four hours. It's reproduced at the end of this post for completeness' sake. I'll chase it down even though it stands thoroughly discredited.
I've got one potential tweak to Hugo's syntax I'm looking at that might bring it down below Denis'; I'll pursue that presently and make another post when I've got an answer. The code below will run overnight and and update provided tomorrow.
These are the major changes:
This final step did a great deal to reduce the log churn this process generates.
Here is the current state of this discredited tangent, which will finish a distant fourth in this contest. Thanks to Denis for a great brain-teaser.
-wp
--WAITFOR DELAY '00:30' -- wait for 30 minutes
USE PrimeNumberTestBeta
GO
-- log a status message
SELECT GETDATE() AS [Starting: Building and Populating Schema..]
SET NOCOUNT ON
-- remove the three tables we're going to use
-- if they're present from a previous run
IF EXISTS (SELECT 1 FROM sys.objects WHERE name = 'Candidate')
BEGIN
DROP TABLE Candidate
-- SELECT GETDATE() AS [SchemaCreate: Candidate Dropped..]
END
IF EXISTS (SELECT 1 FROM sys.objects WHERE NAME = 'ComparisonMatrix')
DROP TABLE ComparisonMatrix
-- SELECT GETDATE() AS [SchemaCreate: ComparisonMatrix Dropped..]
-- create the table to hold the set of values to test from primacy
-- (we are going to remove some low-hanging fruit for performance reasons,
-- so we need a distinct table
CREATE TABLE Candidate (
CandidateValue int --CONSTRAINT Candidate_PK PRIMARY KEY CLUSTERED WITH FILLFACTOR = 100
)
-- SELECT GETDATE() AS [SchemaCreate: Candidate Created..]
-- create the table where the calculations will be performed
-- Base is the number we're testing for primacy
-- Factor is the number we're dividing it by
-- IsQuotientInteger is a bit flag indicating whether the Quotient is an integer
CREATE TABLE ComparisonMatrix (
Base int,
Factor int,
-- Remainder AS Base % Factor,
IsQuotientInteger AS CASE
WHEN Base % Factor = 0 THEN CAST(1 AS int)
ELSE CAST(0 AS int)
END PERSISTED
-- SELECT GETDATE() AS [SchemaCreate: ComparisonMatrix Created..]
-- build indexes on the ComparisonMatrix table to improve perfomance
--CREATE UNIQUE /*CLUSTERED*/ INDEX ComparisonMatrix_PK ON ComparisonMatrix(Base, Factor) WITH FILLFACTOR = 90
--SELECT GETDATE() AS [SchemaCreate: Clustered Index On ComparisonMatrix Created..]
-- CREATE INDEX ComparisonMatrix_IDX1 ON ComparisonMatrix (IsQuotientInteger) WITH FILLFACTOR = 100
--SELECT GETDATE() AS [SchemaCreate: Index On ComparisonMatrix Created..]
-- build an INSTEAD OF INSERT trigger so we only save the integer factors
-- THIS IS PURELY A PERFORMANCE ISSUE;
-- it's cheaper to write this trigger
-- this syntax is new in SQL Server 2005; the BOL reference may be found here:
-- ms-help://MS.SQLCC.v9/MS.SQLSVR.v9.en/udb9/html/0001f651-fcfa-401b-baf2-0ec6b1671116.htm
--
-- A BRIEF DISCUSSION:
-- what we're doing here is tuning our INSERT statement for the table CompressionMatrix below to ignore
-- the transactions which do not result in an integer quotient. This will make the commiting of
-- transactions orders of magnitude faster, since only a small fraction of the values will result in
-- integer quotient
CREATE TRIGGER IOI_ComparisonMatrix
ON ComparisonMatrix
INSTEAD OF INSERT
AS
INSERT ComparisonMatrix (
Base,
Factor
SELECT TOP 3
FROM inserted
WHERE IsQuotientInteger = 1
OPTION (FAST 3)
-- SELECT GETDATE() AS [SchemaCreate: ComparisonMatrix Trigger Created..]
-- SELECT GETDATE() AS [SchemaCreate: Tables Created..]
-- using Shaun's syntax (because it doesn't have a database footprint),
-- populate Candidate
;WITH digits(i) AS (
SELECT 1 AS I UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL
SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL
SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL
SELECT 0
),
-- generate 1M rows each with a unique row number, i
sequence(i) AS (
SELECT d1.i + (10*d2.i) + (100*d3.i) + (1000*d4.i) + (10000*d5.i) + (100000*d6.i)
FROM digits AS d1,
digits AS d2,
digits AS d3,
digits AS d4,
digits AS d5,
digits AS d6
INSERT Candidate (CandidateValue)
SELECT i+1 -- CTE gives us 0-999999; we want 1-1000000
FROM sequence
-- SELECT GETDATE() AS [Candidate Populated..]
-- now delete the low-hanging fruit from Candidate
-- WE ARE ONLY DONG THIS FOR PERFORMANCE; the algorhy\itm can handle them
-- NOTE that these fifteen DELETE statements, which encompass the first 15 prime numbers and
-- thier multiples, together remove 86.13% of the candidate values
DELETE Candidate WHERE (CandidateValue % 2 = 0 AND CandidateValue >= 2+1)
DELETE Candidate WHERE (CandidateValue % 3 = 0 AND CandidateValue >= 3+1)
DELETE Candidate WHERE (CandidateValue % 5 = 0 AND CandidateValue >= 5+1)
DELETE Candidate WHERE (CandidateValue % 7 = 0 AND CandidateValue >= 7+1)
DELETE Candidate WHERE (CandidateValue % 11 = 0 AND CandidateValue >= 11+1)
DELETE Candidate WHERE (CandidateValue % 13 = 0 AND CandidateValue >= 13+1)
DELETE Candidate WHERE (CandidateValue % 17 = 0 AND CandidateValue >= 17+1)
DELETE Candidate WHERE (CandidateValue % 19 = 0 AND CandidateValue >= 19+1)
DELETE Candidate WHERE (CandidateValue % 23 = 0 AND CandidateValue >= 23+1)
DELETE Candidate WHERE (CandidateValue % 29 = 0 AND CandidateValue >= 29+1)
DELETE Candidate WHERE (CandidateValue % 31 = 0 AND CandidateValue >= 31+1)
DELETE Candidate WHERE (CandidateValue % 37 = 0 AND CandidateValue >= 37+1)
DELETE Candidate WHERE (CandidateValue % 41 = 0 AND CandidateValue >= 41+1)
DELETE Candidate WHERE (CandidateValue % 43 = 0 AND CandidateValue >= 43+1)
DELETE Candidate WHERE (CandidateValue % 47 = 0 AND CandidateValue >= 47+1)
-- these bear less fruit, but are included because they'll still
-- help performance; I continued to add a line as long as 1% of remaining candidates
-- was deleted
DELETE Candidate WHERE (CandidateValue % 53 = 0 AND CandidateValue >= 53+1)
DELETE Candidate WHERE (CandidateValue % 59 = 0 AND CandidateValue >= 59+1)
DELETE Candidate WHERE (CandidateValue % 61 = 0 AND CandidateValue >= 61+1)
DELETE Candidate WHERE (CandidateValue % 67 = 0 AND CandidateValue >= 67+1)
DELETE Candidate WHERE (CandidateValue % 71 = 0 AND CandidateValue >= 71+1)
DELETE Candidate WHERE (CandidateValue % 73 = 0 AND CandidateValue >= 73+1)
DELETE Candidate WHERE (CandidateValue % 79 = 0 AND CandidateValue >= 79+1)
DELETE Candidate WHERE (CandidateValue % 83 = 0 AND CandidateValue >= 83+1)
DELETE Candidate WHERE (CandidateValue % 89 = 0 AND CandidateValue >= 89+1)
DELETE Candidate WHERE (CandidateValue % 97 = 0 AND CandidateValue >= 97+1)
DELETE Candidate WHERE (CandidateValue % 101 = 0 AND CandidateValue >= 101+1)
-- SELECT GETDATE() AS [Candidate DePopulated For Performance..]
-- retune indexes for performance
ALTER INDEX ALL ON Candidate REBUILD
UPDATE STATISTICS Candidate
-- uncomment this SELECT statement to confirm that we have cut 1M candidates down to 119,590
-- SELECT COUNT(1) AS [Candidate Depopulated Count] FROM Candidate
-- SELECT GETDATE() AS [Tables Populated..]
-- keep some house for what comes..
DECLARE @CandidateValue int,
@Counter int,
@Now DATETIME,
@PreviousInterval DECIMAL(12,2),
@Interval DECIMAL(12,2)
-- initalize a counter
SET @Counter = 1
-- declare a cursor to traverse the Candidate table
DECLARE TraverseCandidateTable CURSOR FAST_FORWARD READ_ONLY FOR
SELECT CandidateValue
FROM Candidate
ORDER BY CandidateValue
--DESC
-- open the cursor and get the first record
OPEN TraverseCandidateTable
FETCH NEXT
FROM TraverseCandidateTable
INTO @CandidateValue
SELECT GETDATE() AS [Processing: Cursor Loop Starts..]
-- set timer
SET @Now = GETDATE()
-- open a transaction
-- (careful transaction control is necessary to avoid rampant growth of the transaction logs)
BEGIN TRAN
-- start a loop which will run as long as the cursor finds a record
WHILE @@FETCH_STATUS = 0
-- insert the full range of remaining possible factors for the candidate value
-- into the comparison matrix
-- (use the Candidate table to aid this work)
SELECT @CandidateValue,
CandidateValue
WHERE CandidateValue <= @CandidateValue
-- OPTION (RECOMPILE)
-- log a database commit and start a new transaction
-- if it's been 200 transactions since we did so
IF (@CandidateValue >= 20000 AND @Counter % 200 = 0)
OR (@CandidateValue <= 20000 AND @Counter % 100 = 0)
COMMIT TRAN
ALTER INDEX ALL ON ComparisonMatrix REBUILD
CHECKPOINT
-- log a status message if it's been 2000 transactions since we did so
IF @Counter % 2000 = 0
SET @Interval = (DATEDIFF (mi, @now, GETDATE()) * 60) +
DATEDIFF (ss, @now, GETDATE())
SELECT GETDATE() AS [Processing: Status Datetime..],
@Counter AS [Current Count..],
@CandidateValue AS [Current Candidate..],
@Interval AS [ElapsedSecondsThisBatch..],
(@Interval/@PreviousInterval) * 100
AS [PerformanceChangeSinceLastBatch]
-- initialize loop variables for the next batch
SET @PreviousInterval = @Interval
-- get the next candidate value
-- increment the final transaction
SET @Counter = @Counter + 1
-- we're out of the loop;
-- we still need to commit the final transaction
-- clost and deallocate the cursor
CLOSE TraverseCandidateTable
DEALLOCATE TraverseCandidateTable
SELECT GETDATE() AS [Cursor Loop Complete..]
-- generate the list of prime numbers
-- if the SUM(IsQuotientInteger)=2 (where IsQuotientInteger=1 when a particular member of the set
-- is a precise integer factor of the candidate value), then that candidate is a prime number,
-- because IsQuotientInteger will equal 1 for 1 and the candidate itself. All candidates with
-- additional factors will thus be filtered out by the HAVING cluase.
SELECT Base
FROM ComparisonMatrix
GROUP BY Base
HAVING SUM(IsQuotientInteger) = 2
ORDER BY Base