[MsSQL] Non-ascii character removal

Okay, now at work...

Do you want to identify the offending characters in string and then eyeball them before removal (ie: locate & replace?) , or just blindly obliterate all rows that contain a "lower than ASCII 32 & higher than ASCII 127"?

/*
drop table ##JGTally
drop table ##JGOuter
*/


/*
    =========================================================================
Create and populate the Tally table on the fly to cater for
running against columns up to 4,000 characters in length
250 is acceptable for most system fields, but this script is intended
for occasions when script used by Tier 2 becomes 
difficult to find the offending character

Change table & field names as appropriate to the occasion.
=========================================================================
*/
SELECT TOP 250
      IDENTITY(INT,1,1) AS N
INTO 
      ##JGTally 
FROM
      INFORMATION_SCHEMA.COLUMNS with (nolock)
/*
===== Add a PK Constraint on TempTable to maximise performance
*/
ALTER TABLE ##JGTally
  ADD CONSTRAINT PK_Tally_N 
      PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100


--    =========================================================================
--  Use Tally table against table to find any 'Tab' characters
--  or other nonstandard characters
--    =========================================================================
SELECT
  ROW_ID
  /*Any relevant fields from source table*/
--  , S_CASE_PER.CASE_ID
  , N AS [Char Position Within Field (As int)]
  , SUBSTRING(X_MLSTN_LG_DESC,N,1) as [Character]
  , ASCII(SUBSTRING(X_MLSTN_LG_DESC,N,1)) AS [ASCII Value]
INTO 
      ##JGOuter 
FROM
      S_INVOICE with (nolock)
  CROSS JOIN ##JGTally with (nolock)
WHERE X_MLSTN_LG_DESC is not null
/*check for only Tab character*/
-- AND ASCII(SUBSTRING(X_MLSTN_LG_DESC,N,1))= 9
/*check for ANY non-printable, special Word characters characters*/
AND (ASCII(SUBSTRING(X_MLSTN_LG_DESC,N,1))< 32 -- Anything less than ASCII 32 is non-printable.
       OR ASCII(SUBSTRING(X_MLSTN_LG_DESC,N,1)) > 127) -- Anything over 127 is most likely Latin-1 encoded, but 128-160 Block is Windows-Only

select * from ##JGOuter with (nolock)
order by ROW_ID, 2 asc

select 
      ROW_ID
      , X_MLSTN_LG_DESC 
from S_INVOICE with (nolock)

where ROW_ID 
in (select ROW_ID from ##JGOuter)
order by 1, 2 asc
/r/SQL Thread