﻿#### 

In my project, I have a big media library with a lot of media. Sometimes content editors could load the same images and that was reflected in the size of the library. I decided to add a little feature for preventing similar images from loading.

The essence of my method is to create a hash for media items, save to a Solr index and add a processor for checking when the content editor loads a media file.

For the hash, I use System.Security.Cryptography.MD5. Firstly I created a method that converts Stream to MD5.

```
public static string StreamToMD5(Stream stream){    var md5 = MD5.Create();    Byte[] bytes = md5.ComputeHash(stream);    var hash = md5.ComputeHash(bytes);    return Convert.ToBase64String(hash);}
```

As you can see, I converted the hash to Base64. It is needed to simplify storage in Solr.

After that, I implemented a computed field that will keep the hash value.

```
public class MediaHashComputedField : IComputedIndexField    {        public string FieldName { get; set; }        public string ReturnType { get; set; }        public object ComputeFieldValue(IIndexable indexable)        {            var item = (Item)(indexable as SitecoreIndexableItem);            if (item == null)            {                return null;            }            if (!item.HasBaseTemplate(Sitecore.TemplateIDs.UnversionedImage) && !item.HasBaseTemplate(Sitecore.TemplateIDs.VersionedImage))            {                return null;            }            var mediaItem = new MediaItem(item);            using (Stream stream = mediaItem.GetMediaStream())            {                if (stream == null)                {                    return null;                }                return MD5Helper.StreamToMD5(stream);            }        }    }
```

Only Unversioned and Versioned images will have this hash in Solr. You can extend it for other media file types.

Connecting the computed field through Sitecore configuration.

```
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/"                xmlns:env="http://www.sitecore.net/xmlconfig/env/">  <sitecore>    <contentSearch>      <indexConfigurations>        <defaultSolrIndexConfiguration>          <documentOptions>            <fields hint="raw:AddComputedIndexField">              <field fieldName="mediahash" returnType="string">DTCM.Feature.DuplicateMedia.ComputedFileds.MediaHashComputedField, DTCM.Feature.DuplicateMedia</field>            </fields>          </documentOptions>        </defaultSolrIndexConfiguration>      </indexConfigurations>    </contentSearch>        </sitecore></configuration>
```

 The second step is creating a searcher that will have a method for searching images by hash.

```
public interface IMediaSearcher    {        IEnumerable<MediaSearchResultItem> GetMediaByHash(string hash);    }
```

```
[Service(typeof(IMediaSearcher))]public class MediaSearcher : IMediaSearcher{ protected string IndexName => “sitecore_master_index” protected ID mediaLibrary => new ID("{3D6658D8-A0BF-4E75-B3E2-D050FABCF4E1}"); public IEnumerable<MediaSearchResultItem> GetMediaByHash(string hash) { if (string.IsNullOrWhiteSpace(hash)) return null; var index = ContentSearchManager.GetIndex(this.IndexName); using (var context = index.CreateSearchContext()) { var query =        context.GetQueryable<MediaSearchResultItem>(); query = query.Where(i => i.Paths.Contains(mediaLibrary) && i.MediaHash == hash); var result = query.GetResults()?.Select(x => x.Document); return result; } }}
```

I use this searcher in a custom upload processor.

The custom upload processor looks like this:

```
public class CheckDuplicatesByHash : UploadProcessor{    //connect searcher through ServiceLocator, this searcher described above protected IMediaSearcher mediaSearcher => ServiceLocator.ServiceProvider.GetService<IMediaSearcher>();  protected HttpContextBase httpContextBase => (HttpContextBase)new HttpContextWrapper(HttpContext.Current); public void Process(UploadArgs args) { Assert.ArgumentNotNull((object)args, nameof(args)); if (args.Destination == UploadDestination.File) return;         //content editors can load list files  foreach (string file1 in (NameObjectCollectionBase)args.Files) { HttpPostedFile file2 = args.Files[file1];  //images place in archive if (!string.IsNullOrEmpty(file2.FileName)) { if (UploadProcessor.IsUnpack(args, file2)) { ZipReader zipReader = new ZipReader(file2.InputStream); try { foreach (ZipEntry entry in zipReader.Entries) { if (entry.Size == 0) { continue; } var hash = MD5Helper.StreamToMD5(entry.GetStream()); var searchMediaItems = this.mediaSearcher.GetMediaByHash(hash).ToList(); if (searchMediaItems.Any()) { string text = HttpUtility.HtmlEncode(file2.FileName + "/" + entry.Name); var duplicateItemNames = string.Join(", ",searchMediaItems.Select(i => i.Name)); var duplicateItemPath = string.Join(", ",searchMediaItems.Select(i => i.Path)); var errorText = $"The file \"{text}\" is duplicate sitecore media {duplicateItemNames} ({duplicateItemPath})."; if (this.httpContextBase.Request.Path.Contains("Upload Media/UploadMedia2.aspx")) { this.httpContextBase.Response.Write(this.ErrorMessageScript(errorText)); } args.ErrorText = errorText; args.AbortPipeline(); return; } } } finally { file2.InputStream.Position = 0L; } } //images are loading without archive else { var hash = MD5Helper.StreamToMD5(file2.InputStream); var searchMediaItems = this.mediaSearcher.GetMediaByHash(hash).ToList(); if (searchMediaItems.Any()) { string fileName = HttpUtility.HtmlEncode(file2.FileName); var duplicateItemNames = string.Join(", ", searchMediaItems.Select(i => i.Name)); var duplicateItemPath = string.Join(", ", searchMediaItems.Select(i => i.Path)); var errorText = $"The file \"{fileName}\" is duplicate sitecore media {duplicateItemNames} ({duplicateItemPath})."; if (this.httpContextBase.Request.Path.Contains("Upload Media/UploadMedia2.aspx")) { this.httpContextBase.Response.Write(this.ErrorMessageScript(errorText)); } args.ErrorText = errorText; args.AbortPipeline(); break; } } } } } //method for show alert with information about same image which sitecore has private string ErrorMessageScript(string message) { return $"<html><head><script type=\"text/JavaScript\" language=\"javascript\">alert('{message}')</script></head></html>"; }}
```

This method is not elegant but it works with different loading types (through an archive, with list images, etc.)

Connecting the processor to Sitecore configuration.

```
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/"                xmlns:env="http://www.sitecore.net/xmlconfig/env/">  <sitecore>    <processors>      <uiUpload>        <processor type="DTCM.Feature.DuplicateMedia.Pipelines.UploadProcessor.CheckDuplicatesByHash, DTCM.Feature.DuplicateMedia" mode="on"                   patch:after="*[@type='Sitecore.Pipelines.Upload.CheckSize, Sitecore.Kernel']" />      </uiUpload>    </processors>       </sitecore></configuration>
```

**Demo**:

![](https://www.brimit.com/-/media/project/brimit/blog/tech-blogs/checking-duplicate-media-items/image3.png?h=386&amp;w=439&amp;hash=589D69B829AA5615FD6C03F45E20C5CD)

Uploading a file with the same image that I have in Sitecore.

![](https://www.brimit.com/-/media/project/brimit/blog/tech-blogs/checking-duplicate-media-items/image1.png?h=218&amp;w=454&amp;hash=25969DE5A6D10BAF404F046013201222)

![](https://www.brimit.com/-/media/project/brimit/blog/tech-blogs/checking-duplicate-media-items/image2.png?h=139&amp;w=442&amp;hash=C9AF1FC760D94D81AB829D873C4FF196)

###### Author

[!\[Evgeniy Triputko\](https://www.brimit.com/-/jssmedia/feature/blogs/authors/evgeniy-triputko.png?h=544&amp;iar=0&amp;w=544&amp;hash=387BC483135409D00E2D8B19AD8C62E6)
Evgeniy Triputko
Senior Sitecore Developer](https://www.brimit.com/blog/author?authors=Evgeniy%20Triputko)

#### More on Sitecore

[!\[How Vercel Will Help You Save Effort When Deploying Sophisticated Sitecore Projects\](https://www.brimit.com/-/jssmedia/project/brimit/blog/2024/vercel_cover-image.png)
#Guides#How-toDXPE-commerce
##### How Vercel Will Help You Save Effort When Deploying Sophisticated Sitecore Projects
Optimize and accelerate the development and deployment of complex multisite Sitecore projects.
Alexei Vershalovich on July 17, 2024](https://www.brimit.com/blog/how-vercel-will-help-you-save-effort-when-deploying-sophisticated-sitecore-projects)

[!\[Training Up Tomorrow's Sitecore MVPs: a Mentoring Success Story\](https://www.brimit.com/-/jssmedia/project/brimit/blog/2023/sitecore-mentoring---cover-image.png)
#How-toDXP
##### Training Up Tomorrow's Sitecore MVPs: a Mentoring Success Story
How to participate in the Sitecore Mentor program and help younger colleagues jump-start a career in Sitecore development.
Sergey Baranov on October 2, 2023](https://www.brimit.com/blog/training-up-tomorrows-sitecore-mvps)

[!\[Going Headless. Part 2: When a Headless CMS Is Your Best Bet (if you have Sitecore)\](https://www.brimit.com/-/jssmedia/project/brimit/blog/2022/headless/adobestock_456986731.jpg)
#How-toDXPE-commerce
##### Going Headless. Part 2: When a Headless CMS Is Your Best Bet (if you have Sitecore)
Discover how a headless CMS can benefit organizations that use Sitecore.
Daniil Raschupkin, Palina Trokhautsava on September 15, 2022](https://www.brimit.com/blog/going-headless-part-2-when-a-headless-cms-is-your-best-bet-if-you-have-sitecore)

![](https://bat.bing.net/action/0?ti=187017043&amp;tm=gtm002&amp;Ver=2&amp;mid=f41a111c-0232-4c57-b6d6-4c560f77cd17&amp;bo=2&amp;gtm_tag_source=1&amp;pi=0&amp;lg=en-US&amp;sw=800&amp;sh=600&amp;sc=24&amp;nwd=1&amp;tl=Checking%20duplicate%20media%20items&amp;kw=duplicate%20media%20items&amp;p=https%3A%2F%2Fwww.brimit.com%2Fblog%2Fchecking-duplicate-media-items&amp;r=&amp;lt=276&amp;evt=pageLoad&amp;sv=2&amp;asc=D&amp;cdb=AQAY&amp;rn=269922)